<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">On Fri, Sep 27, 2013 at 6:36 AM, <span dir="ltr"><<a href="mailto:praveen@multicorewareinc.com" target="_blank">praveen@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User praveen Tiwari<br>
# Date 1380281758 -19800<br>
# Node ID 7b29c24e499f2d746ce46347440f11919135cba7<br>
# Parent 4014edcf215747ba4ac8147b1168f8edc6f5d64c<br>
asm code for ipfilterH_pp, 4 tap filter<br>
<br>
diff -r 4014edcf2157 -r 7b29c24e499f source/common/x86/CMakeLists.txt<br>
--- a/source/common/x86/CMakeLists.txt Fri Sep 27 02:18:36 2013 -0500<br>
+++ b/source/common/x86/CMakeLists.txt Fri Sep 27 17:05:58 2013 +0530<br>
@@ -5,7 +5,7 @@<br>
add_definitions(-DHAVE_ALIGNED_STACK=0)<br>
endif()<br>
<br>
-set(ASMS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a2.asm)<br>
+set(ASMS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a2.asm ipfilter8.asm)<br>
if (X64)<br>
add_definitions(-DARCH_X86_64=1)<br>
else()<br>
diff -r 4014edcf2157 -r 7b29c24e499f source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Fri Sep 27 02:18:36 2013 -0500<br>
+++ b/source/common/x86/asm-primitives.cpp Fri Sep 27 17:05:58 2013 +0530<br>
@@ -37,6 +37,9 @@<br>
LOWRES(ssse3)<br>
LOWRES(avx)<br>
LOWRES(xop)<br>
+<br>
+extern "C" void x265_filterHorizontal_p_p_4_sse4(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, short const *coeff);<br></blockquote><div><br></div><div>This line is within an extern "C" { } declaration, so this extern "C" is redundant (I'm a little surprised it compiled)</div>
<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br></blockquote><div><br></div><div>no need for a blank line</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
}<br>
<br>
bool hasXOP(void); // instr_detect.cpp<br>
@@ -370,6 +373,9 @@<br>
p.satd[PARTITION_12x32] = cmp<12, 32, 4, 16, x265_pixel_satd_4x16_sse4>;<br>
p.satd[PARTITION_12x48] = cmp<12, 48, 4, 16, x265_pixel_satd_4x16_sse4>;<br>
p.satd[PARTITION_12x64] = cmp<12, 64, 4, 16, x265_pixel_satd_4x16_sse4>;<br>
+<br>
+ p.ipfilter_pp[FILTER_H_P_P_4] = x265_filterHorizontal_p_p_4_sse4;<br>
+<br></blockquote><div><br></div><div>no need for this blank line</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
}<br>
if (cpuMask & (1 << 7))<br>
{<br>
diff -r 4014edcf2157 -r 7b29c24e499f source/common/x86/ipfilter8.asm<br>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000<br>
+++ b/source/common/x86/ipfilter8.asm Fri Sep 27 17:05:58 2013 +0530<br>
@@ -0,0 +1,136 @@<br>
+;*****************************************************************************<br>
+;* Copyright (C) 2013 x265 project<br>
+;*<br>
+;* Authors: Min Chen <<a href="mailto:chenm003@163.com">chenm003@163.com</a>><br>
+;* Nabajit Deka <<a href="mailto:nabajit@multicorewareinc.com">nabajit@multicorewareinc.com</a>><br>
+;* Praveen Kumar Tiwari <<a href="mailto:praveen@multicorewareinc.com">praveen@multicorewareinc.com</a>><br>
+;*<br>
+;* This program is free software; you can redistribute it and/or modify<br>
+;* it under the terms of the GNU General Public License as published by<br>
+;* the Free Software Foundation; either version 2 of the License, or<br>
+;* (at your option) any later version.<br>
+;*<br>
+;* This program is distributed in the hope that it will be useful,<br>
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of<br>
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the<br>
+;* GNU General Public License for more details.<br>
+;*<br>
+;* You should have received a copy of the GNU General Public License<br>
+;* along with this program; if not, write to the Free Software<br>
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.<br>
+;*<br>
+;* This program is also available under a commercial proprietary license.<br>
+;* For more information, contact us at <a href="mailto:licensing@multicorewareinc.com">licensing@multicorewareinc.com</a>.<br>
+;*****************************************************************************/<br>
+<br>
+<br></blockquote><div><br></div><div>no need for two lines here</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+%include "x86inc.asm"<br>
+%include "x86util.asm"<br>
+<br>
+%if ARCH_X86_64 == 0<br>
+<br>
+SECTION_RODATA 32<br>
+tab_leftmask: db -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0<br>
+<br>
+tab_Tm: db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6<br>
+ db 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10<br>
+<br>
+tab_c_512: times 8 dw 512<br>
+<br>
+SECTION .text<br>
+<br>
+%macro FILTER_H4 3<br>
+ movu %1, [src + col - 1]<br>
+ pshufb %2, %1, Tm4<br>
+ pmaddubsw %2, coef2<br>
+ pshufb %1, %1, Tm5<br>
+ pmaddubsw %1, coef2<br>
+ phaddw %2, %1<br>
+ pmulhrsw %2, %3<br>
+ packuswb %2, %2<br>
+%endmacro<br>
+<br>
+;-----------------------------------------------------------------------------<br>
+; void filterHorizontal_p_p_4(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int width, int height, short const *coeff)<br>
+;-----------------------------------------------------------------------------<br>
+INIT_XMM sse4<br>
+cglobal filterHorizontal_p_p_4, 0, 7, 8<br>
+%define src r0<br>
+%define dst r1<br>
+%define row r2<br>
+%define col r3<br>
+%define width r4<br>
+%define widthleft r5<br>
+%define mask_offset r6<br>
+%define coef2 m7<br>
+%define x3 m6<br>
+%define Tm5 m5<br>
+%define Tm4 m4<br>
+%define x2 m3<br>
+%define x1 m2<br>
+%define x0 m1<br>
+%define leftmask m0<br>
+%define tmp r0<br>
+%define tmp1 r1<br>
+<br>
+<br>
+ mov tmp, r6m<br>
+ movu coef2, [tmp]<br>
+ packsswb coef2, coef2<br>
+ pshufd coef2, coef2, 0<br>
+<br>
+ mova x3, [tab_c_512]<br>
+<br>
+ mov width, r4m<br>
+ mov widthleft, width<br>
+ and width, ~7<br>
+ and widthleft, 7<br>
+ mov mask_offset, widthleft<br>
+ neg mask_offset<br></blockquote><div><br></div><div>there are still tab stops here, and trailing white-space all over the file</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+ movq leftmask, [tab_leftmask + (7 + mask_offset)]<br>
+ mova Tm4, [tab_Tm]<br>
+ mova Tm5, [tab_Tm + 16]<br>
+<br>
+ mov src, r0m<br>
+ mov dst, r2m<br>
+ mov row, r5m<br>
+<br>
+_loop_row:<br>
+ xor col, col<br>
+ cmp width, 0<br>
+ je _end_col<br>
+<br>
+_loop_col:<br>
+ FILTER_H4 x0, x1, x3<br>
+ movh [dst + col], x1<br>
+<br>
+ add col, 8<br>
+<br>
+ cmp col, width<br>
+ jl _loop_col<br>
+<br>
+_end_col:<br>
+ test widthleft, widthleft<br>
+ jz _next_row<br>
+<br>
+ movq x2, [dst + col]<br>
+ FILTER_H4 x0, x1, x3<br>
+ pblendvb x2, x2, x1, leftmask<br>
+ movh [dst + col], x2<br>
+<br>
+_next_row:<br>
+ add src, r1m<br>
+ add dst, r3m<br>
+ dec row<br>
+<br>
+ test row, row<br>
+ jz _end_row<br>
+<br>
+ jmp _loop_row<br>
+<br>
+_end_row:<br>
+<br>
+ RET<br>
+<br>
+%endif ; ARCH_X86_64 == 0<br>
\ No newline at end of file<br></blockquote><div><br></div><div>This needs to be cleaned up.</div><div><br></div><div>All things considered, I'm thinking of pushing this to default once the white-space and other nits are cleaned up. Further patches should improve what is in the repo.</div>
</div><div><br></div>-- <br>Steve Borho
</div></div>