[x265] [PATCH] asm: use x264 code for sse2 sad[16xN] except for 16x64

sumalatha at multicorewareinc.com sumalatha at multicorewareinc.com
Mon May 4 10:40:19 CEST 2015


# HG changeset patch
# User Sumalatha Polureddy
# Date 1430728690 -19800
#      Mon May 04 14:08:10 2015 +0530
# Node ID d3fa8d99e44ff4edbf589595401a307d3f79ed9b
# Parent  4cf55e54fe3ec33f540b7678b02de34074c0527b
asm: use x264 code for sse2 sad[16xN] except for 16x64

diff -r 4cf55e54fe3e -r d3fa8d99e44f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Sat May 02 10:58:05 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp	Mon May 04 14:08:10 2015 +0530
@@ -262,7 +262,7 @@
     p.pu[LUMA_16x4].sad  = x265_pixel_sad_16x4_ ## cpu; \
     p.pu[LUMA_16x12].sad = x265_pixel_sad_16x12_ ## cpu; \
     p.pu[LUMA_16x32].sad = x265_pixel_sad_16x32_ ## cpu; \
-    p.pu[LUMA_16x64].sad = x265_pixel_sad_16x64_ ## cpu; \
+    /*p.pu[LUMA_16x64].sad = x265_pixel_sad_16x64_ ## cpu;*/ \
     p.pu[LUMA_32x8].sad  = x265_pixel_sad_32x8_ ## cpu; \
     p.pu[LUMA_32x16].sad = x265_pixel_sad_32x16_ ## cpu; \
     p.pu[LUMA_32x24].sad = x265_pixel_sad_32x24_ ## cpu; \
diff -r 4cf55e54fe3e -r d3fa8d99e44f source/common/x86/sad16-a.asm
--- a/source/common/x86/sad16-a.asm	Sat May 02 10:58:05 2015 -0500
+++ b/source/common/x86/sad16-a.asm	Mon May 04 14:08:10 2015 +0530
@@ -276,9 +276,8 @@
     ABSW2   m3, m4, m3, m4, m7, m5
     paddw   m1, m2
     paddw   m3, m4
-    paddw   m3, m1
-    pmaddwd m3, [pw_1]
-    paddd   m0, m3
+    paddw   m0, m1
+    paddw   m0, m3
 %else
     movu    m1, [r2]
     movu    m2, [r2+2*r3]
@@ -287,9 +286,8 @@
     ABSW2   m1, m2, m1, m2, m3, m4
     lea     r0, [r0+4*r1]
     lea     r2, [r2+4*r3]
-    paddw   m2, m1
-    pmaddwd m2, [pw_1]
-    paddd   m0, m2
+    paddw   m0, m1
+    paddw   m0, m2
 %endif
 %endmacro
 
@@ -309,8 +307,12 @@
     dec    r4d
     jg .loop
 %endif
-
+%if %2 == 32
+    HADDUWD m0, m1
     HADDD   m0, m1
+%else
+    HADDW   m0, m1
+%endif
     movd    eax, xm0
     RET
 %endmacro
@@ -321,7 +323,6 @@
 SAD  16, 12
 SAD  16, 16
 SAD  16, 32
-SAD  16, 64
 
 INIT_XMM sse2
 SAD  8,  4


More information about the x265-devel mailing list