[x265] [PATCH] asm: use x264 code for sse2 sad[16xN] except for 16x64
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Mon May 4 10:40:19 CEST 2015
# HG changeset patch
# User Sumalatha Polureddy
# Date 1430728690 -19800
# Mon May 04 14:08:10 2015 +0530
# Node ID d3fa8d99e44ff4edbf589595401a307d3f79ed9b
# Parent 4cf55e54fe3ec33f540b7678b02de34074c0527b
asm: use x264 code for sse2 sad[16xN] except for 16x64
diff -r 4cf55e54fe3e -r d3fa8d99e44f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sat May 02 10:58:05 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp Mon May 04 14:08:10 2015 +0530
@@ -262,7 +262,7 @@
p.pu[LUMA_16x4].sad = x265_pixel_sad_16x4_ ## cpu; \
p.pu[LUMA_16x12].sad = x265_pixel_sad_16x12_ ## cpu; \
p.pu[LUMA_16x32].sad = x265_pixel_sad_16x32_ ## cpu; \
- p.pu[LUMA_16x64].sad = x265_pixel_sad_16x64_ ## cpu; \
+ /*p.pu[LUMA_16x64].sad = x265_pixel_sad_16x64_ ## cpu;*/ \
p.pu[LUMA_32x8].sad = x265_pixel_sad_32x8_ ## cpu; \
p.pu[LUMA_32x16].sad = x265_pixel_sad_32x16_ ## cpu; \
p.pu[LUMA_32x24].sad = x265_pixel_sad_32x24_ ## cpu; \
diff -r 4cf55e54fe3e -r d3fa8d99e44f source/common/x86/sad16-a.asm
--- a/source/common/x86/sad16-a.asm Sat May 02 10:58:05 2015 -0500
+++ b/source/common/x86/sad16-a.asm Mon May 04 14:08:10 2015 +0530
@@ -276,9 +276,8 @@
ABSW2 m3, m4, m3, m4, m7, m5
paddw m1, m2
paddw m3, m4
- paddw m3, m1
- pmaddwd m3, [pw_1]
- paddd m0, m3
+ paddw m0, m1
+ paddw m0, m3
%else
movu m1, [r2]
movu m2, [r2+2*r3]
@@ -287,9 +286,8 @@
ABSW2 m1, m2, m1, m2, m3, m4
lea r0, [r0+4*r1]
lea r2, [r2+4*r3]
- paddw m2, m1
- pmaddwd m2, [pw_1]
- paddd m0, m2
+ paddw m0, m1
+ paddw m0, m2
%endif
%endmacro
@@ -309,8 +307,12 @@
dec r4d
jg .loop
%endif
-
+%if %2 == 32
+ HADDUWD m0, m1
HADDD m0, m1
+%else
+ HADDW m0, m1
+%endif
movd eax, xm0
RET
%endmacro
@@ -321,7 +323,6 @@
SAD 16, 12
SAD 16, 16
SAD 16, 32
-SAD 16, 64
INIT_XMM sse2
SAD 8, 4
More information about the x265-devel
mailing list