[x265] [PATCH] asm: assembly code for pixel_satd_24x32

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Wed Nov 13 08:38:38 CET 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1384328283 -19800
#      Wed Nov 13 13:08:03 2013 +0530
# Node ID 2ffe634ebd71a74e0af2749b1b9de894a191d1d0
# Parent  c4ca80d19105ccf1ba2ec14dd65915f2820a660d
asm: assembly code for pixel_satd_24x32

diff -r c4ca80d19105 -r 2ffe634ebd71 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Nov 12 19:10:23 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Nov 13 13:08:03 2013 +0530
@@ -60,7 +60,7 @@
 
 #define HEVC_SATD(cpu) \
     p.satd[LUMA_32x32] = cmp<32, 32, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
-    p.satd[LUMA_24x32] = cmp<24, 32, 8, 16, x265_pixel_satd_8x16_ ## cpu>; \
+    p.satd[LUMA_24x32] = x265_pixel_satd_24x32_ ## cpu; \
     p.satd[LUMA_64x64] = cmp<64, 64, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
     p.satd[LUMA_64x32] = cmp<64, 32, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
     p.satd[LUMA_32x64] = cmp<32, 64, 16, 16, x265_pixel_satd_16x16_ ## cpu>; \
@@ -359,7 +359,6 @@
         INIT8(sad_x3, _mmx2);
         INIT8(sad_x4, _mmx2);
         INIT8(satd, _mmx2);
-        HEVC_SATD(mmx2);
         p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2;
         p.satd[LUMA_12x16] = x265_pixel_satd_12x16_sse2;
         p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2;
@@ -588,10 +587,17 @@
     {
         INIT2(sad_x4, _avx2);
         INIT4(satd, _avx2);
-        HEVC_SATD(avx2);
         INIT2_NAME(sse_pp, ssd, _avx2);
         p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_avx2;
         SA8D_INTER_FROM_BLOCK8(avx2);
+        p.satd[LUMA_32x32] = cmp<32, 32, 16, 16, x265_pixel_satd_16x16_avx2>;
+        p.satd[LUMA_24x32] = cmp<24, 32, 8, 16, x265_pixel_satd_8x16_avx2>;
+        p.satd[LUMA_64x64] = cmp<64, 64, 16, 16, x265_pixel_satd_16x16_avx2>;
+        p.satd[LUMA_64x32] = cmp<64, 32, 16, 16, x265_pixel_satd_16x16_avx2>;
+        p.satd[LUMA_32x64] = cmp<32, 64, 16, 16, x265_pixel_satd_16x16_avx2>;
+        p.satd[LUMA_64x48] = cmp<64, 48, 16, 16, x265_pixel_satd_16x16_avx2>;
+        p.satd[LUMA_48x64] = cmp<48, 64, 16, 16, x265_pixel_satd_16x16_avx2>;
+        p.satd[LUMA_64x16] = cmp<64, 16, 16, 16, x265_pixel_satd_16x16_avx2>;
 
         p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx2;
         p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2;
diff -r c4ca80d19105 -r 2ffe634ebd71 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Tue Nov 12 19:10:23 2013 +0530
+++ b/source/common/x86/pixel-a.asm	Wed Nov 13 13:08:03 2013 +0530
@@ -2051,6 +2051,54 @@
     RET
 %endif
 
+%if WIN64
+cglobal pixel_satd_24x32, 4,8,8
+    SATD_START_SSE2 m6, m7
+    mov r6, r0
+    mov r7, r2
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 8]
+    lea r2, [r7 + 8]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    lea r0, [r6 + 16]
+    lea r2, [r7 + 16]
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_END_SSE2 m6
+%else
+cglobal pixel_satd_24x32, 4,6,8
+    SATD_START_SSE2 m6, m7
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    mov r0, r0mp
+    mov r2, r2mp
+    add r0, 8
+    add r2, 8
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    mov r0, r0mp
+    mov r2, r2mp
+    add r0, 16
+    add r2, 16
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    call pixel_satd_8x8_internal
+    SATD_END_SSE2 m6
+%endif    ;WIN64
+
 cglobal pixel_satd_8x32, 4,6,8
     SATD_START_SSE2 m6, m7
 %if vertical


More information about the x265-devel mailing list