[x265] [PATCH 4 of 4] asm: AVX2 version sa8d[16x16], 1913c(AVX) -> 1620c(AVX2)

Min Chen chenm003 at 163.com
Thu May 14 01:53:20 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1431561179 25200
# Node ID 851b3d841543a59796361f83d2cdeef976c35549
# Parent  10e71f621d70b0185deeddae9974df54ebc63dc4
asm: AVX2 version sa8d[16x16], 1913c(AVX) -> 1620c(AVX2)
---
 source/common/x86/asm-primitives.cpp |    2 +
 source/common/x86/pixel-a.asm        |   37 ++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 0 deletions(-)

diff -r 10e71f621d70 -r 851b3d841543 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed May 13 16:52:56 2015 -0700
+++ b/source/common/x86/asm-primitives.cpp	Wed May 13 16:52:59 2015 -0700
@@ -1941,7 +1941,9 @@
         p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = x265_addAvg_32x64_avx2;
 
         p.cu[BLOCK_8x8].sa8d = x265_pixel_sa8d_8x8_avx2;
+        p.cu[BLOCK_16x16].sa8d = x265_pixel_sa8d_16x16_avx2;
         p.chroma[X265_CSP_I420].cu[BLOCK_420_8x8].sa8d = x265_pixel_sa8d_8x8_avx2;
+        p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].sa8d = x265_pixel_sa8d_16x16_avx2;
 
         p.cu[BLOCK_16x16].add_ps = x265_pixel_add_ps_16x16_avx2;
         p.cu[BLOCK_32x32].add_ps = x265_pixel_add_ps_32x32_avx2;
diff -r 10e71f621d70 -r 851b3d841543 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Wed May 13 16:52:56 2015 -0700
+++ b/source/common/x86/pixel-a.asm	Wed May 13 16:52:59 2015 -0700
@@ -6982,6 +6982,43 @@
     add   eax, 1
     shr   eax, 1
     RET
+
+cglobal pixel_sa8d_16x16, 4,6,8
+    SATD_START_AVX2 m6, m7, 1
+
+    call pixel_sa8d_8x8_internal ; pix[0]
+
+    sub  r0, r1
+    sub  r0, r1
+    add  r0, 8*SIZEOF_PIXEL
+    sub  r2, r3
+    sub  r2, r3
+    add  r2, 8*SIZEOF_PIXEL
+    call pixel_sa8d_8x8_internal ; pix[8]
+
+    add  r0, r4
+    add  r0, r1
+    add  r2, r5
+    add  r2, r3
+    call pixel_sa8d_8x8_internal ; pix[8*stride+8]
+
+    sub  r0, r1
+    sub  r0, r1
+    sub  r0, 8*SIZEOF_PIXEL
+    sub  r2, r3
+    sub  r2, r3
+    sub  r2, 8*SIZEOF_PIXEL
+    call pixel_sa8d_8x8_internal ; pix[8*stride]
+
+    ; TODO: analyze Dynamic Range
+    vextracti128 xm0, m6, 1
+    paddusw xm6, xm0
+    HADDUW xm6, xm0
+    movd  eax, xm6
+    add   eax, 1
+    shr   eax, 1
+    RET
+
 %endif ; HIGH_BIT_DEPTH
 
 ; Input 16bpp, Output 8bpp



More information about the x265-devel mailing list