[x265] [PATCH] asm: code for scale1D_128to64 routine

murugan at multicorewareinc.com murugan at multicorewareinc.com
Thu Nov 14 12:27:48 CET 2013


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1384428439 -19800
#      Thu Nov 14 16:57:19 2013 +0530
# Node ID 4526a727f0b4975eeaa1094e0ced0a3b3b5c5a7d
# Parent  4c91093ffb3fc596ce5894d7855b1f4e8b7912ce
asm: code for scale1D_128to64 routine

diff -r 4c91093ffb3f -r 4526a727f0b4 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Nov 14 14:58:40 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Nov 14 16:57:19 2013 +0530
@@ -463,6 +463,8 @@
         PIXEL_AVG(ssse3);
         PIXEL_AVG_W4(ssse3);
 
+        p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
+
         p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
         p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3;
         p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_ssse3;
diff -r 4c91093ffb3f -r 4526a727f0b4 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Thu Nov 14 14:58:40 2013 +0530
+++ b/source/common/x86/pixel-a.asm	Thu Nov 14 16:57:19 2013 +0530
@@ -6784,3 +6784,65 @@
 PIXELSUB_PS_W64_H2 64, 32
 PIXELSUB_PS_W64_H2 64, 48
 PIXELSUB_PS_W64_H2 64, 64
+
+;-----------------------------------------------------------------
+; void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
+;-----------------------------------------------------------------
+INIT_XMM ssse3
+cglobal scale1D_128to64, 2, 2, 8, dest, src1, stride
+
+    mova        m7,      [deinterleave_shuf]
+
+    movu        m0,      [r1]
+    palignr     m1,      m0,    1
+    movu        m2,      [r1 + 16]
+    palignr     m3,      m2,    1
+    movu        m4,      [r1 + 32]
+    palignr     m5,      m4,    1
+    movu        m6,      [r1 + 48]
+
+    pavgb       m0,      m1
+
+    palignr     m1,      m6,    1
+
+    pavgb       m2,      m3
+    pavgb       m4,      m5
+    pavgb       m6,      m1
+
+    pshufb      m0,      m0,    m7
+    pshufb      m2,      m2,    m7
+    pshufb      m4,      m4,    m7
+    pshufb      m6,      m6,    m7
+
+    punpcklqdq    m0,           m2
+    movu          [r0],         m0
+    punpcklqdq    m4,           m6
+    movu          [r0 + 16],    m4
+
+    movu        m0,      [r1 + 64]
+    palignr     m1,      m0,    1
+    movu        m2,      [r1 + 80]
+    palignr     m3,      m2,    1
+    movu        m4,      [r1 + 96]
+    palignr     m5,      m4,    1
+    movu        m6,      [r1 + 112]
+
+    pavgb       m0,      m1
+
+    palignr     m1,      m6,    1
+
+    pavgb       m2,      m3
+    pavgb       m4,      m5
+    pavgb       m6,      m1
+
+    pshufb      m0,      m0,    m7
+    pshufb      m2,      m2,    m7
+    pshufb      m4,      m4,    m7
+    pshufb      m6,      m6,    m7
+
+    punpcklqdq    m0,           m2
+    movu          [r0 + 32],    m0
+    punpcklqdq    m4,           m6
+    movu          [r0 + 48],    m4
+
+RET
diff -r 4c91093ffb3f -r 4526a727f0b4 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Thu Nov 14 14:58:40 2013 +0530
+++ b/source/common/x86/pixel.h	Thu Nov 14 16:57:19 2013 +0530
@@ -116,6 +116,7 @@
 int x265_pixel_satd_16x12_sse2(pixel *, intptr_t, pixel *, intptr_t);
 int x265_pixel_satd_16x32_sse2(pixel *, intptr_t, pixel *, intptr_t);
 int x265_pixel_satd_16x64_sse2(pixel *, intptr_t, pixel *, intptr_t);
+void x265_scale1D_128to64_ssse3(pixel *, pixel *, intptr_t);
 
 DECL_PIXELS(uint64_t, var, mmx2, (pixel * pix, intptr_t i_stride))
 DECL_PIXELS(uint64_t, var, sse2, (pixel * pix, intptr_t i_stride))


More information about the x265-devel mailing list