[x265] [PATCH] asm: code for scale1D_128to64 routine

murugan at multicorewareinc.com murugan at multicorewareinc.com
Thu Nov 14 07:07:43 CET 2013


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1384409084 -19800
#      Thu Nov 14 11:34:44 2013 +0530
# Node ID 35e58e24642fe0c35fcc8c65ee3a3394de8f1304
# Parent  c9fdf510182348c979a58be67101446c84b36569
asm: code for scale1D_128to64 routine.

diff -r c9fdf5101823 -r 35e58e24642f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Nov 14 11:34:09 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Nov 14 11:34:44 2013 +0530
@@ -371,6 +371,7 @@
         p.satd[LUMA_32x24] = x265_pixel_satd_32x24_sse2;
         p.sa8d[BLOCK_4x4]  = x265_pixel_satd_4x4_mmx2;
         p.frame_init_lowres_core = x265_frame_init_lowres_core_mmx2;
+        p.scale1D_128to64 = x265_scale1D_128to64_sse2;
 
         PIXEL_AVG(sse2);
         PIXEL_AVG_W4(mmx2);
diff -r c9fdf5101823 -r 35e58e24642f source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Thu Nov 14 11:34:09 2013 +0530
+++ b/source/common/x86/pixel-a.asm	Thu Nov 14 11:34:44 2013 +0530
@@ -6784,3 +6784,69 @@
 PIXELSUB_PS_W64_H2 64, 32
 PIXELSUB_PS_W64_H2 64, 48
 PIXELSUB_PS_W64_H2 64, 64
+
+;-----------------------------------------------------------------
+; void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
+;-----------------------------------------------------------------
+INIT_XMM sse2
+cglobal scale1D_128to64, 3, 3, 8, dest, src1, stride
+
+movu        m0,      [r1]
+palignr     m1,      m0,    1
+movu        m2,      [r1 + 16]
+palignr     m3,      m2,    1
+movu        m4,      [r1 + 32]
+palignr     m5,      m4,    1
+movu        m6,      [r1 + 48]
+palignr     m7,      m6,    1
+
+pavgb       m0,      m1
+pavgb       m2,      m3
+pavgb       m4,      m5
+pavgb       m6,      m7
+
+pand        m0,      [pw_00ff]
+pand        m2,      [pw_00ff]
+pand        m4,      [pw_00ff]
+pand        m6,      [pw_00ff]
+
+packuswb    m0,      m1
+packuswb    m2,      m3
+packuswb    m4,      m5
+packuswb    m6,      m7
+
+movh        [r0],         m0
+movh        [r0 +8],      m2
+movh        [r0 + 16],    m4
+movh        [r0 + 24],    m6
+
+movu        m0,      [r1 + 64]
+palignr     m1,      m0,    1
+movu        m2,      [r1 + 80]
+palignr     m3,      m2,    1
+movu        m4,      [r1 + 96]
+palignr     m5,      m4,    1
+movu        m6,      [r1 + 112]
+palignr     m7,      m6,    1
+
+pavgb       m0,      m1
+pavgb       m2,      m3
+pavgb       m4,      m5
+pavgb       m6,      m7
+
+pand        m0,      [pw_00ff]
+pand        m2,      [pw_00ff]
+pand        m4,      [pw_00ff]
+pand        m6,      [pw_00ff]
+
+packuswb    m0,      m1
+packuswb    m2,      m3
+packuswb    m4,      m5
+packuswb    m6,      m7
+
+movh        [r0 + 32],    m0
+movh        [r0 + 40],    m2
+movh        [r0 + 48],    m4
+movh        [r0 + 56],    m6
+
+RET
diff -r c9fdf5101823 -r 35e58e24642f source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Thu Nov 14 11:34:09 2013 +0530
+++ b/source/common/x86/pixel.h	Thu Nov 14 11:34:44 2013 +0530
@@ -116,6 +116,7 @@
 int x265_pixel_satd_16x12_sse2(pixel *, intptr_t, pixel *, intptr_t);
 int x265_pixel_satd_16x32_sse2(pixel *, intptr_t, pixel *, intptr_t);
 int x265_pixel_satd_16x64_sse2(pixel *, intptr_t, pixel *, intptr_t);
+void x265_scale1D_128to64_sse2(pixel *, pixel *, intptr_t);
 
 DECL_PIXELS(uint64_t, var, mmx2, (pixel * pix, intptr_t i_stride))
 DECL_PIXELS(uint64_t, var, sse2, (pixel * pix, intptr_t i_stride))


More information about the x265-devel mailing list