[x265] [PATCH Review only] asm: code for scale1D_128to64 routine

murugan at multicorewareinc.com murugan at multicorewareinc.com
Wed Nov 13 16:00:50 CET 2013


# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1384354826 -19800
#      Wed Nov 13 20:30:26 2013 +0530
# Node ID 29f7e68d450f0b71153f6bf5794ae4d1c118ac55
# Parent  481cdfc251de0f99ef0a3c4fd53c786b79b5f182
asm: code for scale1D_128to64 routine

diff -r 481cdfc251de -r 29f7e68d450f source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Wed Nov 13 12:46:07 2013 +0530
+++ b/source/common/x86/pixel-a.asm	Wed Nov 13 20:30:26 2013 +0530
@@ -6784,3 +6784,69 @@
 PIXELSUB_PS_W64_H2 64, 32
 PIXELSUB_PS_W64_H2 64, 48
 PIXELSUB_PS_W64_H2 64, 64
+
+;-----------------------------------------------------------------
+; void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
+;-----------------------------------------------------------------
+INIT_XMM sse2
+cglobal scale_1D_128to64, 3, 3, 8, dest, src1, stride
+
+movu        m0,      [r1]
+movu        m1,      [r1 + 1]
+movu        m2,      [r1 + 16]
+movu        m3,      [r1 + 17]
+movu        m4,      [r1 + 32]
+movu        m5,      [r1 + 33]
+movu        m6,      [r1 + 48]
+movu        m7,      [r1 + 49]
+
+pavgb       m0,      m1
+pavgb       m2,      m3
+pavgb       m4,      m5
+pavgb       m6,      m7
+
+pand        m0,      [pw_00ff]
+pand        m2,      [pw_00ff]
+pand        m4,      [pw_00ff]
+pand        m6,      [pw_00ff]
+
+packuswb    m0,      m1
+packuswb    m2,      m3
+packuswb    m4,      m5
+packuswb    m6,      m7
+
+movh        [r0],         m0
+movh        [r0 +8],      m2
+movh        [r0 + 16],    m4
+movh        [r0 + 24],    m6
+
+movu        m0,      [r1 + 64]
+movu        m1,      [r1 + 65]
+movu        m2,      [r1 + 80]
+movu        m3,      [r1 + 81]
+movu        m4,      [r1 + 96]
+movu        m5,      [r1 + 97]
+movu        m6,      [r1 + 112]
+movu        m7,      [r1 + 113]
+
+pavgb       m0,      m1
+pavgb       m2,      m3
+pavgb       m4,      m5
+pavgb       m6,      m7
+
+pand        m0,      [pw_00ff]
+pand        m2,      [pw_00ff]
+pand        m4,      [pw_00ff]
+pand        m6,      [pw_00ff]
+
+packuswb    m0,      m1
+packuswb    m2,      m3
+packuswb    m4,      m5
+packuswb    m6,      m7
+
+movh        [r0 + 32],    m0
+movh        [r0 + 40],    m2
+movh        [r0 + 48],    m4
+movh        [r0 + 56],    m6
+
+RET


More information about the x265-devel mailing list