[x265] [PATCH] asm: code for scale1D_128to64 routine
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Thu Nov 14 11:59:24 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1384426690 -19800
# Thu Nov 14 16:28:10 2013 +0530
# Node ID e698f27ccbfabb67a7a8f4a09d275442dead301b
# Parent 4c91093ffb3fc596ce5894d7855b1f4e8b7912ce
asm: code for scale1D_128to64 routine
diff -r 4c91093ffb3f -r e698f27ccbfa source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Nov 14 14:58:40 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Nov 14 16:28:10 2013 +0530
@@ -463,6 +463,8 @@
PIXEL_AVG(ssse3);
PIXEL_AVG_W4(ssse3);
+ p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
+
p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
p.sad_x4[LUMA_8x8] = x265_pixel_sad_x4_8x8_ssse3;
p.sad_x3[LUMA_8x16] = x265_pixel_sad_x3_8x16_ssse3;
diff -r 4c91093ffb3f -r e698f27ccbfa source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Thu Nov 14 14:58:40 2013 +0530
+++ b/source/common/x86/pixel-a.asm Thu Nov 14 16:28:10 2013 +0530
@@ -6784,3 +6784,65 @@
PIXELSUB_PS_W64_H2 64, 32
PIXELSUB_PS_W64_H2 64, 48
PIXELSUB_PS_W64_H2 64, 64
+
+;-----------------------------------------------------------------
+; void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
+;-----------------------------------------------------------------
+INIT_XMM ssse3
+cglobal scale1D_128to64, 3, 3, 8, dest, src1, stride
+
+ mova m7, [deinterleave_shuf]
+
+ movu m0, [r1]
+ palignr m1, m0, 1
+ movu m2, [r1 + 16]
+ palignr m3, m2, 1
+ movu m4, [r1 + 32]
+ palignr m5, m4, 1
+ movu m6, [r1 + 48]
+
+ pavgb m0, m1
+
+ palignr m1, m6, 1
+
+ pavgb m2, m3
+ pavgb m4, m5
+ pavgb m6, m1
+
+ pshufb m0, m0, m7
+ pshufb m2, m2, m7
+ pshufb m4, m4, m7
+ pshufb m6, m6, m7
+
+ punpcklqdq m0, m2
+ movu [r0], m0
+ punpcklqdq m4, m6
+ movu [r0 + 16], m4
+
+ movu m0, [r1 + 64]
+ palignr m1, m0, 1
+ movu m2, [r1 + 80]
+ palignr m3, m2, 1
+ movu m4, [r1 + 96]
+ palignr m5, m4, 1
+ movu m6, [r1 + 112]
+
+ pavgb m0, m1
+
+ palignr m1, m6, 1
+
+ pavgb m2, m3
+ pavgb m4, m5
+ pavgb m6, m1
+
+ pshufb m0, m0, m7
+ pshufb m2, m2, m7
+ pshufb m4, m4, m7
+ pshufb m6, m6, m7
+
+ punpcklqdq m0, m2
+ movu [r0 + 32], m0
+ punpcklqdq m4, m6
+ movu [r0 + 48], m4
+
+RET
diff -r 4c91093ffb3f -r e698f27ccbfa source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Thu Nov 14 14:58:40 2013 +0530
+++ b/source/common/x86/pixel.h Thu Nov 14 16:28:10 2013 +0530
@@ -116,6 +116,7 @@
int x265_pixel_satd_16x12_sse2(pixel *, intptr_t, pixel *, intptr_t);
int x265_pixel_satd_16x32_sse2(pixel *, intptr_t, pixel *, intptr_t);
int x265_pixel_satd_16x64_sse2(pixel *, intptr_t, pixel *, intptr_t);
+void x265_scale1D_128to64_ssse3(pixel *, pixel *, intptr_t);
DECL_PIXELS(uint64_t, var, mmx2, (pixel * pix, intptr_t i_stride))
DECL_PIXELS(uint64_t, var, sse2, (pixel * pix, intptr_t i_stride))
More information about the x265-devel
mailing list