[x265] [PATCH] asm: code for scale1D_128to64 routine
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Thu Nov 14 07:07:43 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1384409084 -19800
# Thu Nov 14 11:34:44 2013 +0530
# Node ID 35e58e24642fe0c35fcc8c65ee3a3394de8f1304
# Parent c9fdf510182348c979a58be67101446c84b36569
asm: code for scale1D_128to64 routine.
diff -r c9fdf5101823 -r 35e58e24642f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Nov 14 11:34:09 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Nov 14 11:34:44 2013 +0530
@@ -371,6 +371,7 @@
p.satd[LUMA_32x24] = x265_pixel_satd_32x24_sse2;
p.sa8d[BLOCK_4x4] = x265_pixel_satd_4x4_mmx2;
p.frame_init_lowres_core = x265_frame_init_lowres_core_mmx2;
+ p.scale1D_128to64 = x265_scale1D_128to64_sse2;
PIXEL_AVG(sse2);
PIXEL_AVG_W4(mmx2);
diff -r c9fdf5101823 -r 35e58e24642f source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Thu Nov 14 11:34:09 2013 +0530
+++ b/source/common/x86/pixel-a.asm Thu Nov 14 11:34:44 2013 +0530
@@ -6784,3 +6784,69 @@
PIXELSUB_PS_W64_H2 64, 32
PIXELSUB_PS_W64_H2 64, 48
PIXELSUB_PS_W64_H2 64, 64
+
+;-----------------------------------------------------------------
+; void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
+;-----------------------------------------------------------------
+INIT_XMM sse2
+cglobal scale1D_128to64, 3, 3, 8, dest, src1, stride
+
+movu m0, [r1]
+palignr m1, m0, 1
+movu m2, [r1 + 16]
+palignr m3, m2, 1
+movu m4, [r1 + 32]
+palignr m5, m4, 1
+movu m6, [r1 + 48]
+palignr m7, m6, 1
+
+pavgb m0, m1
+pavgb m2, m3
+pavgb m4, m5
+pavgb m6, m7
+
+pand m0, [pw_00ff]
+pand m2, [pw_00ff]
+pand m4, [pw_00ff]
+pand m6, [pw_00ff]
+
+packuswb m0, m1
+packuswb m2, m3
+packuswb m4, m5
+packuswb m6, m7
+
+movh [r0], m0
+movh [r0 +8], m2
+movh [r0 + 16], m4
+movh [r0 + 24], m6
+
+movu m0, [r1 + 64]
+palignr m1, m0, 1
+movu m2, [r1 + 80]
+palignr m3, m2, 1
+movu m4, [r1 + 96]
+palignr m5, m4, 1
+movu m6, [r1 + 112]
+palignr m7, m6, 1
+
+pavgb m0, m1
+pavgb m2, m3
+pavgb m4, m5
+pavgb m6, m7
+
+pand m0, [pw_00ff]
+pand m2, [pw_00ff]
+pand m4, [pw_00ff]
+pand m6, [pw_00ff]
+
+packuswb m0, m1
+packuswb m2, m3
+packuswb m4, m5
+packuswb m6, m7
+
+movh [r0 + 32], m0
+movh [r0 + 40], m2
+movh [r0 + 48], m4
+movh [r0 + 56], m6
+
+RET
diff -r c9fdf5101823 -r 35e58e24642f source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Thu Nov 14 11:34:09 2013 +0530
+++ b/source/common/x86/pixel.h Thu Nov 14 11:34:44 2013 +0530
@@ -116,6 +116,7 @@
int x265_pixel_satd_16x12_sse2(pixel *, intptr_t, pixel *, intptr_t);
int x265_pixel_satd_16x32_sse2(pixel *, intptr_t, pixel *, intptr_t);
int x265_pixel_satd_16x64_sse2(pixel *, intptr_t, pixel *, intptr_t);
+void x265_scale1D_128to64_sse2(pixel *, pixel *, intptr_t);
DECL_PIXELS(uint64_t, var, mmx2, (pixel * pix, intptr_t i_stride))
DECL_PIXELS(uint64_t, var, sse2, (pixel * pix, intptr_t i_stride))
More information about the x265-devel
mailing list