[x265] [PATCH 1 of 2] asm: fix sse_pp[16x32] sse2 asm for 12-bit
ramya at multicorewareinc.com
ramya at multicorewareinc.com
Thu Sep 3 10:22:52 CEST 2015
# HG changeset patch
# User Ramya Sriraman <ramya at multicorewareinc.com>
# Date 1440588985 -19800
# Wed Aug 26 17:06:25 2015 +0530
# Node ID 83dc8aea6ba7c10e0d78ec7dc34b3d8f7d114563
# Parent d8091487bc9749e702c468786b0cd9e663478a91
asm: fix sse_pp[16x32] sse2 asm for 12-bit
diff -r d8091487bc97 -r 83dc8aea6ba7 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Aug 25 16:39:12 2015 -0700
+++ b/source/common/x86/asm-primitives.cpp Wed Aug 26 17:06:25 2015 +0530
@@ -998,13 +998,13 @@
p.cu[BLOCK_4x4].intra_pred[32] = PFX(intra_pred_ang4_32_sse2);
p.cu[BLOCK_4x4].intra_pred[33] = PFX(intra_pred_ang4_33_sse2);
+ p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_4x8_mmx2);
+ p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_8x16_sse2);
+ p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_16x32_sse2);
+
#if X265_DEPTH <= 10
p.cu[BLOCK_4x4].sse_ss = PFX(pixel_ssd_ss_4x4_mmx2);
ALL_LUMA_CU(sse_ss, pixel_ssd_ss, sse2);
-
- p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_4x8_mmx2);
- p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_8x16_sse2);
- p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_16x32_sse2);
p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_32x64_sse2);
#endif
diff -r d8091487bc97 -r 83dc8aea6ba7 source/common/x86/ssd-a.asm
--- a/source/common/x86/ssd-a.asm Tue Aug 25 16:39:12 2015 -0700
+++ b/source/common/x86/ssd-a.asm Wed Aug 26 17:06:25 2015 +0530
@@ -105,8 +105,20 @@
dec r4d
jg .loop
%endif
+
+%if BIT_DEPTH == 12 && mmsize == 16
+ movu m5, m0
+ pxor m6, m6
+ punpckldq m0, m6
+ punpckhdq m5, m6
+ paddq m0, m5
+ movhlps m5, m0
+ paddq m0, m5
+ movq r6, xm0
+%else
HADDD m0, m5
- movd eax, xm0
+ movd eax,xm0
+%endif
%ifidn movu,movq ; detect MMX
EMMS
%endif
More information about the x265-devel
mailing list