[x265] [PATCH] asm: saoCuOrgE2[0] avx2 code: improve 154c->128c
Divya Manivannan
divya at multicorewareinc.com
Thu Apr 23 15:19:10 CEST 2015
# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1429794634 -19800
# Thu Apr 23 18:40:34 2015 +0530
# Node ID 8d9267b2128bb647557def1b893d1c1bbada4e37
# Parent 861ffbedeaefd45eb6431d8ce6d5a3b4789f9a2c
asm: saoCuOrgE2[0] avx2 code: improve 154c->128c
diff -r 861ffbedeaef -r 8d9267b2128b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Apr 23 18:13:13 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Apr 23 18:40:34 2015 +0530
@@ -1707,6 +1707,7 @@
p.saoCuOrgE0 = x265_saoCuOrgE0_avx2;
p.saoCuOrgE1 = x265_saoCuOrgE1_avx2;
p.saoCuOrgE1_2Rows = x265_saoCuOrgE1_2Rows_avx2;
+ p.saoCuOrgE2[0] = x265_saoCuOrgE2_avx2;
p.saoCuOrgE3[0] = x265_saoCuOrgE3_avx2;
p.saoCuOrgE3[1] = x265_saoCuOrgE3_32_avx2;
p.saoCuOrgB0 = x265_saoCuOrgB0_avx2;
diff -r 861ffbedeaef -r 8d9267b2128b source/common/x86/loopfilter.asm
--- a/source/common/x86/loopfilter.asm Thu Apr 23 18:13:13 2015 +0530
+++ b/source/common/x86/loopfilter.asm Thu Apr 23 18:40:34 2015 +0530
@@ -452,6 +452,41 @@
jnz .loop
RET
+INIT_YMM avx2
+cglobal saoCuOrgE2, 5, 6, 6, rec, bufft, buff1, offsetEo, lcuWidth
+ mov r5d, r5m
+ pxor xm0, xm0 ; xm0 = 0
+ mova xm5, [pb_128]
+ inc r1
+
+ movu xm1, [r0] ; xm1 = rec[x]
+ movu xm2, [r0 + r5 + 1] ; xm2 = rec[x + stride + 1]
+ pxor xm3, xm1, xm5
+ pxor xm4, xm2, xm5
+ pcmpgtb xm2, xm3, xm4
+ pcmpgtb xm4, xm3
+ pand xm2, [pb_1]
+ por xm2, xm4
+ movu xm3, [r2] ; xm3 = buff1
+
+ paddb xm3, xm2
+ paddb xm3, [pb_2] ; xm3 = edgeType
+
+ movu xm4, [r3] ; xm4 = offsetEo
+ pshufb xm4, xm3
+
+ psubb xm3, xm0, xm2
+ movu [r1], xm3
+
+ pmovzxbw m2, xm1
+ pmovsxbw m3, xm4
+
+ paddw m2, m3
+ vextracti128 xm3, m2, 1
+ packuswb xm2, xm3
+ movu [r0], xm2
+ RET
+
;=======================================================================================================
;void saoCuOrgE3(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo, intptr_t stride, int startX, int endX)
;=======================================================================================================
diff -r 861ffbedeaef -r 8d9267b2128b source/common/x86/loopfilter.h
--- a/source/common/x86/loopfilter.h Thu Apr 23 18:13:13 2015 +0530
+++ b/source/common/x86/loopfilter.h Thu Apr 23 18:40:34 2015 +0530
@@ -32,6 +32,7 @@
void x265_saoCuOrgE1_2Rows_sse4(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width);
void x265_saoCuOrgE1_2Rows_avx2(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width);
void x265_saoCuOrgE2_sse4(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride);
+void x265_saoCuOrgE2_avx2(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride);
void x265_saoCuOrgE3_sse4(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo, intptr_t stride, int startX, int endX);
void x265_saoCuOrgE3_avx2(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo, intptr_t stride, int startX, int endX);
void x265_saoCuOrgE3_32_avx2(pixel *rec, int8_t *upBuff1, int8_t *m_offsetEo, intptr_t stride, int startX, int endX);
More information about the x265-devel
mailing list