[x265] [PATCH] avx2: integral_init4h -> added 'INTEGRAL_FOUR_HORIZONTAL_4' macro to reduce data movement for '4' element case
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Wed Jun 14 07:20:43 CEST 2017
# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1497417160 -19800
# Wed Jun 14 10:42:40 2017 +0530
# Node ID f6f920ab5be6e00b9c32ac225959fc6b9f68d36b
# Parent 28bbc349d17035a3c1fcdfbdca3b8e21ae6b669b
avx2: integral_init4h -> added 'INTEGRAL_FOUR_HORIZONTAL_4' macro to reduce data movement for '4' element case
diff -r 28bbc349d170 -r f6f920ab5be6 source/common/x86/seaintegral.asm
--- a/source/common/x86/seaintegral.asm Wed Jun 07 17:06:57 2017 +0530
+++ b/source/common/x86/seaintegral.asm Wed Jun 14 10:42:40 2017 +0530
@@ -148,11 +148,6 @@
jnz .loop
RET
-;-----------------------------------------------------------------------------
-;static void integral_init4h_c(uint32_t *sum, pixel *pix, intptr_t stride)
-;-----------------------------------------------------------------------------
-INIT_YMM avx2
-
%macro INTEGRAL_FOUR_HORIZONTAL_16 0
pmovzxbw m0, [r1]
pmovzxbw m1, [r1 + 1]
@@ -163,6 +158,23 @@
paddw m0, m1
%endmacro
+%macro INTEGRAL_FOUR_HORIZONTAL_4 0
+ movd xm0, [r1]
+ movd xm1, [r1 + 1]
+ pmovzxbw xm0, xm0
+ pmovzxbw xm1, xm1
+ paddw xm0, xm1
+ movd xm1, [r1 + 2]
+ pmovzxbw xm1, xm1
+ paddw xm0, xm1
+ movd xm1, [r1 + 3]
+ paddw xm0, xm1
+%endmacro
+
+;-----------------------------------------------------------------------------
+;static void integral_init4h(uint32_t *sum, pixel *pix, intptr_t stride)
+;-----------------------------------------------------------------------------
+INIT_YMM avx2
cglobal integral4h, 3, 5, 3
lea r3, [4 * r2]
sub r0, r3
@@ -205,7 +217,7 @@
jmp .end
.loop_4:
- INTEGRAL_FOUR_HORIZONTAL_16
+ INTEGRAL_FOUR_HORIZONTAL_4
pmovzxwd xm0, xm0
movu xm1, [r0]
paddd xm0, xm1
More information about the x265-devel
mailing list