[x265] [PATCH] asm: add macro to sub_ps module to reduce code size
sumalatha at multicorewareinc.com
sumalatha at multicorewareinc.com
Thu Apr 16 07:06:48 CEST 2015
# HG changeset patch
# User Sumalatha Polureddy
# Date 1429160799 -19800
# Thu Apr 16 10:36:39 2015 +0530
# Node ID 7d3cb1832fed137c6362c3d1e8f29f6be7de113d
# Parent f9c0e1f233cc15ccce4eb96adef11583af082f33
asm: add macro to sub_ps module to reduce code size
diff -r f9c0e1f233cc -r 7d3cb1832fed source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Wed Apr 15 16:20:27 2015 +0530
+++ b/source/common/x86/pixel-util8.asm Thu Apr 16 10:36:39 2015 +0530
@@ -4514,12 +4514,13 @@
;-----------------------------------------------------------------------------
; void pixel_sub_ps_16x16(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
;-----------------------------------------------------------------------------
+%macro PIXELSUB_PS_W16_H8_avx2 2
%if ARCH_X86_64
INIT_YMM avx2
-cglobal pixel_sub_ps_16x16, 6, 10, 4, dest, deststride, src0, src1, srcstride0, srcstride1
+cglobal pixel_sub_ps_16x%2, 6, 10, 4, dest, deststride, src0, src1, srcstride0, srcstride1
add r1, r1
lea r6, [r1 * 3]
- mov r7d, 2
+ mov r7d, %2/8
lea r9, [r4 * 3]
lea r8, [r5 * 3]
@@ -4581,6 +4582,10 @@
jnz .loop
RET
%endif
+%endmacro
+
+PIXELSUB_PS_W16_H8_avx2 16, 16
+PIXELSUB_PS_W16_H8_avx2 16, 32
;-----------------------------------------------------------------------------
; void pixel_sub_ps_32x%2(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
@@ -4719,10 +4724,11 @@
;-----------------------------------------------------------------------------
; void pixel_sub_ps_32x32(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
;-----------------------------------------------------------------------------
+%macro PIXELSUB_PS_W32_H8_avx2 2
%if ARCH_X86_64
INIT_YMM avx2
-cglobal pixel_sub_ps_32x32, 6, 10, 4, dest, deststride, src0, src1, srcstride0, srcstride1
- mov r6d, 4
+cglobal pixel_sub_ps_32x%2, 6, 10, 4, dest, deststride, src0, src1, srcstride0, srcstride1
+ mov r6d, %2/8
add r1, r1
lea r7, [r4 * 3]
lea r8, [r5 * 3]
@@ -4830,6 +4836,10 @@
jnz .loop
RET
%endif
+%endmacro
+
+PIXELSUB_PS_W32_H8_avx2 32, 32
+PIXELSUB_PS_W32_H8_avx2 32, 64
;-----------------------------------------------------------------------------
; void pixel_sub_ps_64x%2(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
More information about the x265-devel
mailing list