[x265] [PATCH 033 of 307] x86: AVX512 fix convert_p2s_64xN,48x64
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:30:31 CEST 2018
# HG changeset patch
# User Vignesh Vijayakumar
# Date 1500536572 -19800
# Thu Jul 20 13:12:52 2017 +0530
# Node ID bf9a9cd255216300408506d10d4ff8bc87a15845
# Parent 97d5ab44b6da2db69584875c2dde97aef5533d9b
x86: AVX512 fix convert_p2s_64xN,48x64
diff -r 97d5ab44b6da -r bf9a9cd25521 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Wed Jul 19 12:25:43 2017 +0530
+++ b/source/common/x86/ipfilter8.asm Thu Jul 20 13:12:52 2017 +0530
@@ -1953,9 +1953,6 @@
P2S_H_32xN_avx2 64
P2S_H_32xN_avx2 48
-;-----------------------------------------------------------------------------
-; void filterPixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int16_t dstStride)
-;-----------------------------------------------------------------------------
%macro PROCESS_P2S_32x8_AVX512 0
pmovzxbw m0, [r0]
pmovzxbw m1, [r0 + r1]
@@ -1999,6 +1996,9 @@
movu [r2 + r6], m3
%endmacro
+;-----------------------------------------------------------------------------
+; void filterPixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int16_t dstStride)
+;-----------------------------------------------------------------------------
INIT_ZMM avx512
cglobal filterPixelToShort_32x8, 3, 7, 5
mov r3d, r3m
@@ -2446,9 +2446,6 @@
P2S_H_64xN_avx2 32
P2S_H_64xN_avx2 48
-;-----------------------------------------------------------------------------
-; void filterPixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int16_t dstStride)
-;-----------------------------------------------------------------------------
%macro PROCESS_P2S_64x8_AVX512 0
pmovzxbw m0, [r0]
pmovzxbw m1, [r0 + mmsize/2]
@@ -2526,6 +2523,9 @@
movu [r2 + r6 + mmsize], m3
%endmacro
+;-----------------------------------------------------------------------------
+; void filterPixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int16_t dstStride)
+;-----------------------------------------------------------------------------
INIT_ZMM avx512
cglobal filterPixelToShort_64x64, 3, 7, 5
mov r3d, r3m
@@ -2561,14 +2561,14 @@
RET
INIT_ZMM avx512
-cglobal filterPixelToShort_64x48, 3, 7, 9
+cglobal filterPixelToShort_64x48, 3, 7, 5
mov r3d, r3m
add r3d, r3d
lea r5, [r1 * 3]
lea r6, [r3 * 3]
; load constant
- vpbroadcastd m8, [pw_2000]
+ vpbroadcastd m4, [pw_2000]
PROCESS_P2S_64x8_AVX512
lea r0, [r0 + r1 * 4]
@@ -2589,14 +2589,14 @@
RET
INIT_ZMM avx512
-cglobal filterPixelToShort_64x32, 3, 7, 9
+cglobal filterPixelToShort_64x32, 3, 7, 5
mov r3d, r3m
add r3d, r3d
lea r5, [r1 * 3]
lea r6, [r3 * 3]
; load constant
- vpbroadcastd m8, [pw_2000]
+ vpbroadcastd m4, [pw_2000]
PROCESS_P2S_64x8_AVX512
lea r0, [r0 + r1 * 4]
@@ -2611,14 +2611,14 @@
RET
INIT_ZMM avx512
-cglobal filterPixelToShort_64x16, 3, 7, 9
+cglobal filterPixelToShort_64x16, 3, 7, 5
mov r3d, r3m
add r3d, r3d
lea r5, [r1 * 3]
lea r6, [r3 * 3]
; load constant
- vpbroadcastd m8, [pw_2000]
+ vpbroadcastd m4, [pw_2000]
PROCESS_P2S_64x8_AVX512
lea r0, [r0 + r1 * 4]
@@ -3047,9 +3047,6 @@
jnz .loop
RET
-;-----------------------------------------------------------------------------
-; void filterPixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int16_t dstStride)
-;-----------------------------------------------------------------------------
%macro PROCESS_P2S_48x8_AVX512 0
pmovzxbw m0, [r0]
pmovzxbw m1, [r0 + r1]
@@ -3123,6 +3120,9 @@
movu [r2 + r6 + 64], ym3
%endmacro
+;-----------------------------------------------------------------------------
+; void filterPixelToShort(pixel *src, intptr_t srcStride, int16_t *dst, int16_t dstStride)
+;-----------------------------------------------------------------------------
INIT_ZMM avx512
cglobal filterPixelToShort_48x64, 3,7,5
mov r3d, r3m
@@ -3131,7 +3131,7 @@
lea r6, [r3 * 3]
; load constant
- vpbroadcastd m8, [pw_2000]
+ vpbroadcastd m4, [pw_2000]
PROCESS_P2S_48x8_AVX512
lea r0, [r0 + r1 * 4]
More information about the x265-devel
mailing list