[x265] [PATCH 4 of 4] asm: use general constant INTERP_MAGIC_PS to replcae exact bit depth dependency magic constant
Min Chen
chenm003 at 163.com
Fri Jul 10 03:41:38 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1436489266 25200
# Node ID 7b3e1372bb28830ef0ab44cd652ecbe823573675
# Parent bbf0b4acba968355452158479656f7148d2f7199
asm: use general constant INTERP_MAGIC_PS to replcae exact bit depth dependency magic constant
---
source/common/x86/ipfilter16.asm | 133 +++++++++++++++++++------------------
1 files changed, 68 insertions(+), 65 deletions(-)
diff -r bbf0b4acba96 -r 7b3e1372bb28 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm Thu Jul 09 17:39:18 2015 -0700
+++ b/source/common/x86/ipfilter16.asm Thu Jul 09 17:47:46 2015 -0700
@@ -145,6 +145,14 @@
const pb_shuf, db 0, 1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6, 7, 8, 9
db 4, 5, 6, 7, 8, 9, 10, 11, 6, 7, 8, 9, 10, 11, 12, 13
+%if BIT_DEPTH == 10
+ %define INTERP_OFFSET_PS pd_n32768
+%elif BIT_DEPTH == 12
+ %define INTERP_OFFSET_PS pd_n131072
+%else
+%error Unsupport bit depth!
+%endif
+
SECTION .text
cextern pd_32
cextern pw_pixel_max
@@ -279,11 +287,7 @@
mova m1, [pd_32]
pxor m7, m7
%else
- %if BIT_DEPTH == 10
- mova m1, [pd_n32768]
- %elif BIT_DEPTH == 12
- mova m1, [pd_n131072]
- %endif
+ mova m1, [INTERP_OFFSET_PS]
%endif
mov r4d, %2
@@ -495,11 +499,10 @@
mova m7, [pd_32]
%define SHIFT 6
%elifidn %1,ps
+ mova m7, [INTERP_OFFSET_PS]
%if BIT_DEPTH == 10
- mova m7, [pd_n32768]
%define SHIFT 2
%elif BIT_DEPTH == 12
- mova m7, [pd_n131072]
%define SHIFT 4
%endif
%endif
@@ -818,7 +821,7 @@
%endif
%ifidn %3, ps
- mova m1, [pd_n32768]
+ mova m1, [INTERP_OFFSET_PS]
cmp r5m, byte 0
%if %1 <= 6
lea r4, [r1 * 3]
@@ -1185,7 +1188,7 @@
pxor m6, m6
mova m7, [pw_pixel_max]
%else
- mova m1, [pd_n32768]
+ mova m1, [INTERP_OFFSET_PS]
%endif
mov r4d, %2
@@ -1271,7 +1274,7 @@
mova m1, [pd_32]
pxor m7, m7
%else
- mova m1, [pd_n32768]
+ mova m1, [INTERP_OFFSET_PS]
%endif
mov r4d, %2
@@ -1372,7 +1375,7 @@
%ifidn %3, pp
mova m1, [pd_32]
%else
- mova m1, [pd_n32768]
+ mova m1, [INTERP_OFFSET_PS]
%endif
mov r4d, %2
@@ -1495,7 +1498,7 @@
%ifidn %3, pp
mova m1, [pd_32]
%else
- mova m1, [pd_n32768]
+ mova m1, [INTERP_OFFSET_PS]
%endif
mov r4d, %2
@@ -1690,7 +1693,7 @@
%ifidn %3, pp
mova m1, [pd_32]
%else
- mova m1, [pd_n32768]
+ mova m1, [INTERP_OFFSET_PS]
%endif
mov r4d, %2
@@ -2631,7 +2634,7 @@
mova m2, [tab_Tm16]
%ifidn %3, ps
- mova m1, [pd_n32768]
+ mova m1, [INTERP_OFFSET_PS]
cmp r5m, byte 0
je .skip
sub r0, r1
@@ -3233,7 +3236,7 @@
mova m2, [tab_Tm16]
%ifidn %3, ps
- mova m1, [pd_n32768]
+ mova m1, [INTERP_OFFSET_PS]
cmp r5m, byte 0
je .skip
sub r0, r1
@@ -4084,7 +4087,7 @@
mova m6, [tab_c_524800]
%endif
%else
- mova m6, [pd_n32768]
+ mova m6, [INTERP_OFFSET_PS]
%endif
%endif
@@ -4339,7 +4342,7 @@
mova m5, [tab_c_524800]
%endif
%else
- mova m5, [pd_n32768]
+ mova m5, [INTERP_OFFSET_PS]
%endif
%endif
@@ -4434,7 +4437,7 @@
mova m4, [tab_c_524800]
%endif
%else
- mova m4, [pd_n32768]
+ mova m4, [INTERP_OFFSET_PS]
%endif
%endif
@@ -4538,7 +4541,7 @@
mova m6, [tab_c_524800]
%endif
%else
- mova m6, [pd_n32768]
+ mova m6, [INTERP_OFFSET_PS]
%endif
%endif
@@ -4706,7 +4709,7 @@
%elifidn %3, sp
mova m7, [tab_c_524800]
%elifidn %3, ps
- mova m7, [pd_n32768]
+ mova m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -4862,7 +4865,7 @@
%elifidn %2, sp
mova m8, [pd_524800]
%else
- vbroadcasti128 m8, [pd_n32768]
+ vbroadcasti128 m8, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5006,7 +5009,7 @@
%elifidn %2, sp
mova m7, [pd_524800]
%elifidn %2, ps
- mova m7, [pd_n32768]
+ mova m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5178,7 +5181,7 @@
%elifidn %2, sp
mova m7, [pd_524800]
%elifidn %2, ps
- mova m7, [pd_n32768]
+ mova m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5320,7 +5323,7 @@
%elifidn %2, sp
mova m7, [pd_524800]
%elifidn %2, ps
- mova m7, [pd_n32768]
+ mova m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5451,7 +5454,7 @@
%elifidn %2, sp
mova m7, [pd_524800]
%elifidn %2, ps
- mova m7, [pd_n32768]
+ mova m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5604,7 +5607,7 @@
%elifidn %2, sp
mova m7, [pd_524800]
%elifidn %2, ps
- mova m7, [pd_n32768]
+ mova m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5727,7 +5730,7 @@
%elifidn %1, sp
mova m7, [pd_524800]
%elifidn %1, ps
- mova m7, [pd_n32768]
+ mova m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -6063,7 +6066,7 @@
%elifidn %1, sp
mova m6, [pd_524800]
%else
- vbroadcasti128 m6, [pd_n32768]
+ vbroadcasti128 m6, [INTERP_OFFSET_PS]
%endif
movq xm0, [r0]
@@ -6173,7 +6176,7 @@
%elifidn %1, sp
mova m11, [pd_524800]
%else
- vbroadcasti128 m11, [pd_n32768]
+ vbroadcasti128 m11, [INTERP_OFFSET_PS]
%endif
movu xm0, [r0] ; m0 = row 0
@@ -6811,7 +6814,7 @@
%elifidn %1, sp
mova m14, [pd_524800]
%else
- vbroadcasti128 m14, [pd_n32768]
+ vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
mov r9d, %2 / 8
@@ -6862,7 +6865,7 @@
%elifidn %3, sp
mova m14, [pd_524800]
%else
- vbroadcasti128 m14, [pd_n32768]
+ vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
@@ -6945,7 +6948,7 @@
%elifidn %1, sp
mova m14, [pd_524800]
%else
- vbroadcasti128 m14, [pd_n32768]
+ vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
lea r7, [r1 * 4]
@@ -7592,7 +7595,7 @@
%elifidn %1, sp
mova m11, [pd_524800]
%else
- vbroadcasti128 m11, [pd_n32768]
+ vbroadcasti128 m11, [INTERP_OFFSET_PS]
%endif
mova m12, [pw_pixel_max]
lea r6, [r3 * 3]
@@ -7639,7 +7642,7 @@
%elifidn %1, sp
mova m14, [pd_524800]
%else
- vbroadcasti128 m14, [pd_n32768]
+ vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
mov r9d, 4
@@ -7811,7 +7814,7 @@
%elifidn %1, sp
mova m7, [pd_524800]
%else
- vbroadcasti128 m7, [pd_n32768]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
mov dword [rsp], 2
.loopW:
@@ -7856,7 +7859,7 @@
%elifidn %1, sp
mova m7, [pd_524800]
%else
- vbroadcasti128 m7, [pd_n32768]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
PROCESS_LUMA_AVX2_W8_4R %1
@@ -7896,7 +7899,7 @@
%elifidn %1, sp
mova m14, [pd_524800]
%else
- vbroadcasti128 m14, [pd_n32768]
+ vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
mova m13, [pw_pixel_max]
pxor m12, m12
@@ -8243,7 +8246,7 @@
%elifidn %1, sp
mova m7, [pd_524800]
%else
- vbroadcasti128 m7, [pd_n32768]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
@@ -8663,7 +8666,7 @@
%elifidn %1, sp
mova m7, [pd_524800]
%else
- vbroadcasti128 m7, [pd_n32768]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
PROCESS_LUMA_AVX2_W4_16R %1
@@ -8698,7 +8701,7 @@
%elifidn %1, sp
mova m14, [pd_524800]
%else
- vbroadcasti128 m14, [pd_n32768]
+ vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
PROCESS_LUMA_AVX2_W8_16R %1
@@ -8735,7 +8738,7 @@
lea r6, [tab_LumaCoeffV + r4]
%endif
- mova m7, [pd_n32768]
+ mova m7, [INTERP_OFFSET_PS]
mov dword [rsp], %2/4
.loopH:
@@ -10297,7 +10300,7 @@
vbroadcasti128 m0, [tab_LumaCoeff + r4 * 2]
%endif
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 - interpolate coeff
@@ -10408,7 +10411,7 @@
vpbroadcastq m1, [tab_LumaCoeff + r4 + 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -10476,7 +10479,7 @@
vpbroadcastq m1, [tab_LumaCoeff + r4 + 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -10546,7 +10549,7 @@
vpbroadcastq m1, [tab_LumaCoeff + r4 + 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -10657,7 +10660,7 @@
vpbroadcastq m1, [tab_LumaCoeff + r4 + 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -10750,7 +10753,7 @@
vpbroadcastq m1, [tab_LumaCoeff + r4 + 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -10825,7 +10828,7 @@
vpbroadcastq m0, [tab_ChromaCoeff + r4 * 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -10884,7 +10887,7 @@
vpbroadcastq m0, [tab_ChromaCoeff + r4 * 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -10957,7 +10960,7 @@
vpbroadcastq m0, [tab_ChromaCoeff + r4 * 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -11039,7 +11042,7 @@
vpbroadcastq m0, [tab_ChromaCoeff + r4 * 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -11104,7 +11107,7 @@
vpbroadcastq m0, [tab_ChromaCoeff + r4 * 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -11205,7 +11208,7 @@
vpbroadcastq m0, [tab_ChromaCoeff + r4 * 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -11358,7 +11361,7 @@
vpbroadcastq m0, [tab_ChromaCoeff + r4 * 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -11478,7 +11481,7 @@
vpbroadcastq m0, [tab_ChromaCoeff + r4 * 8]
%endif
mova m3, [pb_shuf]
- vbroadcasti128 m2, [pd_n32768]
+ vbroadcasti128 m2, [INTERP_OFFSET_PS]
; register map
; m0 , m1 interpolate coeff
@@ -11539,7 +11542,7 @@
%elifidn %1, sp
mova m14, [pd_524800]
%else
- vbroadcasti128 m14, [pd_n32768]
+ vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
lea r7, [r1 * 4]
@@ -11956,7 +11959,7 @@
%elifidn %1, sp
mova m7, [pd_524800]
%else
- vbroadcasti128 m7, [pd_n32768]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
PROCESS_CHROMA_AVX2_8x2 %1, %2, %3
@@ -11993,7 +11996,7 @@
%elifidn %1, sp
mova m6, [pd_524800]
%else
- vbroadcasti128 m6, [pd_n32768]
+ vbroadcasti128 m6, [INTERP_OFFSET_PS]
%endif
movq xm0, [r0] ; row 0
@@ -12060,7 +12063,7 @@
%elifidn %1, sp
mova m6, [pd_524800]
%else
- vbroadcasti128 m6, [pd_n32768]
+ vbroadcasti128 m6, [INTERP_OFFSET_PS]
%endif
movq xm0, [r0] ; row 0
movq xm1, [r0 + r1] ; row 1
@@ -12140,7 +12143,7 @@
%elifidn %1, sp
mova m7, [pd_524800]
%else
- vbroadcasti128 m7, [pd_n32768]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
@@ -12398,7 +12401,7 @@
%elifidn %1, sp
mova m7, [pd_524800]
%else
- vbroadcasti128 m7, [pd_n32768]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
.loopH:
@@ -12442,7 +12445,7 @@
%elifidn %1, sp
mova m11, [pd_524800]
%else
- vbroadcasti128 m11, [pd_n32768]
+ vbroadcasti128 m11, [INTERP_OFFSET_PS]
%endif
movu xm0, [r0] ; m0 = row 0
@@ -12597,7 +12600,7 @@
%elifidn %1, sp
mova m11, [pd_524800]
%else
- vbroadcasti128 m11, [pd_n32768]
+ vbroadcasti128 m11, [INTERP_OFFSET_PS]
%endif
movu xm0, [r0] ; m0 = row 0
@@ -12787,7 +12790,7 @@
%elifidn %1, sp
mova m7, [pd_524800]
%else
- vbroadcasti128 m7, [pd_n32768]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
PROCESS_CHROMA_AVX2 %1, %2, %3
movu [r2], xm0
@@ -12826,7 +12829,7 @@
%elifidn %1, sp
mova m14, [pd_524800]
%else
- vbroadcasti128 m14, [pd_n32768]
+ vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
lea r6, [r3 * 3]
movu xm0, [r0] ; m0 = row 0
More information about the x265-devel
mailing list