[x265] [PATCH] asm: fix main12 avx2 for chroma_vpp/vps/vsp/vss
rajesh at multicorewareinc.com
rajesh at multicorewareinc.com
Mon Oct 5 14:59:00 CEST 2015
# HG changeset patch
# User Rajesh Paulraj<rajesh at multicorewareinc.com>
# Date 1444041513 -19800
# Mon Oct 05 16:08:33 2015 +0530
# Node ID 8dc9dfe33c370e5bc09863ab1062568662d46e37
# Parent 5f73ada8caa0c62cc7540799966bde7536861bf7
asm: fix main12 avx2 for chroma_vpp/vps/vsp/vss
diff -r 5f73ada8caa0 -r 8dc9dfe33c37 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Oct 01 17:53:59 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Oct 05 16:08:33 2015 +0530
@@ -1881,7 +1881,6 @@
p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_hpp = PFX(interp_4tap_horiz_pp_64x64_avx2);
p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_hpp = PFX(interp_4tap_horiz_pp_48x64_avx2);
-#if X265_DEPTH <= 10
p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_vpp = PFX(interp_4tap_vert_pp_4x2_avx2);
p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_vps = PFX(interp_4tap_vert_ps_4x2_avx2);
p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_vsp = PFX(interp_4tap_vert_sp_4x2_avx2);
@@ -2161,7 +2160,6 @@
p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_vsp = PFX(interp_4tap_vert_sp_64x32_avx2);
p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_vsp = PFX(interp_4tap_vert_sp_64x48_avx2);
p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vsp = PFX(interp_4tap_vert_sp_64x64_avx2);
-#endif
p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
diff -r 5f73ada8caa0 -r 8dc9dfe33c37 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm Thu Oct 01 17:53:59 2015 +0530
+++ b/source/common/x86/ipfilter16.asm Mon Oct 05 16:08:33 2015 +0530
@@ -4869,7 +4869,7 @@
%ifidn %2,pp
vbroadcasti128 m8, [INTERP_OFFSET_PP]
%elifidn %2, sp
- mova m8, [INTERP_OFFSET_SP]
+ vbroadcasti128 m8, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m8, [INTERP_OFFSET_PS]
%endif
@@ -5011,11 +5011,11 @@
mov r4d, %1/2
%ifidn %2, pp
- mova m7, [INTERP_OFFSET_PP]
+ vbroadcasti128 m7, [INTERP_OFFSET_PP]
%elifidn %2, sp
- mova m7, [INTERP_OFFSET_SP]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%elifidn %2, ps
- mova m7, [INTERP_OFFSET_PS]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5183,11 +5183,11 @@
mov r4d, %1/2
%ifidn %2, pp
- mova m7, [INTERP_OFFSET_PP]
+ vbroadcasti128 m7, [INTERP_OFFSET_PP]
%elifidn %2, sp
- mova m7, [INTERP_OFFSET_SP]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%elifidn %2, ps
- mova m7, [INTERP_OFFSET_PS]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5325,11 +5325,11 @@
mov r4d, %1/2
%ifidn %2, pp
- mova m7, [INTERP_OFFSET_PP]
+ vbroadcasti128 m7, [INTERP_OFFSET_PP]
%elifidn %2, sp
- mova m7, [INTERP_OFFSET_SP]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%elifidn %2, ps
- mova m7, [INTERP_OFFSET_PS]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5456,11 +5456,11 @@
mov r4d, %1/2
%ifidn %2, pp
- mova m7, [INTERP_OFFSET_PP]
+ vbroadcasti128 m7, [INTERP_OFFSET_PP]
%elifidn %2, sp
- mova m7, [INTERP_OFFSET_SP]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%elifidn %2, ps
- mova m7, [INTERP_OFFSET_PS]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5609,11 +5609,11 @@
mov r4d, %1/2
%ifidn %2, pp
- mova m7, [INTERP_OFFSET_PP]
+ vbroadcasti128 m7, [INTERP_OFFSET_PP]
%elifidn %2, sp
- mova m7, [INTERP_OFFSET_SP]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%elifidn %2, ps
- mova m7, [INTERP_OFFSET_PS]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -5732,11 +5732,11 @@
mov r4d, 32
%ifidn %1, pp
- mova m7, [INTERP_OFFSET_PP]
+ vbroadcasti128 m7, [INTERP_OFFSET_PP]
%elifidn %1, sp
- mova m7, [INTERP_OFFSET_SP]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%elifidn %1, ps
- mova m7, [INTERP_OFFSET_PS]
+ vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
.loopH:
@@ -11537,7 +11537,7 @@
%ifidn %1,pp
vbroadcasti128 m14, [pd_32]
%elifidn %1, sp
- mova m14, [pd_524800]
+ vbroadcasti128 m14, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
@@ -11665,19 +11665,19 @@
psrad m4, 6
psrad m5, 6
%elifidn %1, sp
- psrad m0, 10
- psrad m1, 10
- psrad m2, 10
- psrad m3, 10
- psrad m4, 10
- psrad m5, 10
-%else
- psrad m0, 2
- psrad m1, 2
- psrad m2, 2
- psrad m3, 2
- psrad m4, 2
- psrad m5, 2
+ psrad m0, INTERP_SHIFT_SP
+ psrad m1, INTERP_SHIFT_SP
+ psrad m2, INTERP_SHIFT_SP
+ psrad m3, INTERP_SHIFT_SP
+ psrad m4, INTERP_SHIFT_SP
+ psrad m5, INTERP_SHIFT_SP
+%else
+ psrad m0, INTERP_SHIFT_PS
+ psrad m1, INTERP_SHIFT_PS
+ psrad m2, INTERP_SHIFT_PS
+ psrad m3, INTERP_SHIFT_PS
+ psrad m4, INTERP_SHIFT_PS
+ psrad m5, INTERP_SHIFT_PS
%endif
%endif
@@ -11736,11 +11736,11 @@
psrad m6, 6
psrad m7, 6
%elifidn %1, sp
- psrad m6, 10
- psrad m7, 10
-%else
- psrad m6, 2
- psrad m7, 2
+ psrad m6, INTERP_SHIFT_SP
+ psrad m7, INTERP_SHIFT_SP
+%else
+ psrad m6, INTERP_SHIFT_PS
+ psrad m7, INTERP_SHIFT_PS
%endif
%endif
@@ -11814,23 +11814,23 @@
psrad m0, 6
psrad m1, 6
%elifidn %1, sp
- psrad m8, 10
- psrad m9, 10
- psrad m10, 10
- psrad m11, 10
- psrad m12, 10
- psrad m13, 10
- psrad m0, 10
- psrad m1, 10
-%else
- psrad m8, 2
- psrad m9, 2
- psrad m10, 2
- psrad m11, 2
- psrad m12, 2
- psrad m13, 2
- psrad m0, 2
- psrad m1, 2
+ psrad m8, INTERP_SHIFT_SP
+ psrad m9, INTERP_SHIFT_SP
+ psrad m10, INTERP_SHIFT_SP
+ psrad m11, INTERP_SHIFT_SP
+ psrad m12, INTERP_SHIFT_SP
+ psrad m13, INTERP_SHIFT_SP
+ psrad m0, INTERP_SHIFT_SP
+ psrad m1, INTERP_SHIFT_SP
+%else
+ psrad m8, INTERP_SHIFT_PS
+ psrad m9, INTERP_SHIFT_PS
+ psrad m10, INTERP_SHIFT_PS
+ psrad m11, INTERP_SHIFT_PS
+ psrad m12, INTERP_SHIFT_PS
+ psrad m13, INTERP_SHIFT_PS
+ psrad m0, INTERP_SHIFT_PS
+ psrad m1, INTERP_SHIFT_PS
%endif
%endif
@@ -11954,7 +11954,7 @@
%ifidn %1,pp
vbroadcasti128 m7, [pd_32]
%elifidn %1, sp
- mova m7, [pd_524800]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
@@ -11966,8 +11966,8 @@
%endmacro
FILTER_VER_CHROMA_AVX2_8x2 pp, 1, 6
-FILTER_VER_CHROMA_AVX2_8x2 ps, 0, 2
-FILTER_VER_CHROMA_AVX2_8x2 sp, 1, 10
+FILTER_VER_CHROMA_AVX2_8x2 ps, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_8x2 sp, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_8x2 ss, 0, 6
%macro FILTER_VER_CHROMA_AVX2_4x2 3
@@ -11991,7 +11991,7 @@
%ifidn %1,pp
vbroadcasti128 m6, [pd_32]
%elifidn %1, sp
- mova m6, [pd_524800]
+ vbroadcasti128 m6, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m6, [INTERP_OFFSET_PS]
%endif
@@ -12033,8 +12033,8 @@
%endmacro
FILTER_VER_CHROMA_AVX2_4x2 pp, 1, 6
-FILTER_VER_CHROMA_AVX2_4x2 ps, 0, 2
-FILTER_VER_CHROMA_AVX2_4x2 sp, 1, 10
+FILTER_VER_CHROMA_AVX2_4x2 ps, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_4x2 sp, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_4x2 ss, 0, 6
%macro FILTER_VER_CHROMA_AVX2_4x4 3
@@ -12058,7 +12058,7 @@
%ifidn %1,pp
vbroadcasti128 m6, [pd_32]
%elifidn %1, sp
- mova m6, [pd_524800]
+ vbroadcasti128 m6, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m6, [INTERP_OFFSET_PS]
%endif
@@ -12112,8 +12112,8 @@
%endmacro
FILTER_VER_CHROMA_AVX2_4x4 pp, 1, 6
-FILTER_VER_CHROMA_AVX2_4x4 ps, 0, 2
-FILTER_VER_CHROMA_AVX2_4x4 sp, 1, 10
+FILTER_VER_CHROMA_AVX2_4x4 ps, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_4x4 sp, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_4x4 ss, 0, 6
@@ -12138,7 +12138,7 @@
%ifidn %1,pp
vbroadcasti128 m7, [pd_32]
%elifidn %1, sp
- mova m7, [pd_524800]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
@@ -12225,8 +12225,8 @@
%endmacro
FILTER_VER_CHROMA_AVX2_4x8 pp, 1, 6
-FILTER_VER_CHROMA_AVX2_4x8 ps, 0, 2
-FILTER_VER_CHROMA_AVX2_4x8 sp, 1, 10
+FILTER_VER_CHROMA_AVX2_4x8 ps, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_4x8 sp, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_4x8 ss, 0 , 6
%macro PROCESS_LUMA_AVX2_W4_16R_4TAP 3
@@ -12396,7 +12396,7 @@
%ifidn %1,pp
vbroadcasti128 m7, [pd_32]
%elifidn %1, sp
- mova m7, [pd_524800]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
@@ -12410,12 +12410,12 @@
%endmacro
FILTER_VER_CHROMA_AVX2_4xN pp, 16, 1, 6
-FILTER_VER_CHROMA_AVX2_4xN ps, 16, 0, 2
-FILTER_VER_CHROMA_AVX2_4xN sp, 16, 1, 10
+FILTER_VER_CHROMA_AVX2_4xN ps, 16, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_4xN sp, 16, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_4xN ss, 16, 0, 6
FILTER_VER_CHROMA_AVX2_4xN pp, 32, 1, 6
-FILTER_VER_CHROMA_AVX2_4xN ps, 32, 0, 2
-FILTER_VER_CHROMA_AVX2_4xN sp, 32, 1, 10
+FILTER_VER_CHROMA_AVX2_4xN ps, 32, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_4xN sp, 32, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_4xN ss, 32, 0, 6
%macro FILTER_VER_CHROMA_AVX2_8x8 3
@@ -12429,7 +12429,7 @@
%ifdef PIC
lea r5, [tab_ChromaCoeffVer]
- add r5, r4
+ add r5, r4
%else
lea r5, [tab_ChromaCoeffVer + r4]
%endif
@@ -12440,7 +12440,7 @@
%ifidn %1,pp
vbroadcasti128 m11, [pd_32]
%elifidn %1, sp
- mova m11, [pd_524800]
+ vbroadcasti128 m11, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m11, [INTERP_OFFSET_PS]
%endif
@@ -12569,8 +12569,8 @@
%endmacro
FILTER_VER_CHROMA_AVX2_8x8 pp, 1, 6
-FILTER_VER_CHROMA_AVX2_8x8 ps, 0, 2
-FILTER_VER_CHROMA_AVX2_8x8 sp, 1, 10
+FILTER_VER_CHROMA_AVX2_8x8 ps, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_8x8 sp, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_8x8 ss, 0, 6
%macro FILTER_VER_CHROMA_AVX2_8x6 3
@@ -12595,7 +12595,7 @@
%ifidn %1,pp
vbroadcasti128 m11, [pd_32]
%elifidn %1, sp
- mova m11, [pd_524800]
+ vbroadcasti128 m11, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m11, [INTERP_OFFSET_PS]
%endif
@@ -12700,8 +12700,8 @@
%endmacro
FILTER_VER_CHROMA_AVX2_8x6 pp, 1, 6
-FILTER_VER_CHROMA_AVX2_8x6 ps, 0, 2
-FILTER_VER_CHROMA_AVX2_8x6 sp, 1, 10
+FILTER_VER_CHROMA_AVX2_8x6 ps, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_8x6 sp, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_8x6 ss, 0, 6
%macro PROCESS_CHROMA_AVX2 3
@@ -12785,7 +12785,7 @@
%ifidn %1,pp
vbroadcasti128 m7, [pd_32]
%elifidn %1, sp
- mova m7, [pd_524800]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
@@ -12799,8 +12799,8 @@
%endmacro
FILTER_VER_CHROMA_AVX2_8x4 pp, 1, 6
-FILTER_VER_CHROMA_AVX2_8x4 ps, 0, 2
-FILTER_VER_CHROMA_AVX2_8x4 sp, 1, 10
+FILTER_VER_CHROMA_AVX2_8x4 ps, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_8x4 sp, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_8x4 ss, 0, 6
%macro FILTER_VER_CHROMA_AVX2_8x12 3
@@ -12824,7 +12824,7 @@
%ifidn %1,pp
vbroadcasti128 m14, [pd_32]
%elifidn %1, sp
- mova m14, [pd_524800]
+ vbroadcasti128 m14, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
@@ -13002,6 +13002,6 @@
%endmacro
FILTER_VER_CHROMA_AVX2_8x12 pp, 1, 6
-FILTER_VER_CHROMA_AVX2_8x12 ps, 0, 2
-FILTER_VER_CHROMA_AVX2_8x12 sp, 1, 10
+FILTER_VER_CHROMA_AVX2_8x12 ps, 0, INTERP_SHIFT_PS
+FILTER_VER_CHROMA_AVX2_8x12 sp, 1, INTERP_SHIFT_SP
FILTER_VER_CHROMA_AVX2_8x12 ss, 0, 6
More information about the x265-devel
mailing list