[x265] [PATCH] asm: fix main12 avx2 for luma_vsp and luma_hvpp
rajesh at multicorewareinc.com
rajesh at multicorewareinc.com
Thu Oct 8 12:49:52 CEST 2015
# HG changeset patch
# User Rajesh Paulraj<rajesh at multicorewareinc.com>
# Date 1444288440 -19800
# Thu Oct 08 12:44:00 2015 +0530
# Node ID 8a91a65e9290075c2dfcc192c076682e9b3c2514
# Parent 0e3aeb97e206b04521b13666c5c4bf4681748bb7
asm: fix main12 avx2 for luma_vsp and luma_hvpp
diff -r 0e3aeb97e206 -r 8a91a65e9290 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Oct 05 16:08:33 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Oct 08 12:44:00 2015 +0530
@@ -1595,13 +1595,9 @@
ALL_LUMA_PU(luma_vpp, interp_8tap_vert_pp, avx2);
ALL_LUMA_PU(luma_vps, interp_8tap_vert_ps, avx2);
-#if X265_DEPTH <= 10
ALL_LUMA_PU(luma_vsp, interp_8tap_vert_sp, avx2);
-#endif
ALL_LUMA_PU(luma_vss, interp_8tap_vert_ss, avx2);
-#if X265_DEPTH <= 10
p.pu[LUMA_4x4].luma_vsp = PFX(interp_8tap_vert_sp_4x4_avx2); // since ALL_LUMA_PU didn't declare 4x4 size, calling separately luma_vsp function to use
-#endif
p.cu[BLOCK_16x16].add_ps = PFX(pixel_add_ps_16x16_avx2);
p.cu[BLOCK_32x32].add_ps = PFX(pixel_add_ps_32x32_avx2);
@@ -2163,11 +2159,9 @@
p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
-#if X265_DEPTH <= 10
// TODO: depends on hps and vsp
ALL_LUMA_PU_T(luma_hvpp, interp_8tap_hv_pp_cpu); // calling luma_hvpp for all sizes
p.pu[LUMA_4x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x4>; // ALL_LUMA_PU_T has declared all sizes except 4x4, hence calling luma_hvpp[4x4]
-#endif
if (cpuMask & X265_CPU_BMI2)
p.scanPosLast = PFX(scanPosLast_avx2_bmi2);
diff -r 0e3aeb97e206 -r 8a91a65e9290 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm Mon Oct 05 16:08:33 2015 +0530
+++ b/source/common/x86/ipfilter16.asm Thu Oct 08 12:44:00 2015 +0530
@@ -6068,7 +6068,7 @@
%ifidn %1,pp
vbroadcasti128 m6, [pd_32]
%elifidn %1, sp
- mova m6, [pd_524800]
+ vbroadcasti128 m6, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m6, [INTERP_OFFSET_PS]
%endif
@@ -6178,7 +6178,7 @@
%ifidn %1,pp
vbroadcasti128 m11, [pd_32]
%elifidn %1, sp
- mova m11, [pd_524800]
+ vbroadcasti128 m11, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m11, [INTERP_OFFSET_PS]
%endif
@@ -6816,7 +6816,7 @@
%ifidn %1,pp
vbroadcasti128 m14, [pd_32]
%elifidn %1, sp
- mova m14, [INTERP_OFFSET_SP]
+ vbroadcasti128 m14, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
@@ -6867,7 +6867,7 @@
%ifidn %3,pp
vbroadcasti128 m14, [pd_32]
%elifidn %3, sp
- mova m14, [INTERP_OFFSET_SP]
+ vbroadcasti128 m14, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
@@ -6950,7 +6950,7 @@
%ifidn %1,pp
vbroadcasti128 m14, [pd_32]
%elifidn %1, sp
- mova m14, [INTERP_OFFSET_SP]
+ vbroadcasti128 m14, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
@@ -7597,7 +7597,7 @@
%ifidn %1,pp
vbroadcasti128 m11, [pd_32]
%elifidn %1, sp
- mova m11, [INTERP_OFFSET_SP]
+ vbroadcasti128 m11, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m11, [INTERP_OFFSET_PS]
%endif
@@ -7644,7 +7644,7 @@
%ifidn %1,pp
vbroadcasti128 m14, [pd_32]
%elifidn %1, sp
- mova m14, [INTERP_OFFSET_SP]
+ vbroadcasti128 m14, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
@@ -7816,7 +7816,7 @@
%ifidn %1,pp
vbroadcasti128 m7, [pd_32]
%elifidn %1, sp
- mova m7, [INTERP_OFFSET_SP]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
@@ -7861,7 +7861,7 @@
%ifidn %1,pp
vbroadcasti128 m7, [pd_32]
%elifidn %1, sp
- mova m7, [INTERP_OFFSET_SP]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
@@ -7901,7 +7901,7 @@
%ifidn %1,pp
vbroadcasti128 m14, [pd_32]
%elifidn %1, sp
- mova m14, [INTERP_OFFSET_SP]
+ vbroadcasti128 m14, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
@@ -8248,7 +8248,7 @@
%ifidn %1,pp
vbroadcasti128 m7, [pd_32]
%elifidn %1, sp
- mova m7, [INTERP_OFFSET_SP]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
@@ -8668,7 +8668,7 @@
%ifidn %1,pp
vbroadcasti128 m7, [pd_32]
%elifidn %1, sp
- mova m7, [INTERP_OFFSET_SP]
+ vbroadcasti128 m7, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m7, [INTERP_OFFSET_PS]
%endif
@@ -8703,7 +8703,7 @@
%ifidn %1,pp
vbroadcasti128 m14, [pd_32]
%elifidn %1, sp
- mova m14, [INTERP_OFFSET_SP]
+ vbroadcasti128 m14, [INTERP_OFFSET_SP]
%else
vbroadcasti128 m14, [INTERP_OFFSET_PS]
%endif
More information about the x265-devel
mailing list