[x265] [PATCH] asm: fix main12 avx2 for luma_vsp and luma_hvpp

rajesh at multicorewareinc.com rajesh at multicorewareinc.com
Thu Oct 8 12:49:52 CEST 2015


# HG changeset patch
# User Rajesh Paulraj<rajesh at multicorewareinc.com>
# Date 1444288440 -19800
#      Thu Oct 08 12:44:00 2015 +0530
# Node ID 8a91a65e9290075c2dfcc192c076682e9b3c2514
# Parent  0e3aeb97e206b04521b13666c5c4bf4681748bb7
asm: fix main12 avx2 for luma_vsp and luma_hvpp

diff -r 0e3aeb97e206 -r 8a91a65e9290 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Oct 05 16:08:33 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Oct 08 12:44:00 2015 +0530
@@ -1595,13 +1595,9 @@
 
         ALL_LUMA_PU(luma_vpp, interp_8tap_vert_pp, avx2);
         ALL_LUMA_PU(luma_vps, interp_8tap_vert_ps, avx2);
-#if X265_DEPTH <= 10
         ALL_LUMA_PU(luma_vsp, interp_8tap_vert_sp, avx2);
-#endif
         ALL_LUMA_PU(luma_vss, interp_8tap_vert_ss, avx2);
-#if X265_DEPTH <= 10
         p.pu[LUMA_4x4].luma_vsp = PFX(interp_8tap_vert_sp_4x4_avx2);               // since ALL_LUMA_PU didn't declare 4x4 size, calling separately luma_vsp function to use 
-#endif
 
         p.cu[BLOCK_16x16].add_ps = PFX(pixel_add_ps_16x16_avx2);
         p.cu[BLOCK_32x32].add_ps = PFX(pixel_add_ps_32x32_avx2);
@@ -2163,11 +2159,9 @@
 
         p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
 
-#if X265_DEPTH <= 10
         // TODO: depends on hps and vsp
         ALL_LUMA_PU_T(luma_hvpp, interp_8tap_hv_pp_cpu);                        // calling luma_hvpp for all sizes
         p.pu[LUMA_4x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x4>;             // ALL_LUMA_PU_T has declared all sizes except 4x4, hence calling luma_hvpp[4x4] 
-#endif
 
         if (cpuMask & X265_CPU_BMI2)
             p.scanPosLast = PFX(scanPosLast_avx2_bmi2);
diff -r 0e3aeb97e206 -r 8a91a65e9290 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm	Mon Oct 05 16:08:33 2015 +0530
+++ b/source/common/x86/ipfilter16.asm	Thu Oct 08 12:44:00 2015 +0530
@@ -6068,7 +6068,7 @@
 %ifidn %1,pp
     vbroadcasti128  m6, [pd_32]
 %elifidn %1, sp
-    mova            m6, [pd_524800]
+    vbroadcasti128  m6, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m6, [INTERP_OFFSET_PS]
 %endif
@@ -6178,7 +6178,7 @@
 %ifidn %1,pp
     vbroadcasti128  m11, [pd_32]
 %elifidn %1, sp
-    mova            m11, [pd_524800]
+    vbroadcasti128  m11, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m11, [INTERP_OFFSET_PS]
 %endif
@@ -6816,7 +6816,7 @@
 %ifidn %1,pp
     vbroadcasti128  m14, [pd_32]
 %elifidn %1, sp
-    mova            m14, [INTERP_OFFSET_SP]
+    vbroadcasti128  m14, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
@@ -6867,7 +6867,7 @@
 %ifidn %3,pp
     vbroadcasti128  m14, [pd_32]
 %elifidn %3, sp
-    mova            m14, [INTERP_OFFSET_SP]
+    vbroadcasti128  m14, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
@@ -6950,7 +6950,7 @@
 %ifidn %1,pp
     vbroadcasti128  m14, [pd_32]
 %elifidn %1, sp
-    mova            m14, [INTERP_OFFSET_SP]
+    vbroadcasti128  m14, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
@@ -7597,7 +7597,7 @@
 %ifidn %1,pp
     vbroadcasti128  m11, [pd_32]
 %elifidn %1, sp
-    mova            m11, [INTERP_OFFSET_SP]
+    vbroadcasti128  m11, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m11, [INTERP_OFFSET_PS]
 %endif
@@ -7644,7 +7644,7 @@
 %ifidn %1,pp
     vbroadcasti128  m14, [pd_32]
 %elifidn %1, sp
-    mova            m14, [INTERP_OFFSET_SP]
+    vbroadcasti128  m14, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
@@ -7816,7 +7816,7 @@
 %ifidn %1,pp
     vbroadcasti128  m7, [pd_32]
 %elifidn %1, sp
-    mova            m7, [INTERP_OFFSET_SP]
+    vbroadcasti128  m7, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
@@ -7861,7 +7861,7 @@
 %ifidn %1,pp
     vbroadcasti128  m7, [pd_32]
 %elifidn %1, sp
-    mova            m7, [INTERP_OFFSET_SP]
+    vbroadcasti128  m7, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
@@ -7901,7 +7901,7 @@
 %ifidn %1,pp
     vbroadcasti128  m14, [pd_32]
 %elifidn %1, sp
-    mova            m14, [INTERP_OFFSET_SP]
+    vbroadcasti128  m14, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
@@ -8248,7 +8248,7 @@
 %ifidn %1,pp
     vbroadcasti128  m7, [pd_32]
 %elifidn %1, sp
-    mova            m7, [INTERP_OFFSET_SP]
+    vbroadcasti128  m7, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
@@ -8668,7 +8668,7 @@
 %ifidn %1,pp
     vbroadcasti128  m7, [pd_32]
 %elifidn %1, sp
-    mova            m7, [INTERP_OFFSET_SP]
+    vbroadcasti128  m7, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
@@ -8703,7 +8703,7 @@
 %ifidn %1,pp
     vbroadcasti128  m14, [pd_32]
 %elifidn %1, sp
-    mova            m14, [INTERP_OFFSET_SP]
+    vbroadcasti128  m14, [INTERP_OFFSET_SP]
 %else
     vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif


More information about the x265-devel mailing list