[x265] [PATCH] asm: fix main12 avx2 for chroma_hps, chroma_hpp

rajesh at multicorewareinc.com rajesh at multicorewareinc.com
Thu Oct 1 14:58:03 CEST 2015


# HG changeset patch
# User Rajesh Paulraj<rajesh at multicorewareinc.com>
# Date 1443702239 -19800
#      Thu Oct 01 17:53:59 2015 +0530
# Node ID 5f73ada8caa0c62cc7540799966bde7536861bf7
# Parent  b2889a2a87f8194fa5587496e8f5752ca13b8d9f
asm: fix main12 avx2 for chroma_hps, chroma_hpp

diff -r b2889a2a87f8 -r 5f73ada8caa0 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Oct 01 12:46:19 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Oct 01 17:53:59 2015 +0530
@@ -1763,7 +1763,6 @@
         p.pu[LUMA_24x32].luma_hpp = PFX(interp_8tap_horiz_pp_24x32_avx2);
         p.pu[LUMA_48x64].luma_hpp = PFX(interp_8tap_horiz_pp_48x64_avx2);
 
-#if X265_DEPTH <= 10
         p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_hps = PFX(interp_4tap_horiz_ps_8x8_avx2);
         p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_hps = PFX(interp_4tap_horiz_ps_8x4_avx2);
         p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].filter_hps = PFX(interp_4tap_horiz_ps_8x16_avx2);
@@ -1882,6 +1881,7 @@
         p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_hpp = PFX(interp_4tap_horiz_pp_64x64_avx2);
         p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_hpp = PFX(interp_4tap_horiz_pp_48x64_avx2);
 
+#if X265_DEPTH <= 10
         p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_vpp = PFX(interp_4tap_vert_pp_4x2_avx2);
         p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_vps = PFX(interp_4tap_vert_ps_4x2_avx2);
         p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_vsp = PFX(interp_4tap_vert_sp_4x2_avx2);
diff -r b2889a2a87f8 -r 5f73ada8caa0 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm	Thu Oct 01 12:46:19 2015 +0530
+++ b/source/common/x86/ipfilter16.asm	Thu Oct 01 17:53:59 2015 +0530
@@ -10847,7 +10847,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2], xm4
@@ -10906,7 +10906,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2], xm4
@@ -10920,7 +10920,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 16], xm4
@@ -10979,7 +10979,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2], xm4
@@ -10993,7 +10993,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 16], xm4
@@ -11007,7 +11007,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 32], xm4
@@ -11061,7 +11061,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2], xm4
@@ -11072,7 +11072,7 @@
     phaddd              m4, m4
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movq                [r2 + 16], xm4
@@ -11126,7 +11126,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2], xm4
@@ -11140,7 +11140,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 16], xm4
@@ -11154,7 +11154,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 32], xm4
@@ -11168,7 +11168,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 48], xm4
@@ -11227,7 +11227,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2], xm4
@@ -11241,7 +11241,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 16], xm4
@@ -11255,7 +11255,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 32], xm4
@@ -11269,7 +11269,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 48], xm4
@@ -11283,7 +11283,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 64], xm4
@@ -11297,7 +11297,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 80], xm4
@@ -11311,7 +11311,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 96], xm4
@@ -11325,7 +11325,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 112], xm4
@@ -11380,7 +11380,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2], xm4
@@ -11394,7 +11394,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 16], xm4
@@ -11408,7 +11408,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 32], xm4
@@ -11422,7 +11422,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 48], xm4
@@ -11436,7 +11436,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 64], xm4
@@ -11450,7 +11450,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movu                [r2 + 80], xm4
@@ -11500,7 +11500,7 @@
     phaddd              m4, m5
     paddd               m4, m2
     vpermq              m4, m4, q3120
-    psrad               m4, 2
+    psrad               m4, INTERP_SHIFT_PS
     vextracti128        xm5, m4, 1
     packssdw            xm4, xm5
     movq                [r2], xm4


More information about the x265-devel mailing list