[x265] [PATCH 4 of 4] asm: use general constant INTERP_MAGIC_PS to replcae exact bit depth dependency magic constant

Min Chen chenm003 at 163.com
Fri Jul 10 03:41:38 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1436489266 25200
# Node ID 7b3e1372bb28830ef0ab44cd652ecbe823573675
# Parent  bbf0b4acba968355452158479656f7148d2f7199
asm: use general constant INTERP_MAGIC_PS to replcae exact bit depth dependency magic constant
---
 source/common/x86/ipfilter16.asm |  133 +++++++++++++++++++------------------
 1 files changed, 68 insertions(+), 65 deletions(-)

diff -r bbf0b4acba96 -r 7b3e1372bb28 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm	Thu Jul 09 17:39:18 2015 -0700
+++ b/source/common/x86/ipfilter16.asm	Thu Jul 09 17:47:46 2015 -0700
@@ -145,6 +145,14 @@
 const pb_shuf,  db 0, 1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6, 7, 8, 9
                 db 4, 5, 6, 7, 8, 9, 10, 11, 6, 7, 8, 9, 10, 11, 12, 13
 
+%if BIT_DEPTH == 10
+    %define INTERP_OFFSET_PS        pd_n32768
+%elif BIT_DEPTH == 12
+    %define INTERP_OFFSET_PS        pd_n131072
+%else
+%error Unsupport bit depth!
+%endif
+
 SECTION .text
 cextern pd_32
 cextern pw_pixel_max
@@ -279,11 +287,7 @@
     mova        m1,     [pd_32]
     pxor        m7,     m7
 %else
-  %if BIT_DEPTH == 10
-    mova        m1,     [pd_n32768]
-  %elif BIT_DEPTH == 12
-    mova        m1,     [pd_n131072]
-  %endif
+    mova        m1,     [INTERP_OFFSET_PS]
 %endif
 
     mov         r4d,    %2
@@ -495,11 +499,10 @@
     mova      m7, [pd_32]
 %define SHIFT 6
 %elifidn %1,ps
+    mova      m7, [INTERP_OFFSET_PS]
   %if BIT_DEPTH == 10
-    mova      m7, [pd_n32768]
     %define SHIFT 2
   %elif BIT_DEPTH == 12
-    mova      m7, [pd_n131072]
     %define SHIFT 4
   %endif
 %endif
@@ -818,7 +821,7 @@
 %endif
 
 %ifidn %3, ps
-    mova        m1,     [pd_n32768]
+    mova        m1,     [INTERP_OFFSET_PS]
     cmp         r5m,    byte 0
 %if %1 <= 6
     lea         r4,     [r1 * 3]
@@ -1185,7 +1188,7 @@
     pxor        m6, m6
     mova        m7, [pw_pixel_max]
 %else
-    mova        m1, [pd_n32768]
+    mova        m1, [INTERP_OFFSET_PS]
 %endif
 
     mov         r4d, %2
@@ -1271,7 +1274,7 @@
     mova        m1, [pd_32]
     pxor        m7, m7
 %else
-    mova        m1, [pd_n32768]
+    mova        m1, [INTERP_OFFSET_PS]
 %endif
 
     mov         r4d, %2
@@ -1372,7 +1375,7 @@
 %ifidn %3, pp 
     mova        m1, [pd_32]
 %else
-    mova        m1, [pd_n32768]
+    mova        m1, [INTERP_OFFSET_PS]
 %endif
 
     mov         r4d, %2
@@ -1495,7 +1498,7 @@
 %ifidn %3, pp 
     mova        m1, [pd_32]
 %else
-    mova        m1, [pd_n32768]
+    mova        m1, [INTERP_OFFSET_PS]
 %endif
 
     mov         r4d, %2
@@ -1690,7 +1693,7 @@
 %ifidn %3, pp 
     mova        m1, [pd_32]
 %else
-    mova        m1, [pd_n32768]
+    mova        m1, [INTERP_OFFSET_PS]
 %endif
 
     mov         r4d, %2
@@ -2631,7 +2634,7 @@
     mova        m2,       [tab_Tm16]
 
 %ifidn %3, ps
-    mova        m1,       [pd_n32768]
+    mova        m1,       [INTERP_OFFSET_PS]
     cmp         r5m, byte 0
     je          .skip
     sub         r0, r1
@@ -3233,7 +3236,7 @@
     mova        m2,       [tab_Tm16]
 
 %ifidn %3, ps
-    mova        m1,       [pd_n32768]
+    mova        m1,       [INTERP_OFFSET_PS]
     cmp         r5m, byte 0
     je          .skip
     sub         r0, r1
@@ -4084,7 +4087,7 @@
             mova      m6, [tab_c_524800]
         %endif
     %else
-        mova      m6, [pd_n32768]
+        mova      m6, [INTERP_OFFSET_PS]
     %endif
 %endif
 
@@ -4339,7 +4342,7 @@
             mova      m5, [tab_c_524800]
         %endif
     %else
-        mova      m5, [pd_n32768]
+        mova      m5, [INTERP_OFFSET_PS]
     %endif
 %endif
 
@@ -4434,7 +4437,7 @@
             mova      m4, [tab_c_524800]
         %endif
     %else
-        mova      m4, [pd_n32768]
+        mova      m4, [INTERP_OFFSET_PS]
     %endif
 %endif
 
@@ -4538,7 +4541,7 @@
             mova      m6, [tab_c_524800]
         %endif
     %else
-        mova      m6, [pd_n32768]
+        mova      m6, [INTERP_OFFSET_PS]
     %endif
 %endif
 
@@ -4706,7 +4709,7 @@
 %elifidn %3, sp
     mova      m7, [tab_c_524800]
 %elifidn %3, ps
-    mova      m7, [pd_n32768]
+    mova      m7, [INTERP_OFFSET_PS]
 %endif
 
 .loopH:
@@ -4862,7 +4865,7 @@
 %elifidn %2, sp
     mova            m8, [pd_524800]
 %else
-    vbroadcasti128  m8, [pd_n32768]
+    vbroadcasti128  m8, [INTERP_OFFSET_PS]
 %endif
 
 .loopH:
@@ -5006,7 +5009,7 @@
 %elifidn %2, sp
     mova      m7, [pd_524800]
 %elifidn %2, ps
-    mova      m7, [pd_n32768]
+    mova      m7, [INTERP_OFFSET_PS]
 %endif
 
 .loopH:
@@ -5178,7 +5181,7 @@
 %elifidn %2, sp
     mova      m7, [pd_524800]
 %elifidn %2, ps
-    mova      m7, [pd_n32768]
+    mova      m7, [INTERP_OFFSET_PS]
 %endif
 
 .loopH:
@@ -5320,7 +5323,7 @@
 %elifidn %2, sp
     mova      m7, [pd_524800]
 %elifidn %2, ps
-    mova      m7, [pd_n32768]
+    mova      m7, [INTERP_OFFSET_PS]
 %endif
 
 .loopH:
@@ -5451,7 +5454,7 @@
 %elifidn %2, sp
     mova      m7, [pd_524800]
 %elifidn %2, ps
-    mova      m7, [pd_n32768]
+    mova      m7, [INTERP_OFFSET_PS]
 %endif
 
 .loopH:
@@ -5604,7 +5607,7 @@
 %elifidn %2, sp
     mova      m7, [pd_524800]
 %elifidn %2, ps
-    mova      m7, [pd_n32768]
+    mova      m7, [INTERP_OFFSET_PS]
 %endif
 
 .loopH:
@@ -5727,7 +5730,7 @@
 %elifidn %1, sp
     mova      m7, [pd_524800]
 %elifidn %1, ps
-    mova      m7, [pd_n32768]
+    mova      m7, [INTERP_OFFSET_PS]
 %endif
 
 .loopH:
@@ -6063,7 +6066,7 @@
 %elifidn %1, sp
     mova            m6, [pd_524800]
 %else
-    vbroadcasti128  m6, [pd_n32768]
+    vbroadcasti128  m6, [INTERP_OFFSET_PS]
 %endif
 
     movq            xm0, [r0]
@@ -6173,7 +6176,7 @@
 %elifidn %1, sp
     mova            m11, [pd_524800]
 %else
-    vbroadcasti128  m11, [pd_n32768]
+    vbroadcasti128  m11, [INTERP_OFFSET_PS]
 %endif
 
     movu            xm0, [r0]                       ; m0 = row 0
@@ -6811,7 +6814,7 @@
 %elifidn %1, sp
     mova            m14, [pd_524800]
 %else
-    vbroadcasti128  m14, [pd_n32768]
+    vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
     lea             r6, [r3 * 3]
     mov             r9d, %2 / 8
@@ -6862,7 +6865,7 @@
 %elifidn %3, sp
     mova            m14, [pd_524800]
 %else
-    vbroadcasti128  m14, [pd_n32768]
+    vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
 
     lea             r6, [r3 * 3]
@@ -6945,7 +6948,7 @@
 %elifidn %1, sp
     mova            m14, [pd_524800]
 %else
-    vbroadcasti128  m14, [pd_n32768]
+    vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
     lea             r6, [r3 * 3]
     lea             r7, [r1 * 4]
@@ -7592,7 +7595,7 @@
 %elifidn %1, sp
     mova            m11, [pd_524800]
 %else
-    vbroadcasti128  m11, [pd_n32768]
+    vbroadcasti128  m11, [INTERP_OFFSET_PS]
 %endif
     mova            m12, [pw_pixel_max]
     lea             r6, [r3 * 3]
@@ -7639,7 +7642,7 @@
 %elifidn %1, sp
     mova            m14, [pd_524800]
 %else
-    vbroadcasti128  m14, [pd_n32768]
+    vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
     lea             r6, [r3 * 3]
     mov             r9d, 4
@@ -7811,7 +7814,7 @@
 %elifidn %1, sp
     mova            m7, [pd_524800]
 %else
-    vbroadcasti128  m7, [pd_n32768]
+    vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
     mov             dword [rsp], 2
 .loopW:
@@ -7856,7 +7859,7 @@
 %elifidn %1, sp
     mova            m7, [pd_524800]
 %else
-    vbroadcasti128  m7, [pd_n32768]
+    vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
 
     PROCESS_LUMA_AVX2_W8_4R %1
@@ -7896,7 +7899,7 @@
 %elifidn %1, sp
     mova            m14, [pd_524800]
 %else
-    vbroadcasti128  m14, [pd_n32768]
+    vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
     mova            m13, [pw_pixel_max]
     pxor            m12, m12
@@ -8243,7 +8246,7 @@
 %elifidn %1, sp
     mova            m7, [pd_524800]
 %else
-    vbroadcasti128  m7, [pd_n32768]
+    vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
     lea             r6, [r3 * 3]
 
@@ -8663,7 +8666,7 @@
 %elifidn %1, sp
     mova            m7, [pd_524800]
 %else
-    vbroadcasti128  m7, [pd_n32768]
+    vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
     lea             r6, [r3 * 3]
     PROCESS_LUMA_AVX2_W4_16R %1
@@ -8698,7 +8701,7 @@
 %elifidn %1, sp
     mova            m14, [pd_524800]
 %else
-    vbroadcasti128  m14, [pd_n32768]
+    vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
     lea             r6, [r3 * 3]
     PROCESS_LUMA_AVX2_W8_16R %1
@@ -8735,7 +8738,7 @@
     lea       r6, [tab_LumaCoeffV + r4]
 %endif
 
-    mova      m7, [pd_n32768]
+    mova      m7, [INTERP_OFFSET_PS]
 
     mov       dword [rsp], %2/4
 .loopH:
@@ -10297,7 +10300,7 @@
     vbroadcasti128              m0,                [tab_LumaCoeff + r4 * 2]
 %endif
 
-    vbroadcasti128              m2,                [pd_n32768]
+    vbroadcasti128              m2,                [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 - interpolate coeff
@@ -10408,7 +10411,7 @@
     vpbroadcastq        m1, [tab_LumaCoeff + r4 + 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -10476,7 +10479,7 @@
     vpbroadcastq        m1, [tab_LumaCoeff + r4 + 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -10546,7 +10549,7 @@
     vpbroadcastq        m1, [tab_LumaCoeff + r4 + 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -10657,7 +10660,7 @@
     vpbroadcastq        m1, [tab_LumaCoeff + r4 + 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -10750,7 +10753,7 @@
     vpbroadcastq        m1, [tab_LumaCoeff + r4 + 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -10825,7 +10828,7 @@
     vpbroadcastq        m0, [tab_ChromaCoeff + r4 * 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -10884,7 +10887,7 @@
     vpbroadcastq        m0, [tab_ChromaCoeff + r4 * 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -10957,7 +10960,7 @@
     vpbroadcastq        m0, [tab_ChromaCoeff + r4 * 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -11039,7 +11042,7 @@
     vpbroadcastq        m0, [tab_ChromaCoeff + r4 * 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -11104,7 +11107,7 @@
     vpbroadcastq        m0, [tab_ChromaCoeff + r4 * 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -11205,7 +11208,7 @@
     vpbroadcastq        m0, [tab_ChromaCoeff + r4 * 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -11358,7 +11361,7 @@
     vpbroadcastq        m0, [tab_ChromaCoeff + r4 * 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -11478,7 +11481,7 @@
     vpbroadcastq        m0, [tab_ChromaCoeff + r4 * 8]
 %endif
     mova                m3, [pb_shuf]
-    vbroadcasti128      m2, [pd_n32768]
+    vbroadcasti128      m2, [INTERP_OFFSET_PS]
 
     ; register map
     ; m0 , m1 interpolate coeff
@@ -11539,7 +11542,7 @@
 %elifidn %1, sp
     mova            m14, [pd_524800]
 %else
-    vbroadcasti128  m14, [pd_n32768]
+    vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
     lea             r6, [r3 * 3]
     lea             r7, [r1 * 4]
@@ -11956,7 +11959,7 @@
 %elifidn %1, sp
     mova            m7, [pd_524800]
 %else
-    vbroadcasti128  m7, [pd_n32768]
+    vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
 
     PROCESS_CHROMA_AVX2_8x2 %1, %2, %3
@@ -11993,7 +11996,7 @@
 %elifidn %1, sp
     mova            m6, [pd_524800]
 %else
-    vbroadcasti128  m6, [pd_n32768]
+    vbroadcasti128  m6, [INTERP_OFFSET_PS]
 %endif
 
     movq            xm0, [r0]                       ; row 0
@@ -12060,7 +12063,7 @@
 %elifidn %1, sp
     mova            m6, [pd_524800]
 %else
-    vbroadcasti128  m6, [pd_n32768]
+    vbroadcasti128  m6, [INTERP_OFFSET_PS]
 %endif
     movq            xm0, [r0]                       ; row 0
     movq            xm1, [r0 + r1]                  ; row 1
@@ -12140,7 +12143,7 @@
 %elifidn %1, sp
     mova            m7, [pd_524800]
 %else
-    vbroadcasti128  m7, [pd_n32768]
+    vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
     lea             r6, [r3 * 3]
 
@@ -12398,7 +12401,7 @@
 %elifidn %1, sp
     mova            m7, [pd_524800]
 %else
-    vbroadcasti128  m7, [pd_n32768]
+    vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
     lea             r6, [r3 * 3]
 .loopH:
@@ -12442,7 +12445,7 @@
 %elifidn %1, sp
     mova            m11, [pd_524800]
 %else
-    vbroadcasti128  m11, [pd_n32768]
+    vbroadcasti128  m11, [INTERP_OFFSET_PS]
 %endif
 
     movu            xm0, [r0]                       ; m0 = row 0
@@ -12597,7 +12600,7 @@
 %elifidn %1, sp
     mova            m11, [pd_524800]
 %else
-    vbroadcasti128  m11, [pd_n32768]
+    vbroadcasti128  m11, [INTERP_OFFSET_PS]
 %endif
 
     movu            xm0, [r0]                       ; m0 = row 0
@@ -12787,7 +12790,7 @@
 %elifidn %1, sp
     mova            m7, [pd_524800]
 %else
-    vbroadcasti128  m7, [pd_n32768]
+    vbroadcasti128  m7, [INTERP_OFFSET_PS]
 %endif
     PROCESS_CHROMA_AVX2 %1, %2, %3
     movu            [r2], xm0
@@ -12826,7 +12829,7 @@
 %elifidn %1, sp
     mova            m14, [pd_524800]
 %else
-    vbroadcasti128  m14, [pd_n32768]
+    vbroadcasti128  m14, [INTERP_OFFSET_PS]
 %endif
     lea             r6, [r3 * 3]
     movu            xm0, [r0]                       ; m0 = row 0



More information about the x265-devel mailing list