[x265] [PATCH 3 of 4] asm: replace tab_c_n32768 by pd_n32768

Min Chen chenm003 at 163.com
Fri Jul 10 03:41:37 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1436488758 25200
# Node ID bbf0b4acba968355452158479656f7148d2f7199
# Parent  858570d4ebbcca144fd83aaa8436e945ae284ce1
asm: replace tab_c_n32768 by pd_n32768
---
 source/common/x86/const-a.asm    |    2 +-
 source/common/x86/ipfilter16.asm |   53 ++++++++++++++++++-------------------
 source/test/ipfilterharness.cpp  |    7 +++++
 3 files changed, 34 insertions(+), 28 deletions(-)

diff -r 858570d4ebbc -r bbf0b4acba96 source/common/x86/const-a.asm
--- a/source/common/x86/const-a.asm	Thu Jul 09 17:29:40 2015 -0700
+++ b/source/common/x86/const-a.asm	Thu Jul 09 17:39:18 2015 -0700
@@ -125,7 +125,7 @@
 const pd_2048,              times  4 dd 2048
 const pd_ffff,              times  4 dd 0xffff
 const pd_32767,             times  4 dd 32767
-const pd_n32768,            times  4 dd 0xffff8000
+const pd_n32768,            times  8 dd 0xffff8000
 const pd_n131072,           times  4 dd 0xfffe0000
 
 const trans8_shuf,          times  1 dd   0,   4,   1,   5,   2,   6,   3,   7
diff -r 858570d4ebbc -r bbf0b4acba96 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm	Thu Jul 09 17:29:40 2015 -0700
+++ b/source/common/x86/ipfilter16.asm	Thu Jul 09 17:39:18 2015 -0700
@@ -29,7 +29,6 @@
 SECTION_RODATA 32
 
 tab_c_32:         times 8 dd 32
-tab_c_n32768:     times 8 dd -32768
 tab_c_524800:     times 4 dd 524800
 tab_c_n8192:      times 8 dw -8192
 pd_524800:        times 8 dd 524800
@@ -819,7 +818,7 @@
 %endif
 
 %ifidn %3, ps
-    mova        m1,     [tab_c_n32768]
+    mova        m1,     [pd_n32768]
     cmp         r5m,    byte 0
 %if %1 <= 6
     lea         r4,     [r1 * 3]
@@ -1001,8 +1000,8 @@
     movd        m2,     [r0 + r1 * 2 + %1]
     movhps      m0,     [r0 + r1 + %1]
     movhps      m2,     [r0 + r4 + %1]
-    psllw       m0,     4
-    psllw       m2,     4
+    psllw       m0,     (14 - BIT_DEPTH)
+    psllw       m2,     (14 - BIT_DEPTH)
     psubw       m0,     m1
     psubw       m2,     m1
 
@@ -1017,14 +1016,14 @@
 %macro FILTER_P2S_4_4_sse2 1
     movh        m0,     [r0 + %1]
     movhps      m0,     [r0 + r1 + %1]
-    psllw       m0,     4
+    psllw       m0,     (14 - BIT_DEPTH)
     psubw       m0,     m1
     movh        [r2 + r3 * 0 + %1], m0
     movhps      [r2 + r3 * 1 + %1], m0
 
     movh        m2,     [r0 + r1 * 2 + %1]
     movhps      m2,     [r0 + r4 + %1]
-    psllw       m2,     4
+    psllw       m2,     (14 - BIT_DEPTH)
     psubw       m2,     m1
     movh        [r2 + r3 * 2 + %1], m2
     movhps      [r2 + r5 + %1], m2
@@ -1033,7 +1032,7 @@
 %macro FILTER_P2S_4_2_sse2 0
     movh        m0,     [r0]
     movhps      m0,     [r0 + r1 * 2]
-    psllw       m0,     4
+    psllw       m0,     (14 - BIT_DEPTH)
     psubw       m0,     [pw_2000]
     movh        [r2 + r3 * 0], m0
     movhps      [r2 + r3 * 2], m0
@@ -1042,8 +1041,8 @@
 %macro FILTER_P2S_8_4_sse2 1
     movu        m0,     [r0 + %1]
     movu        m2,     [r0 + r1 + %1]
-    psllw       m0,     4
-    psllw       m2,     4
+    psllw       m0,     (14 - BIT_DEPTH)
+    psllw       m2,     (14 - BIT_DEPTH)
     psubw       m0,     m1
     psubw       m2,     m1
     movu        [r2 + r3 * 0 + %1], m0
@@ -1051,8 +1050,8 @@
 
     movu        m3,     [r0 + r1 * 2 + %1]
     movu        m4,     [r0 + r4 + %1]
-    psllw       m3,     4
-    psllw       m4,     4
+    psllw       m3,     (14 - BIT_DEPTH)
+    psllw       m4,     (14 - BIT_DEPTH)
     psubw       m3,     m1
     psubw       m4,     m1
     movu        [r2 + r3 * 2 + %1], m3
@@ -1062,8 +1061,8 @@
 %macro FILTER_P2S_8_2_sse2 1
     movu        m0,     [r0 + %1]
     movu        m2,     [r0 + r1 + %1]
-    psllw       m0,     4
-    psllw       m2,     4
+    psllw       m0,     (14 - BIT_DEPTH)
+    psllw       m2,     (14 - BIT_DEPTH)
     psubw       m0,     m1
     psubw       m2,     m1
     movu        [r2 + r3 * 0 + %1], m0
@@ -2632,7 +2631,7 @@
     mova        m2,       [tab_Tm16]
 
 %ifidn %3, ps
-    mova        m1,       [tab_c_n32768]
+    mova        m1,       [pd_n32768]
     cmp         r5m, byte 0
     je          .skip
     sub         r0, r1
@@ -3234,7 +3233,7 @@
     mova        m2,       [tab_Tm16]
 
 %ifidn %3, ps
-    mova        m1,       [tab_c_n32768]
+    mova        m1,       [pd_n32768]
     cmp         r5m, byte 0
     je          .skip
     sub         r0, r1
@@ -4085,7 +4084,7 @@
             mova      m6, [tab_c_524800]
         %endif
     %else
-        mova      m6, [tab_c_n32768]
+        mova      m6, [pd_n32768]
     %endif
 %endif
 
@@ -4340,7 +4339,7 @@
             mova      m5, [tab_c_524800]
         %endif
     %else
-        mova      m5, [tab_c_n32768]
+        mova      m5, [pd_n32768]
     %endif
 %endif
 
@@ -4435,7 +4434,7 @@
             mova      m4, [tab_c_524800]
         %endif
     %else
-        mova      m4, [tab_c_n32768]
+        mova      m4, [pd_n32768]
     %endif
 %endif
 
@@ -4539,7 +4538,7 @@
             mova      m6, [tab_c_524800]
         %endif
     %else
-        mova      m6, [tab_c_n32768]
+        mova      m6, [pd_n32768]
     %endif
 %endif
 
@@ -4707,7 +4706,7 @@
 %elifidn %3, sp
     mova      m7, [tab_c_524800]
 %elifidn %3, ps
-    mova      m7, [tab_c_n32768]
+    mova      m7, [pd_n32768]
 %endif
 
 .loopH:
@@ -4863,7 +4862,7 @@
 %elifidn %2, sp
     mova            m8, [pd_524800]
 %else
-    vbroadcasti128  m8, [tab_c_n32768]
+    vbroadcasti128  m8, [pd_n32768]
 %endif
 
 .loopH:
@@ -5007,7 +5006,7 @@
 %elifidn %2, sp
     mova      m7, [pd_524800]
 %elifidn %2, ps
-    mova      m7, [tab_c_n32768]
+    mova      m7, [pd_n32768]
 %endif
 
 .loopH:
@@ -5179,7 +5178,7 @@
 %elifidn %2, sp
     mova      m7, [pd_524800]
 %elifidn %2, ps
-    mova      m7, [tab_c_n32768]
+    mova      m7, [pd_n32768]
 %endif
 
 .loopH:
@@ -5321,7 +5320,7 @@
 %elifidn %2, sp
     mova      m7, [pd_524800]
 %elifidn %2, ps
-    mova      m7, [tab_c_n32768]
+    mova      m7, [pd_n32768]
 %endif
 
 .loopH:
@@ -5452,7 +5451,7 @@
 %elifidn %2, sp
     mova      m7, [pd_524800]
 %elifidn %2, ps
-    mova      m7, [tab_c_n32768]
+    mova      m7, [pd_n32768]
 %endif
 
 .loopH:
@@ -5605,7 +5604,7 @@
 %elifidn %2, sp
     mova      m7, [pd_524800]
 %elifidn %2, ps
-    mova      m7, [tab_c_n32768]
+    mova      m7, [pd_n32768]
 %endif
 
 .loopH:
@@ -5728,7 +5727,7 @@
 %elifidn %1, sp
     mova      m7, [pd_524800]
 %elifidn %1, ps
-    mova      m7, [tab_c_n32768]
+    mova      m7, [pd_n32768]
 %endif
 
 .loopH:
diff -r 858570d4ebbc -r bbf0b4acba96 source/test/ipfilterharness.cpp
--- a/source/test/ipfilterharness.cpp	Thu Jul 09 17:29:40 2015 -0700
+++ b/source/test/ipfilterharness.cpp	Thu Jul 09 17:39:18 2015 -0700
@@ -122,7 +122,14 @@
                     coeffIdx);
 
             if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE * sizeof(int16_t)))
+            {
+            ref(pixel_test_buff[index] + 3 * rand_srcStride,
+                rand_srcStride,
+                IPF_C_output_s,
+                rand_dstStride,
+                coeffIdx);
                 return false;
+            }
 
             reportfail();
         }



More information about the x265-devel mailing list