[x265] [PATCH 3 of 4] asm: replace tab_c_n32768 by pd_n32768
Min Chen
chenm003 at 163.com
Fri Jul 10 03:41:37 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1436488758 25200
# Node ID bbf0b4acba968355452158479656f7148d2f7199
# Parent 858570d4ebbcca144fd83aaa8436e945ae284ce1
asm: replace tab_c_n32768 by pd_n32768
---
source/common/x86/const-a.asm | 2 +-
source/common/x86/ipfilter16.asm | 53 ++++++++++++++++++-------------------
source/test/ipfilterharness.cpp | 7 +++++
3 files changed, 34 insertions(+), 28 deletions(-)
diff -r 858570d4ebbc -r bbf0b4acba96 source/common/x86/const-a.asm
--- a/source/common/x86/const-a.asm Thu Jul 09 17:29:40 2015 -0700
+++ b/source/common/x86/const-a.asm Thu Jul 09 17:39:18 2015 -0700
@@ -125,7 +125,7 @@
const pd_2048, times 4 dd 2048
const pd_ffff, times 4 dd 0xffff
const pd_32767, times 4 dd 32767
-const pd_n32768, times 4 dd 0xffff8000
+const pd_n32768, times 8 dd 0xffff8000
const pd_n131072, times 4 dd 0xfffe0000
const trans8_shuf, times 1 dd 0, 4, 1, 5, 2, 6, 3, 7
diff -r 858570d4ebbc -r bbf0b4acba96 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm Thu Jul 09 17:29:40 2015 -0700
+++ b/source/common/x86/ipfilter16.asm Thu Jul 09 17:39:18 2015 -0700
@@ -29,7 +29,6 @@
SECTION_RODATA 32
tab_c_32: times 8 dd 32
-tab_c_n32768: times 8 dd -32768
tab_c_524800: times 4 dd 524800
tab_c_n8192: times 8 dw -8192
pd_524800: times 8 dd 524800
@@ -819,7 +818,7 @@
%endif
%ifidn %3, ps
- mova m1, [tab_c_n32768]
+ mova m1, [pd_n32768]
cmp r5m, byte 0
%if %1 <= 6
lea r4, [r1 * 3]
@@ -1001,8 +1000,8 @@
movd m2, [r0 + r1 * 2 + %1]
movhps m0, [r0 + r1 + %1]
movhps m2, [r0 + r4 + %1]
- psllw m0, 4
- psllw m2, 4
+ psllw m0, (14 - BIT_DEPTH)
+ psllw m2, (14 - BIT_DEPTH)
psubw m0, m1
psubw m2, m1
@@ -1017,14 +1016,14 @@
%macro FILTER_P2S_4_4_sse2 1
movh m0, [r0 + %1]
movhps m0, [r0 + r1 + %1]
- psllw m0, 4
+ psllw m0, (14 - BIT_DEPTH)
psubw m0, m1
movh [r2 + r3 * 0 + %1], m0
movhps [r2 + r3 * 1 + %1], m0
movh m2, [r0 + r1 * 2 + %1]
movhps m2, [r0 + r4 + %1]
- psllw m2, 4
+ psllw m2, (14 - BIT_DEPTH)
psubw m2, m1
movh [r2 + r3 * 2 + %1], m2
movhps [r2 + r5 + %1], m2
@@ -1033,7 +1032,7 @@
%macro FILTER_P2S_4_2_sse2 0
movh m0, [r0]
movhps m0, [r0 + r1 * 2]
- psllw m0, 4
+ psllw m0, (14 - BIT_DEPTH)
psubw m0, [pw_2000]
movh [r2 + r3 * 0], m0
movhps [r2 + r3 * 2], m0
@@ -1042,8 +1041,8 @@
%macro FILTER_P2S_8_4_sse2 1
movu m0, [r0 + %1]
movu m2, [r0 + r1 + %1]
- psllw m0, 4
- psllw m2, 4
+ psllw m0, (14 - BIT_DEPTH)
+ psllw m2, (14 - BIT_DEPTH)
psubw m0, m1
psubw m2, m1
movu [r2 + r3 * 0 + %1], m0
@@ -1051,8 +1050,8 @@
movu m3, [r0 + r1 * 2 + %1]
movu m4, [r0 + r4 + %1]
- psllw m3, 4
- psllw m4, 4
+ psllw m3, (14 - BIT_DEPTH)
+ psllw m4, (14 - BIT_DEPTH)
psubw m3, m1
psubw m4, m1
movu [r2 + r3 * 2 + %1], m3
@@ -1062,8 +1061,8 @@
%macro FILTER_P2S_8_2_sse2 1
movu m0, [r0 + %1]
movu m2, [r0 + r1 + %1]
- psllw m0, 4
- psllw m2, 4
+ psllw m0, (14 - BIT_DEPTH)
+ psllw m2, (14 - BIT_DEPTH)
psubw m0, m1
psubw m2, m1
movu [r2 + r3 * 0 + %1], m0
@@ -2632,7 +2631,7 @@
mova m2, [tab_Tm16]
%ifidn %3, ps
- mova m1, [tab_c_n32768]
+ mova m1, [pd_n32768]
cmp r5m, byte 0
je .skip
sub r0, r1
@@ -3234,7 +3233,7 @@
mova m2, [tab_Tm16]
%ifidn %3, ps
- mova m1, [tab_c_n32768]
+ mova m1, [pd_n32768]
cmp r5m, byte 0
je .skip
sub r0, r1
@@ -4085,7 +4084,7 @@
mova m6, [tab_c_524800]
%endif
%else
- mova m6, [tab_c_n32768]
+ mova m6, [pd_n32768]
%endif
%endif
@@ -4340,7 +4339,7 @@
mova m5, [tab_c_524800]
%endif
%else
- mova m5, [tab_c_n32768]
+ mova m5, [pd_n32768]
%endif
%endif
@@ -4435,7 +4434,7 @@
mova m4, [tab_c_524800]
%endif
%else
- mova m4, [tab_c_n32768]
+ mova m4, [pd_n32768]
%endif
%endif
@@ -4539,7 +4538,7 @@
mova m6, [tab_c_524800]
%endif
%else
- mova m6, [tab_c_n32768]
+ mova m6, [pd_n32768]
%endif
%endif
@@ -4707,7 +4706,7 @@
%elifidn %3, sp
mova m7, [tab_c_524800]
%elifidn %3, ps
- mova m7, [tab_c_n32768]
+ mova m7, [pd_n32768]
%endif
.loopH:
@@ -4863,7 +4862,7 @@
%elifidn %2, sp
mova m8, [pd_524800]
%else
- vbroadcasti128 m8, [tab_c_n32768]
+ vbroadcasti128 m8, [pd_n32768]
%endif
.loopH:
@@ -5007,7 +5006,7 @@
%elifidn %2, sp
mova m7, [pd_524800]
%elifidn %2, ps
- mova m7, [tab_c_n32768]
+ mova m7, [pd_n32768]
%endif
.loopH:
@@ -5179,7 +5178,7 @@
%elifidn %2, sp
mova m7, [pd_524800]
%elifidn %2, ps
- mova m7, [tab_c_n32768]
+ mova m7, [pd_n32768]
%endif
.loopH:
@@ -5321,7 +5320,7 @@
%elifidn %2, sp
mova m7, [pd_524800]
%elifidn %2, ps
- mova m7, [tab_c_n32768]
+ mova m7, [pd_n32768]
%endif
.loopH:
@@ -5452,7 +5451,7 @@
%elifidn %2, sp
mova m7, [pd_524800]
%elifidn %2, ps
- mova m7, [tab_c_n32768]
+ mova m7, [pd_n32768]
%endif
.loopH:
@@ -5605,7 +5604,7 @@
%elifidn %2, sp
mova m7, [pd_524800]
%elifidn %2, ps
- mova m7, [tab_c_n32768]
+ mova m7, [pd_n32768]
%endif
.loopH:
@@ -5728,7 +5727,7 @@
%elifidn %1, sp
mova m7, [pd_524800]
%elifidn %1, ps
- mova m7, [tab_c_n32768]
+ mova m7, [pd_n32768]
%endif
.loopH:
diff -r 858570d4ebbc -r bbf0b4acba96 source/test/ipfilterharness.cpp
--- a/source/test/ipfilterharness.cpp Thu Jul 09 17:29:40 2015 -0700
+++ b/source/test/ipfilterharness.cpp Thu Jul 09 17:39:18 2015 -0700
@@ -122,7 +122,14 @@
coeffIdx);
if (memcmp(IPF_vec_output_s, IPF_C_output_s, TEST_BUF_SIZE * sizeof(int16_t)))
+ {
+ ref(pixel_test_buff[index] + 3 * rand_srcStride,
+ rand_srcStride,
+ IPF_C_output_s,
+ rand_dstStride,
+ coeffIdx);
return false;
+ }
reportfail();
}
More information about the x265-devel
mailing list