[x265] [PATCH 3 of 3] asm: fix Main12 fault on AVX2 weight_pp
Min Chen
chenm003 at 163.com
Tue Jul 28 04:04:17 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1438027388 25200
# Node ID 9fa519421505caebad5f2fd79011426c0e9d7afc
# Parent 9fbbf8c772a46830c21287addb82039963e68a5d
asm: fix Main12 fault on AVX2 weight_pp
---
source/common/x86/asm-primitives.cpp | 3 +--
source/common/x86/pixel-util8.asm | 11 +++++++----
2 files changed, 8 insertions(+), 6 deletions(-)
diff -r 9fbbf8c772a4 -r 9fa519421505 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Jul 27 13:03:06 2015 -0700
+++ b/source/common/x86/asm-primitives.cpp Mon Jul 27 13:03:08 2015 -0700
@@ -1549,9 +1549,8 @@
p.scale1D_128to64 = PFX(scale1D_128to64_avx2);
p.scale2D_64to32 = PFX(scale2D_64to32_avx2);
-#if X265_DEPTH <= 10
+
p.weight_pp = PFX(weight_pp_avx2);
-#endif
p.weight_sp = PFX(weight_sp_avx2);
p.sign = PFX(calSign_avx2);
p.planecopy_cp = PFX(upShift_8_avx2);
diff -r 9fbbf8c772a4 -r 9fa519421505 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Mon Jul 27 13:03:06 2015 -0700
+++ b/source/common/x86/pixel-util8.asm Mon Jul 27 13:03:08 2015 -0700
@@ -1407,14 +1407,16 @@
%if HIGH_BIT_DEPTH
INIT_YMM avx2
cglobal weight_pp, 6, 7, 7
- shl r5d, 4 ; m0 = [w0<<4]
+%define correction (14 - BIT_DEPTH)
mov r6d, r6m
- shl r6d, 16
- or r6d, r5d ; assuming both (w0<<4) and round are using maximum of 16 bits each.
+ shl r6d, 16 - correction
+ or r6d, r5d ; assuming both w0 and round are using maximum of 16 bits each.
vpbroadcastd m0, r6d
- movd xm1, r7m
+ mov r5d, r7m
+ sub r5d, correction
+ movd xm1, r5d
vpbroadcastd m2, r8m
mova m5, [pw_1]
mova m6, [pw_pixel_max]
@@ -1453,6 +1455,7 @@
dec r4d
jnz .loopH
+%undef correction
RET
%else
INIT_YMM avx2
More information about the x265-devel
mailing list