[x265] [PATCH 3 of 3] asm: fix Main12 fault on AVX2 weight_pp

Min Chen chenm003 at 163.com
Tue Jul 28 04:04:17 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1438027388 25200
# Node ID 9fa519421505caebad5f2fd79011426c0e9d7afc
# Parent  9fbbf8c772a46830c21287addb82039963e68a5d
asm: fix Main12 fault on AVX2 weight_pp
---
 source/common/x86/asm-primitives.cpp |    3 +--
 source/common/x86/pixel-util8.asm    |   11 +++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff -r 9fbbf8c772a4 -r 9fa519421505 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Jul 27 13:03:06 2015 -0700
+++ b/source/common/x86/asm-primitives.cpp	Mon Jul 27 13:03:08 2015 -0700
@@ -1549,9 +1549,8 @@
 
         p.scale1D_128to64 = PFX(scale1D_128to64_avx2);
         p.scale2D_64to32 = PFX(scale2D_64to32_avx2);
-#if X265_DEPTH <= 10
+
         p.weight_pp = PFX(weight_pp_avx2);
-#endif
         p.weight_sp = PFX(weight_sp_avx2);
         p.sign = PFX(calSign_avx2);
         p.planecopy_cp = PFX(upShift_8_avx2);
diff -r 9fbbf8c772a4 -r 9fa519421505 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Mon Jul 27 13:03:06 2015 -0700
+++ b/source/common/x86/pixel-util8.asm	Mon Jul 27 13:03:08 2015 -0700
@@ -1407,14 +1407,16 @@
 %if HIGH_BIT_DEPTH
 INIT_YMM avx2
 cglobal weight_pp, 6, 7, 7
-    shl          r5d, 4            ; m0 = [w0<<4]
+%define correction      (14 - BIT_DEPTH)
     mov          r6d, r6m
-    shl          r6d, 16
-    or           r6d, r5d          ; assuming both (w0<<4) and round are using maximum of 16 bits each.
+    shl          r6d, 16 - correction
+    or           r6d, r5d          ; assuming both w0 and round are using maximum of 16 bits each.
 
     vpbroadcastd m0, r6d
 
-    movd         xm1, r7m
+    mov          r5d, r7m
+    sub          r5d, correction
+    movd         xm1, r5d
     vpbroadcastd m2, r8m
     mova         m5, [pw_1]
     mova         m6, [pw_pixel_max]
@@ -1453,6 +1455,7 @@
 
     dec         r4d
     jnz         .loopH
+%undef correction
     RET
 %else
 INIT_YMM avx2



More information about the x265-devel mailing list