[x265] [PATCH 3 of 4] asm: fix Main12 fault on intra_dc_avx2

Min Chen chenm003 at 163.com
Wed Jul 22 01:20:04 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1437514216 25200
# Node ID 486e77fdd864304b5016411daf507e3fdde5b618
# Parent  668adf85074fd29025f0a6ff3784a83bf04e4968
asm: fix Main12 fault on intra_dc_avx2
---
 source/common/x86/asm-primitives.cpp |    2 +-
 source/common/x86/intrapred16.asm    |   32 ++++++++++++++++----------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff -r 668adf85074f -r 486e77fdd864 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Jul 21 14:30:14 2015 -0700
+++ b/source/common/x86/asm-primitives.cpp	Tue Jul 21 14:30:16 2015 -0700
@@ -1485,10 +1485,10 @@
         p.cu[BLOCK_64x64].psy_cost_pp = PFX(psyCost_pp_64x64_avx2);
         p.cu[BLOCK_16x16].intra_pred[PLANAR_IDX] = PFX(intra_pred_planar16_avx2);
         p.cu[BLOCK_32x32].intra_pred[PLANAR_IDX] = PFX(intra_pred_planar32_avx2);
+#endif
 
         p.cu[BLOCK_16x16].intra_pred[DC_IDX] = PFX(intra_pred_dc16_avx2);
         p.cu[BLOCK_32x32].intra_pred[DC_IDX] = PFX(intra_pred_dc32_avx2);
-#endif
 
         p.pu[LUMA_48x64].satd = PFX(pixel_satd_48x64_avx2);
 
diff -r 668adf85074f -r 486e77fdd864 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Tue Jul 21 14:30:14 2015 -0700
+++ b/source/common/x86/intrapred16.asm	Tue Jul 21 14:30:16 2015 -0700
@@ -473,14 +473,14 @@
     add             r1d,                 r1d
     movu            m0,                  [r2 + 66]
     movu            m2,                  [r2 +  2]
-    paddw           m0,                  m2
+    paddw           m0,                  m2                 ; dynamic range 13 bits
 
     vextracti128    xm1,                 m0, 1
-    paddw           xm0,                 xm1
+    paddw           xm0,                 xm1                ; dynamic range 14 bits
     movhlps         xm1,                 xm0
-    paddw           xm0,                 xm1
-    phaddw          xm0,                 xm0
+    paddw           xm0,                 xm1                ; dynamic range 15 bits
     pmaddwd         xm0,                 [pw_1]
+    phaddd          xm0,                 xm0
     paddd           xm0,                 [pd_16]
     psrld           xm0,                 5
     movd            r5d,                 xm0
@@ -580,25 +580,25 @@
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
 ;---------------------------------------------------------------------------------------------
 INIT_YMM avx2
-cglobal intra_pred_dc32, 3, 3, 2
+cglobal intra_pred_dc32, 3,3,3
     add              r2, 2
     add             r1d, r1d
     movu             m0, [r2]
     movu             m1, [r2 + 32]
-    add              r2, mmsize*4        ; r2 += 128
-    paddw            m0, m1
+    add              r2, mmsize*4                       ; r2 += 128
+    paddw            m0, m1                             ; dynamic range 13 bits
     movu             m1, [r2]
-    paddw            m0, m1
-    movu             m1, [r2 + 32]
-    paddw            m0, m1
+    movu             m2, [r2 + 32]
+    paddw            m1, m2                             ; dynamic range 13 bits
+    paddw            m0, m1                             ; dynamic range 14 bits
     vextracti128    xm1, m0, 1
-    paddw           xm0, xm1
+    paddw           xm0, xm1                            ; dynamic range 15 bits
+    pmaddwd         xm0, [pw_1]
     movhlps         xm1, xm0
-    paddw           xm0, xm1
-    phaddw          xm0, xm0
-    pmaddwd         xm0, [pw_1]
-    paddd           xm0, [pd_32]         ; sum = sum + 32
-    psrld           xm0, 6               ; sum = sum / 64
+    paddd           xm0, xm1
+    phaddd          xm0, xm0
+    paddd           xm0, [pd_32]                        ; sum = sum + 32
+    psrld           xm0, 6                              ; sum = sum / 64
     vpbroadcastw     m0, xm0
 
     lea              r2, [r1 * 3]



More information about the x265-devel mailing list