[x265] [PATCH 5 of 6] use 32-bits multiply in mbtree_propagate_cost to avoid intraCost overflow

Min Chen chenm003 at 163.com
Wed Nov 18 18:14:08 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1447865933 21600
# Node ID 9ff06f65a7559f405bf12594f027210b73dd8861
# Parent  bf3171a0d20a175268ed987c6f93feb07229562e
use 32-bits multiply in mbtree_propagate_cost to avoid intraCost overflow
---
 source/common/x86/mc-a2.asm |   22 +---------------------
 1 files changed, 1 insertions(+), 21 deletions(-)

diff -r bf3171a0d20a -r 9ff06f65a755 source/common/x86/mc-a2.asm
--- a/source/common/x86/mc-a2.asm	Wed Nov 18 10:58:51 2015 -0600
+++ b/source/common/x86/mc-a2.asm	Wed Nov 18 10:58:53 2015 -0600
@@ -1019,15 +1019,11 @@
     por         m3, m1
 
     movd        m1, [r1+r5*2]       ; prop
-%if (BIT_DEPTH <= 10)
-    pmaddwd     m0, m2
-%else
     punpckldq   m2, m2
     punpckldq   m0, m0
     pmuludq     m0, m2
     pshufd      m2, m2, q3120
     pshufd      m0, m0, q3120
-%endif
 
     punpcklwd   m1, m4
     cvtdq2pd    m0, m0
@@ -1072,15 +1068,11 @@
     por         m3, m1
 
     movd        m1, [r1+r5*2]       ; prop
-%if (BIT_DEPTH <= 10)
-    pmaddwd     m0, m2
-%else
-    punpckldq   m2, m2              ; DWORD [- 1 - 0]
+    punpckldq   m2, m2              ; DWORD [_ 1 _ 0]
     punpckldq   m0, m0
     pmuludq     m0, m2              ; QWORD [m1 m0]
     pshufd      m2, m2, q3120
     pshufd      m0, m0, q3120
-%endif
     punpcklwd   m1, m4
     cvtdq2pd    m0, m0
     mulpd       m0, m6              ; intra*invq*fps_factor>>8
@@ -1120,11 +1112,7 @@
     pminsd          xm3, xm2
 
     pmovzxwd        xm1, [r1+r5*2]      ; prop
-%if (BIT_DEPTH <= 10)
-    pmaddwd         xm0, xm2
-%else
     pmulld          xm0, xm2
-%endif
     cvtdq2pd        m0, xm0
     cvtdq2pd        m1, xm1             ; prop
 %if cpuflag(avx2)
@@ -1166,11 +1154,7 @@
 
     movd            xm1, [r1+r5*2]      ; prop
     pmovzxwd        xm1, xm1
-%if (BIT_DEPTH <= 10)
-    pmaddwd         xm0, xm2
-%else
     pmulld          xm0, xm2
-%endif
     cvtdq2pd        m0, xm0
     cvtdq2pd        m1, xm1             ; prop
 %if cpuflag(avx2)
@@ -1204,11 +1188,7 @@
 
     movzx           r6d, word [r1+r5*2] ; prop
     movd            xm1, r6d
-%if (BIT_DEPTH <= 10)
-    pmaddwd         xm0, xm2
-%else
     pmulld          xm0, xm2
-%endif
     cvtdq2pd        m0, xm0
     cvtdq2pd        m1, xm1             ; prop
 %if cpuflag(avx2)



More information about the x265-devel mailing list