[x265] [PATCH] use 32-bits multiply in mbtree_propagate_cost to avoid intraCost overflow

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Wed Nov 25 06:13:33 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1447865933 21600
#      Wed Nov 18 10:58:53 2015 -0600
# Node ID d4e8af415c2ea939f1c82cf2dc1561fee20847de
# Parent  ad15f3756ad888b99a4ba868b857e09909dae226
use 32-bits multiply in mbtree_propagate_cost to avoid intraCost overflow

diff -r ad15f3756ad8 -r d4e8af415c2e source/common/x86/mc-a2.asm
--- a/source/common/x86/mc-a2.asm	Fri Nov 06 12:33:51 2015 +0530
+++ b/source/common/x86/mc-a2.asm	Wed Nov 18 10:58:53 2015 -0600
@@ -1019,15 +1019,11 @@
     por         m3, m1
 
     movd        m1, [r1+r5*2]       ; prop
-%if (BIT_DEPTH <= 8)
-    pmaddwd     m0, m2
-%else
     punpckldq   m2, m2
     punpckldq   m0, m0
     pmuludq     m0, m2
     pshufd      m2, m2, q3120
     pshufd      m0, m0, q3120
-%endif
 
     punpcklwd   m1, m4
     cvtdq2pd    m0, m0
@@ -1072,15 +1068,11 @@
     por         m3, m1
 
     movd        m1, [r1+r5*2]       ; prop
-%if (BIT_DEPTH <= 8)
-    pmaddwd     m0, m2
-%else
-    punpckldq   m2, m2              ; DWORD [- 1 - 0]
+    punpckldq   m2, m2              ; DWORD [_ 1 _ 0]
     punpckldq   m0, m0
     pmuludq     m0, m2              ; QWORD [m1 m0]
     pshufd      m2, m2, q3120
     pshufd      m0, m0, q3120
-%endif
     punpcklwd   m1, m4
     cvtdq2pd    m0, m0
     mulpd       m0, m6              ; intra*invq*fps_factor>>8
@@ -1120,11 +1112,7 @@
     pminsd          xm3, xm2
 
     pmovzxwd        xm1, [r1+r5*2]      ; prop
-%if (BIT_DEPTH <= 8)
-    pmaddwd         xm0, xm2
-%else
     pmulld          xm0, xm2
-%endif
     cvtdq2pd        m0, xm0
     cvtdq2pd        m1, xm1             ; prop
 %if cpuflag(avx2)
@@ -1166,11 +1154,7 @@
 
     movd            xm1, [r1+r5*2]      ; prop
     pmovzxwd        xm1, xm1
-%if (BIT_DEPTH <= 8)
-    pmaddwd         xm0, xm2
-%else
     pmulld          xm0, xm2
-%endif
     cvtdq2pd        m0, xm0
     cvtdq2pd        m1, xm1             ; prop
 %if cpuflag(avx2)
@@ -1204,11 +1188,7 @@
 
     movzx           r6d, word [r1+r5*2] ; prop
     movd            xm1, r6d
-%if (BIT_DEPTH <= 8)
-    pmaddwd         xm0, xm2
-%else
     pmulld          xm0, xm2
-%endif
     cvtdq2pd        m0, xm0
     cvtdq2pd        m1, xm1             ; prop
 %if cpuflag(avx2)


More information about the x265-devel mailing list