[x265] [PATCH] use 32-bits multiply in mbtree_propagate_cost to avoid intraCost overflow
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Wed Nov 25 06:13:33 CET 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1447865933 21600
# Wed Nov 18 10:58:53 2015 -0600
# Node ID d4e8af415c2ea939f1c82cf2dc1561fee20847de
# Parent ad15f3756ad888b99a4ba868b857e09909dae226
use 32-bits multiply in mbtree_propagate_cost to avoid intraCost overflow
diff -r ad15f3756ad8 -r d4e8af415c2e source/common/x86/mc-a2.asm
--- a/source/common/x86/mc-a2.asm Fri Nov 06 12:33:51 2015 +0530
+++ b/source/common/x86/mc-a2.asm Wed Nov 18 10:58:53 2015 -0600
@@ -1019,15 +1019,11 @@
por m3, m1
movd m1, [r1+r5*2] ; prop
-%if (BIT_DEPTH <= 8)
- pmaddwd m0, m2
-%else
punpckldq m2, m2
punpckldq m0, m0
pmuludq m0, m2
pshufd m2, m2, q3120
pshufd m0, m0, q3120
-%endif
punpcklwd m1, m4
cvtdq2pd m0, m0
@@ -1072,15 +1068,11 @@
por m3, m1
movd m1, [r1+r5*2] ; prop
-%if (BIT_DEPTH <= 8)
- pmaddwd m0, m2
-%else
- punpckldq m2, m2 ; DWORD [- 1 - 0]
+ punpckldq m2, m2 ; DWORD [_ 1 _ 0]
punpckldq m0, m0
pmuludq m0, m2 ; QWORD [m1 m0]
pshufd m2, m2, q3120
pshufd m0, m0, q3120
-%endif
punpcklwd m1, m4
cvtdq2pd m0, m0
mulpd m0, m6 ; intra*invq*fps_factor>>8
@@ -1120,11 +1112,7 @@
pminsd xm3, xm2
pmovzxwd xm1, [r1+r5*2] ; prop
-%if (BIT_DEPTH <= 8)
- pmaddwd xm0, xm2
-%else
pmulld xm0, xm2
-%endif
cvtdq2pd m0, xm0
cvtdq2pd m1, xm1 ; prop
%if cpuflag(avx2)
@@ -1166,11 +1154,7 @@
movd xm1, [r1+r5*2] ; prop
pmovzxwd xm1, xm1
-%if (BIT_DEPTH <= 8)
- pmaddwd xm0, xm2
-%else
pmulld xm0, xm2
-%endif
cvtdq2pd m0, xm0
cvtdq2pd m1, xm1 ; prop
%if cpuflag(avx2)
@@ -1204,11 +1188,7 @@
movzx r6d, word [r1+r5*2] ; prop
movd xm1, r6d
-%if (BIT_DEPTH <= 8)
- pmaddwd xm0, xm2
-%else
pmulld xm0, xm2
-%endif
cvtdq2pd m0, xm0
cvtdq2pd m1, xm1 ; prop
%if cpuflag(avx2)
More information about the x265-devel
mailing list