[x265] [PATCH 5 of 6] use 32-bits multiply in mbtree_propagate_cost to avoid intraCost overflow
Min Chen
chenm003 at 163.com
Wed Nov 18 18:14:08 CET 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1447865933 21600
# Node ID 9ff06f65a7559f405bf12594f027210b73dd8861
# Parent bf3171a0d20a175268ed987c6f93feb07229562e
use 32-bits multiply in mbtree_propagate_cost to avoid intraCost overflow
---
source/common/x86/mc-a2.asm | 22 +---------------------
1 files changed, 1 insertions(+), 21 deletions(-)
diff -r bf3171a0d20a -r 9ff06f65a755 source/common/x86/mc-a2.asm
--- a/source/common/x86/mc-a2.asm Wed Nov 18 10:58:51 2015 -0600
+++ b/source/common/x86/mc-a2.asm Wed Nov 18 10:58:53 2015 -0600
@@ -1019,15 +1019,11 @@
por m3, m1
movd m1, [r1+r5*2] ; prop
-%if (BIT_DEPTH <= 10)
- pmaddwd m0, m2
-%else
punpckldq m2, m2
punpckldq m0, m0
pmuludq m0, m2
pshufd m2, m2, q3120
pshufd m0, m0, q3120
-%endif
punpcklwd m1, m4
cvtdq2pd m0, m0
@@ -1072,15 +1068,11 @@
por m3, m1
movd m1, [r1+r5*2] ; prop
-%if (BIT_DEPTH <= 10)
- pmaddwd m0, m2
-%else
- punpckldq m2, m2 ; DWORD [- 1 - 0]
+ punpckldq m2, m2 ; DWORD [_ 1 _ 0]
punpckldq m0, m0
pmuludq m0, m2 ; QWORD [m1 m0]
pshufd m2, m2, q3120
pshufd m0, m0, q3120
-%endif
punpcklwd m1, m4
cvtdq2pd m0, m0
mulpd m0, m6 ; intra*invq*fps_factor>>8
@@ -1120,11 +1112,7 @@
pminsd xm3, xm2
pmovzxwd xm1, [r1+r5*2] ; prop
-%if (BIT_DEPTH <= 10)
- pmaddwd xm0, xm2
-%else
pmulld xm0, xm2
-%endif
cvtdq2pd m0, xm0
cvtdq2pd m1, xm1 ; prop
%if cpuflag(avx2)
@@ -1166,11 +1154,7 @@
movd xm1, [r1+r5*2] ; prop
pmovzxwd xm1, xm1
-%if (BIT_DEPTH <= 10)
- pmaddwd xm0, xm2
-%else
pmulld xm0, xm2
-%endif
cvtdq2pd m0, xm0
cvtdq2pd m1, xm1 ; prop
%if cpuflag(avx2)
@@ -1204,11 +1188,7 @@
movzx r6d, word [r1+r5*2] ; prop
movd xm1, r6d
-%if (BIT_DEPTH <= 10)
- pmaddwd xm0, xm2
-%else
pmulld xm0, xm2
-%endif
cvtdq2pd m0, xm0
cvtdq2pd m1, xm1 ; prop
%if cpuflag(avx2)
More information about the x265-devel
mailing list