[x265] [PATCH] asm: fix for 32-bit build satd overflow issue

chen chenm003 at 163.com
Wed Jan 29 14:20:24 CET 2014


right

At 2014-01-29 21:16:56,yuvaraj at multicorewareinc.com wrote:
># HG changeset patch
># User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
># Date 1391001289 -19800
>#      Wed Jan 29 18:44:49 2014 +0530
># Branch stable
># Node ID 86743912a5b0459645e5aeccd1c35313e3f0af58
># Parent  d6091cb46ae1afeeec40d247d5d5247f26e3372c
>asm: fix for 32-bit build satd overflow issue.
>
>diff -r d6091cb46ae1 -r 86743912a5b0 source/common/x86/pixel-a.asm
>--- a/source/common/x86/pixel-a.asm Wed Jan 29 12:05:06 2014 +0530
>+++ b/source/common/x86/pixel-a.asm Wed Jan 29 18:44:49 2014 +0530
>@@ -626,23 +626,17 @@
>     movd eax, m7
>     RET
> 
>-cglobal pixel_satd_8x8_internal
>-    LOAD_SUMSUB_8x4P 0, 1, 2, 3, 4, 5, 7, r0, r2, 1, 0
>-    SATD_8x4_SSE vertical, 0, 1, 2, 3, 4, 5, 6
>-%%pixel_satd_8x4_internal:
>-    LOAD_SUMSUB_8x4P 0, 1, 2, 3, 4, 5, 7, r0, r2, 1, 0
>-    SATD_8x4_SSE vertical, 0, 1, 2, 3, 4, 5, 6
>-    ret
>-
> cglobal pixel_satd_8x8_internal2
> %if WIN64
>     LOAD_SUMSUB_8x4P 0, 1, 2, 3, 4, 5, 7, r0, r2, 1, 0
>     SATD_8x4_1_SSE vertical, 0, 1, 2, 3, 4, 5, 6, 12, 13
>+%%pixel_satd_8x4_internal2:
>     LOAD_SUMSUB_8x4P 0, 1, 2, 3, 4, 5, 7, r0, r2, 1, 0
>     SATD_8x4_1_SSE vertical, 0, 1, 2, 3, 4, 5, 6, 12, 13
> %else
>     LOAD_SUMSUB_8x4P 0, 1, 2, 3, 4, 5, 7, r0, r2, 1, 0
>     SATD_8x4_1_SSE vertical, 0, 1, 2, 3, 4, 5, 6, 4, 5
>+%%pixel_satd_8x4_internal2:
>     LOAD_SUMSUB_8x4P 0, 1, 2, 3, 4, 5, 7, r0, r2, 1, 0
>     SATD_8x4_1_SSE vertical, 0, 1, 2, 3, 4, 5, 6, 4, 5
> %endif
>@@ -652,15 +646,6 @@
> ; These aren't any faster on AVX systems with fast movddup (Bulldozer, Sandy Bridge)
> %if HIGH_BIT_DEPTH == 0 && (WIN64 || UNIX64) && notcpuflag(avx)
> 
>-cglobal pixel_satd_16x4_internal
>-    LOAD_SUMSUB_16x4P 0, 1, 2, 3, 4, 8, 5, 9, 6, 7, r0, r2, 11
>-    lea  r2, [r2+4*r3]
>-    lea  r0, [r0+4*r1]
>-    ; always use horizontal mode here
>-    SATD_8x4_SSE 0, 0, 1, 2, 3, 6, 11, 10
>-    SATD_8x4_SSE 0, 4, 8, 5, 9, 6, 3, 10
>-    ret
>-
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140129/141ce1c1/attachment.html>


More information about the x265-devel mailing list