[x265] [PATCH] asm: 16bpp support for satd_32xN

chen chenm003 at 163.com
Mon Dec 2 15:06:25 CET 2013


This patch broken 8bpp build

At 2013-12-02 20:20:29,yuvaraj at multicorewareinc.com wrote:
># HG changeset patch
># User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
># Date 1385986812 -19800
>#      Mon Dec 02 17:50:12 2013 +0530
># Node ID fa3a3eced7228599400f9403dba159d433d05222
># Parent  d0bed5d188d772231f946d0e54bb96c2eff5d38a
>asm: 16bpp support for satd_32xN
>
>diff -r d0bed5d188d7 -r fa3a3eced722 source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp Mon Dec 02 15:29:22 2013 +0530
>+++ b/source/common/x86/asm-primitives.cpp Mon Dec 02 17:50:12 2013 +0530
>@@ -498,6 +498,12 @@
>         p.satd[LUMA_12x16] = x265_pixel_satd_12x16_sse2;
>         p.satd[LUMA_24x32] = x265_pixel_satd_24x32_sse2;
>         p.satd[LUMA_48x64] = x265_pixel_satd_48x64_sse2;
>+        p.satd[LUMA_32x8] = x265_pixel_satd_32x8_sse2;
>+        p.satd[LUMA_32x16] = x265_pixel_satd_32x16_sse2;
>+        p.satd[LUMA_32x24] = x265_pixel_satd_32x24_sse2;
>+        p.satd[LUMA_32x32] = x265_pixel_satd_32x32_sse2;
>+        p.satd[LUMA_32x64] = x265_pixel_satd_32x64_sse2;
>+
> 
>         p.sa8d_inter[LUMA_8x8] = x265_pixel_sa8d_8x8_sse2;
>         p.sa8d_inter[LUMA_16x16] = x265_pixel_sa8d_16x16_sse2;
>diff -r d0bed5d188d7 -r fa3a3eced722 source/common/x86/pixel-a.asm
>--- a/source/common/x86/pixel-a.asm Mon Dec 02 15:29:22 2013 +0530
>+++ b/source/common/x86/pixel-a.asm Mon Dec 02 17:50:12 2013 +0530
>@@ -3788,14 +3788,14 @@
>     mov r6, r0
>     mov r7, r2
>     call pixel_satd_8x8_internal
>-    lea r0, [r6 + 8]
>-    lea r2, [r7 + 8]
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 16]
>-    lea r2, [r7 + 16]
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 24]
>-    lea r2, [r7 + 24]
>+    lea r0, [r6 + 8*SIZEOF_PIXEL]
>+    lea r2, [r7 + 8*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal
>+    lea r0, [r6 + 16*SIZEOF_PIXEL]
>+    lea r2, [r7 + 16*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal
>+    lea r0, [r6 + 24*SIZEOF_PIXEL]
>+    lea r2, [r7 + 24*SIZEOF_PIXEL]
>     call pixel_satd_8x8_internal
>     SATD_END_SSE2 m6
> %else
>@@ -3804,17 +3804,17 @@
>     mov r6, r0
>     mov [rsp], r2
>     call pixel_satd_8x8_internal
>-    lea r0, [r6 + 8]
>+    lea r0, [r6 + 8*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 8
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 16]
>+    add r2, 8*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal
>+    lea r0, [r6 + 16*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 16
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 24]
>+    add r2, 16*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal
>+    lea r0, [r6 + 24*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 24
>+    add r2, 24*SIZEOF_PIXEL
>     call pixel_satd_8x8_internal
>     SATD_END_SSE2 m6
> %endif
>@@ -3826,16 +3826,16 @@
>     mov r7, r2
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>-    lea r0, [r6 + 8]
>-    lea r2, [r7 + 8]
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 16]
>-    lea r2, [r7 + 16]
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 24]
>-    lea r2, [r7 + 24]
>+    lea r0, [r6 + 8*SIZEOF_PIXEL]
>+    lea r2, [r7 + 8*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    lea r0, [r6 + 16*SIZEOF_PIXEL]
>+    lea r2, [r7 + 16*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    lea r0, [r6 + 24*SIZEOF_PIXEL]
>+    lea r2, [r7 + 24*SIZEOF_PIXEL]
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>     SATD_END_SSE2 m6
>@@ -3846,19 +3846,19 @@
>     mov [rsp], r2
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>-    lea r0, [r6 + 8]
>+    lea r0, [r6 + 8*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 8
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 16]
>+    add r2, 8*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    lea r0, [r6 + 16*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 16
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 24]
>+    add r2, 16*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    lea r0, [r6 + 24*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 24
>+    add r2, 24*SIZEOF_PIXEL
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>     SATD_END_SSE2 m6
>@@ -3872,22 +3872,25 @@
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>-    lea r0, [r6 + 8]
>-    lea r2, [r7 + 8]
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 16]
>-    lea r2, [r7 + 16]
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 24]
>-    lea r2, [r7 + 24]
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    SATD_END_SSE2 m6
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 8*SIZEOF_PIXEL]
>+    lea r2, [r7 + 8*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 16*SIZEOF_PIXEL]
>+    lea r2, [r7 + 16*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 24*SIZEOF_PIXEL]
>+    lea r2, [r7 + 24*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_END_SSE2 m6, m7
> %else
> cglobal pixel_satd_32x24, 4,7,8,0-4    ;if !WIN64
>     SATD_START_SSE2 m6, m7
>@@ -3896,25 +3899,29 @@
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>-    lea r0, [r6 + 8]
>+    pxor       m7, m7
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 8*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 8
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 16]
>+    add r2, 8*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 16*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 16
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 24]
>+    add r2, 16*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 24*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 24
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    SATD_END_SSE2 m6
>+    add r2, 24*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_END_SSE2 m6, m7
> %endif
> 
> %if WIN64
>@@ -3926,28 +3933,29 @@
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>-    lea r0, [r6 + 8]
>-    lea r2, [r7 + 8]
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 16]
>-    lea r2, [r7 + 16]
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 24]
>-    lea r2, [r7 + 24]
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    SATD_END_SSE2 m6
>-
>-
>-%else   
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 8*SIZEOF_PIXEL]
>+    lea r2, [r7 + 8*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 16*SIZEOF_PIXEL]
>+    lea r2, [r7 + 16*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 24*SIZEOF_PIXEL]
>+    lea r2, [r7 + 24*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_END_SSE2 m6, m7
>+%else
> cglobal pixel_satd_32x32, 4,7,8,0-4    ;if !WIN64
> 
>     SATD_START_SSE2 m6, m7
>@@ -3957,28 +3965,32 @@
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>     call pixel_satd_8x8_internal
>-    lea r0, [r6 + 8]
>+    pxor       m7, m7
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 8*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 8
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 16]
>+    add r2, 8*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 16*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 16
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    lea r0, [r6 + 24]
>+    add r2, 16*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_ACCUM m6, m0, m7
>+    lea r0, [r6 + 24*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 24
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    call pixel_satd_8x8_internal
>-    SATD_END_SSE2 m6
>+    add r2, 24*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    call pixel_satd_8x8_internal
>+    SATD_END_SSE2 m6, m7
> 
> %endif
> 
>@@ -3995,28 +4007,28 @@
>     call pixel_satd_8x8_internal2
>     call pixel_satd_8x8_internal2
>     call pixel_satd_8x8_internal2
>-    lea r0, [r6 + 8]
>-    lea r2, [r7 + 8]
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    lea r0, [r6 + 16]
>-    lea r2, [r7 + 16]
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    lea r0, [r6 + 24]
>-    lea r2, [r7 + 24]
>+    lea r0, [r6 + 8*SIZEOF_PIXEL]
>+    lea r2, [r7 + 8*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    lea r0, [r6 + 16*SIZEOF_PIXEL]
>+    lea r2, [r7 + 16*SIZEOF_PIXEL]
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    lea r0, [r6 + 24*SIZEOF_PIXEL]
>+    lea r2, [r7 + 24*SIZEOF_PIXEL]
>     call pixel_satd_8x8_internal2
>     call pixel_satd_8x8_internal2
>     call pixel_satd_8x8_internal2
>@@ -4045,31 +4057,31 @@
>     call pixel_satd_8x8_internal2
>     call pixel_satd_8x8_internal2
>     call pixel_satd_8x8_internal2
>-    lea r0, [r6 + 8]
>+    lea r0, [r6 + 8*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 8
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    lea r0, [r6 + 16]
>+    add r2, 8*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    lea r0, [r6 + 16*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 16
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    call pixel_satd_8x8_internal2
>-    lea r0, [r6 + 24]
>+    add r2, 16*SIZEOF_PIXEL
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    call pixel_satd_8x8_internal2
>+    lea r0, [r6 + 24*SIZEOF_PIXEL]
>     mov r2, [rsp]
>-    add r2, 24
>+    add r2, 24*SIZEOF_PIXEL
>     call pixel_satd_8x8_internal2
>     call pixel_satd_8x8_internal2
>     call pixel_satd_8x8_internal2
>@@ -4087,6 +4099,7 @@
>     RET
> %endif
> 
>+
> %if WIN64
> cglobal pixel_satd_48x64, 4,8,8    ;if WIN64 && cpuflag(avx)
>     SATD_START_SSE2 m6, m7
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131202/234a2174/attachment-0001.html>


More information about the x265-devel mailing list