[x265] [PATCH 1 of 2] x86: Modify asm codes for NASM compatibility
Pradeep Ramachandran
pradeep at multicorewareinc.com
Thu Nov 30 08:39:40 CET 2017
On Tue, Nov 21, 2017 at 10:37 AM, <vignesh at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
> # Date 1509595798 -19800
> # Thu Nov 02 09:39:58 2017 +0530
> # Node ID 182bfd0d5af929a801a08b35ee863d79eadb2833
> # Parent dae558b40d9901d5498bb989c96ae8acc5b63cdf
> x86: Modify asm codes for NASM compatibility
>
Pushed series to default branch.
>
> diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/blockcopy8.asm
> --- a/source/common/x86/blockcopy8.asm Tue Nov 21 09:40:16 2017 +0530
> +++ b/source/common/x86/blockcopy8.asm Thu Nov 02 09:39:58 2017 +0530
> @@ -3850,7 +3850,7 @@
> mov r4d, %2/4
> add r1, r1
> add r3, r3
> -.loop
> +.loop:
> movu m0, [r2]
> movu m1, [r2 + 16]
> movu m2, [r2 + 32]
> @@ -3905,7 +3905,7 @@
> lea r5, [3 * r3]
> lea r6, [3 * r1]
>
> -.loop
> +.loop:
> movu m0, [r2]
> movu xm1, [r2 + 32]
> movu [r0], m0
> @@ -5085,7 +5085,7 @@
> pxor m4, m4
> pxor m5, m5
>
> -.loop
> +.loop:
> ; row 0
> movu m0, [r1]
> movu m1, [r1 + 16]
> @@ -5196,7 +5196,7 @@
> pxor m4, m4
> pxor m5, m5
>
> -.loop
> +.loop:
> ; row 0
> movu m0, [r1]
> movu m1, [r1 + 16]
> diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/intrapred8.asm
> --- a/source/common/x86/intrapred8.asm Tue Nov 21 09:40:16 2017 +0530
> +++ b/source/common/x86/intrapred8.asm Thu Nov 02 09:39:58 2017 +0530
> @@ -2148,7 +2148,7 @@
> paddw m0, m1
> packuswb m0, m0
>
> - movd r2, m0
> + movd r2d, m0
> mov [r0], r2b
> shr r2, 8
> mov [r0 + r1], r2b
> diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/ipfilter16.asm
> --- a/source/common/x86/ipfilter16.asm Tue Nov 21 09:40:16 2017 +0530
> +++ b/source/common/x86/ipfilter16.asm Thu Nov 02 09:39:58 2017 +0530
> @@ -9103,7 +9103,7 @@
> ; load constant
> mova m2, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r1]
> psllw m0, (14 - BIT_DEPTH)
> @@ -9156,7 +9156,7 @@
> ; load constant
> mova m1, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> psllw m0, (14 - BIT_DEPTH)
> psubw m0, m1
> @@ -9277,7 +9277,7 @@
> ; load constant
> mova m2, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r1]
> psllw m0, (14 - BIT_DEPTH)
> @@ -9351,7 +9351,7 @@
> ; load constant
> mova m2, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r1]
> psllw m0, (14 - BIT_DEPTH)
> @@ -9405,7 +9405,7 @@
> ; load constant
> mova m4, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r1]
> movu m2, [r0 + r1 * 2]
> @@ -9510,7 +9510,7 @@
> ; load constant
> mova m2, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r1]
> psllw m0, (14 - BIT_DEPTH)
> @@ -9583,7 +9583,7 @@
> ; load constant
> mova m4, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r1]
> movu m2, [r0 + r1 * 2]
> @@ -9758,7 +9758,7 @@
> ; load constant
> mova m2, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r1]
> psllw m0, (14 - BIT_DEPTH)
> @@ -9869,7 +9869,7 @@
> ; load constant
> mova m4, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r1]
> movu m2, [r0 + r1 * 2]
> @@ -9952,7 +9952,7 @@
> ; load constant
> mova m2, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + 32]
> psllw m0, (14 - BIT_DEPTH)
> @@ -10017,7 +10017,7 @@
> ; load constant
> mova m2, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r1]
> psllw m0, (14 - BIT_DEPTH)
> @@ -10081,7 +10081,7 @@
> ; load constant
> mova m4, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r1]
> movu m2, [r0 + r1 * 2]
> @@ -10214,7 +10214,7 @@
> ; load constant
> mova m3, [pw_2000]
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + 32]
> movu m2, [r0 + 64]
> @@ -10314,7 +10314,7 @@
>
> .preloop:
> lea r6, [r3 * 3]
> -.loop
> +.loop:
> ; Row 0
> movu xm3, [r0]
> ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> movu xm4, [r0 + 2]
> ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> @@ -10381,7 +10381,7 @@
> packssdw xm4, xm4
>
> movq [r2], xm3
> ;row 0
> -.end
> +.end:
> RET
> %endif
> %endmacro
> diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/ipfilter8.asm
> --- a/source/common/x86/ipfilter8.asm Tue Nov 21 09:40:16 2017 +0530
> +++ b/source/common/x86/ipfilter8.asm Thu Nov 02 09:39:58 2017 +0530
> @@ -324,7 +324,7 @@
> paddw m0, m5
> psraw m0, 6
> packuswb m0, m0
> - movd r4, m0
> + movd r4d, m0
> mov [dstq], r4w
> shr r4, 16
> mov [dstq + dststrideq], r4w
> @@ -3471,7 +3471,7 @@
> phaddw %2, %2
> pmulhrsw %2, %3
> packuswb %2, %2
> - movd r4, %2
> + movd r4d, %2
> mov [dstq], r4w
> shr r4, 16
> mov [dstq + dststrideq], r4w
> @@ -5336,7 +5336,7 @@
> sub r0 , r1
> add r6d , 3
>
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0] ; [x x x
> x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m3, m1
> @@ -5441,7 +5441,7 @@
>
> .preloop:
> lea r6, [r3 * 3]
> -.loop
> +.loop:
> ; Row 0-1
> vbroadcasti128 m3, [r0]
> ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m3, m1
> ; shuffled based on the col order tab_Lm
> @@ -5502,7 +5502,7 @@
> movq [r2], xm3
> movhps [r2 + r3], xm3
> movq [r2 + r3 * 2], xm4
> -.end
> +.end:
> RET
> %endif
> %endmacro
> @@ -5592,7 +5592,7 @@
> paddw xm1, xm2
> psubw xm1, xm0
> movu [r2], xm1
> ;row 0
> -.end
> +.end:
> RET
> %endif
> %endmacro ; IPFILTER_LUMA_PS_8xN_AVX2
> @@ -5634,7 +5634,7 @@
> sub r0, r8
> ; r0(src)-r8
> add r9, 7
> ; blkheight += N - 1 (7 - 1 = 6 ; since the last one row not in loop)
>
> -.label
> +.label:
> ; Row 0
> vbroadcasti128 m3, [r0]
> ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m4, m3, m6
> ; row 0 (col 4 to 7)
> @@ -12374,7 +12374,7 @@
> mova m4, [pb_128]
> mova m5, [tab_c_64_n64]
>
> -.loop
> +.loop:
> movh m0, [r0]
> punpcklbw m0, m4
> pmaddubsw m0, m5
> @@ -25491,7 +25491,7 @@
> sub r0, r1
> add r4d, 3
>
> -.loop
> +.loop:
> ; Row 0
> movu m2, [r0]
> movu m3, [r0 + 1]
> @@ -25553,7 +25553,7 @@
> sub r0 , r1
> add r6d , 3
>
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0] ; [x x x
> x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m3, m1
> @@ -25607,7 +25607,7 @@
> sub r0 , r1
> add r6d , 3
>
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0]
> pshufb m3, m1
> @@ -25670,7 +25670,7 @@
> sub r0 , r1
> add r6d , 3
>
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0]
> pshufb m3, m1
> @@ -25743,7 +25743,7 @@
> je .label
> sub r0 , r1
>
> -.label
> +.label:
> ; Row 0-1
> movu xm3, [r0]
> vinserti128 m3, m3, [r0 + r1], 1
> @@ -25795,7 +25795,7 @@
> movq [r2+r3], xm4
> lea r2, [r2 + r3 * 2]
> movhps [r2], xm3
> -.end
> +.end:
> RET
>
> cglobal interp_4tap_horiz_ps_4x2, 4,7,5
> @@ -25823,7 +25823,7 @@
> je .label
> sub r0 , r1
>
> -.label
> +.label:
> ; Row 0-1
> movu xm3, [r0]
> vinserti128 m3, m3, [r0 + r1], 1
> @@ -25864,7 +25864,7 @@
> movq [r2+r3], xm4
> lea r2, [r2 + r3 * 2]
> movhps [r2], xm3
> -.end
> +.end:
> RET
>
> ;-----------------------------------------------------------
> ------------------------------------------------------------------
> @@ -25899,7 +25899,7 @@
> sub r0 , r1
>
>
> -.loop
> +.loop:
> sub r4d, 4
> ; Row 0-1
> movu xm3, [r0]
> @@ -25955,7 +25955,7 @@
> movq [r2+r3], xm4
> lea r2, [r2 + r3 * 2]
> movhps [r2], xm3
> -.end
> +.end:
> RET
> %endmacro
>
> @@ -25993,7 +25993,7 @@
> sub r0 , r1
> add r6d , 1
>
> -.loop
> +.loop:
> dec r6d
> ; Row 0
> vbroadcasti128 m3, [r0]
> @@ -26032,7 +26032,7 @@
> psubw m3, m5
> vpermq m3, m3, 11011000b
> movu [r2], xm3
> -.end
> +.end:
> RET
>
> INIT_YMM avx2
> @@ -26237,7 +26237,7 @@
>
> dec r0
>
> -.loop
> +.loop:
> sub r4d, 4
> ; Row 0-1
> movu xm3, [r0] ; [x x x
> x x A 9 8 7 6 5 4 3 2 1 0]
> @@ -26306,9 +26306,9 @@
> sub r0, r6
> add r4d, 7
>
> -.label
> +.label:
> lea r6, [pw_2000]
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0]
> ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m4, m3, m6
> ; row 0 (col 4 to 7)
> @@ -26405,9 +26405,9 @@
> sub r0, r6
> ; r0(src)-r6
> add r4d, 7
> ; blkheight += N - 1 (7 - 1 = 6 ; since the last one row not in
> loop)
>
> -.label
> +.label:
> lea r6, [interp8_hps_shuf]
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0]
> ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m4, m3, m6
> ; row 0 (col 4 to 7)
> @@ -26736,9 +26736,9 @@
> sub r0, r6
> ; r0(src)-r6
> add r4d, 7
> ; blkheight += N - 1
>
> -.label
> +.label:
> lea r6, [pw_2000]
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0]
> ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m4, m3, m6
> ; row 0 (col 4 to 7)
> @@ -26880,7 +26880,7 @@
> sub r0 , r1
> inc r6d
>
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0]
> pshufb m3, m1
> @@ -26915,7 +26915,7 @@
> psubw m3, m5
> vpermq m3, m3, 11011000b
> movu [r2], xm3
> -.end
> +.end:
> RET
> %endmacro
>
> @@ -26945,7 +26945,7 @@
> jz .label
> sub r0, r1
>
> -.label
> +.label:
> lea r6, [r1 * 3]
> movq xm1, [r0]
> movhps xm1, [r0 + r1]
> @@ -26985,7 +26985,7 @@
> movd [r2], xm1
> pextrd [r2 + r3], xm1, 1
> pextrd [r2 + r3 * 2], xm1, 2
> -.end
> +.end:
> RET
>
> INIT_YMM avx2
> @@ -27005,7 +27005,7 @@
> jz .label
> sub r0, r1
>
> -.label
> +.label:
> mova m4, [interp4_hpp_shuf]
> mova m5, [pw_1]
> dec r0
> @@ -27062,7 +27062,7 @@
> movd [r2], xm1
> pextrd [r2 + r3], xm1, 1
> movd [r2 + r3 * 2], xm2
> -.end
> +.end:
> RET
>
> INIT_YMM avx2
> @@ -27217,7 +27217,7 @@
> sub r0 , r1
> inc r6d
>
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0]
> pshufb m3, m1
> @@ -27254,7 +27254,7 @@
> vextracti128 xm4, m3, 1
> movq [r2], xm3
> movd [r2+8], xm4
> -.end
> +.end:
> RET
>
> INIT_YMM avx2
> @@ -27285,7 +27285,7 @@
> lea r6, [r1 * 3]
> ; r6 = (N / 2 - 1) * srcStride
> sub r0, r6
> ; r0(src)-r6
> add r4d, 7
> -.loop
> +.loop:
>
> ; Row 0
>
> @@ -27350,9 +27350,9 @@
> sub r0, r6
> ; r0(src)-r6
> add r4d, 7
> ; blkheight += N - 1 (7 - 1 = 6 ; since the last one row not in loop)
>
> -.label
> +.label:
> lea r6, [interp8_hps_shuf]
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0]
> ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m4, m3, m6
> ; row 0 (col 4 to 7)
> @@ -27430,7 +27430,7 @@
> sub r0 , r1
> add r6d , 3
>
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0] ; [x x
> x x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m3, m1
> @@ -27988,7 +27988,7 @@
> sub r0 , r1
> add r6d , 3
>
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0] ; [x x
> x x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m3, m1
> @@ -28067,7 +28067,7 @@
> sub r0 , r1
> add r6d , 3
>
> -.loop
> +.loop:
> ; Row 0
> vbroadcasti128 m3, [r0] ; [x
> x x x x A 9 8 7 6 5 4 3 2 1 0]
> pshufb m3, m1
> @@ -28114,7 +28114,7 @@
> jz .label
> sub r0, r1
>
> -.label
> +.label:
> mova m4, [interp4_hps_shuf]
> mova m5, [pw_1]
> dec r0
> @@ -28209,7 +28209,7 @@
> movd [r2], xm1
> pextrd [r2 + r3], xm1, 1
> movd [r2 + r3 * 2], xm2
> -.end
> +.end:
> RET
>
> INIT_YMM avx2
> diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/loopfilter.asm
> --- a/source/common/x86/loopfilter.asm Tue Nov 21 09:40:16 2017 +0530
> +++ b/source/common/x86/loopfilter.asm Thu Nov 02 09:39:58 2017 +0530
> @@ -374,7 +374,7 @@
> pxor m0, m0 ; m0 = 0
> mova m6, [pb_2] ; m6 = [2, 2, 2, 2, 2, 2, 2,
> 2, 2, 2, 2, 2, 2, 2, 2, 2]
> shr r4d, 4
> -.loop
> +.loop:
> movu m7, [r0]
> movu m5, [r0 + 16]
> movu m3, [r0 + r3]
> @@ -430,7 +430,7 @@
> mova m6, [pb_2] ; m6 = [2, 2, 2, 2, 2, 2,
> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
> mova m7, [pb_128]
> shr r4d, 4
> -.loop
> +.loop:
> movu m1, [r0] ; m1 = pRec[x]
> movu m2, [r0 + r3] ; m2 = pRec[x + iStride]
>
> @@ -478,7 +478,7 @@
> mova m4, [pb_2]
> shr r4d, 4
> mova m0, [pw_pixel_max]
> -.loop
> +.loop:
> movu m5, [r0]
> movu m3, [r0 + r3]
>
> @@ -523,7 +523,7 @@
> mova xm6, [pb_2] ; xm6 = [2, 2, 2, 2, 2,
> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
> mova xm7, [pb_128]
> shr r4d, 4
> -.loop
> +.loop:
> movu xm1, [r0] ; xm1 = pRec[x]
> movu xm2, [r0 + r3] ; xm2 = pRec[x +
> iStride]
>
> @@ -572,7 +572,7 @@
> mov r5d, r4d
> shr r4d, 4
> mov r6, r0
> -.loop
> +.loop:
> movu m7, [r0]
> movu m5, [r0 + 16]
> movu m3, [r0 + r3]
> @@ -674,7 +674,7 @@
> pxor m0, m0 ; m0 = 0
> mova m7, [pb_128]
> shr r4d, 4
> -.loop
> +.loop:
> movu m1, [r0] ; m1 = pRec[x]
> movu m2, [r0 + r3] ; m2 = pRec[x +
> iStride]
>
> @@ -748,7 +748,7 @@
> mova m4, [pw_pixel_max]
> vbroadcasti128 m6, [r2] ; m6 = m_iOffsetEo
> shr r4d, 4
> -.loop
> +.loop:
> movu m7, [r0]
> movu m5, [r0 + r3]
> movu m1, [r0 + r3 * 2]
> @@ -804,7 +804,7 @@
> vbroadcasti128 m5, [pb_128]
> vbroadcasti128 m6, [r2] ; m6 =
> m_iOffsetEo
> shr r4d, 4
> -.loop
> +.loop:
> movu xm1, [r0] ; m1 =
> pRec[x]
> movu xm2, [r0 + r3] ; m2 =
> pRec[x + iStride]
> vinserti128 m1, m1, xm2, 1
> @@ -859,7 +859,7 @@
> movh m6, [r0 + r4 * 2]
> movhps m6, [r1 + r4]
>
> -.loop
> +.loop:
> movu m7, [r0]
> movu m5, [r0 + 16]
> movu m3, [r0 + r5 + 2]
> @@ -918,7 +918,7 @@
> movh m5, [r0 + r4]
> movhps m5, [r1 + r4]
>
> -.loop
> +.loop:
> movu m1, [r0] ; m1 = rec[x]
> movu m2, [r0 + r5 + 1] ; m2 = rec[x + stride + 1]
> pxor m3, m1, m7
> @@ -970,7 +970,7 @@
> movhps xm4, [r1 + r4]
> vbroadcasti128 m5, [r3]
> mova m6, [pw_pixel_max]
> -.loop
> +.loop:
> movu m1, [r0]
> movu m3, [r0 + r5 + 2]
>
> @@ -1061,7 +1061,7 @@
> movhps xm4, [r1 + r4]
> vbroadcasti128 m5, [r3]
>
> -.loop
> +.loop:
> movu m1, [r0]
> movu m7, [r0 + 32]
> movu m3, [r0 + r5 + 2]
> @@ -1567,11 +1567,11 @@
> movu m4, [r1 + 16] ; offset[16-31]
> pxor m7, m7
>
> -.loopH
> +.loopH:
> mov r5d, r2d
> xor r6, r6
>
> -.loopW
> +.loopW:
> movu m2, [r0 + r6]
> movu m5, [r0 + r6 + 16]
> psrlw m0, m2, (BIT_DEPTH - 5)
> @@ -1617,11 +1617,11 @@
> movu m3, [r1 + 0] ; offset[0-15]
> movu m4, [r1 + 16] ; offset[16-31]
> pxor m7, m7 ; m7 =[0]
> -.loopH
> +.loopH:
> mov r5d, r2d
> xor r6, r6
>
> -.loopW
> +.loopW:
> movu m2, [r0 + r6] ; m0 = [rec]
> psrlw m1, m2, 3
> pand m1, [pb_31] ; m1 = [index]
> @@ -1670,9 +1670,9 @@
> mov r6d, r3d
> shr r3d, 1
>
> -.loopH
> +.loopH:
> mov r5d, r2d
> -.loopW
> +.loopW:
> movu m2, [r0]
> movu m5, [r0 + r4]
> psrlw m0, m2, (BIT_DEPTH - 5)
> @@ -1751,9 +1751,9 @@
> shr r2d, 4
> mov r1d, r3d
> shr r3d, 1
> -.loopH
> +.loopH:
> mov r5d, r2d
> -.loopW
> +.loopW:
> movu xm2, [r0] ; m2 = [rec]
> vinserti128 m2, m2, [r0 + r4], 1
> psrlw m1, m2, 3
> @@ -1789,7 +1789,7 @@
> test r1b, 1
> jz .end
> mov r5d, r2d
> -.loopW1
> +.loopW1:
> movu xm2, [r0] ; m2 = [rec]
> psrlw xm1, xm2, 3
> pand xm1, xm7 ; m1 = [index]
> @@ -1811,7 +1811,7 @@
> add r0, 16
> dec r5d
> jnz .loopW1
> -.end
> +.end:
> RET
> %endif
>
> @@ -1827,7 +1827,7 @@
> add r3d, 1
> mov r5, r0
> movu m4, [r0 + r4]
> -.loop
> +.loop:
> movu m1, [r1] ; m2 = pRec[x]
> movu m2, [r2] ; m3 = pTmpU[x]
>
> @@ -1921,7 +1921,7 @@
> mov r5, r0
> movu m4, [r0 + r4]
>
> -.loop
> +.loop:
> movu m1, [r1] ; m2 = pRec[x]
> movu m2, [r2] ; m3 = pTmpU[x]
>
> diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/mc-a.asm
> --- a/source/common/x86/mc-a.asm Tue Nov 21 09:40:16 2017 +0530
> +++ b/source/common/x86/mc-a.asm Thu Nov 02 09:39:58 2017 +0530
> @@ -4115,7 +4115,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 4
> -.loop
> +.loop:
> pixel_avg_W8
> dec r9d
> jnz .loop
> @@ -4129,7 +4129,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 8
> -.loop
> +.loop:
> pixel_avg_W8
> dec r9d
> jnz .loop
> @@ -4697,7 +4697,7 @@
> lea r8, [r1 * 3]
> mov r9d, 4
>
> -.loop
> +.loop:
> movu m0, [r2]
> movu m1, [r4]
> pavgw m0, m1
> @@ -4834,7 +4834,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 4
> -.loop
> +.loop:
> pixel_avg_H16
> dec r9d
> jnz .loop
> @@ -4848,7 +4848,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 4
> -.loop
> +.loop:
> pixel_avg_H16
> pixel_avg_H16
> dec r9d
> @@ -4863,7 +4863,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 4
> -.loop
> +.loop:
> pixel_avg_H16
> pixel_avg_H16
> pixel_avg_H16
> @@ -4887,7 +4887,7 @@
> lea r8, [r1 * 3]
> mov r9d, 8
>
> -.loop
> +.loop:
> movu m0, [r2]
> movu m1, [r4]
> pavgw m0, m1
> @@ -4987,7 +4987,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 2
> -.loop
> +.loop:
> pixel_avg_W32
> dec r9d
> jnz .loop
> @@ -5001,7 +5001,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 4
> -.loop
> +.loop:
> pixel_avg_W32
> dec r9d
> jnz .loop
> @@ -5015,7 +5015,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 6
> -.loop
> +.loop:
> pixel_avg_W32
> dec r9d
> jnz .loop
> @@ -5029,7 +5029,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 8
> -.loop
> +.loop:
> pixel_avg_W32
> dec r9d
> jnz .loop
> @@ -5043,7 +5043,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 16
> -.loop
> +.loop:
> pixel_avg_W32
> dec r9d
> jnz .loop
> @@ -5141,7 +5141,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 4
> -.loop
> +.loop:
> pixel_avg_W64
> dec r9d
> jnz .loop
> @@ -5155,7 +5155,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 8
> -.loop
> +.loop:
> pixel_avg_W64
> dec r9d
> jnz .loop
> @@ -5169,7 +5169,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 12
> -.loop
> +.loop:
> pixel_avg_W64
> dec r9d
> jnz .loop
> @@ -5183,7 +5183,7 @@
> lea r7, [r5 * 3]
> lea r8, [r1 * 3]
> mov r9d, 16
> -.loop
> +.loop:
> pixel_avg_W64
> dec r9d
> jnz .loop
> @@ -5204,7 +5204,7 @@
> lea r8, [r1 * 3]
> mov r9d, 16
>
> -.loop
> +.loop:
> movu m0, [r2]
> movu m1, [r4]
> pavgw m0, m1
> diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/pixel-util8.asm
> --- a/source/common/x86/pixel-util8.asm Tue Nov 21 09:40:16 2017 +0530
> +++ b/source/common/x86/pixel-util8.asm Thu Nov 02 09:39:58 2017 +0530
> @@ -1785,7 +1785,7 @@
> movu [r1], xm7
> je .nextH
>
> -.width6
> +.width6:
> cmp r6d, 6
> jl .width4
> movq [r1], xm7
> @@ -4937,7 +4937,7 @@
> lea r9, [r4 * 3]
> lea r8, [r5 * 3]
>
> -.loop
> +.loop:
> pmovzxbw m0, [r2]
> pmovzxbw m1, [r3]
> pmovzxbw m2, [r2 + r4]
> @@ -5150,7 +5150,7 @@
> lea r7, [r4 * 3]
> lea r8, [r5 * 3]
>
> -.loop
> +.loop:
> movu m0, [r2]
> movu m1, [r2 + 32]
> movu m2, [r3]
> @@ -5557,7 +5557,7 @@
> lea r7, [r4 * 3]
> lea r8, [r5 * 3]
>
> -.loop
> +.loop:
> movu m0, [r2]
> movu m1, [r2 + 32]
> movu m2, [r2 + 64]
> diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/sad-a.asm
> --- a/source/common/x86/sad-a.asm Tue Nov 21 09:40:16 2017 +0530
> +++ b/source/common/x86/sad-a.asm Thu Nov 02 09:39:58 2017 +0530
> @@ -5631,7 +5631,7 @@
> xorps m5, m5
> mov r4d, 4
>
> -.loop
> +.loop:
> movu m1, [r0] ; row 0 of pix0
> movu m2, [r2] ; row 0 of pix1
> movu m3, [r0 + r1] ; row 1 of pix0
> @@ -5676,7 +5676,7 @@
> mov r4d, 6
> lea r5, [r1 * 3]
> lea r6, [r3 * 3]
> -.loop
> +.loop:
> movu m1, [r0] ; row 0 of pix0
> movu m2, [r2] ; row 0 of pix1
> movu m3, [r0 + r1] ; row 1 of pix0
> @@ -5718,7 +5718,7 @@
> lea r5, [r1 * 3]
> lea r6, [r3 * 3]
>
> -.loop
> +.loop:
> movu m1, [r0] ; row 0 of pix0
> movu m2, [r2] ; row 0 of pix1
> movu m3, [r0 + r1] ; row 1 of pix0
> @@ -5759,7 +5759,7 @@
> lea r5, [r1 * 3]
> lea r6, [r3 * 3]
>
> -.loop
> +.loop:
> movu m1, [r0] ; row 0 of pix0
> movu m2, [r2] ; row 0 of pix1
> movu m3, [r0 + r1] ; row 1 of pix0
> @@ -5822,7 +5822,7 @@
> mov r4d, 64/4
> lea r5, [r1 * 3]
> lea r6, [r3 * 3]
> -.loop
> +.loop:
> movu m1, [r0] ; row 0 of pix0
> movu m2, [r2] ; row 0 of pix1
> movu m3, [r0 + r1] ; row 1 of pix0
> @@ -5873,7 +5873,7 @@
> xorps m0, m0
> xorps m5, m5
> mov r4d, 4
> -.loop
> +.loop:
> movu m1, [r0] ; first 32 of row 0 of pix0
> movu m2, [r2] ; first 32 of row 0 of pix1
> movu m3, [r0 + 32] ; second 32 of row 0 of pix0
> @@ -5936,7 +5936,7 @@
> xorps m0, m0
> xorps m5, m5
> mov r4d, 16
> -.loop
> +.loop:
> movu m1, [r0] ; first 32 of row 0 of pix0
> movu m2, [r2] ; first 32 of row 0 of pix1
> movu m3, [r0 + 32] ; second 32 of row 0 of pix0
> @@ -5978,7 +5978,7 @@
> mov r4d, 12
> lea r5, [r1 * 3]
> lea r6, [r3 * 3]
> -.loop
> +.loop:
> movu m1, [r0] ; first 32 of row 0 of pix0
> movu m2, [r2] ; first 32 of row 0 of pix1
> movu m3, [r0 + 32] ; second 32 of row 0 of pix0
> @@ -6040,7 +6040,7 @@
> mov r4d, 8
> lea r5, [r1 * 3]
> lea r6, [r3 * 3]
> -.loop
> +.loop:
> movu m1, [r0] ; first 32 of row 0 of pix0
> movu m2, [r2] ; first 32 of row 0 of pix1
> movu m3, [r0 + 32] ; second 32 of row 0 of pix0
> diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/seaintegral.asm
> --- a/source/common/x86/seaintegral.asm Tue Nov 21 09:40:16 2017 +0530
> +++ b/source/common/x86/seaintegral.asm Thu Nov 02 09:39:58 2017 +0530
> @@ -36,7 +36,7 @@
> mov r2, r1
> shl r2, 4
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r2]
> psubd m1, m0
> @@ -54,7 +54,7 @@
> mov r2, r1
> shl r2, 5
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r2]
> psubd m1, m0
> @@ -75,7 +75,7 @@
> shl r3, 4
> add r2, r3
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r2]
> psubd m1, m0
> @@ -93,7 +93,7 @@
> mov r2, r1
> shl r2, 6
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r2]
> psubd m1, m0
> @@ -114,7 +114,7 @@
> shl r3, 5
> add r2, r3
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r2]
> psubd m1, m0
> @@ -132,7 +132,7 @@
> mov r2, r1
> shl r2, 7
>
> -.loop
> +.loop:
> movu m0, [r0]
> movu m1, [r0 + r2]
> psubd m1, m0
> @@ -264,7 +264,7 @@
> movu [r0 + r3], xm0
> jmp .end
>
> -.end
> +.end:
> RET
> %endif
>
> @@ -379,7 +379,7 @@
> movu [r0 + r3], m0
> jmp .end
>
> -.end
> +.end:
> RET
> %endif
>
> @@ -577,7 +577,7 @@
> movu [r0 + r3], xm0
> jmp .end
>
> -.end
> +.end:
> RET
> %endif
>
> @@ -740,7 +740,7 @@
> movu [r0 + r3], m0
> jmp .end
>
> -.end
> +.end:
> RET
> %endif
>
> @@ -883,7 +883,7 @@
> movu [r0 + r3], m0
> jmp .end
>
> -.end
> +.end:
> RET
>
> %macro INTEGRAL_THIRTYTWO_HORIZONTAL_16 0
> @@ -1058,5 +1058,5 @@
> movu [r0 + r3], m0
> jmp .end
>
> -.end
> +.end:
> RET
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20171130/55c58bd3/attachment-0001.html>
More information about the x265-devel
mailing list