[x265] [PATCH 1 of 2] x86: Modify asm codes for NASM compatibility
vignesh at multicorewareinc.com
vignesh at multicorewareinc.com
Tue Nov 21 06:07:01 CET 2017
# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1509595798 -19800
# Thu Nov 02 09:39:58 2017 +0530
# Node ID 182bfd0d5af929a801a08b35ee863d79eadb2833
# Parent dae558b40d9901d5498bb989c96ae8acc5b63cdf
x86: Modify asm codes for NASM compatibility
diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Tue Nov 21 09:40:16 2017 +0530
+++ b/source/common/x86/blockcopy8.asm Thu Nov 02 09:39:58 2017 +0530
@@ -3850,7 +3850,7 @@
mov r4d, %2/4
add r1, r1
add r3, r3
-.loop
+.loop:
movu m0, [r2]
movu m1, [r2 + 16]
movu m2, [r2 + 32]
@@ -3905,7 +3905,7 @@
lea r5, [3 * r3]
lea r6, [3 * r1]
-.loop
+.loop:
movu m0, [r2]
movu xm1, [r2 + 32]
movu [r0], m0
@@ -5085,7 +5085,7 @@
pxor m4, m4
pxor m5, m5
-.loop
+.loop:
; row 0
movu m0, [r1]
movu m1, [r1 + 16]
@@ -5196,7 +5196,7 @@
pxor m4, m4
pxor m5, m5
-.loop
+.loop:
; row 0
movu m0, [r1]
movu m1, [r1 + 16]
diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Tue Nov 21 09:40:16 2017 +0530
+++ b/source/common/x86/intrapred8.asm Thu Nov 02 09:39:58 2017 +0530
@@ -2148,7 +2148,7 @@
paddw m0, m1
packuswb m0, m0
- movd r2, m0
+ movd r2d, m0
mov [r0], r2b
shr r2, 8
mov [r0 + r1], r2b
diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/ipfilter16.asm
--- a/source/common/x86/ipfilter16.asm Tue Nov 21 09:40:16 2017 +0530
+++ b/source/common/x86/ipfilter16.asm Thu Nov 02 09:39:58 2017 +0530
@@ -9103,7 +9103,7 @@
; load constant
mova m2, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r1]
psllw m0, (14 - BIT_DEPTH)
@@ -9156,7 +9156,7 @@
; load constant
mova m1, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
psllw m0, (14 - BIT_DEPTH)
psubw m0, m1
@@ -9277,7 +9277,7 @@
; load constant
mova m2, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r1]
psllw m0, (14 - BIT_DEPTH)
@@ -9351,7 +9351,7 @@
; load constant
mova m2, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r1]
psllw m0, (14 - BIT_DEPTH)
@@ -9405,7 +9405,7 @@
; load constant
mova m4, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r1]
movu m2, [r0 + r1 * 2]
@@ -9510,7 +9510,7 @@
; load constant
mova m2, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r1]
psllw m0, (14 - BIT_DEPTH)
@@ -9583,7 +9583,7 @@
; load constant
mova m4, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r1]
movu m2, [r0 + r1 * 2]
@@ -9758,7 +9758,7 @@
; load constant
mova m2, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r1]
psllw m0, (14 - BIT_DEPTH)
@@ -9869,7 +9869,7 @@
; load constant
mova m4, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r1]
movu m2, [r0 + r1 * 2]
@@ -9952,7 +9952,7 @@
; load constant
mova m2, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + 32]
psllw m0, (14 - BIT_DEPTH)
@@ -10017,7 +10017,7 @@
; load constant
mova m2, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r1]
psllw m0, (14 - BIT_DEPTH)
@@ -10081,7 +10081,7 @@
; load constant
mova m4, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r1]
movu m2, [r0 + r1 * 2]
@@ -10214,7 +10214,7 @@
; load constant
mova m3, [pw_2000]
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + 32]
movu m2, [r0 + 64]
@@ -10314,7 +10314,7 @@
.preloop:
lea r6, [r3 * 3]
-.loop
+.loop:
; Row 0
movu xm3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
movu xm4, [r0 + 2] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
@@ -10381,7 +10381,7 @@
packssdw xm4, xm4
movq [r2], xm3 ;row 0
-.end
+.end:
RET
%endif
%endmacro
diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Tue Nov 21 09:40:16 2017 +0530
+++ b/source/common/x86/ipfilter8.asm Thu Nov 02 09:39:58 2017 +0530
@@ -324,7 +324,7 @@
paddw m0, m5
psraw m0, 6
packuswb m0, m0
- movd r4, m0
+ movd r4d, m0
mov [dstq], r4w
shr r4, 16
mov [dstq + dststrideq], r4w
@@ -3471,7 +3471,7 @@
phaddw %2, %2
pmulhrsw %2, %3
packuswb %2, %2
- movd r4, %2
+ movd r4d, %2
mov [dstq], r4w
shr r4, 16
mov [dstq + dststrideq], r4w
@@ -5336,7 +5336,7 @@
sub r0 , r1
add r6d , 3
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m3, m1
@@ -5441,7 +5441,7 @@
.preloop:
lea r6, [r3 * 3]
-.loop
+.loop:
; Row 0-1
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m3, m1 ; shuffled based on the col order tab_Lm
@@ -5502,7 +5502,7 @@
movq [r2], xm3
movhps [r2 + r3], xm3
movq [r2 + r3 * 2], xm4
-.end
+.end:
RET
%endif
%endmacro
@@ -5592,7 +5592,7 @@
paddw xm1, xm2
psubw xm1, xm0
movu [r2], xm1 ;row 0
-.end
+.end:
RET
%endif
%endmacro ; IPFILTER_LUMA_PS_8xN_AVX2
@@ -5634,7 +5634,7 @@
sub r0, r8 ; r0(src)-r8
add r9, 7 ; blkheight += N - 1 (7 - 1 = 6 ; since the last one row not in loop)
-.label
+.label:
; Row 0
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m4, m3, m6 ; row 0 (col 4 to 7)
@@ -12374,7 +12374,7 @@
mova m4, [pb_128]
mova m5, [tab_c_64_n64]
-.loop
+.loop:
movh m0, [r0]
punpcklbw m0, m4
pmaddubsw m0, m5
@@ -25491,7 +25491,7 @@
sub r0, r1
add r4d, 3
-.loop
+.loop:
; Row 0
movu m2, [r0]
movu m3, [r0 + 1]
@@ -25553,7 +25553,7 @@
sub r0 , r1
add r6d , 3
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m3, m1
@@ -25607,7 +25607,7 @@
sub r0 , r1
add r6d , 3
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0]
pshufb m3, m1
@@ -25670,7 +25670,7 @@
sub r0 , r1
add r6d , 3
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0]
pshufb m3, m1
@@ -25743,7 +25743,7 @@
je .label
sub r0 , r1
-.label
+.label:
; Row 0-1
movu xm3, [r0]
vinserti128 m3, m3, [r0 + r1], 1
@@ -25795,7 +25795,7 @@
movq [r2+r3], xm4
lea r2, [r2 + r3 * 2]
movhps [r2], xm3
-.end
+.end:
RET
cglobal interp_4tap_horiz_ps_4x2, 4,7,5
@@ -25823,7 +25823,7 @@
je .label
sub r0 , r1
-.label
+.label:
; Row 0-1
movu xm3, [r0]
vinserti128 m3, m3, [r0 + r1], 1
@@ -25864,7 +25864,7 @@
movq [r2+r3], xm4
lea r2, [r2 + r3 * 2]
movhps [r2], xm3
-.end
+.end:
RET
;-----------------------------------------------------------------------------------------------------------------------------
@@ -25899,7 +25899,7 @@
sub r0 , r1
-.loop
+.loop:
sub r4d, 4
; Row 0-1
movu xm3, [r0]
@@ -25955,7 +25955,7 @@
movq [r2+r3], xm4
lea r2, [r2 + r3 * 2]
movhps [r2], xm3
-.end
+.end:
RET
%endmacro
@@ -25993,7 +25993,7 @@
sub r0 , r1
add r6d , 1
-.loop
+.loop:
dec r6d
; Row 0
vbroadcasti128 m3, [r0]
@@ -26032,7 +26032,7 @@
psubw m3, m5
vpermq m3, m3, 11011000b
movu [r2], xm3
-.end
+.end:
RET
INIT_YMM avx2
@@ -26237,7 +26237,7 @@
dec r0
-.loop
+.loop:
sub r4d, 4
; Row 0-1
movu xm3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
@@ -26306,9 +26306,9 @@
sub r0, r6
add r4d, 7
-.label
+.label:
lea r6, [pw_2000]
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m4, m3, m6 ; row 0 (col 4 to 7)
@@ -26405,9 +26405,9 @@
sub r0, r6 ; r0(src)-r6
add r4d, 7 ; blkheight += N - 1 (7 - 1 = 6 ; since the last one row not in loop)
-.label
+.label:
lea r6, [interp8_hps_shuf]
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m4, m3, m6 ; row 0 (col 4 to 7)
@@ -26736,9 +26736,9 @@
sub r0, r6 ; r0(src)-r6
add r4d, 7 ; blkheight += N - 1
-.label
+.label:
lea r6, [pw_2000]
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m4, m3, m6 ; row 0 (col 4 to 7)
@@ -26880,7 +26880,7 @@
sub r0 , r1
inc r6d
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0]
pshufb m3, m1
@@ -26915,7 +26915,7 @@
psubw m3, m5
vpermq m3, m3, 11011000b
movu [r2], xm3
-.end
+.end:
RET
%endmacro
@@ -26945,7 +26945,7 @@
jz .label
sub r0, r1
-.label
+.label:
lea r6, [r1 * 3]
movq xm1, [r0]
movhps xm1, [r0 + r1]
@@ -26985,7 +26985,7 @@
movd [r2], xm1
pextrd [r2 + r3], xm1, 1
pextrd [r2 + r3 * 2], xm1, 2
-.end
+.end:
RET
INIT_YMM avx2
@@ -27005,7 +27005,7 @@
jz .label
sub r0, r1
-.label
+.label:
mova m4, [interp4_hpp_shuf]
mova m5, [pw_1]
dec r0
@@ -27062,7 +27062,7 @@
movd [r2], xm1
pextrd [r2 + r3], xm1, 1
movd [r2 + r3 * 2], xm2
-.end
+.end:
RET
INIT_YMM avx2
@@ -27217,7 +27217,7 @@
sub r0 , r1
inc r6d
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0]
pshufb m3, m1
@@ -27254,7 +27254,7 @@
vextracti128 xm4, m3, 1
movq [r2], xm3
movd [r2+8], xm4
-.end
+.end:
RET
INIT_YMM avx2
@@ -27285,7 +27285,7 @@
lea r6, [r1 * 3] ; r6 = (N / 2 - 1) * srcStride
sub r0, r6 ; r0(src)-r6
add r4d, 7
-.loop
+.loop:
; Row 0
@@ -27350,9 +27350,9 @@
sub r0, r6 ; r0(src)-r6
add r4d, 7 ; blkheight += N - 1 (7 - 1 = 6 ; since the last one row not in loop)
-.label
+.label:
lea r6, [interp8_hps_shuf]
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m4, m3, m6 ; row 0 (col 4 to 7)
@@ -27430,7 +27430,7 @@
sub r0 , r1
add r6d , 3
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m3, m1
@@ -27988,7 +27988,7 @@
sub r0 , r1
add r6d , 3
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m3, m1
@@ -28067,7 +28067,7 @@
sub r0 , r1
add r6d , 3
-.loop
+.loop:
; Row 0
vbroadcasti128 m3, [r0] ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
pshufb m3, m1
@@ -28114,7 +28114,7 @@
jz .label
sub r0, r1
-.label
+.label:
mova m4, [interp4_hps_shuf]
mova m5, [pw_1]
dec r0
@@ -28209,7 +28209,7 @@
movd [r2], xm1
pextrd [r2 + r3], xm1, 1
movd [r2 + r3 * 2], xm2
-.end
+.end:
RET
INIT_YMM avx2
diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/loopfilter.asm
--- a/source/common/x86/loopfilter.asm Tue Nov 21 09:40:16 2017 +0530
+++ b/source/common/x86/loopfilter.asm Thu Nov 02 09:39:58 2017 +0530
@@ -374,7 +374,7 @@
pxor m0, m0 ; m0 = 0
mova m6, [pb_2] ; m6 = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
shr r4d, 4
-.loop
+.loop:
movu m7, [r0]
movu m5, [r0 + 16]
movu m3, [r0 + r3]
@@ -430,7 +430,7 @@
mova m6, [pb_2] ; m6 = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
mova m7, [pb_128]
shr r4d, 4
-.loop
+.loop:
movu m1, [r0] ; m1 = pRec[x]
movu m2, [r0 + r3] ; m2 = pRec[x + iStride]
@@ -478,7 +478,7 @@
mova m4, [pb_2]
shr r4d, 4
mova m0, [pw_pixel_max]
-.loop
+.loop:
movu m5, [r0]
movu m3, [r0 + r3]
@@ -523,7 +523,7 @@
mova xm6, [pb_2] ; xm6 = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
mova xm7, [pb_128]
shr r4d, 4
-.loop
+.loop:
movu xm1, [r0] ; xm1 = pRec[x]
movu xm2, [r0 + r3] ; xm2 = pRec[x + iStride]
@@ -572,7 +572,7 @@
mov r5d, r4d
shr r4d, 4
mov r6, r0
-.loop
+.loop:
movu m7, [r0]
movu m5, [r0 + 16]
movu m3, [r0 + r3]
@@ -674,7 +674,7 @@
pxor m0, m0 ; m0 = 0
mova m7, [pb_128]
shr r4d, 4
-.loop
+.loop:
movu m1, [r0] ; m1 = pRec[x]
movu m2, [r0 + r3] ; m2 = pRec[x + iStride]
@@ -748,7 +748,7 @@
mova m4, [pw_pixel_max]
vbroadcasti128 m6, [r2] ; m6 = m_iOffsetEo
shr r4d, 4
-.loop
+.loop:
movu m7, [r0]
movu m5, [r0 + r3]
movu m1, [r0 + r3 * 2]
@@ -804,7 +804,7 @@
vbroadcasti128 m5, [pb_128]
vbroadcasti128 m6, [r2] ; m6 = m_iOffsetEo
shr r4d, 4
-.loop
+.loop:
movu xm1, [r0] ; m1 = pRec[x]
movu xm2, [r0 + r3] ; m2 = pRec[x + iStride]
vinserti128 m1, m1, xm2, 1
@@ -859,7 +859,7 @@
movh m6, [r0 + r4 * 2]
movhps m6, [r1 + r4]
-.loop
+.loop:
movu m7, [r0]
movu m5, [r0 + 16]
movu m3, [r0 + r5 + 2]
@@ -918,7 +918,7 @@
movh m5, [r0 + r4]
movhps m5, [r1 + r4]
-.loop
+.loop:
movu m1, [r0] ; m1 = rec[x]
movu m2, [r0 + r5 + 1] ; m2 = rec[x + stride + 1]
pxor m3, m1, m7
@@ -970,7 +970,7 @@
movhps xm4, [r1 + r4]
vbroadcasti128 m5, [r3]
mova m6, [pw_pixel_max]
-.loop
+.loop:
movu m1, [r0]
movu m3, [r0 + r5 + 2]
@@ -1061,7 +1061,7 @@
movhps xm4, [r1 + r4]
vbroadcasti128 m5, [r3]
-.loop
+.loop:
movu m1, [r0]
movu m7, [r0 + 32]
movu m3, [r0 + r5 + 2]
@@ -1567,11 +1567,11 @@
movu m4, [r1 + 16] ; offset[16-31]
pxor m7, m7
-.loopH
+.loopH:
mov r5d, r2d
xor r6, r6
-.loopW
+.loopW:
movu m2, [r0 + r6]
movu m5, [r0 + r6 + 16]
psrlw m0, m2, (BIT_DEPTH - 5)
@@ -1617,11 +1617,11 @@
movu m3, [r1 + 0] ; offset[0-15]
movu m4, [r1 + 16] ; offset[16-31]
pxor m7, m7 ; m7 =[0]
-.loopH
+.loopH:
mov r5d, r2d
xor r6, r6
-.loopW
+.loopW:
movu m2, [r0 + r6] ; m0 = [rec]
psrlw m1, m2, 3
pand m1, [pb_31] ; m1 = [index]
@@ -1670,9 +1670,9 @@
mov r6d, r3d
shr r3d, 1
-.loopH
+.loopH:
mov r5d, r2d
-.loopW
+.loopW:
movu m2, [r0]
movu m5, [r0 + r4]
psrlw m0, m2, (BIT_DEPTH - 5)
@@ -1751,9 +1751,9 @@
shr r2d, 4
mov r1d, r3d
shr r3d, 1
-.loopH
+.loopH:
mov r5d, r2d
-.loopW
+.loopW:
movu xm2, [r0] ; m2 = [rec]
vinserti128 m2, m2, [r0 + r4], 1
psrlw m1, m2, 3
@@ -1789,7 +1789,7 @@
test r1b, 1
jz .end
mov r5d, r2d
-.loopW1
+.loopW1:
movu xm2, [r0] ; m2 = [rec]
psrlw xm1, xm2, 3
pand xm1, xm7 ; m1 = [index]
@@ -1811,7 +1811,7 @@
add r0, 16
dec r5d
jnz .loopW1
-.end
+.end:
RET
%endif
@@ -1827,7 +1827,7 @@
add r3d, 1
mov r5, r0
movu m4, [r0 + r4]
-.loop
+.loop:
movu m1, [r1] ; m2 = pRec[x]
movu m2, [r2] ; m3 = pTmpU[x]
@@ -1921,7 +1921,7 @@
mov r5, r0
movu m4, [r0 + r4]
-.loop
+.loop:
movu m1, [r1] ; m2 = pRec[x]
movu m2, [r2] ; m3 = pTmpU[x]
diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm Tue Nov 21 09:40:16 2017 +0530
+++ b/source/common/x86/mc-a.asm Thu Nov 02 09:39:58 2017 +0530
@@ -4115,7 +4115,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 4
-.loop
+.loop:
pixel_avg_W8
dec r9d
jnz .loop
@@ -4129,7 +4129,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 8
-.loop
+.loop:
pixel_avg_W8
dec r9d
jnz .loop
@@ -4697,7 +4697,7 @@
lea r8, [r1 * 3]
mov r9d, 4
-.loop
+.loop:
movu m0, [r2]
movu m1, [r4]
pavgw m0, m1
@@ -4834,7 +4834,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 4
-.loop
+.loop:
pixel_avg_H16
dec r9d
jnz .loop
@@ -4848,7 +4848,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 4
-.loop
+.loop:
pixel_avg_H16
pixel_avg_H16
dec r9d
@@ -4863,7 +4863,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 4
-.loop
+.loop:
pixel_avg_H16
pixel_avg_H16
pixel_avg_H16
@@ -4887,7 +4887,7 @@
lea r8, [r1 * 3]
mov r9d, 8
-.loop
+.loop:
movu m0, [r2]
movu m1, [r4]
pavgw m0, m1
@@ -4987,7 +4987,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 2
-.loop
+.loop:
pixel_avg_W32
dec r9d
jnz .loop
@@ -5001,7 +5001,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 4
-.loop
+.loop:
pixel_avg_W32
dec r9d
jnz .loop
@@ -5015,7 +5015,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 6
-.loop
+.loop:
pixel_avg_W32
dec r9d
jnz .loop
@@ -5029,7 +5029,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 8
-.loop
+.loop:
pixel_avg_W32
dec r9d
jnz .loop
@@ -5043,7 +5043,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 16
-.loop
+.loop:
pixel_avg_W32
dec r9d
jnz .loop
@@ -5141,7 +5141,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 4
-.loop
+.loop:
pixel_avg_W64
dec r9d
jnz .loop
@@ -5155,7 +5155,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 8
-.loop
+.loop:
pixel_avg_W64
dec r9d
jnz .loop
@@ -5169,7 +5169,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 12
-.loop
+.loop:
pixel_avg_W64
dec r9d
jnz .loop
@@ -5183,7 +5183,7 @@
lea r7, [r5 * 3]
lea r8, [r1 * 3]
mov r9d, 16
-.loop
+.loop:
pixel_avg_W64
dec r9d
jnz .loop
@@ -5204,7 +5204,7 @@
lea r8, [r1 * 3]
mov r9d, 16
-.loop
+.loop:
movu m0, [r2]
movu m1, [r4]
pavgw m0, m1
diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Tue Nov 21 09:40:16 2017 +0530
+++ b/source/common/x86/pixel-util8.asm Thu Nov 02 09:39:58 2017 +0530
@@ -1785,7 +1785,7 @@
movu [r1], xm7
je .nextH
-.width6
+.width6:
cmp r6d, 6
jl .width4
movq [r1], xm7
@@ -4937,7 +4937,7 @@
lea r9, [r4 * 3]
lea r8, [r5 * 3]
-.loop
+.loop:
pmovzxbw m0, [r2]
pmovzxbw m1, [r3]
pmovzxbw m2, [r2 + r4]
@@ -5150,7 +5150,7 @@
lea r7, [r4 * 3]
lea r8, [r5 * 3]
-.loop
+.loop:
movu m0, [r2]
movu m1, [r2 + 32]
movu m2, [r3]
@@ -5557,7 +5557,7 @@
lea r7, [r4 * 3]
lea r8, [r5 * 3]
-.loop
+.loop:
movu m0, [r2]
movu m1, [r2 + 32]
movu m2, [r2 + 64]
diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/sad-a.asm
--- a/source/common/x86/sad-a.asm Tue Nov 21 09:40:16 2017 +0530
+++ b/source/common/x86/sad-a.asm Thu Nov 02 09:39:58 2017 +0530
@@ -5631,7 +5631,7 @@
xorps m5, m5
mov r4d, 4
-.loop
+.loop:
movu m1, [r0] ; row 0 of pix0
movu m2, [r2] ; row 0 of pix1
movu m3, [r0 + r1] ; row 1 of pix0
@@ -5676,7 +5676,7 @@
mov r4d, 6
lea r5, [r1 * 3]
lea r6, [r3 * 3]
-.loop
+.loop:
movu m1, [r0] ; row 0 of pix0
movu m2, [r2] ; row 0 of pix1
movu m3, [r0 + r1] ; row 1 of pix0
@@ -5718,7 +5718,7 @@
lea r5, [r1 * 3]
lea r6, [r3 * 3]
-.loop
+.loop:
movu m1, [r0] ; row 0 of pix0
movu m2, [r2] ; row 0 of pix1
movu m3, [r0 + r1] ; row 1 of pix0
@@ -5759,7 +5759,7 @@
lea r5, [r1 * 3]
lea r6, [r3 * 3]
-.loop
+.loop:
movu m1, [r0] ; row 0 of pix0
movu m2, [r2] ; row 0 of pix1
movu m3, [r0 + r1] ; row 1 of pix0
@@ -5822,7 +5822,7 @@
mov r4d, 64/4
lea r5, [r1 * 3]
lea r6, [r3 * 3]
-.loop
+.loop:
movu m1, [r0] ; row 0 of pix0
movu m2, [r2] ; row 0 of pix1
movu m3, [r0 + r1] ; row 1 of pix0
@@ -5873,7 +5873,7 @@
xorps m0, m0
xorps m5, m5
mov r4d, 4
-.loop
+.loop:
movu m1, [r0] ; first 32 of row 0 of pix0
movu m2, [r2] ; first 32 of row 0 of pix1
movu m3, [r0 + 32] ; second 32 of row 0 of pix0
@@ -5936,7 +5936,7 @@
xorps m0, m0
xorps m5, m5
mov r4d, 16
-.loop
+.loop:
movu m1, [r0] ; first 32 of row 0 of pix0
movu m2, [r2] ; first 32 of row 0 of pix1
movu m3, [r0 + 32] ; second 32 of row 0 of pix0
@@ -5978,7 +5978,7 @@
mov r4d, 12
lea r5, [r1 * 3]
lea r6, [r3 * 3]
-.loop
+.loop:
movu m1, [r0] ; first 32 of row 0 of pix0
movu m2, [r2] ; first 32 of row 0 of pix1
movu m3, [r0 + 32] ; second 32 of row 0 of pix0
@@ -6040,7 +6040,7 @@
mov r4d, 8
lea r5, [r1 * 3]
lea r6, [r3 * 3]
-.loop
+.loop:
movu m1, [r0] ; first 32 of row 0 of pix0
movu m2, [r2] ; first 32 of row 0 of pix1
movu m3, [r0 + 32] ; second 32 of row 0 of pix0
diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/seaintegral.asm
--- a/source/common/x86/seaintegral.asm Tue Nov 21 09:40:16 2017 +0530
+++ b/source/common/x86/seaintegral.asm Thu Nov 02 09:39:58 2017 +0530
@@ -36,7 +36,7 @@
mov r2, r1
shl r2, 4
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r2]
psubd m1, m0
@@ -54,7 +54,7 @@
mov r2, r1
shl r2, 5
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r2]
psubd m1, m0
@@ -75,7 +75,7 @@
shl r3, 4
add r2, r3
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r2]
psubd m1, m0
@@ -93,7 +93,7 @@
mov r2, r1
shl r2, 6
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r2]
psubd m1, m0
@@ -114,7 +114,7 @@
shl r3, 5
add r2, r3
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r2]
psubd m1, m0
@@ -132,7 +132,7 @@
mov r2, r1
shl r2, 7
-.loop
+.loop:
movu m0, [r0]
movu m1, [r0 + r2]
psubd m1, m0
@@ -264,7 +264,7 @@
movu [r0 + r3], xm0
jmp .end
-.end
+.end:
RET
%endif
@@ -379,7 +379,7 @@
movu [r0 + r3], m0
jmp .end
-.end
+.end:
RET
%endif
@@ -577,7 +577,7 @@
movu [r0 + r3], xm0
jmp .end
-.end
+.end:
RET
%endif
@@ -740,7 +740,7 @@
movu [r0 + r3], m0
jmp .end
-.end
+.end:
RET
%endif
@@ -883,7 +883,7 @@
movu [r0 + r3], m0
jmp .end
-.end
+.end:
RET
%macro INTEGRAL_THIRTYTWO_HORIZONTAL_16 0
@@ -1058,5 +1058,5 @@
movu [r0 + r3], m0
jmp .end
-.end
+.end:
RET
More information about the x265-devel
mailing list