[x265] [PATCH 02/12] Arm: Use local labels for assembly routine loops
Hari Limaye
hari.limaye at arm.com
Thu May 2 21:19:37 UTC 2024
Amend loop labels in Arm & AArch64 assembly files to start with `.Loop`
instead of `.loop`, as the GNU assembler prefix for local labels is
`.L`. This improves the output of tools like perf and gdb, as code under
loop labels are correctly attributed to their containing routine.
---
source/common/aarch64/blockcopy8-sve.S | 60 +++++------
source/common/aarch64/blockcopy8.S | 116 ++++++++++-----------
source/common/aarch64/ipfilter-common.S | 104 +++++++++----------
source/common/aarch64/ipfilter-sve2.S | 56 +++++-----
source/common/aarch64/ipfilter.S | 12 +--
source/common/aarch64/mc-a-sve2.S | 88 ++++++++--------
source/common/aarch64/mc-a.S | 32 +++---
source/common/aarch64/p2s-sve.S | 12 +--
source/common/aarch64/p2s.S | 12 +--
source/common/aarch64/pixel-util-sve.S | 4 +-
source/common/aarch64/pixel-util-sve2.S | 56 +++++-----
source/common/aarch64/pixel-util.S | 132 ++++++++++++------------
source/common/aarch64/sad-a-sve2.S | 16 +--
source/common/aarch64/sad-a.S | 8 +-
source/common/aarch64/ssd-a-sve2.S | 16 +--
source/common/aarch64/ssd-a.S | 32 +++---
source/common/arm/blockcopy8.S | 4 +-
source/common/arm/dct-a.S | 8 +-
source/common/arm/ipfilter8.S | 108 +++++++++----------
source/common/arm/mc-a.S | 8 +-
source/common/arm/pixel-util.S | 28 ++---
source/common/arm/sad-a.S | 36 +++----
source/common/arm/ssd-a.S | 28 ++---
23 files changed, 488 insertions(+), 488 deletions(-)
diff --git a/source/common/aarch64/blockcopy8-sve.S b/source/common/aarch64/blockcopy8-sve.S
index 846927909..d5664af58 100644
--- a/source/common/aarch64/blockcopy8-sve.S
+++ b/source/common/aarch64/blockcopy8-sve.S
@@ -112,7 +112,7 @@ function PFX(blockcopy_sp_32x32_sve)
lsl x3, x3, #1
movrel x11, xtn_xtn2_table
ld1 {v31.16b}, [x11]
-.loop_csp32_sve:
+.Loop_csp32_sve:
sub w12, w12, #1
.rept 4
ld1 {v0.8h-v3.8h}, [x2], x3
@@ -124,7 +124,7 @@ function PFX(blockcopy_sp_32x32_sve)
st1 {v0.16b-v1.16b}, [x0], x1
st1 {v2.16b-v3.16b}, [x0], x1
.endr
- cbnz w12, .loop_csp32_sve
+ cbnz w12, .Loop_csp32_sve
ret
.vl_gt_16_blockcopy_sp_32_32:
cmp x9, #48
@@ -199,7 +199,7 @@ function PFX(blockcopy_ps_32x32_sve)
bgt .vl_gt_16_blockcopy_ps_32_32
lsl x1, x1, #1
mov w12, #4
-.loop_cps32_sve:
+.Loop_cps32_sve:
sub w12, w12, #1
.rept 4
ld1 {v16.16b-v17.16b}, [x2], x3
@@ -215,7 +215,7 @@ function PFX(blockcopy_ps_32x32_sve)
st1 {v0.8h-v3.8h}, [x0], x1
st1 {v4.8h-v7.8h}, [x0], x1
.endr
- cbnz w12, .loop_cps32_sve
+ cbnz w12, .Loop_cps32_sve
ret
.vl_gt_16_blockcopy_ps_32_32:
cmp x9, #48
@@ -248,7 +248,7 @@ function PFX(blockcopy_ps_64x64_sve)
lsl x1, x1, #1
sub x1, x1, #64
mov w12, #16
-.loop_cps64_sve:
+.Loop_cps64_sve:
sub w12, w12, #1
.rept 4
ld1 {v16.16b-v19.16b}, [x2], x3
@@ -263,7 +263,7 @@ function PFX(blockcopy_ps_64x64_sve)
st1 {v0.8h-v3.8h}, [x0], #64
st1 {v4.8h-v7.8h}, [x0], x1
.endr
- cbnz w12, .loop_cps64_sve
+ cbnz w12, .Loop_cps64_sve
ret
.vl_gt_16_blockcopy_ps_64_64:
cmp x9, #48
@@ -338,13 +338,13 @@ function PFX(blockcopy_ss_32x32_sve)
lsl x1, x1, #1
lsl x3, x3, #1
mov w12, #4
-.loop_css32_sve:
+.Loop_css32_sve:
sub w12, w12, #1
.rept 8
ld1 {v0.8h-v3.8h}, [x2], x3
st1 {v0.8h-v3.8h}, [x0], x1
.endr
- cbnz w12, .loop_css32_sve
+ cbnz w12, .Loop_css32_sve
ret
.vl_gt_16_blockcopy_ss_32_32:
cmp x9, #48
@@ -379,7 +379,7 @@ function PFX(blockcopy_ss_64x64_sve)
lsl x3, x3, #1
sub x3, x3, #64
mov w12, #8
-.loop_css64_sve:
+.Loop_css64_sve:
sub w12, w12, #1
.rept 8
ld1 {v0.8h-v3.8h}, [x2], #64
@@ -387,7 +387,7 @@ function PFX(blockcopy_ss_64x64_sve)
st1 {v0.8h-v3.8h}, [x0], #64
st1 {v4.8h-v7.8h}, [x0], x1
.endr
- cbnz w12, .loop_css64_sve
+ cbnz w12, .Loop_css64_sve
ret
.vl_gt_16_blockcopy_ss_64_64:
cmp x9, #48
@@ -474,13 +474,13 @@ function PFX(blockcopy_ss_32x64_sve)
lsl x1, x1, #1
lsl x3, x3, #1
mov w12, #8
-.loop_css32x64_sve:
+.Loop_css32x64_sve:
sub w12, w12, #1
.rept 8
ld1 {v0.8h-v3.8h}, [x2], x3
st1 {v0.8h-v3.8h}, [x0], x1
.endr
- cbnz w12, .loop_css32x64_sve
+ cbnz w12, .Loop_css32x64_sve
ret
.vl_gt_16_blockcopy_ss_32_64:
cmp x9, #48
@@ -570,7 +570,7 @@ function PFX(blockcopy_ps_32x64_sve)
bgt .vl_gt_16_blockcopy_ps_32_64
lsl x1, x1, #1
mov w12, #8
-.loop_cps32x64_sve:
+.Loop_cps32x64_sve:
sub w12, w12, #1
.rept 4
ld1 {v16.16b-v17.16b}, [x2], x3
@@ -586,7 +586,7 @@ function PFX(blockcopy_ps_32x64_sve)
st1 {v0.8h-v3.8h}, [x0], x1
st1 {v4.8h-v7.8h}, [x0], x1
.endr
- cbnz w12, .loop_cps32x64_sve
+ cbnz w12, .Loop_cps32x64_sve
ret
.vl_gt_16_blockcopy_ps_32_64:
cmp x9, #48
@@ -730,13 +730,13 @@ function PFX(blockcopy_pp_32x\h\()_sve)
rdvl x9, #1
cmp x9, #16
bgt .vl_gt_16_blockcopy_pp_32xN_\h
-.loop_sve_32x\h\():
+.Loop_sve_32x\h\():
sub w12, w12, #1
.rept 8
ld1 {v0.16b-v1.16b}, [x2], x3
st1 {v0.16b-v1.16b}, [x0], x1
.endr
- cbnz w12, .loop_sve_32x\h
+ cbnz w12, .Loop_sve_32x\h
ret
.vl_gt_16_blockcopy_pp_32xN_\h:
ptrue p0.b, vl32
@@ -765,13 +765,13 @@ function PFX(blockcopy_pp_64x\h\()_sve)
rdvl x9, #1
cmp x9, #16
bgt .vl_gt_16_blockcopy_pp_64xN_\h
-.loop_sve_64x\h\():
+.Loop_sve_64x\h\():
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v3.16b}, [x2], x3
st1 {v0.16b-v3.16b}, [x0], x1
.endr
- cbnz w12, .loop_sve_64x\h
+ cbnz w12, .Loop_sve_64x\h
ret
.vl_gt_16_blockcopy_pp_64xN_\h:
cmp x9, #48
@@ -856,7 +856,7 @@ function PFX(cpy2Dto1D_shl_16x16_sve)
bgt .vl_gt_16_cpy2Dto1D_shl_16x16
cpy2Dto1D_shl_start_sve
mov w12, #4
-.loop_cpy2Dto1D_shl_16_sve:
+.Loop_cpy2Dto1D_shl_16_sve:
sub w12, w12, #1
.rept 4
ld1 {v2.16b-v3.16b}, [x1], x2
@@ -864,7 +864,7 @@ function PFX(cpy2Dto1D_shl_16x16_sve)
sshl v3.8h, v3.8h, v0.8h
st1 {v2.16b-v3.16b}, [x0], #32
.endr
- cbnz w12, .loop_cpy2Dto1D_shl_16_sve
+ cbnz w12, .Loop_cpy2Dto1D_shl_16_sve
ret
.vl_gt_16_cpy2Dto1D_shl_16x16:
ptrue p0.h, vl16
@@ -885,7 +885,7 @@ function PFX(cpy2Dto1D_shl_32x32_sve)
bgt .vl_gt_16_cpy2Dto1D_shl_32x32
cpy2Dto1D_shl_start_sve
mov w12, #16
-.loop_cpy2Dto1D_shl_32_sve:
+.Loop_cpy2Dto1D_shl_32_sve:
sub w12, w12, #1
.rept 2
ld1 {v2.16b-v5.16b}, [x1], x2
@@ -895,7 +895,7 @@ function PFX(cpy2Dto1D_shl_32x32_sve)
sshl v5.8h, v5.8h, v0.8h
st1 {v2.16b-v5.16b}, [x0], #64
.endr
- cbnz w12, .loop_cpy2Dto1D_shl_32_sve
+ cbnz w12, .Loop_cpy2Dto1D_shl_32_sve
ret
.vl_gt_16_cpy2Dto1D_shl_32x32:
cmp x9, #48
@@ -931,7 +931,7 @@ function PFX(cpy2Dto1D_shl_64x64_sve)
cpy2Dto1D_shl_start_sve
mov w12, #32
sub x2, x2, #64
-.loop_cpy2Dto1D_shl_64_sve:
+.Loop_cpy2Dto1D_shl_64_sve:
sub w12, w12, #1
.rept 2
ld1 {v2.16b-v5.16b}, [x1], #64
@@ -947,7 +947,7 @@ function PFX(cpy2Dto1D_shl_64x64_sve)
st1 {v2.16b-v5.16b}, [x0], #64
st1 {v16.16b-v19.16b}, [x0], #64
.endr
- cbnz w12, .loop_cpy2Dto1D_shl_64_sve
+ cbnz w12, .Loop_cpy2Dto1D_shl_64_sve
ret
.vl_gt_16_cpy2Dto1D_shl_64x64:
dup z0.h, w3
@@ -1055,7 +1055,7 @@ function PFX(cpy2Dto1D_shr_32x32_sve)
bgt .vl_gt_16_cpy2Dto1D_shr_32x32
cpy2Dto1D_shr_start
mov w12, #16
-.loop_cpy2Dto1D_shr_32_sve:
+.Loop_cpy2Dto1D_shr_32_sve:
sub w12, w12, #1
.rept 2
ld1 {v2.8h-v5.8h}, [x1], x2
@@ -1069,7 +1069,7 @@ function PFX(cpy2Dto1D_shr_32x32_sve)
sshl v5.8h, v5.8h, v0.8h
st1 {v2.8h-v5.8h}, [x0], #64
.endr
- cbnz w12, .loop_cpy2Dto1D_shr_32_sve
+ cbnz w12, .Loop_cpy2Dto1D_shr_32_sve
ret
.vl_gt_16_cpy2Dto1D_shr_32x32:
dup z0.h, w3
@@ -1218,7 +1218,7 @@ function PFX(cpy1Dto2D_shr_16x16_sve)
bgt .vl_gt_16_cpy1Dto2D_shr_16x16
cpy1Dto2D_shr_start
mov w12, #4
-.loop_cpy1Dto2D_shr_16:
+.Loop_cpy1Dto2D_shr_16:
sub w12, w12, #1
.rept 4
ld1 {v2.8h-v3.8h}, [x1], #32
@@ -1228,7 +1228,7 @@ function PFX(cpy1Dto2D_shr_16x16_sve)
sshl v3.8h, v3.8h, v0.8h
st1 {v2.8h-v3.8h}, [x0], x2
.endr
- cbnz w12, .loop_cpy1Dto2D_shr_16
+ cbnz w12, .Loop_cpy1Dto2D_shr_16
ret
.vl_gt_16_cpy1Dto2D_shr_16x16:
dup z0.h, w3
@@ -1254,7 +1254,7 @@ function PFX(cpy1Dto2D_shr_32x32_sve)
bgt .vl_gt_16_cpy1Dto2D_shr_32x32
cpy1Dto2D_shr_start
mov w12, #16
-.loop_cpy1Dto2D_shr_32_sve:
+.Loop_cpy1Dto2D_shr_32_sve:
sub w12, w12, #1
.rept 2
ld1 {v2.16b-v5.16b}, [x1], #64
@@ -1268,7 +1268,7 @@ function PFX(cpy1Dto2D_shr_32x32_sve)
sshl v5.8h, v5.8h, v0.8h
st1 {v2.16b-v5.16b}, [x0], x2
.endr
- cbnz w12, .loop_cpy1Dto2D_shr_32_sve
+ cbnz w12, .Loop_cpy1Dto2D_shr_32_sve
ret
.vl_gt_16_cpy1Dto2D_shr_32x32:
dup z0.h, w3
diff --git a/source/common/aarch64/blockcopy8.S b/source/common/aarch64/blockcopy8.S
index 495ee7ea2..1ad371c57 100644
--- a/source/common/aarch64/blockcopy8.S
+++ b/source/common/aarch64/blockcopy8.S
@@ -86,7 +86,7 @@ function PFX(blockcopy_sp_32x32_neon)
lsl x3, x3, #1
movrel x11, xtn_xtn2_table
ld1 {v31.16b}, [x11]
-.loop_csp32:
+.Loop_csp32:
sub w12, w12, #1
.rept 4
ld1 {v0.8h-v3.8h}, [x2], x3
@@ -98,7 +98,7 @@ function PFX(blockcopy_sp_32x32_neon)
st1 {v0.16b-v1.16b}, [x0], x1
st1 {v2.16b-v3.16b}, [x0], x1
.endr
- cbnz w12, .loop_csp32
+ cbnz w12, .Loop_csp32
ret
endfunc
@@ -108,7 +108,7 @@ function PFX(blockcopy_sp_64x64_neon)
sub x3, x3, #64
movrel x11, xtn_xtn2_table
ld1 {v31.16b}, [x11]
-.loop_csp64:
+.Loop_csp64:
sub w12, w12, #1
.rept 4
ld1 {v0.8h-v3.8h}, [x2], #64
@@ -119,7 +119,7 @@ function PFX(blockcopy_sp_64x64_neon)
tbl v3.16b, {v6.16b,v7.16b}, v31.16b
st1 {v0.16b-v3.16b}, [x0], x1
.endr
- cbnz w12, .loop_csp64
+ cbnz w12, .Loop_csp64
ret
endfunc
@@ -168,7 +168,7 @@ endfunc
function PFX(blockcopy_ps_32x32_neon)
lsl x1, x1, #1
mov w12, #4
-.loop_cps32:
+.Loop_cps32:
sub w12, w12, #1
.rept 4
ld1 {v16.16b-v17.16b}, [x2], x3
@@ -184,7 +184,7 @@ function PFX(blockcopy_ps_32x32_neon)
st1 {v0.8h-v3.8h}, [x0], x1
st1 {v4.8h-v7.8h}, [x0], x1
.endr
- cbnz w12, .loop_cps32
+ cbnz w12, .Loop_cps32
ret
endfunc
@@ -192,7 +192,7 @@ function PFX(blockcopy_ps_64x64_neon)
lsl x1, x1, #1
sub x1, x1, #64
mov w12, #16
-.loop_cps64:
+.Loop_cps64:
sub w12, w12, #1
.rept 4
ld1 {v16.16b-v19.16b}, [x2], x3
@@ -207,7 +207,7 @@ function PFX(blockcopy_ps_64x64_neon)
st1 {v0.8h-v3.8h}, [x0], #64
st1 {v4.8h-v7.8h}, [x0], x1
.endr
- cbnz w12, .loop_cps64
+ cbnz w12, .Loop_cps64
ret
endfunc
@@ -252,13 +252,13 @@ function PFX(blockcopy_ss_32x32_neon)
lsl x1, x1, #1
lsl x3, x3, #1
mov w12, #4
-.loop_css32:
+.Loop_css32:
sub w12, w12, #1
.rept 8
ld1 {v0.8h-v3.8h}, [x2], x3
st1 {v0.8h-v3.8h}, [x0], x1
.endr
- cbnz w12, .loop_css32
+ cbnz w12, .Loop_css32
ret
endfunc
@@ -268,7 +268,7 @@ function PFX(blockcopy_ss_64x64_neon)
lsl x3, x3, #1
sub x3, x3, #64
mov w12, #8
-.loop_css64:
+.Loop_css64:
sub w12, w12, #1
.rept 8
ld1 {v0.8h-v3.8h}, [x2], #64
@@ -276,7 +276,7 @@ function PFX(blockcopy_ss_64x64_neon)
st1 {v0.8h-v3.8h}, [x0], #64
st1 {v4.8h-v7.8h}, [x0], x1
.endr
- cbnz w12, .loop_css64
+ cbnz w12, .Loop_css64
ret
endfunc
@@ -321,13 +321,13 @@ function PFX(blockcopy_ss_32x64_neon)
lsl x1, x1, #1
lsl x3, x3, #1
mov w12, #8
-.loop_css32x64:
+.Loop_css32x64:
sub w12, w12, #1
.rept 8
ld1 {v0.8h-v3.8h}, [x2], x3
st1 {v0.8h-v3.8h}, [x0], x1
.endr
- cbnz w12, .loop_css32x64
+ cbnz w12, .Loop_css32x64
ret
endfunc
@@ -376,7 +376,7 @@ endfunc
function PFX(blockcopy_ps_32x64_neon)
lsl x1, x1, #1
mov w12, #8
-.loop_cps32x64:
+.Loop_cps32x64:
sub w12, w12, #1
.rept 4
ld1 {v16.16b-v17.16b}, [x2], x3
@@ -392,7 +392,7 @@ function PFX(blockcopy_ps_32x64_neon)
st1 {v0.8h-v3.8h}, [x0], x1
st1 {v4.8h-v7.8h}, [x0], x1
.endr
- cbnz w12, .loop_cps32x64
+ cbnz w12, .Loop_cps32x64
ret
endfunc
@@ -443,7 +443,7 @@ function PFX(blockcopy_sp_32x64_neon)
lsl x3, x3, #1
movrel x11, xtn_xtn2_table
ld1 {v31.16b}, [x11]
-.loop_csp32x64:
+.Loop_csp32x64:
sub w12, w12, #1
.rept 4
ld1 {v0.8h-v3.8h}, [x2], x3
@@ -455,7 +455,7 @@ function PFX(blockcopy_sp_32x64_neon)
st1 {v0.16b-v1.16b}, [x0], x1
st1 {v2.16b-v3.16b}, [x0], x1
.endr
- cbnz w12, .loop_csp32x64
+ cbnz w12, .Loop_csp32x64
ret
endfunc
@@ -595,13 +595,13 @@ blockcopy_pp_8xN_neon 32
function PFX(blockcopy_pp_8x64_neon)
mov w12, #4
-.loop_pp_8x64:
+.Loop_pp_8x64:
sub w12, w12, #1
.rept 16
ld1 {v0.4h}, [x2], x3
st1 {v0.4h}, [x0], x1
.endr
- cbnz w12, .loop_pp_8x64
+ cbnz w12, .Loop_pp_8x64
ret
endfunc
@@ -623,13 +623,13 @@ blockcopy_pp_16xN_neon 16
.macro blockcopy_pp_16xN1_neon h
function PFX(blockcopy_pp_16x\h\()_neon)
mov w12, #\h / 8
-.loop_16x\h\():
+.Loop_16x\h\():
.rept 8
ld1 {v0.8h}, [x2], x3
st1 {v0.8h}, [x0], x1
.endr
sub w12, w12, #1
- cbnz w12, .loop_16x\h
+ cbnz w12, .Loop_16x\h
ret
endfunc
.endm
@@ -651,38 +651,38 @@ endfunc
function PFX(blockcopy_pp_12x32_neon)
sub x1, x1, #8
mov w12, #4
-.loop_pp_12x32:
+.Loop_pp_12x32:
sub w12, w12, #1
.rept 8
ld1 {v0.16b}, [x2], x3
str d0, [x0], #8
st1 {v0.s}[2], [x0], x1
.endr
- cbnz w12, .loop_pp_12x32
+ cbnz w12, .Loop_pp_12x32
ret
endfunc
function PFX(blockcopy_pp_24x32_neon)
mov w12, #4
-.loop_24x32:
+.Loop_24x32:
sub w12, w12, #1
.rept 8
ld1 {v0.8b-v2.8b}, [x2], x3
st1 {v0.8b-v2.8b}, [x0], x1
.endr
- cbnz w12, .loop_24x32
+ cbnz w12, .Loop_24x32
ret
endfunc
function PFX(blockcopy_pp_24x64_neon)
mov w12, #4
-.loop_24x64:
+.Loop_24x64:
sub w12, w12, #1
.rept 16
ld1 {v0.8b-v2.8b}, [x2], x3
st1 {v0.8b-v2.8b}, [x0], x1
.endr
- cbnz w12, .loop_24x64
+ cbnz w12, .Loop_24x64
ret
endfunc
@@ -697,13 +697,13 @@ endfunc
.macro blockcopy_pp_32xN_neon h
function PFX(blockcopy_pp_32x\h\()_neon)
mov w12, #\h / 8
-.loop_32x\h\():
+.Loop_32x\h\():
sub w12, w12, #1
.rept 8
ld1 {v0.16b-v1.16b}, [x2], x3
st1 {v0.16b-v1.16b}, [x0], x1
.endr
- cbnz w12, .loop_32x\h
+ cbnz w12, .Loop_32x\h
ret
endfunc
.endm
@@ -716,26 +716,26 @@ blockcopy_pp_32xN_neon 48
function PFX(blockcopy_pp_48x64_neon)
mov w12, #8
-.loop_48x64:
+.Loop_48x64:
sub w12, w12, #1
.rept 8
ld1 {v0.16b-v2.16b}, [x2], x3
st1 {v0.16b-v2.16b}, [x0], x1
.endr
- cbnz w12, .loop_48x64
+ cbnz w12, .Loop_48x64
ret
endfunc
.macro blockcopy_pp_64xN_neon h
function PFX(blockcopy_pp_64x\h\()_neon)
mov w12, #\h / 4
-.loop_64x\h\():
+.Loop_64x\h\():
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v3.16b}, [x2], x3
st1 {v0.16b-v3.16b}, [x0], x1
.endr
- cbnz w12, .loop_64x\h
+ cbnz w12, .Loop_64x\h
ret
endfunc
.endm
@@ -950,11 +950,11 @@ function PFX(count_nonzero_32_neon)
trn1 v16.16b, v16.16b, v17.16b
movi v18.16b, #0
mov w12, #16
-.loop_count_nonzero_32:
+.Loop_count_nonzero_32:
sub w12, w12, #1
COUNT_NONZERO_8
add v18.16b, v18.16b, v0.16b
- cbnz w12, .loop_count_nonzero_32
+ cbnz w12, .Loop_count_nonzero_32
uaddlv s0, v18.8h
fmov w0, s0
@@ -994,7 +994,7 @@ endfunc
function PFX(cpy2Dto1D_shl_16x16_neon)
cpy2Dto1D_shl_start
mov w12, #4
-.loop_cpy2Dto1D_shl_16:
+.Loop_cpy2Dto1D_shl_16:
sub w12, w12, #1
.rept 4
ld1 {v2.16b-v3.16b}, [x1], x2
@@ -1002,14 +1002,14 @@ function PFX(cpy2Dto1D_shl_16x16_neon)
sshl v3.8h, v3.8h, v0.8h
st1 {v2.16b-v3.16b}, [x0], #32
.endr
- cbnz w12, .loop_cpy2Dto1D_shl_16
+ cbnz w12, .Loop_cpy2Dto1D_shl_16
ret
endfunc
function PFX(cpy2Dto1D_shl_32x32_neon)
cpy2Dto1D_shl_start
mov w12, #16
-.loop_cpy2Dto1D_shl_32:
+.Loop_cpy2Dto1D_shl_32:
sub w12, w12, #1
.rept 2
ld1 {v2.16b-v5.16b}, [x1], x2
@@ -1019,7 +1019,7 @@ function PFX(cpy2Dto1D_shl_32x32_neon)
sshl v5.8h, v5.8h, v0.8h
st1 {v2.16b-v5.16b}, [x0], #64
.endr
- cbnz w12, .loop_cpy2Dto1D_shl_32
+ cbnz w12, .Loop_cpy2Dto1D_shl_32
ret
endfunc
@@ -1027,7 +1027,7 @@ function PFX(cpy2Dto1D_shl_64x64_neon)
cpy2Dto1D_shl_start
mov w12, #32
sub x2, x2, #64
-.loop_cpy2Dto1D_shl_64:
+.Loop_cpy2Dto1D_shl_64:
sub w12, w12, #1
.rept 2
ld1 {v2.16b-v5.16b}, [x1], #64
@@ -1043,7 +1043,7 @@ function PFX(cpy2Dto1D_shl_64x64_neon)
st1 {v2.16b-v5.16b}, [x0], #64
st1 {v16.16b-v19.16b}, [x0], #64
.endr
- cbnz w12, .loop_cpy2Dto1D_shl_64
+ cbnz w12, .Loop_cpy2Dto1D_shl_64
ret
endfunc
@@ -1079,7 +1079,7 @@ endfunc
function PFX(cpy2Dto1D_shr_16x16_neon)
cpy2Dto1D_shr_start
mov w12, #4
-.loop_cpy2Dto1D_shr_16:
+.Loop_cpy2Dto1D_shr_16:
sub w12, w12, #1
.rept 4
ld1 {v2.8h-v3.8h}, [x1], x2
@@ -1089,14 +1089,14 @@ function PFX(cpy2Dto1D_shr_16x16_neon)
sshl v3.8h, v3.8h, v0.8h
st1 {v2.8h-v3.8h}, [x0], #32
.endr
- cbnz w12, .loop_cpy2Dto1D_shr_16
+ cbnz w12, .Loop_cpy2Dto1D_shr_16
ret
endfunc
function PFX(cpy2Dto1D_shr_32x32_neon)
cpy2Dto1D_shr_start
mov w12, #16
-.loop_cpy2Dto1D_shr_32:
+.Loop_cpy2Dto1D_shr_32:
sub w12, w12, #1
.rept 2
ld1 {v2.8h-v5.8h}, [x1], x2
@@ -1110,7 +1110,7 @@ function PFX(cpy2Dto1D_shr_32x32_neon)
sshl v5.8h, v5.8h, v0.8h
st1 {v2.8h-v5.8h}, [x0], #64
.endr
- cbnz w12, .loop_cpy2Dto1D_shr_32
+ cbnz w12, .Loop_cpy2Dto1D_shr_32
ret
endfunc
@@ -1147,7 +1147,7 @@ endfunc
function PFX(cpy1Dto2D_shl_16x16_neon)
cpy1Dto2D_shl_start
mov w12, #4
-.loop_cpy1Dto2D_shl_16:
+.Loop_cpy1Dto2D_shl_16:
sub w12, w12, #1
.rept 4
ld1 {v2.16b-v3.16b}, [x1], #32
@@ -1155,14 +1155,14 @@ function PFX(cpy1Dto2D_shl_16x16_neon)
sshl v3.8h, v3.8h, v0.8h
st1 {v2.16b-v3.16b}, [x0], x2
.endr
- cbnz w12, .loop_cpy1Dto2D_shl_16
+ cbnz w12, .Loop_cpy1Dto2D_shl_16
ret
endfunc
function PFX(cpy1Dto2D_shl_32x32_neon)
cpy1Dto2D_shl_start
mov w12, #16
-.loop_cpy1Dto2D_shl_32:
+.Loop_cpy1Dto2D_shl_32:
sub w12, w12, #1
.rept 2
ld1 {v2.16b-v5.16b}, [x1], #64
@@ -1172,7 +1172,7 @@ function PFX(cpy1Dto2D_shl_32x32_neon)
sshl v5.8h, v5.8h, v0.8h
st1 {v2.16b-v5.16b}, [x0], x2
.endr
- cbnz w12, .loop_cpy1Dto2D_shl_32
+ cbnz w12, .Loop_cpy1Dto2D_shl_32
ret
endfunc
@@ -1180,7 +1180,7 @@ function PFX(cpy1Dto2D_shl_64x64_neon)
cpy1Dto2D_shl_start
mov w12, #32
sub x2, x2, #64
-.loop_cpy1Dto2D_shl_64:
+.Loop_cpy1Dto2D_shl_64:
sub w12, w12, #1
.rept 2
ld1 {v2.16b-v5.16b}, [x1], #64
@@ -1196,7 +1196,7 @@ function PFX(cpy1Dto2D_shl_64x64_neon)
st1 {v2.16b-v5.16b}, [x0], #64
st1 {v16.16b-v19.16b}, [x0], x2
.endr
- cbnz w12, .loop_cpy1Dto2D_shl_64
+ cbnz w12, .Loop_cpy1Dto2D_shl_64
ret
endfunc
@@ -1231,7 +1231,7 @@ endfunc
function PFX(cpy1Dto2D_shr_16x16_neon)
cpy1Dto2D_shr_start
mov w12, #4
-.loop_cpy1Dto2D_shr_16:
+.Loop_cpy1Dto2D_shr_16:
sub w12, w12, #1
.rept 4
ld1 {v2.8h-v3.8h}, [x1], #32
@@ -1241,14 +1241,14 @@ function PFX(cpy1Dto2D_shr_16x16_neon)
sshl v3.8h, v3.8h, v0.8h
st1 {v2.8h-v3.8h}, [x0], x2
.endr
- cbnz w12, .loop_cpy1Dto2D_shr_16
+ cbnz w12, .Loop_cpy1Dto2D_shr_16
ret
endfunc
function PFX(cpy1Dto2D_shr_32x32_neon)
cpy1Dto2D_shr_start
mov w12, #16
-.loop_cpy1Dto2D_shr_32:
+.Loop_cpy1Dto2D_shr_32:
sub w12, w12, #1
.rept 2
ld1 {v2.16b-v5.16b}, [x1], #64
@@ -1262,7 +1262,7 @@ function PFX(cpy1Dto2D_shr_32x32_neon)
sshl v5.8h, v5.8h, v0.8h
st1 {v2.16b-v5.16b}, [x0], x2
.endr
- cbnz w12, .loop_cpy1Dto2D_shr_32
+ cbnz w12, .Loop_cpy1Dto2D_shr_32
ret
endfunc
@@ -1270,7 +1270,7 @@ function PFX(cpy1Dto2D_shr_64x64_neon)
cpy1Dto2D_shr_start
mov w12, #32
sub x2, x2, #64
-.loop_cpy1Dto2D_shr_64:
+.Loop_cpy1Dto2D_shr_64:
sub w12, w12, #1
.rept 2
ld1 {v2.16b-v5.16b}, [x1], #64
@@ -1294,6 +1294,6 @@ function PFX(cpy1Dto2D_shr_64x64_neon)
st1 {v2.16b-v5.16b}, [x0], #64
st1 {v16.16b-v19.16b}, [x0], x2
.endr
- cbnz w12, .loop_cpy1Dto2D_shr_64
+ cbnz w12, .Loop_cpy1Dto2D_shr_64
ret
endfunc
diff --git a/source/common/aarch64/ipfilter-common.S b/source/common/aarch64/ipfilter-common.S
index b7c61ee64..a08c3c165 100644
--- a/source/common/aarch64/ipfilter-common.S
+++ b/source/common/aarch64/ipfilter-common.S
@@ -800,10 +800,10 @@
mov w12, #32
dup v31.8h, w12
qpel_start_\v
-.loop_luma_vpp_\v\()_\w\()x\h:
+.Loop_luma_vpp_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_luma_vpp_w8_\v\()_\w\()x\h:
+.Loop_luma_vpp_w8_\v\()_\w\()x\h:
add x6, x0, x9
.if \w == 8 || \w == 24
qpel_load_32b \v
@@ -833,11 +833,11 @@
add x9, x9, #16
.endif
cmp x9, #\w
- blt .loop_luma_vpp_w8_\v\()_\w\()x\h
+ blt .Loop_luma_vpp_w8_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_luma_vpp_\v\()_\w\()x\h
+ cbnz x5, .Loop_luma_vpp_\v\()_\w\()x\h
ret
.endm
@@ -854,10 +854,10 @@
mov w12, #8192
dup v31.8h, w12
qpel_start_\v
-.loop_ps_\v\()_\w\()x\h:
+.Loop_ps_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_ps_w8_\v\()_\w\()x\h:
+.Loop_ps_w8_\v\()_\w\()x\h:
add x6, x0, x9
.if \w == 8 || \w == 24
qpel_load_32b \v
@@ -885,11 +885,11 @@
add x9, x9, #16
.endif
cmp x9, #\w
- blt .loop_ps_w8_\v\()_\w\()x\h
+ blt .Loop_ps_w8_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_ps_\v\()_\w\()x\h
+ cbnz x5, .Loop_ps_\v\()_\w\()x\h
ret
.endm
@@ -914,10 +914,10 @@
mov x12, #\w
lsl x12, x12, #1
qpel_start_\v\()_1
-.loop_luma_vsp_\v\()_\w\()x\h:
+.Loop_luma_vsp_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_luma_vsp_w8_\v\()_\w\()x\h:
+.Loop_luma_vsp_w8_\v\()_\w\()x\h:
add x6, x0, x9
qpel_load_64b \v
qpel_filter_\v\()_32b_1
@@ -933,11 +933,11 @@
add x9, x9, #8
.endif
cmp x9, x12
- blt .loop_luma_vsp_w8_\v\()_\w\()x\h
+ blt .Loop_luma_vsp_w8_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_luma_vsp_\v\()_\w\()x\h
+ cbnz x5, .Loop_luma_vsp_\v\()_\w\()x\h
ret
.endm
@@ -957,10 +957,10 @@
mov x12, #\w
lsl x12, x12, #1
qpel_start_\v\()_1
-.loop_luma_vss_\v\()_\w\()x\h:
+.Loop_luma_vss_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_luma_vss_w8_\v\()_\w\()x\h:
+.Loop_luma_vss_w8_\v\()_\w\()x\h:
add x6, x0, x9
qpel_load_64b \v
qpel_filter_\v\()_32b_1
@@ -981,11 +981,11 @@
.endif
.endif
cmp x9, x12
- blt .loop_luma_vss_w8_\v\()_\w\()x\h
+ blt .Loop_luma_vss_w8_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_luma_vss_\v\()_\w\()x\h
+ cbnz x5, .Loop_luma_vss_\v\()_\w\()x\h
ret
.endm
@@ -1013,11 +1013,11 @@
.endr
ret
.else
-.loop1_hpp_\v\()_\w\()x\h:
+.Loop1_hpp_\v\()_\w\()x\h:
mov x7, #\w
mov x11, x0
sub x11, x11, #4
-.loop2_hpp_\v\()_\w\()x\h:
+.Loop2_hpp_\v\()_\w\()x\h:
vextin8 \v
qpel_filter_\v\()_32b
hpp_end
@@ -1031,11 +1031,11 @@
str s17, [x2], #4
sub x7, x7, #4
.endif
- cbnz x7, .loop2_hpp_\v\()_\w\()x\h
+ cbnz x7, .Loop2_hpp_\v\()_\w\()x\h
sub x6, x6, #1
add x0, x0, x1
add x2, x2, x3
- cbnz x6, .loop1_hpp_\v\()_\w\()x\h
+ cbnz x6, .Loop1_hpp_\v\()_\w\()x\h
ret
.endif
.endm
@@ -1051,7 +1051,7 @@
dup v31.8h, w12
qpel_start_\v
.if \w == 4
-.loop_hps_\v\()_\w\()x\h\():
+.Loop_hps_\v\()_\w\()x\h\():
mov x11, x0
sub x11, x11, #4
vextin8 \v
@@ -1061,14 +1061,14 @@
sub w6, w6, #1
add x0, x0, x1
add x2, x2, x3
- cbnz w6, .loop_hps_\v\()_\w\()x\h
+ cbnz w6, .Loop_hps_\v\()_\w\()x\h
ret
.else
-.loop1_hps_\v\()_\w\()x\h\():
+.Loop1_hps_\v\()_\w\()x\h\():
mov w7, #\w
mov x11, x0
sub x11, x11, #4
-.loop2_hps_\v\()_\w\()x\h\():
+.Loop2_hps_\v\()_\w\()x\h\():
.if \w == 8 || \w == 12 || \w == 24
vextin8 \v
qpel_filter_\v\()_32b
@@ -1092,11 +1092,11 @@
sub w7, w7, #16
sub x11, x11, #16
.endif
- cbnz w7, .loop2_hps_\v\()_\w\()x\h
+ cbnz w7, .Loop2_hps_\v\()_\w\()x\h
sub w6, w6, #1
add x0, x0, x1
add x2, x2, x3
- cbnz w6, .loop1_hps_\v\()_\w\()x\h
+ cbnz w6, .Loop1_hps_\v\()_\w\()x\h
ret
.endif
.endm
@@ -1107,10 +1107,10 @@
dup v31.8h, w12
sub x0, x0, x1
mov x5, #\h
-.loop_chroma_vpp_\v\()_\w\()x\h:
+.Loop_chroma_vpp_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_chroma_vpp_w8_\v\()_\w\()x\h:
+.Loop_chroma_vpp_w8_\v\()_\w\()x\h:
add x6, x0, x9
qpel_chroma_load_32b \v
qpel_filter_chroma_\v\()_32b
@@ -1137,11 +1137,11 @@
str d17, [x7], #8
.endif
cmp x9, #\w
- blt .loop_chroma_vpp_w8_\v\()_\w\()x\h
+ blt .Loop_chroma_vpp_w8_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_chroma_vpp_\v\()_\w\()x\h
+ cbnz x5, .Loop_chroma_vpp_\v\()_\w\()x\h
ret
.endm
@@ -1152,10 +1152,10 @@
lsl x3, x3, #1
sub x0, x0, x1
mov x5, #\h
-.loop_vps_\v\()_\w\()x\h:
+.Loop_vps_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_vps_w8_\v\()_\w\()x\h:
+.Loop_vps_w8_\v\()_\w\()x\h:
add x6, x0, x9
qpel_chroma_load_32b \v
qpel_filter_chroma_\v\()_32b
@@ -1180,12 +1180,12 @@
str q17, [x7], #16
.endif
cmp x9, #\w
- blt .loop_vps_w8_\v\()_\w\()x\h
+ blt .Loop_vps_w8_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_vps_\v\()_\w\()x\h
+ cbnz x5, .Loop_vps_\v\()_\w\()x\h
ret
.endm
@@ -1200,10 +1200,10 @@
mov x12, #\w
lsl x12, x12, #1
qpel_start_chroma_\v\()_1
-.loop_vsp_\v\()_\w\()x\h:
+.Loop_vsp_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_vsp_w8_\v\()_\w\()x\h:
+.Loop_vsp_w8_\v\()_\w\()x\h:
add x6, x0, x9
qpel_chroma_load_64b \v
qpel_filter_chroma_\v\()_32b_1
@@ -1223,11 +1223,11 @@
str d17, [x7], #8
.endif
cmp x9, x12
- blt .loop_vsp_w8_\v\()_\w\()x\h
+ blt .Loop_vsp_w8_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_vsp_\v\()_\w\()x\h
+ cbnz x5, .Loop_vsp_\v\()_\w\()x\h
ret
.endm
@@ -1239,7 +1239,7 @@
mov x12, #\w
lsl x12, x12, #1
qpel_start_chroma_\v\()_1
-.loop_vss_\v\()_\w\()x\h:
+.Loop_vss_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
.if \w == 4
@@ -1252,7 +1252,7 @@
add x9, x9, #4
.endr
.else
-.loop_vss_w8_\v\()_\w\()x\h:
+.Loop_vss_w8_\v\()_\w\()x\h:
add x6, x0, x9
qpel_chroma_load_64b \v
qpel_filter_chroma_\v\()_32b_1
@@ -1268,12 +1268,12 @@
add x9, x9, #8
.endif
cmp x9, x12
- blt .loop_vss_w8_\v\()_\w\()x\h
+ blt .Loop_vss_w8_\v\()_\w\()x\h
.endif
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_vss_\v\()_\w\()x\h
+ cbnz x5, .Loop_vss_\v\()_\w\()x\h
ret
.endm
@@ -1284,7 +1284,7 @@
mov w6, #\h
sub x3, x3, #\w
.if \w == 2 || \w == 4 || \w == 6 || \w == 12
-.loop4_chroma_hpp_\v\()_\w\()x\h:
+.Loop4_chroma_hpp_\v\()_\w\()x\h:
mov x11, x0
sub x11, x11, #2
vextin8_chroma \v
@@ -1310,15 +1310,15 @@
sub w6, w6, #1
add x0, x0, x1
add x2, x2, x3
- cbnz w6, .loop4_chroma_hpp_\v\()_\w\()x\h
+ cbnz w6, .Loop4_chroma_hpp_\v\()_\w\()x\h
ret
.else
-.loop2_chroma_hpp_\v\()_\w\()x\h:
+.Loop2_chroma_hpp_\v\()_\w\()x\h:
mov x7, #\w
lsr x7, x7, #3
mov x11, x0
sub x11, x11, #2
-.loop3_chroma_hpp_\v\()_\w\()x\h:
+.Loop3_chroma_hpp_\v\()_\w\()x\h:
.if \w == 8 || \w == 24
vextin8_chroma \v
qpel_filter_chroma_\v\()_32b
@@ -1336,11 +1336,11 @@
sub x7, x7, #2
sub x11, x11, #16
.endif
- cbnz x7, .loop3_chroma_hpp_\v\()_\w\()x\h
+ cbnz x7, .Loop3_chroma_hpp_\v\()_\w\()x\h
sub w6, w6, #1
add x0, x0, x1
add x2, x2, x3
- cbnz w6, .loop2_chroma_hpp_\v\()_\w\()x\h
+ cbnz w6, .Loop2_chroma_hpp_\v\()_\w\()x\h
ret
.endif
.endm
@@ -1397,12 +1397,12 @@
add w10, w10, #3
9:
mov w6, w10
-.loop1_chroma_hps_\v\()_\w\()x\h\():
+.Loop1_chroma_hps_\v\()_\w\()x\h\():
mov x7, #\w
lsr x7, x7, #3
mov x11, x0
sub x11, x11, #2
-.loop2_chroma_hps_\v\()_\w\()x\h\():
+.Loop2_chroma_hps_\v\()_\w\()x\h\():
.if \w == 8 || \w == 24
vextin8_chroma \v
qpel_filter_chroma_\v\()_32b
@@ -1419,11 +1419,11 @@
sub x7, x7, #2
sub x11, x11, #16
.endif
- cbnz x7, .loop2_chroma_hps_\v\()_\w\()x\h\()
+ cbnz x7, .Loop2_chroma_hps_\v\()_\w\()x\h\()
sub w6, w6, #1
add x0, x0, x1
add x2, x2, x3
- cbnz w6, .loop1_chroma_hps_\v\()_\w\()x\h\()
+ cbnz w6, .Loop1_chroma_hps_\v\()_\w\()x\h\()
ret
.endif
.endm
diff --git a/source/common/aarch64/ipfilter-sve2.S b/source/common/aarch64/ipfilter-sve2.S
index 95657db55..525ed1172 100644
--- a/source/common/aarch64/ipfilter-sve2.S
+++ b/source/common/aarch64/ipfilter-sve2.S
@@ -370,10 +370,10 @@
cmp x9, #16
bgt .vl_gt_16_FILTER_LUMA_VPP_\v\()_\w\()x\h
qpel_start_\v
-.loop_luma_vpp_sve2_\v\()_\w\()x\h:
+.Loop_luma_vpp_sve2_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_luma_vpp_w8_sve2_\v\()_\w\()x\h:
+.Loop_luma_vpp_w8_sve2_\v\()_\w\()x\h:
add x6, x0, x9
.if \w == 8 || \w == 24
qpel_load_32b \v
@@ -403,11 +403,11 @@
add x9, x9, #16
.endif
cmp x9, #\w
- blt .loop_luma_vpp_w8_sve2_\v\()_\w\()x\h
+ blt .Loop_luma_vpp_w8_sve2_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_luma_vpp_sve2_\v\()_\w\()x\h
+ cbnz x5, .Loop_luma_vpp_sve2_\v\()_\w\()x\h
ret
.vl_gt_16_FILTER_LUMA_VPP_\v\()_\w\()x\h:
ptrue p0.h, vl8
@@ -522,7 +522,7 @@ function x265_interp_8tap_vert_ps_4x\h\()_sve2
ld1rd {z22.d}, p0/z, [x12, #48]
ld1rd {z23.d}, p0/z, [x12, #56]
-.loop_vps_sve2_4x\h:
+.Loop_vps_sve2_4x\h:
mov x6, x0
ld1b {z0.s}, p0/z, [x6]
@@ -557,7 +557,7 @@ function x265_interp_8tap_vert_ps_4x\h\()_sve2
add x0, x0, x1
sub x4, x4, #1
- cbnz x4, .loop_vps_sve2_4x\h
+ cbnz x4, .Loop_vps_sve2_4x\h
ret
endfunc
.endm
@@ -593,7 +593,7 @@ function x265_interp_8tap_vert_sp_4x\h\()_sve2
ld1rd {z22.d}, p0/z, [x12, #48]
ld1rd {z23.d}, p0/z, [x12, #56]
-.loop_vsp_sve2_4x\h:
+.Loop_vsp_sve2_4x\h:
mov x6, x0
ld1 {v0.8b}, [x6], x1
@@ -630,7 +630,7 @@ function x265_interp_8tap_vert_sp_4x\h\()_sve2
add x0, x0, x1
sub x4, x4, #1
- cbnz x4, .loop_vsp_sve2_4x\h
+ cbnz x4, .Loop_vsp_sve2_4x\h
ret
endfunc
.endm
@@ -654,10 +654,10 @@ LUMA_VSP_4xN_SVE2 16
cmp x14, #16
bgt .vl_gt_16_FILTER_VPS_\v\()_\w\()x\h
qpel_start_\v
-.loop_ps_sve2_\v\()_\w\()x\h:
+.Loop_ps_sve2_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_ps_w8_sve2_\v\()_\w\()x\h:
+.Loop_ps_w8_sve2_\v\()_\w\()x\h:
add x6, x0, x9
.if \w == 8 || \w == 24
qpel_load_32b \v
@@ -685,11 +685,11 @@ LUMA_VSP_4xN_SVE2 16
add x9, x9, #16
.endif
cmp x9, #\w
- blt .loop_ps_w8_sve2_\v\()_\w\()x\h
+ blt .Loop_ps_w8_sve2_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_ps_sve2_\v\()_\w\()x\h
+ cbnz x5, .Loop_ps_sve2_\v\()_\w\()x\h
ret
.vl_gt_16_FILTER_VPS_\v\()_\w\()x\h:
ptrue p0.h, vl8
@@ -796,10 +796,10 @@ LUMA_VPS_SVE2 64, 48
mov x12, #\w
lsl x12, x12, #1
qpel_start_\v\()_1
-.loop_luma_vss_sve2_\v\()_\w\()x\h:
+.Loop_luma_vss_sve2_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_luma_vss_w8_sve2_\v\()_\w\()x\h:
+.Loop_luma_vss_w8_sve2_\v\()_\w\()x\h:
add x6, x0, x9
qpel_load_64b \v
qpel_filter_\v\()_32b_1
@@ -820,11 +820,11 @@ LUMA_VPS_SVE2 64, 48
.endif
.endif
cmp x9, x12
- blt .loop_luma_vss_w8_sve2_\v\()_\w\()x\h
+ blt .Loop_luma_vss_w8_sve2_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_luma_vss_sve2_\v\()_\w\()x\h
+ cbnz x5, .Loop_luma_vss_sve2_\v\()_\w\()x\h
ret
.endm
@@ -884,10 +884,10 @@ LUMA_VSS_SVE2 48, 64
mov z31.h, #32
sub x0, x0, x1
mov x5, #\h
-.loop_chroma_vpp_sve2_\v\()_\w\()x\h:
+.Loop_chroma_vpp_sve2_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_chroma_vpp_w8_sve2_\v\()_\w\()x\h:
+.Loop_chroma_vpp_w8_sve2_\v\()_\w\()x\h:
add x6, x0, x9
qpel_chroma_load_32b_sve2 \v
qpel_filter_chroma_sve2_\v\()_32b
@@ -914,11 +914,11 @@ LUMA_VSS_SVE2 48, 64
str d17, [x7], #8
.endif
cmp x9, #\w
- blt .loop_chroma_vpp_w8_sve2_\v\()_\w\()x\h
+ blt .Loop_chroma_vpp_w8_sve2_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_chroma_vpp_sve2_\v\()_\w\()x\h
+ cbnz x5, .Loop_chroma_vpp_sve2_\v\()_\w\()x\h
ret
.endm
@@ -1008,10 +1008,10 @@ CHROMA_VPP_SVE2 48, 64
lsl x3, x3, #1
sub x0, x0, x1
mov x5, #\h
-.loop_vps_sve2_\v\()_\w\()x\h:
+.Loop_vps_sve2_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
-.loop_vps_w8_sve2_\v\()_\w\()x\h:
+.Loop_vps_w8_sve2_\v\()_\w\()x\h:
add x6, x0, x9
qpel_chroma_load_32b_sve2 \v
qpel_filter_chroma_sve2_\v\()_32b
@@ -1036,12 +1036,12 @@ CHROMA_VPP_SVE2 48, 64
str q17, [x7], #16
.endif
cmp x9, #\w
- blt .loop_vps_w8_sve2_\v\()_\w\()x\h
+ blt .Loop_vps_w8_sve2_\v\()_\w\()x\h
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_vps_sve2_\v\()_\w\()x\h
+ cbnz x5, .Loop_vps_sve2_\v\()_\w\()x\h
ret
.endm
@@ -1170,7 +1170,7 @@ CHROMA_VPS_SVE2 48, 64
mov x12, #\w
lsl x12, x12, #1
qpel_start_chroma_sve2_\v\()_1
-.loop_vss_sve2_\v\()_\w\()x\h:
+.Loop_vss_sve2_\v\()_\w\()x\h:
mov x7, x2
mov x9, #0
.if \w == 4
@@ -1183,7 +1183,7 @@ CHROMA_VPS_SVE2 48, 64
add x9, x9, #4
.endr
.else
-.loop_vss_w8_sve2_\v\()_\w\()x\h:
+.Loop_vss_w8_sve2_\v\()_\w\()x\h:
add x6, x0, x9
qpel_chroma_load_64b \v
qpel_filter_chroma_\v\()_32b_1
@@ -1199,12 +1199,12 @@ CHROMA_VPS_SVE2 48, 64
add x9, x9, #8
.endif
cmp x9, x12
- blt .loop_vss_w8_sve2_\v\()_\w\()x\h
+ blt .Loop_vss_w8_sve2_\v\()_\w\()x\h
.endif
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_vss_sve2_\v\()_\w\()x\h
+ cbnz x5, .Loop_vss_sve2_\v\()_\w\()x\h
ret
.endm
diff --git a/source/common/aarch64/ipfilter.S b/source/common/aarch64/ipfilter.S
index 80624862d..228ffae29 100644
--- a/source/common/aarch64/ipfilter.S
+++ b/source/common/aarch64/ipfilter.S
@@ -85,7 +85,7 @@ function x265_interp_8tap_vert_pp_4x\h\()_neon
ushll v3.8h, v3.8b, #0
mov x9, #\h
-.loop_4x\h:
+.Loop_4x\h:
ld1 {v4.s}[0], [x0], x1
ld1 {v4.s}[1], [x0], x1
ushll v4.8h, v4.8b, #0
@@ -124,7 +124,7 @@ function x265_interp_8tap_vert_pp_4x\h\()_neon
st1 {v16.s}[1], [x2], x3
sub x9, x9, #2
- cbnz x9, .loop_4x\h
+ cbnz x9, .Loop_4x\h
ret
endfunc
.endm
@@ -202,7 +202,7 @@ function x265_interp_8tap_vert_ps_4x\h\()_neon
ld1r {v22.2d}, [x12], #8
ld1r {v23.2d}, [x12], #8
-.loop_vps_4x\h:
+.Loop_vps_4x\h:
mov x6, x0
ld1 {v0.s}[0], [x6], x1
@@ -252,7 +252,7 @@ function x265_interp_8tap_vert_ps_4x\h\()_neon
add x0, x0, x1
sub x4, x4, #1
- cbnz x4, .loop_vps_4x\h
+ cbnz x4, .Loop_vps_4x\h
ret
endfunc
.endm
@@ -331,7 +331,7 @@ function x265_interp_8tap_vert_sp_4x\h\()_neon
ld1r {v21.2d}, [x12], #8
ld1r {v22.2d}, [x12], #8
ld1r {v23.2d}, [x12], #8
-.loop_vsp_4x\h:
+.Loop_vsp_4x\h:
mov x6, x0
ld1 {v0.8b}, [x6], x1
@@ -368,7 +368,7 @@ function x265_interp_8tap_vert_sp_4x\h\()_neon
add x0, x0, x1
sub x4, x4, #1
- cbnz x4, .loop_vsp_4x\h
+ cbnz x4, .Loop_vsp_4x\h
ret
endfunc
.endm
diff --git a/source/common/aarch64/mc-a-sve2.S b/source/common/aarch64/mc-a-sve2.S
index 704bdaed0..e4540ce9b 100644
--- a/source/common/aarch64/mc-a-sve2.S
+++ b/source/common/aarch64/mc-a-sve2.S
@@ -219,7 +219,7 @@ function PFX(pixel_avg_pp_48x64_sve2)
mov x11, #0
whilelt p0.b, x11, x10
mov w12, #8
-.loop_gt_32_pixel_avg_pp_48x64:
+.Loop_gt_32_pixel_avg_pp_48x64:
sub w12, w12, #1
.rept 8
ld1b {z0.b}, p0/z, [x2]
@@ -230,7 +230,7 @@ function PFX(pixel_avg_pp_48x64_sve2)
st1b {z0.b}, p0, [x0]
add x0, x0, x1
.endr
- cbnz w12, .loop_gt_32_pixel_avg_pp_48x64
+ cbnz w12, .Loop_gt_32_pixel_avg_pp_48x64
ret
endfunc
@@ -339,7 +339,7 @@ function PFX(addAvg_6x\h\()_sve2)
mov w12, #\h / 2
ptrue p0.b, vl16
ptrue p2.h, vl6
-.loop_sve2_addavg_6x\h\():
+.Loop_sve2_addavg_6x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x1]
@@ -359,7 +359,7 @@ function PFX(addAvg_6x\h\()_sve2)
add x2, x2, x5
st1b {z2.h}, p2, [x2]
add x2, x2, x5
- cbnz w12, .loop_sve2_addavg_6x\h
+ cbnz w12, .Loop_sve2_addavg_6x\h
ret
endfunc
.endm
@@ -398,7 +398,7 @@ endfunc
function PFX(addAvg_8x\h\()_sve2)
mov w12, #\h / 2
ptrue p0.b, vl16
-.loop_sve2_addavg_8x\h\():
+.Loop_sve2_addavg_8x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x1]
@@ -418,7 +418,7 @@ function PFX(addAvg_8x\h\()_sve2)
add x2, x2, x5
st1b {z2.h}, p0, [x2]
add x2, x2, x5
- cbnz w12, .loop_sve2_addavg_8x\h
+ cbnz w12, .Loop_sve2_addavg_8x\h
ret
endfunc
.endm
@@ -440,7 +440,7 @@ function PFX(addAvg_12x\h\()_sve2)
bgt .vl_gt_16_addAvg_12x\h
ptrue p0.b, vl16
ptrue p1.b, vl8
-.loop_sve2_addavg_12x\h\():
+.Loop_sve2_addavg_12x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x1]
@@ -457,13 +457,13 @@ function PFX(addAvg_12x\h\()_sve2)
st1b {z0.h}, p0, [x2]
st1b {z2.h}, p1, [x2, #1, mul vl]
add x2, x2, x5
- cbnz w12, .loop_sve2_addavg_12x\h
+ cbnz w12, .Loop_sve2_addavg_12x\h
ret
.vl_gt_16_addAvg_12x\h\():
mov x10, #24
mov x11, #0
whilelt p0.b, x11, x10
-.loop_sve2_gt_16_addavg_12x\h\():
+.Loop_sve2_gt_16_addavg_12x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x1]
@@ -476,7 +476,7 @@ function PFX(addAvg_12x\h\()_sve2)
add z2.b, z2.b, #0x80
st1b {z0.h}, p0, [x2]
add x2, x2, x5
- cbnz w12, .loop_sve2_gt_16_addavg_12x\h
+ cbnz w12, .Loop_sve2_gt_16_addavg_12x\h
ret
endfunc
.endm
@@ -491,7 +491,7 @@ function PFX(addAvg_16x\h\()_sve2)
cmp x9, #16
bgt .vl_gt_16_addAvg_16x\h
ptrue p0.b, vl16
-.loop_eq_16_sve2_addavg_16x\h\():
+.Loop_eq_16_sve2_addavg_16x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x1]
@@ -508,13 +508,13 @@ function PFX(addAvg_16x\h\()_sve2)
st1b {z0.h}, p0, [x2]
st1b {z2.h}, p0, [x2, #1, mul vl]
add x2, x2, x5
- cbnz w12, .loop_eq_16_sve2_addavg_16x\h
+ cbnz w12, .Loop_eq_16_sve2_addavg_16x\h
ret
.vl_gt_16_addAvg_16x\h\():
cmp x9, #32
bgt .vl_gt_32_addAvg_16x\h
ptrue p0.b, vl32
-.loop_gt_16_sve2_addavg_16x\h\():
+.Loop_gt_16_sve2_addavg_16x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x1]
@@ -525,13 +525,13 @@ function PFX(addAvg_16x\h\()_sve2)
add z0.b, z0.b, #0x80
st1b {z0.h}, p1, [x2]
add x2, x2, x5
- cbnz w12, .loop_gt_16_sve2_addavg_16x\h
+ cbnz w12, .Loop_gt_16_sve2_addavg_16x\h
ret
.vl_gt_32_addAvg_16x\h\():
mov x10, #48
mov x11, #0
whilelt p0.b, x11, x10
-.loop_gt_32_sve2_addavg_16x\h\():
+.Loop_gt_32_sve2_addavg_16x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
add x0, x0, x3, lsl #1
@@ -541,7 +541,7 @@ function PFX(addAvg_16x\h\()_sve2)
add z0.b, z0.b, #0x80
st1b {z0.h}, p0, [x2]
add x2, x2, x5
- cbnz w12, .loop_gt_32_sve2_addavg_16x\h
+ cbnz w12, .Loop_gt_32_sve2_addavg_16x\h
ret
endfunc
.endm
@@ -561,7 +561,7 @@ function PFX(addAvg_24x\h\()_sve2)
cmp x9, #16
bgt .vl_gt_16_addAvg_24x\h
addAvg_start
-.loop_eq_16_sve2_addavg_24x\h\():
+.Loop_eq_16_sve2_addavg_24x\h\():
sub w12, w12, #1
ld1 {v0.16b-v2.16b}, [x0], x3
ld1 {v3.16b-v5.16b}, [x1], x4
@@ -572,14 +572,14 @@ function PFX(addAvg_24x\h\()_sve2)
sqxtun v1.8b, v1.8h
sqxtun v2.8b, v2.8h
st1 {v0.8b-v2.8b}, [x2], x5
- cbnz w12, .loop_eq_16_sve2_addavg_24x\h
+ cbnz w12, .Loop_eq_16_sve2_addavg_24x\h
ret
.vl_gt_16_addAvg_24x\h\():
cmp x9, #48
bgt .vl_gt_48_addAvg_24x\h
ptrue p0.b, vl32
ptrue p1.b, vl16
-.loop_gt_16_sve2_addavg_24x\h\():
+.Loop_gt_16_sve2_addavg_24x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p1/z, [x0, #1, mul vl]
@@ -596,13 +596,13 @@ function PFX(addAvg_24x\h\()_sve2)
st1b {z0.h}, p0, [x2]
st1b {z1.h}, p1, [x2, #1, mul vl]
add x2, x2, x5
- cbnz w12, .loop_gt_16_sve2_addavg_24x\h
+ cbnz w12, .Loop_gt_16_sve2_addavg_24x\h
ret
.vl_gt_48_addAvg_24x\h\():
mov x10, #48
mov x11, #0
whilelt p0.b, x11, x10
-.loop_gt_48_sve2_addavg_24x\h\():
+.Loop_gt_48_sve2_addavg_24x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z2.b}, p0/z, [x1]
@@ -613,7 +613,7 @@ function PFX(addAvg_24x\h\()_sve2)
add z0.b, z0.b, #0x80
st1b {z0.h}, p0, [x2]
add x2, x2, x5
- cbnz w12, .loop_gt_48_sve2_addavg_24x\h
+ cbnz w12, .Loop_gt_48_sve2_addavg_24x\h
ret
endfunc
.endm
@@ -628,7 +628,7 @@ function PFX(addAvg_32x\h\()_sve2)
cmp x9, #16
bgt .vl_gt_16_addAvg_32x\h
ptrue p0.b, vl16
-.loop_eq_16_sve2_addavg_32x\h\():
+.Loop_eq_16_sve2_addavg_32x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x0, #1, mul vl]
@@ -657,13 +657,13 @@ function PFX(addAvg_32x\h\()_sve2)
st1b {z2.h}, p0, [x2, #2, mul vl]
st1b {z3.h}, p0, [x2, #3, mul vl]
add x2, x2, x5
- cbnz w12, .loop_eq_16_sve2_addavg_32x\h
+ cbnz w12, .Loop_eq_16_sve2_addavg_32x\h
ret
.vl_gt_16_addAvg_32x\h\():
cmp x9, #48
bgt .vl_gt_48_addAvg_32x\h
ptrue p0.b, vl32
-.loop_gt_eq_32_sve2_addavg_32x\h\():
+.Loop_gt_eq_32_sve2_addavg_32x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x0, #1, mul vl]
@@ -680,11 +680,11 @@ function PFX(addAvg_32x\h\()_sve2)
st1b {z0.h}, p0, [x2]
st1b {z1.h}, p0, [x2, #1, mul vl]
add x2, x2, x5
- cbnz w12, .loop_gt_eq_32_sve2_addavg_32x\h
+ cbnz w12, .Loop_gt_eq_32_sve2_addavg_32x\h
ret
.vl_gt_48_addAvg_32x\h\():
ptrue p0.b, vl64
-.loop_eq_64_sve2_addavg_32x\h\():
+.Loop_eq_64_sve2_addavg_32x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x1]
@@ -695,7 +695,7 @@ function PFX(addAvg_32x\h\()_sve2)
add z0.b, z0.b, #0x80
st1b {z0.h}, p0, [x2]
add x2, x2, x5
- cbnz w12, .loop_eq_64_sve2_addavg_32x\h
+ cbnz w12, .Loop_eq_64_sve2_addavg_32x\h
ret
endfunc
.endm
@@ -715,7 +715,7 @@ function PFX(addAvg_48x64_sve2)
addAvg_start
sub x3, x3, #64
sub x4, x4, #64
-.loop_eq_16_sve2_addavg_48x64:
+.Loop_eq_16_sve2_addavg_48x64:
sub w12, w12, #1
ld1 {v0.8h-v3.8h}, [x0], #64
ld1 {v4.8h-v7.8h}, [x1], #64
@@ -734,13 +734,13 @@ function PFX(addAvg_48x64_sve2)
sqxtun v2.8b, v20.8h
sqxtun2 v2.16b, v21.8h
st1 {v0.16b-v2.16b}, [x2], x5
- cbnz w12, .loop_eq_16_sve2_addavg_48x64
+ cbnz w12, .Loop_eq_16_sve2_addavg_48x64
ret
.vl_gt_16_addAvg_48x64:
cmp x9, #48
bgt .vl_gt_48_addAvg_48x64
ptrue p0.b, vl32
-.loop_gt_eq_32_sve2_addavg_48x64:
+.Loop_gt_eq_32_sve2_addavg_48x64:
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x0, #1, mul vl]
@@ -763,14 +763,14 @@ function PFX(addAvg_48x64_sve2)
st1b {z1.h}, p0, [x2, #1, mul vl]
st1b {z2.h}, p0, [x2, #2, mul vl]
add x2, x2, x5
- cbnz w12, .loop_gt_eq_32_sve2_addavg_48x64
+ cbnz w12, .Loop_gt_eq_32_sve2_addavg_48x64
ret
.vl_gt_48_addAvg_48x64:
cmp x9, #112
bgt .vl_gt_112_addAvg_48x64
ptrue p0.b, vl64
ptrue p1.b, vl32
-.loop_gt_48_sve2_addavg_48x64:
+.Loop_gt_48_sve2_addavg_48x64:
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p1/z, [x0, #1, mul vl]
@@ -787,13 +787,13 @@ function PFX(addAvg_48x64_sve2)
st1b {z0.h}, p0, [x2]
st1b {z1.h}, p1, [x2, #1, mul vl]
add x2, x2, x5
- cbnz w12, .loop_gt_48_sve2_addavg_48x64
+ cbnz w12, .Loop_gt_48_sve2_addavg_48x64
ret
.vl_gt_112_addAvg_48x64:
mov x10, #96
mov x11, #0
whilelt p0.b, x11, x10
-.loop_gt_112_sve2_addavg_48x64:
+.Loop_gt_112_sve2_addavg_48x64:
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z4.b}, p0/z, [x1]
@@ -804,7 +804,7 @@ function PFX(addAvg_48x64_sve2)
add z0.b, z0.b, #0x80
st1b {z0.h}, p0, [x2]
add x2, x2, x5
- cbnz w12, .loop_gt_112_sve2_addavg_48x64
+ cbnz w12, .Loop_gt_112_sve2_addavg_48x64
ret
endfunc
@@ -817,7 +817,7 @@ function PFX(addAvg_64x\h\()_sve2)
addAvg_start
sub x3, x3, #64
sub x4, x4, #64
-.loop_eq_16_sve2_addavg_64x\h\():
+.Loop_eq_16_sve2_addavg_64x\h\():
sub w12, w12, #1
ld1 {v0.8h-v3.8h}, [x0], #64
ld1 {v4.8h-v7.8h}, [x1], #64
@@ -840,13 +840,13 @@ function PFX(addAvg_64x\h\()_sve2)
sqxtun v3.8b, v22.8h
sqxtun2 v3.16b, v23.8h
st1 {v0.16b-v3.16b}, [x2], x5
- cbnz w12, .loop_eq_16_sve2_addavg_64x\h
+ cbnz w12, .Loop_eq_16_sve2_addavg_64x\h
ret
.vl_gt_16_addAvg_64x\h\():
cmp x9, #48
bgt .vl_gt_48_addAvg_64x\h
ptrue p0.b, vl32
-.loop_gt_eq_32_sve2_addavg_64x\h\():
+.Loop_gt_eq_32_sve2_addavg_64x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x0, #1, mul vl]
@@ -875,13 +875,13 @@ function PFX(addAvg_64x\h\()_sve2)
st1b {z2.h}, p0, [x2, #2, mul vl]
st1b {z3.h}, p0, [x2, #3, mul vl]
add x2, x2, x5
- cbnz w12, .loop_gt_eq_32_sve2_addavg_64x\h
+ cbnz w12, .Loop_gt_eq_32_sve2_addavg_64x\h
ret
.vl_gt_48_addAvg_64x\h\():
cmp x9, #112
bgt .vl_gt_112_addAvg_64x\h
ptrue p0.b, vl64
-.loop_gt_eq_48_sve2_addavg_64x\h\():
+.Loop_gt_eq_48_sve2_addavg_64x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z1.b}, p0/z, [x0, #1, mul vl]
@@ -898,11 +898,11 @@ function PFX(addAvg_64x\h\()_sve2)
st1b {z0.h}, p0, [x2]
st1b {z1.h}, p0, [x2, #1, mul vl]
add x2, x2, x5
- cbnz w12, .loop_gt_eq_48_sve2_addavg_64x\h
+ cbnz w12, .Loop_gt_eq_48_sve2_addavg_64x\h
ret
.vl_gt_112_addAvg_64x\h\():
ptrue p0.b, vl128
-.loop_gt_eq_128_sve2_addavg_64x\h\():
+.Loop_gt_eq_128_sve2_addavg_64x\h\():
sub w12, w12, #1
ld1b {z0.b}, p0/z, [x0]
ld1b {z4.b}, p0/z, [x1]
@@ -913,7 +913,7 @@ function PFX(addAvg_64x\h\()_sve2)
add z0.b, z0.b, #0x80
st1b {z0.h}, p0, [x2]
add x2, x2, x5
- cbnz w12, .loop_gt_eq_128_sve2_addavg_64x\h
+ cbnz w12, .Loop_gt_eq_128_sve2_addavg_64x\h
ret
endfunc
.endm
diff --git a/source/common/aarch64/mc-a.S b/source/common/aarch64/mc-a.S
index d122b8bb3..8c2878b3e 100644
--- a/source/common/aarch64/mc-a.S
+++ b/source/common/aarch64/mc-a.S
@@ -283,7 +283,7 @@ function PFX(addAvg_6x\h\()_neon)
addAvg_start
mov w12, #\h / 2
sub x5, x5, #4
-.loop_addavg_6x\h:
+.Loop_addavg_6x\h:
sub w12, w12, #1
ld1 {v0.16b}, [x0], x3
ld1 {v1.16b}, [x1], x4
@@ -305,7 +305,7 @@ function PFX(addAvg_6x\h\()_neon)
st1 {v0.h}[2], [x2], x5
str s1, [x2], #4
st1 {v1.h}[2], [x2], x5
- cbnz w12, .loop_addavg_6x\h
+ cbnz w12, .Loop_addavg_6x\h
ret
endfunc
.endm
@@ -344,7 +344,7 @@ endfunc
function PFX(addAvg_8x\h\()_neon)
addAvg_start
mov w12, #\h / 2
-.loop_addavg_8x\h:
+.Loop_addavg_8x\h:
sub w12, w12, #1
ld1 {v0.16b}, [x0], x3
ld1 {v1.16b}, [x1], x4
@@ -364,7 +364,7 @@ function PFX(addAvg_8x\h\()_neon)
sqxtun v1.8b, v1.8h
st1 {v0.8b}, [x2], x5
st1 {v1.8b}, [x2], x5
- cbnz w12, .loop_addavg_8x\h
+ cbnz w12, .Loop_addavg_8x\h
ret
endfunc
.endm
@@ -385,7 +385,7 @@ function PFX(addAvg_12x\h\()_neon)
sub x4, x4, #16
sub x5, x5, #8
mov w12, #\h
-.loop_addAvg_12X\h\():
+.Loop_addAvg_12X\h\():
sub w12, w12, #1
ld1 {v0.16b}, [x0], #16
ld1 {v1.16b}, [x1], #16
@@ -403,7 +403,7 @@ function PFX(addAvg_12x\h\()_neon)
sqxtun v1.8b, v1.8h
st1 {v0.8b}, [x2], #8
st1 {v1.s}[0], [x2], x5
- cbnz w12, .loop_addAvg_12X\h
+ cbnz w12, .Loop_addAvg_12X\h
ret
endfunc
.endm
@@ -415,7 +415,7 @@ addAvg_12xN 32
function PFX(addAvg_16x\h\()_neon)
addAvg_start
mov w12, #\h
-.loop_addavg_16x\h:
+.Loop_addavg_16x\h:
sub w12, w12, #1
ld1 {v0.8h-v1.8h}, [x0], x3
ld1 {v2.8h-v3.8h}, [x1], x4
@@ -424,7 +424,7 @@ function PFX(addAvg_16x\h\()_neon)
sqxtun v0.8b, v0.8h
sqxtun2 v0.16b, v1.8h
st1 {v0.16b}, [x2], x5
- cbnz w12, .loop_addavg_16x\h
+ cbnz w12, .Loop_addavg_16x\h
ret
endfunc
.endm
@@ -441,7 +441,7 @@ addAvg_16xN 64
function PFX(addAvg_24x\h\()_neon)
addAvg_start
mov w12, #\h
-.loop_addavg_24x\h\():
+.Loop_addavg_24x\h\():
sub w12, w12, #1
ld1 {v0.16b-v2.16b}, [x0], x3
ld1 {v3.16b-v5.16b}, [x1], x4
@@ -452,7 +452,7 @@ function PFX(addAvg_24x\h\()_neon)
sqxtun v1.8b, v1.8h
sqxtun v2.8b, v2.8h
st1 {v0.8b-v2.8b}, [x2], x5
- cbnz w12, .loop_addavg_24x\h
+ cbnz w12, .Loop_addavg_24x\h
ret
endfunc
.endm
@@ -464,7 +464,7 @@ addAvg_24xN 64
function PFX(addAvg_32x\h\()_neon)
addAvg_start
mov w12, #\h
-.loop_addavg_32x\h\():
+.Loop_addavg_32x\h\():
sub w12, w12, #1
ld1 {v0.8h-v3.8h}, [x0], x3
ld1 {v4.8h-v7.8h}, [x1], x4
@@ -477,7 +477,7 @@ function PFX(addAvg_32x\h\()_neon)
sqxtun v2.8b, v2.8h
sqxtun v3.8b, v3.8h
st1 {v0.8b-v3.8b}, [x2], x5
- cbnz w12, .loop_addavg_32x\h
+ cbnz w12, .Loop_addavg_32x\h
ret
endfunc
.endm
@@ -494,7 +494,7 @@ function PFX(addAvg_48x64_neon)
sub x3, x3, #64
sub x4, x4, #64
mov w12, #64
-.loop_addavg_48x64:
+.Loop_addavg_48x64:
sub w12, w12, #1
ld1 {v0.8h-v3.8h}, [x0], #64
ld1 {v4.8h-v7.8h}, [x1], #64
@@ -513,7 +513,7 @@ function PFX(addAvg_48x64_neon)
sqxtun v2.8b, v20.8h
sqxtun2 v2.16b, v21.8h
st1 {v0.16b-v2.16b}, [x2], x5
- cbnz w12, .loop_addavg_48x64
+ cbnz w12, .Loop_addavg_48x64
ret
endfunc
@@ -523,7 +523,7 @@ function PFX(addAvg_64x\h\()_neon)
mov w12, #\h
sub x3, x3, #64
sub x4, x4, #64
-.loop_addavg_64x\h\():
+.Loop_addavg_64x\h\():
sub w12, w12, #1
ld1 {v0.8h-v3.8h}, [x0], #64
ld1 {v4.8h-v7.8h}, [x1], #64
@@ -546,7 +546,7 @@ function PFX(addAvg_64x\h\()_neon)
sqxtun v3.8b, v22.8h
sqxtun2 v3.16b, v23.8h
st1 {v0.16b-v3.16b}, [x2], x5
- cbnz w12, .loop_addavg_64x\h
+ cbnz w12, .Loop_addavg_64x\h
ret
endfunc
.endm
diff --git a/source/common/aarch64/p2s-sve.S b/source/common/aarch64/p2s-sve.S
index dc32df2e6..85bb14b3d 100644
--- a/source/common/aarch64/p2s-sve.S
+++ b/source/common/aarch64/p2s-sve.S
@@ -204,7 +204,7 @@ function PFX(filterPixelToShort_32x\h\()_sve)
#else
p2s_start
mov x9, #\h
-.loop_filter_sve_P2S_32x\h:
+.Loop_filter_sve_P2S_32x\h:
sub x9, x9, #1
ld1 {v0.16b-v1.16b}, [x0], x1
ushll v22.8h, v0.8b, #P2S_SHIFT
@@ -216,7 +216,7 @@ function PFX(filterPixelToShort_32x\h\()_sve)
add v24.8h, v24.8h, v31.8h
add v25.8h, v25.8h, v31.8h
st1 {v22.16b-v25.16b}, [x2], x3
- cbnz x9, .loop_filter_sve_P2S_32x\h
+ cbnz x9, .Loop_filter_sve_P2S_32x\h
ret
#endif
endfunc
@@ -331,7 +331,7 @@ function PFX(filterPixelToShort_64x\h\()_sve)
p2s_start
sub x3, x3, #64
mov x9, #\h
-.loop_filter_sve_P2S_64x\h:
+.Loop_filter_sve_P2S_64x\h:
sub x9, x9, #1
ld1 {v0.16b-v3.16b}, [x0], x1
ushll v16.8h, v0.8b, #P2S_SHIFT
@@ -352,7 +352,7 @@ function PFX(filterPixelToShort_64x\h\()_sve)
add v23.8h, v23.8h, v31.8h
st1 {v16.16b-v19.16b}, [x2], #64
st1 {v20.16b-v23.16b}, [x2], x3
- cbnz x9, .loop_filter_sve_P2S_64x\h
+ cbnz x9, .Loop_filter_sve_P2S_64x\h
ret
#endif
endfunc
@@ -422,7 +422,7 @@ function PFX(filterPixelToShort_48x64_sve)
p2s_start
sub x3, x3, #64
mov x9, #64
-.loop_filterP2S_sve_48x64:
+.Loop_filterP2S_sve_48x64:
sub x9, x9, #1
ld1 {v0.16b-v2.16b}, [x0], x1
ushll v16.8h, v0.8b, #P2S_SHIFT
@@ -439,7 +439,7 @@ function PFX(filterPixelToShort_48x64_sve)
add v21.8h, v21.8h, v31.8h
st1 {v16.16b-v19.16b}, [x2], #64
st1 {v20.16b-v21.16b}, [x2], x3
- cbnz x9, .loop_filterP2S_sve_48x64
+ cbnz x9, .Loop_filterP2S_sve_48x64
ret
#endif
endfunc
diff --git a/source/common/aarch64/p2s.S b/source/common/aarch64/p2s.S
index 58301c9bf..b15835b34 100644
--- a/source/common/aarch64/p2s.S
+++ b/source/common/aarch64/p2s.S
@@ -262,7 +262,7 @@ p2s_24xN 64
function PFX(filterPixelToShort_32x\h\()_neon)
p2s_start
mov x9, #\h
-.loop_filterP2S_32x\h:
+.Loop_filterP2S_32x\h:
sub x9, x9, #1
#if HIGH_BIT_DEPTH
ld1 {v0.16b-v3.16b}, [x0], x1
@@ -282,7 +282,7 @@ function PFX(filterPixelToShort_32x\h\()_neon)
add v24.8h, v24.8h, v31.8h
add v25.8h, v25.8h, v31.8h
st1 {v22.16b-v25.16b}, [x2], x3
- cbnz x9, .loop_filterP2S_32x\h
+ cbnz x9, .Loop_filterP2S_32x\h
ret
endfunc
.endm
@@ -302,7 +302,7 @@ function PFX(filterPixelToShort_64x\h\()_neon)
#endif
sub x3, x3, #64
mov x9, #\h
-.loop_filterP2S_64x\h:
+.Loop_filterP2S_64x\h:
sub x9, x9, #1
#if HIGH_BIT_DEPTH
ld1 {v0.16b-v3.16b}, [x0], #64
@@ -336,7 +336,7 @@ function PFX(filterPixelToShort_64x\h\()_neon)
add v23.8h, v23.8h, v31.8h
st1 {v16.16b-v19.16b}, [x2], #64
st1 {v20.16b-v23.16b}, [x2], x3
- cbnz x9, .loop_filterP2S_64x\h
+ cbnz x9, .Loop_filterP2S_64x\h
ret
endfunc
.endm
@@ -353,7 +353,7 @@ function PFX(filterPixelToShort_48x64_neon)
#endif
sub x3, x3, #64
mov x9, #64
-.loop_filterP2S_48x64:
+.Loop_filterP2S_48x64:
sub x9, x9, #1
#if HIGH_BIT_DEPTH
ld1 {v0.16b-v3.16b}, [x0], #64
@@ -381,6 +381,6 @@ function PFX(filterPixelToShort_48x64_neon)
add v21.8h, v21.8h, v31.8h
st1 {v16.16b-v19.16b}, [x2], #64
st1 {v20.16b-v21.16b}, [x2], x3
- cbnz x9, .loop_filterP2S_48x64
+ cbnz x9, .Loop_filterP2S_48x64
ret
endfunc
diff --git a/source/common/aarch64/pixel-util-sve.S b/source/common/aarch64/pixel-util-sve.S
index 715fcc1cb..c1d6b4129 100644
--- a/source/common/aarch64/pixel-util-sve.S
+++ b/source/common/aarch64/pixel-util-sve.S
@@ -333,7 +333,7 @@ function PFX(quant_sve)
eor w10, w10, w10
eor z17.d, z17.d, z17.d
-.loop_quant_sve:
+.Loop_quant_sve:
ld1 {v18.4h}, [x0], #8
ld1 {v7.4s}, [x1], #16
sxtl v6.4s, v18.4h
@@ -364,7 +364,7 @@ function PFX(quant_sve)
st1 {v5.4h}, [x3], #8
subs w6, w6, #1
- b.ne .loop_quant_sve
+ b.ne .Loop_quant_sve
addv s4, v4.4s
mov w9, v4.s[0]
diff --git a/source/common/aarch64/pixel-util-sve2.S b/source/common/aarch64/pixel-util-sve2.S
index dbd138f62..2af5d63c1 100644
--- a/source/common/aarch64/pixel-util-sve2.S
+++ b/source/common/aarch64/pixel-util-sve2.S
@@ -64,11 +64,11 @@ function PFX(pixel_var_16x16_sve2)
bgt .vl_gt_16_pixel_var_16x16
pixel_var_start
mov w12, #16
-.loop_var_16_sve2:
+.Loop_var_16_sve2:
sub w12, w12, #1
ld1 {v4.16b}, [x0], x1
pixel_var_1 v4
- cbnz w12, .loop_var_16_sve2
+ cbnz w12, .Loop_var_16_sve2
pixel_var_end
ret
.vl_gt_16_pixel_var_16x16:
@@ -95,12 +95,12 @@ function PFX(pixel_var_32x32_sve2)
bgt .vl_gt_16_pixel_var_32x32
pixel_var_start
mov w12, #32
-.loop_var_32_sve2:
+.Loop_var_32_sve2:
sub w12, w12, #1
ld1 {v4.16b-v5.16b}, [x0], x1
pixel_var_1 v4
pixel_var_1 v5
- cbnz w12, .loop_var_32_sve2
+ cbnz w12, .Loop_var_32_sve2
pixel_var_end
ret
.vl_gt_16_pixel_var_32x32:
@@ -150,14 +150,14 @@ function PFX(pixel_var_64x64_sve2)
bgt .vl_gt_16_pixel_var_64x64
pixel_var_start
mov w12, #64
-.loop_var_64_sve2:
+.Loop_var_64_sve2:
sub w12, w12, #1
ld1 {v4.16b-v7.16b}, [x0], x1
pixel_var_1 v4
pixel_var_1 v5
pixel_var_1 v6
pixel_var_1 v7
- cbnz w12, .loop_var_64_sve2
+ cbnz w12, .Loop_var_64_sve2
pixel_var_end
ret
.vl_gt_16_pixel_var_64x64:
@@ -268,7 +268,7 @@ function PFX(getResidual32_sve2)
bgt .vl_gt_16_getResidual32
lsl x4, x3, #1
mov w12, #4
-.loop_residual_32:
+.Loop_residual_32:
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v1.16b}, [x0], x3
@@ -286,7 +286,7 @@ function PFX(getResidual32_sve2)
st1 {v16.8h-v19.8h}, [x2], x4
st1 {v20.8h-v23.8h}, [x2], x4
.endr
- cbnz w12, .loop_residual_32
+ cbnz w12, .Loop_residual_32
ret
.vl_gt_16_getResidual32:
cmp x9, #48
@@ -323,7 +323,7 @@ function PFX(pixel_sub_ps_32x32_sve2)
bgt .vl_gt_16_pixel_sub_ps_32x32
lsl x1, x1, #1
mov w12, #4
-.loop_sub_ps_32_sve2:
+.Loop_sub_ps_32_sve2:
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v1.16b}, [x2], x4
@@ -341,7 +341,7 @@ function PFX(pixel_sub_ps_32x32_sve2)
st1 {v16.8h-v19.8h}, [x0], x1
st1 {v20.8h-v23.8h}, [x0], x1
.endr
- cbnz w12, .loop_sub_ps_32_sve2
+ cbnz w12, .Loop_sub_ps_32_sve2
ret
.vl_gt_16_pixel_sub_ps_32x32:
cmp x9, #48
@@ -387,7 +387,7 @@ function PFX(pixel_sub_ps_64x64_sve2)
lsl x1, x1, #1
sub x1, x1, #64
mov w12, #16
-.loop_sub_ps_64_sve2:
+.Loop_sub_ps_64_sve2:
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v3.16b}, [x2], x4
@@ -403,7 +403,7 @@ function PFX(pixel_sub_ps_64x64_sve2)
st1 {v16.8h-v19.8h}, [x0], #64
st1 {v20.8h-v23.8h}, [x0], x1
.endr
- cbnz w12, .loop_sub_ps_64_sve2
+ cbnz w12, .Loop_sub_ps_64_sve2
ret
.vl_gt_16_pixel_sub_ps_64x64:
rdvl x9, #1
@@ -473,7 +473,7 @@ function PFX(pixel_sub_ps_32x64_sve2)
bgt .vl_gt_16_pixel_sub_ps_32x64
lsl x1, x1, #1
mov w12, #8
-.loop_sub_ps_32x64_sve2:
+.Loop_sub_ps_32x64_sve2:
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v1.16b}, [x2], x4
@@ -491,7 +491,7 @@ function PFX(pixel_sub_ps_32x64_sve2)
st1 {v16.8h-v19.8h}, [x0], x1
st1 {v20.8h-v23.8h}, [x0], x1
.endr
- cbnz w12, .loop_sub_ps_32x64_sve2
+ cbnz w12, .Loop_sub_ps_32x64_sve2
ret
.vl_gt_16_pixel_sub_ps_32x64:
cmp x9, #48
@@ -609,7 +609,7 @@ pixel_add_ps_16xN_sve2 32
bgt .vl_gt_16_pixel_add_ps_32x\h
lsl x5, x5, #1
mov w12, #\h / 4
-.loop_add_ps__sve2_32x\h\():
+.Loop_add_ps__sve2_32x\h\():
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v1.16b}, [x2], x4
@@ -628,7 +628,7 @@ pixel_add_ps_16xN_sve2 32
sqxtun2 v5.16b, v27.8h
st1 {v4.16b-v5.16b}, [x0], x1
.endr
- cbnz w12, .loop_add_ps__sve2_32x\h
+ cbnz w12, .Loop_add_ps__sve2_32x\h
ret
.vl_gt_16_pixel_add_ps_32x\h\():
cmp x9, #48
@@ -1157,7 +1157,7 @@ function PFX(ssimDist16_sve2)
bgt .vl_gt_16_ssimDist16
ssimDist_start
ptrue p0.s, vl4
-.loop_ssimDist16_sve2:
+.Loop_ssimDist16_sve2:
sub w12, w12, #1
ld1b {z4.s}, p0/z, [x0]
ld1b {z5.s}, p0/z, [x0, #1, mul vl]
@@ -1171,7 +1171,7 @@ function PFX(ssimDist16_sve2)
add x2, x2, x3
ssimDist_1_sve2 z4, z5, z8, z9
ssimDist_1_sve2 z6, z7, z10, z11
- cbnz w12, .loop_ssimDist16_sve2
+ cbnz w12, .Loop_ssimDist16_sve2
ssimDist_end
ret
.vl_gt_16_ssimDist16:
@@ -1217,7 +1217,7 @@ function PFX(ssimDist32_sve2)
bgt .vl_gt_16_ssimDist32
ssimDist_start
ptrue p0.s, vl4
-.loop_ssimDist32_sve2:
+.Loop_ssimDist32_sve2:
sub w12, w12, #1
ld1b {z2.s}, p0/z, [x0]
ld1b {z3.s}, p0/z, [x0, #1, mul vl]
@@ -1241,7 +1241,7 @@ function PFX(ssimDist32_sve2)
ssimDist_1_sve2 z4, z5, z12, z13
ssimDist_1_sve2 z6, z7, z14, z15
ssimDist_1_sve2 z8, z9, z30, z31
- cbnz w12, .loop_ssimDist32_sve2
+ cbnz w12, .Loop_ssimDist32_sve2
ssimDist_end
ret
.vl_gt_16_ssimDist32:
@@ -1309,7 +1309,7 @@ function PFX(ssimDist64_sve2)
bgt .vl_gt_16_ssimDist64
ssimDist_start
ptrue p0.s, vl4
-.loop_ssimDist64_sve2:
+.Loop_ssimDist64_sve2:
sub w12, w12, #1
ld1b {z2.s}, p0/z, [x0]
ld1b {z3.s}, p0/z, [x0, #1, mul vl]
@@ -1357,7 +1357,7 @@ function PFX(ssimDist64_sve2)
ssimDist_1_sve2 z8, z9, z29, z30
add x0, x0, x1
add x2, x2, x3
- cbnz w12, .loop_ssimDist64_sve2
+ cbnz w12, .Loop_ssimDist64_sve2
ssimDist_end
ret
.vl_gt_16_ssimDist64:
@@ -1482,7 +1482,7 @@ function PFX(normFact16_sve2)
bgt .vl_gt_16_normFact16
normFact_start
ptrue p0.s, vl4
-.loop_normFact16_sve2:
+.Loop_normFact16_sve2:
sub w12, w12, #1
ld1b {z4.s}, p0/z, [x0]
ld1b {z5.s}, p0/z, [x0, #1, mul vl]
@@ -1491,7 +1491,7 @@ function PFX(normFact16_sve2)
add x0, x0, x1
normFact_1_sve2 z4, z5
normFact_1_sve2 z6, z7
- cbnz w12, .loop_normFact16_sve2
+ cbnz w12, .Loop_normFact16_sve2
normFact_end
ret
.vl_gt_16_normFact16:
@@ -1529,7 +1529,7 @@ function PFX(normFact32_sve2)
bgt .vl_gt_16_normFact32
normFact_start
ptrue p0.s, vl4
-.loop_normFact32_sve2:
+.Loop_normFact32_sve2:
sub w12, w12, #1
ld1b {z4.s}, p0/z, [x0]
ld1b {z5.s}, p0/z, [x0, #1, mul vl]
@@ -1544,7 +1544,7 @@ function PFX(normFact32_sve2)
normFact_1_sve2 z6, z7
normFact_1_sve2 z8, z9
normFact_1_sve2 z10, z11
- cbnz w12, .loop_normFact32_sve2
+ cbnz w12, .Loop_normFact32_sve2
normFact_end
ret
.vl_gt_16_normFact32:
@@ -1599,7 +1599,7 @@ function PFX(normFact64_sve2)
bgt .vl_gt_16_normFact64
normFact_start
ptrue p0.s, vl4
-.loop_normFact64_sve2:
+.Loop_normFact64_sve2:
sub w12, w12, #1
ld1b {z4.s}, p0/z, [x0]
ld1b {z5.s}, p0/z, [x0, #1, mul vl]
@@ -1628,7 +1628,7 @@ function PFX(normFact64_sve2)
normFact_1_sve2 z8, z9
normFact_1_sve2 z10, z11
add x0, x0, x1
- cbnz w12, .loop_normFact64_sve2
+ cbnz w12, .Loop_normFact64_sve2
normFact_end
ret
.vl_gt_16_normFact64:
diff --git a/source/common/aarch64/pixel-util.S b/source/common/aarch64/pixel-util.S
index 378c6891c..1df49ba6e 100644
--- a/source/common/aarch64/pixel-util.S
+++ b/source/common/aarch64/pixel-util.S
@@ -60,11 +60,11 @@ endfunc
function PFX(pixel_var_16x16_neon)
pixel_var_start
mov w12, #16
-.loop_var_16:
+.Loop_var_16:
sub w12, w12, #1
ld1 {v4.16b}, [x0], x1
pixel_var_1 v4
- cbnz w12, .loop_var_16
+ cbnz w12, .Loop_var_16
pixel_var_end
ret
endfunc
@@ -72,12 +72,12 @@ endfunc
function PFX(pixel_var_32x32_neon)
pixel_var_start
mov w12, #32
-.loop_var_32:
+.Loop_var_32:
sub w12, w12, #1
ld1 {v4.16b-v5.16b}, [x0], x1
pixel_var_1 v4
pixel_var_1 v5
- cbnz w12, .loop_var_32
+ cbnz w12, .Loop_var_32
pixel_var_end
ret
endfunc
@@ -85,14 +85,14 @@ endfunc
function PFX(pixel_var_64x64_neon)
pixel_var_start
mov w12, #64
-.loop_var_64:
+.Loop_var_64:
sub w12, w12, #1
ld1 {v4.16b-v7.16b}, [x0], x1
pixel_var_1 v4
pixel_var_1 v5
pixel_var_1 v6
pixel_var_1 v7
- cbnz w12, .loop_var_64
+ cbnz w12, .Loop_var_64
pixel_var_end
ret
endfunc
@@ -148,7 +148,7 @@ endfunc
function PFX(getResidual32_neon)
lsl x4, x3, #1
mov w12, #4
-.loop_residual_32:
+.Loop_residual_32:
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v1.16b}, [x0], x3
@@ -166,7 +166,7 @@ function PFX(getResidual32_neon)
st1 {v16.8h-v19.8h}, [x2], x4
st1 {v20.8h-v23.8h}, [x2], x4
.endr
- cbnz w12, .loop_residual_32
+ cbnz w12, .Loop_residual_32
ret
endfunc
@@ -221,7 +221,7 @@ endfunc
function PFX(pixel_sub_ps_32x32_neon)
lsl x1, x1, #1
mov w12, #4
-.loop_sub_ps_32:
+.Loop_sub_ps_32:
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v1.16b}, [x2], x4
@@ -239,7 +239,7 @@ function PFX(pixel_sub_ps_32x32_neon)
st1 {v16.8h-v19.8h}, [x0], x1
st1 {v20.8h-v23.8h}, [x0], x1
.endr
- cbnz w12, .loop_sub_ps_32
+ cbnz w12, .Loop_sub_ps_32
ret
endfunc
@@ -247,7 +247,7 @@ function PFX(pixel_sub_ps_64x64_neon)
lsl x1, x1, #1
sub x1, x1, #64
mov w12, #16
-.loop_sub_ps_64:
+.Loop_sub_ps_64:
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v3.16b}, [x2], x4
@@ -263,7 +263,7 @@ function PFX(pixel_sub_ps_64x64_neon)
st1 {v16.8h-v19.8h}, [x0], #64
st1 {v20.8h-v23.8h}, [x0], x1
.endr
- cbnz w12, .loop_sub_ps_64
+ cbnz w12, .Loop_sub_ps_64
ret
endfunc
@@ -318,7 +318,7 @@ endfunc
function PFX(pixel_sub_ps_32x64_neon)
lsl x1, x1, #1
mov w12, #8
-.loop_sub_ps_32x64:
+.Loop_sub_ps_32x64:
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v1.16b}, [x2], x4
@@ -336,7 +336,7 @@ function PFX(pixel_sub_ps_32x64_neon)
st1 {v16.8h-v19.8h}, [x0], x1
st1 {v20.8h-v23.8h}, [x0], x1
.endr
- cbnz w12, .loop_sub_ps_32x64
+ cbnz w12, .Loop_sub_ps_32x64
ret
endfunc
@@ -383,7 +383,7 @@ endfunc
function PFX(pixel_add_ps_16x\h\()_neon)
lsl x5, x5, #1
mov w12, #\h / 8
-.loop_add_ps_16x\h\():
+.Loop_add_ps_16x\h\():
sub w12, w12, #1
.rept 4
ld1 {v0.16b}, [x2], x4
@@ -405,7 +405,7 @@ function PFX(pixel_add_ps_16x\h\()_neon)
st1 {v4.16b}, [x0], x1
st1 {v5.16b}, [x0], x1
.endr
- cbnz w12, .loop_add_ps_16x\h
+ cbnz w12, .Loop_add_ps_16x\h
ret
endfunc
.endm
@@ -417,7 +417,7 @@ pixel_add_ps_16xN_neon 32
function PFX(pixel_add_ps_32x\h\()_neon)
lsl x5, x5, #1
mov w12, #\h / 4
-.loop_add_ps_32x\h\():
+.Loop_add_ps_32x\h\():
sub w12, w12, #1
.rept 4
ld1 {v0.16b-v1.16b}, [x2], x4
@@ -436,7 +436,7 @@ pixel_add_ps_16xN_neon 32
sqxtun2 v5.16b, v27.8h
st1 {v4.16b-v5.16b}, [x0], x1
.endr
- cbnz w12, .loop_add_ps_32x\h
+ cbnz w12, .Loop_add_ps_32x\h
ret
endfunc
.endm
@@ -448,7 +448,7 @@ function PFX(pixel_add_ps_64x64_neon)
lsl x5, x5, #1
sub x5, x5, #64
mov w12, #32
-.loop_add_ps_64x64:
+.Loop_add_ps_64x64:
sub w12, w12, #1
.rept 2
ld1 {v0.16b-v3.16b}, [x2], x4
@@ -480,7 +480,7 @@ function PFX(pixel_add_ps_64x64_neon)
sqxtun2 v3.16b, v7.8h
st1 {v0.16b-v3.16b}, [x0], x1
.endr
- cbnz w12, .loop_add_ps_64x64
+ cbnz w12, .Loop_add_ps_64x64
ret
endfunc
@@ -548,7 +548,7 @@ endfunc
// void scale2D_64to32(pixel* dst, const pixel* src, intptr_t stride)
function PFX(scale2D_64to32_neon)
mov w12, #32
-.loop_scale2D:
+.Loop_scale2D:
ld1 {v0.16b-v3.16b}, [x1], x2
sub w12, w12, #1
ld1 {v4.16b-v7.16b}, [x1], x2
@@ -561,7 +561,7 @@ function PFX(scale2D_64to32_neon)
uqrshrn v1.8b, v2.8h, #2
uqrshrn2 v1.16b, v3.8h, #2
st1 {v0.16b-v1.16b}, [x0], #32
- cbnz w12, .loop_scale2D
+ cbnz w12, .Loop_scale2D
ret
endfunc
@@ -569,33 +569,33 @@ endfunc
function PFX(pixel_planecopy_cp_neon)
dup v2.16b, w6
sub x5, x5, #1
-.loop_h:
+.Loop_h:
mov x6, x0
mov x12, x2
mov x7, #0
-.loop_w:
+.Loop_w:
ldr q0, [x6], #16
ushl v0.16b, v0.16b, v2.16b
str q0, [x12], #16
add x7, x7, #16
cmp x7, x4
- blt .loop_w
+ blt .Loop_w
add x0, x0, x1
add x2, x2, x3
sub x5, x5, #1
- cbnz x5, .loop_h
+ cbnz x5, .Loop_h
// handle last row
mov x5, x4
lsr x5, x5, #3
-.loopW8:
+.LoopW8:
ldr d0, [x0], #8
ushl v0.8b, v0.8b, v2.8b
str d0, [x2], #8
sub x4, x4, #8
sub x5, x5, #1
- cbnz x5, .loopW8
+ cbnz x5, .LoopW8
mov x5, #8
sub x5, x5, x4
@@ -1508,7 +1508,7 @@ function PFX(pixel_sa8d_32x64_neon)
mov x10, x30
mov w11, #4
mov w9, #0
-.loop_sa8d_32:
+.Loop_sa8d_32:
sub w11, w11, #1
sa8d_16x16 w4
sub x0, x0, x1, lsl #4
@@ -1520,7 +1520,7 @@ function PFX(pixel_sa8d_32x64_neon)
add w9, w9, w4
sub x0, x0, #24
sub x2, x2, #24
- cbnz w11, .loop_sa8d_32
+ cbnz w11, .Loop_sa8d_32
mov w0, w9
ret x10
endfunc
@@ -1529,7 +1529,7 @@ function PFX(pixel_sa8d_64x64_neon)
mov x10, x30
mov w11, #4
mov w9, #0
-.loop_sa8d_64:
+.Loop_sa8d_64:
sub w11, w11, #1
sa8d_16x16 w4
sub x0, x0, x1, lsl #4
@@ -1554,7 +1554,7 @@ function PFX(pixel_sa8d_64x64_neon)
sub x0, x0, #56
sub x2, x2, #56
- cbnz w11, .loop_sa8d_64
+ cbnz w11, .Loop_sa8d_64
mov w0, w9
ret x10
endfunc
@@ -1807,7 +1807,7 @@ function PFX(quant_neon)
eor w10, w10, w10
eor v17.16b, v17.16b, v17.16b
-.loop_quant:
+.Loop_quant:
ld1 {v18.4h}, [x0], #8
ld1 {v7.4s}, [x1], #16
@@ -1839,7 +1839,7 @@ function PFX(quant_neon)
st1 {v5.4h}, [x3], #8
subs w6, w6, #1
- b.ne .loop_quant
+ b.ne .Loop_quant
addv s4, v4.4s
mov w9, v4.s[0]
@@ -1858,7 +1858,7 @@ function PFX(nquant_neon)
mov x4, #0
movi v22.4s, #0
-.loop_nquant:
+.Loop_nquant:
ld1 {v16.4h}, [x0], #8
sub w5, w5, #1
sxtl v19.4s, v16.4h // v19 = coef[blockpos]
@@ -1883,7 +1883,7 @@ function PFX(nquant_neon)
abs v17.4h, v16.4h
st1 {v17.4h}, [x2], #8
- cbnz w5, .loop_nquant
+ cbnz w5, .Loop_nquant
uaddlv d4, v4.4s
fmov x12, d4
@@ -1937,7 +1937,7 @@ endfunc
function PFX(ssimDist16_neon)
mov w12, #16
ssimDist_start
-.loop_ssimDist16:
+.Loop_ssimDist16:
sub w12, w12, #1
ld1 {v4.16b}, [x0], x1
ld1 {v5.16b}, [x2], x3
@@ -1947,7 +1947,7 @@ function PFX(ssimDist16_neon)
uxtl2 v5.8h, v5.16b
ssimDist_1 v6, v7
ssimDist_1 v4, v5
- cbnz w12, .loop_ssimDist16
+ cbnz w12, .Loop_ssimDist16
ssimDist_end
ret
endfunc
@@ -1955,7 +1955,7 @@ endfunc
function PFX(ssimDist32_neon)
mov w12, #32
ssimDist_start
-.loop_ssimDist32:
+.Loop_ssimDist32:
sub w12, w12, #1
ld1 {v4.16b-v5.16b}, [x0], x1
ld1 {v6.16b-v7.16b}, [x2], x3
@@ -1971,7 +1971,7 @@ function PFX(ssimDist32_neon)
ssimDist_1 v23, v24
ssimDist_1 v25, v26
ssimDist_1 v27, v28
- cbnz w12, .loop_ssimDist32
+ cbnz w12, .Loop_ssimDist32
ssimDist_end
ret
endfunc
@@ -1979,7 +1979,7 @@ endfunc
function PFX(ssimDist64_neon)
mov w12, #64
ssimDist_start
-.loop_ssimDist64:
+.Loop_ssimDist64:
sub w12, w12, #1
ld1 {v4.16b-v7.16b}, [x0], x1
ld1 {v16.16b-v19.16b}, [x2], x3
@@ -2007,7 +2007,7 @@ function PFX(ssimDist64_neon)
ssimDist_1 v23, v24
ssimDist_1 v25, v26
ssimDist_1 v27, v28
- cbnz w12, .loop_ssimDist64
+ cbnz w12, .Loop_ssimDist64
ssimDist_end
ret
endfunc
@@ -2035,14 +2035,14 @@ endfunc
function PFX(normFact16_neon)
mov w12, #16
normFact_start
-.loop_normFact16:
+.Loop_normFact16:
sub w12, w12, #1
ld1 {v4.16b}, [x0], x1
uxtl v5.8h, v4.8b
uxtl2 v4.8h, v4.16b
normFact_1 v5
normFact_1 v4
- cbnz w12, .loop_normFact16
+ cbnz w12, .Loop_normFact16
normFact_end
ret
endfunc
@@ -2050,7 +2050,7 @@ endfunc
function PFX(normFact32_neon)
mov w12, #32
normFact_start
-.loop_normFact32:
+.Loop_normFact32:
sub w12, w12, #1
ld1 {v4.16b-v5.16b}, [x0], x1
uxtl v6.8h, v4.8b
@@ -2061,7 +2061,7 @@ function PFX(normFact32_neon)
normFact_1 v5
normFact_1 v6
normFact_1 v7
- cbnz w12, .loop_normFact32
+ cbnz w12, .Loop_normFact32
normFact_end
ret
endfunc
@@ -2069,7 +2069,7 @@ endfunc
function PFX(normFact64_neon)
mov w12, #64
normFact_start
-.loop_normFact64:
+.Loop_normFact64:
sub w12, w12, #1
ld1 {v4.16b-v7.16b}, [x0], x1
uxtl v26.8h, v4.8b
@@ -2088,7 +2088,7 @@ function PFX(normFact64_neon)
normFact_1 v25
normFact_1 v26
normFact_1 v27
- cbnz w12, .loop_normFact64
+ cbnz w12, .Loop_normFact64
normFact_end
ret
endfunc
@@ -2120,9 +2120,9 @@ function PFX(weight_pp_neon)
cbnz w11, .widenTo32Bit
// 16-bit arithmetic is enough.
-.loopHpp:
+.LoopHpp:
mov x12, x3
-.loopWpp:
+.LoopWpp:
ldr q0, [x0], #16
sub x12, x12, #16
umull v1.8h, v0.8b, v25.8b // val *= w0 << correction >> shift
@@ -2132,18 +2132,18 @@ function PFX(weight_pp_neon)
sqxtun v0.8b, v1.8h // val = x265_clip(val)
sqxtun2 v0.16b, v2.8h
str q0, [x1], #16
- cbnz x12, .loopWpp
+ cbnz x12, .LoopWpp
add x1, x1, x2
add x0, x0, x2
sub x4, x4, #1
- cbnz x4, .loopHpp
+ cbnz x4, .LoopHpp
ret
// 32-bit arithmetic is needed.
.widenTo32Bit:
-.loopHpp32:
+.LoopHpp32:
mov x12, x3
-.loopWpp32:
+.LoopWpp32:
ldr d0, [x0], #8
sub x12, x12, #8
uxtl v0.8h, v0.8b
@@ -2155,11 +2155,11 @@ function PFX(weight_pp_neon)
sqxtn2 v0.8h, v2.4s
sqxtun v0.8b, v0.8h
str d0, [x1], #8
- cbnz x12, .loopWpp32
+ cbnz x12, .LoopWpp32
add x1, x1, x2
add x0, x0, x2
sub x4, x4, #1
- cbnz x4, .loopHpp32
+ cbnz x4, .LoopHpp32
ret
// The shift right cannot be moved out of the loop.
@@ -2169,9 +2169,9 @@ function PFX(weight_pp_neon)
neg w7, w7 // -shift
dup v27.4s, w7
dup v29.4s, w9 // offset
-.loopHppUS:
+.LoopHppUS:
mov x12, x3
-.loopWppUS:
+.LoopWppUS:
ldr d0, [x0], #8
sub x12, x12, #8
uxtl v0.8h, v0.8b
@@ -2187,11 +2187,11 @@ function PFX(weight_pp_neon)
sqxtn2 v0.8h, v2.4s
sqxtun v0.8b, v0.8h
str d0, [x1], #8
- cbnz x12, .loopWppUS
+ cbnz x12, .LoopWppUS
add x1, x1, x2
add x0, x0, x2
sub x4, x4, #1
- cbnz x4, .loopHppUS
+ cbnz x4, .LoopHppUS
ret
endfunc
@@ -2220,7 +2220,7 @@ function PFX(scanPosLast_neon)
add x11, x10, x7 // 3*x7
add x9, x4, #1 // CG count
-.loop_spl:
+.Loop_spl:
// position of current CG
ldrh w6, [x0], #32
add x6, x1, x6, lsl #1
@@ -2267,14 +2267,14 @@ function PFX(scanPosLast_neon)
// accelerate by preparing w13 = w13 & w15
and w13, w13, w15
mov x14, xzr
-.loop_spl_1:
+.Loop_spl_1:
cbz w15, .pext_end
clz w6, w15
lsl w13, w13, w6
lsl w15, w15, w6
extr w14, w14, w13, #31
bfm w15, wzr, #1, #0
- b .loop_spl_1
+ b .Loop_spl_1
.pext_end:
strh w14, [x2], #2
@@ -2285,7 +2285,7 @@ function PFX(scanPosLast_neon)
sub x5, x5, x6
strb w6, [x4], #1
- cbnz x5, .loop_spl
+ cbnz x5, .Loop_spl
// count trailing zeros
rbit w13, w12
@@ -2364,7 +2364,7 @@ function PFX(costCoeffNxN_neon)
mov x11, #0
movi v31.16b, #0
cbz x2, .idx_zero
-.loop_ccnn:
+.Loop_ccnn:
// {
// const uint32_t cnt = tabSigCtx[blkPos] + offset + posOffset;
// ctxSig = cnt & posZeroMask;
@@ -2403,7 +2403,7 @@ function PFX(costCoeffNxN_neon)
cmp w9, #1
csel w10, w11, w10, eq
strb w10, [x6, x14]
- cbnz x2, .loop_ccnn
+ cbnz x2, .Loop_ccnn
.idx_zero:
add x13, x3, x4, lsl #1
diff --git a/source/common/aarch64/sad-a-sve2.S b/source/common/aarch64/sad-a-sve2.S
index 9c86d84b6..599a3719a 100644
--- a/source/common/aarch64/sad-a-sve2.S
+++ b/source/common/aarch64/sad-a-sve2.S
@@ -217,12 +217,12 @@ function PFX(pixel_sad_\w\()x\h\()_sve2)
SAD_START_\w
mov w9, #\h/8
-.loop_sve2_\w\()x\h:
+.Loop_sve2_\w\()x\h:
sub w9, w9, #1
.rept 4
SAD_\w
.endr
- cbnz w9, .loop_sve2_\w\()x\h
+ cbnz w9, .Loop_sve2_\w\()x\h
SAD_END_\w
@@ -231,12 +231,12 @@ function PFX(pixel_sad_\w\()x\h\()_sve2)
SAD_START_\w
mov w9, #\h/8
-.loop_sve2_loop_\w\()x\h:
+.Loop_sve2_loop_\w\()x\h:
sub w9, w9, #1
.rept 4
SAD_\w
.endr
- cbnz w9, .loop_sve2_loop_\w\()x\h
+ cbnz w9, .Loop_sve2_loop_\w\()x\h
SAD_END_\w
.else
@@ -402,7 +402,7 @@ function PFX(sad_x\x\()_\w\()x\h\()_sve2)
bgt .vl_gt_16_sad_x_loop_\x\()_\w\()x\h
SAD_X_START_\w \x
mov w12, #\h/4
-.loop_sad_sve2_x\x\()_\w\()x\h:
+.Loop_sad_sve2_x\x\()_\w\()x\h:
sub w12, w12, #1
.rept 4
.if \w == 24
@@ -422,7 +422,7 @@ function PFX(sad_x\x\()_\w\()x\h\()_sve2)
SAD_X_\w x4, v19, v23
.endif
.endr
- cbnz w12, .loop_sad_sve2_x\x\()_\w\()x\h
+ cbnz w12, .Loop_sad_sve2_x\x\()_\w\()x\h
SAD_X_END_\w \x
.vl_gt_16_sad_x_loop_\x\()_\w\()x\h\():
.if \w == 24 || \w == 32
@@ -431,7 +431,7 @@ function PFX(sad_x\x\()_\w\()x\h\()_sve2)
.else
SAD_X_START_\w \x
mov w12, #\h/4
-.loop_sad_sve2_gt_16_x\x\()_\w\()x\h:
+.Loop_sad_sve2_gt_16_x\x\()_\w\()x\h:
sub w12, w12, #1
.rept 4
.if \w == 24
@@ -451,7 +451,7 @@ function PFX(sad_x\x\()_\w\()x\h\()_sve2)
SAD_X_\w x4, v19, v23
.endif
.endr
- cbnz w12, .loop_sad_sve2_gt_16_x\x\()_\w\()x\h
+ cbnz w12, .Loop_sad_sve2_gt_16_x\x\()_\w\()x\h
SAD_X_END_\w \x
.endif
endfunc
diff --git a/source/common/aarch64/sad-a.S b/source/common/aarch64/sad-a.S
index 20d7cac7c..7460825f1 100644
--- a/source/common/aarch64/sad-a.S
+++ b/source/common/aarch64/sad-a.S
@@ -55,12 +55,12 @@ function PFX(pixel_sad_\w\()x\h\()_neon)
SAD_START_\w
mov w9, #\h/8
-.loop_\w\()x\h:
+.Loop_\w\()x\h:
sub w9, w9, #1
.rept 4
SAD_\w
.endr
- cbnz w9, .loop_\w\()x\h
+ cbnz w9, .Loop_\w\()x\h
SAD_END_\w
endfunc
@@ -129,7 +129,7 @@ function PFX(sad_x\x\()_\w\()x\h\()_neon)
.endif
SAD_X_START_\w \x
mov w12, #\h/4
-.loop_sad_x\x\()_\w\()x\h:
+.Loop_sad_x\x\()_\w\()x\h:
sub w12, w12, #1
.rept 4
.if \w == 24
@@ -149,7 +149,7 @@ function PFX(sad_x\x\()_\w\()x\h\()_neon)
SAD_X_\w x4, v19, v23
.endif
.endr
- cbnz w12, .loop_sad_x\x\()_\w\()x\h
+ cbnz w12, .Loop_sad_x\x\()_\w\()x\h
SAD_X_END_\w \x
endfunc
.endm
diff --git a/source/common/aarch64/ssd-a-sve2.S b/source/common/aarch64/ssd-a-sve2.S
index de2603850..8077bd93c 100644
--- a/source/common/aarch64/ssd-a-sve2.S
+++ b/source/common/aarch64/ssd-a-sve2.S
@@ -43,7 +43,7 @@ function PFX(pixel_sse_pp_32x32_sve2)
mov w12, #8
movi v0.16b, #0
movi v1.16b, #0
-.loop_sse_pp_32_sve2:
+.Loop_sse_pp_32_sve2:
sub w12, w12, #1
.rept 4
ld1 {v16.16b,v17.16b}, [x0], x1
@@ -61,7 +61,7 @@ function PFX(pixel_sse_pp_32x32_sve2)
smlal v0.4s, v5.4h, v5.4h
smlal2 v1.4s, v5.8h, v5.8h
.endr
- cbnz w12, .loop_sse_pp_32_sve2
+ cbnz w12, .Loop_sse_pp_32_sve2
add v0.4s, v0.4s, v1.4s
ret_v0_w0
.vl_gt_16_pixel_sse_pp_32x32:
@@ -182,7 +182,7 @@ function PFX(pixel_sse_pp_64x64_sve2)
movi v0.16b, #0
movi v1.16b, #0
-.loop_sse_pp_64_sve2:
+.Loop_sse_pp_64_sve2:
sub w12, w12, #1
.rept 4
ld1 {v16.16b-v19.16b}, [x0], x1
@@ -214,7 +214,7 @@ function PFX(pixel_sse_pp_64x64_sve2)
smlal v0.4s, v5.4h, v5.4h
smlal2 v1.4s, v5.8h, v5.8h
.endr
- cbnz w12, .loop_sse_pp_64_sve2
+ cbnz w12, .Loop_sse_pp_64_sve2
add v0.4s, v0.4s, v1.4s
ret_v0_w0
.vl_gt_16_pixel_sse_pp_64x64:
@@ -788,7 +788,7 @@ function PFX(pixel_ssd_s_16x16_sve2)
mov w12, #4
movi v0.16b, #0
movi v1.16b, #0
-.loop_ssd_s_16_sve2:
+.Loop_ssd_s_16_sve2:
sub w12, w12, #1
.rept 2
ld1 {v4.16b,v5.16b}, [x0], x1
@@ -802,7 +802,7 @@ function PFX(pixel_ssd_s_16x16_sve2)
smlal v0.4s, v7.4h, v7.4h
smlal2 v1.4s, v7.8h, v7.8h
.endr
- cbnz w12, .loop_ssd_s_16_sve2
+ cbnz w12, .Loop_ssd_s_16_sve2
add v0.4s, v0.4s, v1.4s
ret_v0_w0
.vl_gt_16_pixel_ssd_s_16x16:
@@ -830,7 +830,7 @@ function PFX(pixel_ssd_s_32x32_sve2)
mov w12, #8
movi v0.16b, #0
movi v1.16b, #0
-.loop_ssd_s_32:
+.Loop_ssd_s_32:
sub w12, w12, #1
.rept 4
ld1 {v4.16b-v7.16b}, [x0], x1
@@ -843,7 +843,7 @@ function PFX(pixel_ssd_s_32x32_sve2)
smlal v0.4s, v7.4h, v7.4h
smlal2 v1.4s, v7.8h, v7.8h
.endr
- cbnz w12, .loop_ssd_s_32
+ cbnz w12, .Loop_ssd_s_32
add v0.4s, v0.4s, v1.4s
ret_v0_w0
.vl_gt_16_pixel_ssd_s_32x32:
diff --git a/source/common/aarch64/ssd-a.S b/source/common/aarch64/ssd-a.S
index 7c778b4fe..f4b79304a 100644
--- a/source/common/aarch64/ssd-a.S
+++ b/source/common/aarch64/ssd-a.S
@@ -157,7 +157,7 @@ function PFX(pixel_sse_pp_32x32_neon)
mov w12, #8
movi v0.16b, #0
movi v1.16b, #0
-.loop_sse_pp_32:
+.Loop_sse_pp_32:
sub w12, w12, #1
.rept 4
ld1 {v16.16b,v17.16b}, [x0], x1
@@ -175,7 +175,7 @@ function PFX(pixel_sse_pp_32x32_neon)
smlal v0.4s, v5.4h, v5.4h
smlal2 v1.4s, v5.8h, v5.8h
.endr
- cbnz w12, .loop_sse_pp_32
+ cbnz w12, .Loop_sse_pp_32
add v0.4s, v0.4s, v1.4s
ret_v0_w0
endfunc
@@ -184,7 +184,7 @@ function PFX(pixel_sse_pp_32x64_neon)
mov w12, #16
movi v0.16b, #0
movi v1.16b, #0
-.loop_sse_pp_32x64:
+.Loop_sse_pp_32x64:
sub w12, w12, #1
.rept 4
ld1 {v16.16b,v17.16b}, [x0], x1
@@ -202,7 +202,7 @@ function PFX(pixel_sse_pp_32x64_neon)
smlal v0.4s, v5.4h, v5.4h
smlal2 v1.4s, v5.8h, v5.8h
.endr
- cbnz w12, .loop_sse_pp_32x64
+ cbnz w12, .Loop_sse_pp_32x64
add v0.4s, v0.4s, v1.4s
ret_v0_w0
endfunc
@@ -212,7 +212,7 @@ function PFX(pixel_sse_pp_64x64_neon)
movi v0.16b, #0
movi v1.16b, #0
-.loop_sse_pp_64:
+.Loop_sse_pp_64:
sub w12, w12, #1
.rept 4
ld1 {v16.16b-v19.16b}, [x0], x1
@@ -244,7 +244,7 @@ function PFX(pixel_sse_pp_64x64_neon)
smlal v0.4s, v5.4h, v5.4h
smlal2 v1.4s, v5.8h, v5.8h
.endr
- cbnz w12, .loop_sse_pp_64
+ cbnz w12, .Loop_sse_pp_64
add v0.4s, v0.4s, v1.4s
ret_v0_w0
endfunc
@@ -301,7 +301,7 @@ function PFX(pixel_sse_ss_16x16_neon)
mov w12, #4
movi v0.16b, #0
movi v1.16b, #0
-.loop_sse_ss_16:
+.Loop_sse_ss_16:
sub w12, w12, #1
.rept 4
ld1 {v16.16b, v17.16b}, [x0], x1
@@ -313,7 +313,7 @@ function PFX(pixel_sse_ss_16x16_neon)
smlal v0.4s, v3.4h, v3.4h
smlal2 v1.4s, v3.8h, v3.8h
.endr
- cbnz w12, .loop_sse_ss_16
+ cbnz w12, .Loop_sse_ss_16
add v0.4s, v0.4s, v1.4s
ret_v0_w0
endfunc
@@ -325,7 +325,7 @@ function PFX(pixel_sse_ss_32x32_neon)
mov w12, #8
movi v0.16b, #0
movi v1.16b, #0
-.loop_sse_ss_32:
+.Loop_sse_ss_32:
sub w12, w12, #1
.rept 4
ld1 {v16.16b-v19.16b}, [x0], x1
@@ -343,7 +343,7 @@ function PFX(pixel_sse_ss_32x32_neon)
smlal v0.4s, v5.4h, v5.4h
smlal2 v1.4s, v5.8h, v5.8h
.endr
- cbnz w12, .loop_sse_ss_32
+ cbnz w12, .Loop_sse_ss_32
add v0.4s, v0.4s, v1.4s
ret_v0_w0
endfunc
@@ -357,7 +357,7 @@ function PFX(pixel_sse_ss_64x64_neon)
mov w12, #32
movi v0.16b, #0
movi v1.16b, #0
-.loop_sse_ss_64:
+.Loop_sse_ss_64:
sub w12, w12, #1
.rept 2
ld1 {v16.16b-v19.16b}, [x0], #64
@@ -389,7 +389,7 @@ function PFX(pixel_sse_ss_64x64_neon)
smlal v0.4s, v5.4h, v5.4h
smlal2 v1.4s, v5.8h, v5.8h
.endr
- cbnz w12, .loop_sse_ss_64
+ cbnz w12, .Loop_sse_ss_64
add v0.4s, v0.4s, v1.4s
ret_v0_w0
endfunc
@@ -433,7 +433,7 @@ function PFX(pixel_ssd_s_16x16_neon)
mov w12, #4
movi v0.16b, #0
movi v1.16b, #0
-.loop_ssd_s_16:
+.Loop_ssd_s_16:
sub w12, w12, #1
.rept 2
ld1 {v4.16b,v5.16b}, [x0], x1
@@ -447,7 +447,7 @@ function PFX(pixel_ssd_s_16x16_neon)
smlal v0.4s, v7.4h, v7.4h
smlal2 v1.4s, v7.8h, v7.8h
.endr
- cbnz w12, .loop_ssd_s_16
+ cbnz w12, .Loop_ssd_s_16
add v0.4s, v0.4s, v1.4s
ret_v0_w0
endfunc
@@ -457,7 +457,7 @@ function PFX(pixel_ssd_s_32x32_neon)
mov w12, #8
movi v0.16b, #0
movi v1.16b, #0
-.loop_ssd_s_32:
+.Loop_ssd_s_32:
sub w12, w12, #1
.rept 4
ld1 {v4.16b-v7.16b}, [x0], x1
@@ -470,7 +470,7 @@ function PFX(pixel_ssd_s_32x32_neon)
smlal v0.4s, v7.4h, v7.4h
smlal2 v1.4s, v7.8h, v7.8h
.endr
- cbnz w12, .loop_ssd_s_32
+ cbnz w12, .Loop_ssd_s_32
add v0.4s, v0.4s, v1.4s
ret_v0_w0
endfunc
diff --git a/source/common/arm/blockcopy8.S b/source/common/arm/blockcopy8.S
index 1c868f464..8170160aa 100644
--- a/source/common/arm/blockcopy8.S
+++ b/source/common/arm/blockcopy8.S
@@ -795,7 +795,7 @@ function x265_count_nonzero_32_neon
vmov q2, q12
vmov q3, q14
-.loop:
+.Loop:
vldm r0!, {q8-q15}
subs r1, #1
@@ -817,7 +817,7 @@ function x265_count_nonzero_32_neon
vadd.s8 q1, q10
vadd.s8 q2, q12
vadd.s8 q3, q14
- bgt .loop
+ bgt .Loop
// sum
vadd.s8 q0, q1
diff --git a/source/common/arm/dct-a.S b/source/common/arm/dct-a.S
index 42b193bf8..5be8847e9 100644
--- a/source/common/arm/dct-a.S
+++ b/source/common/arm/dct-a.S
@@ -422,7 +422,7 @@ function x265_dct_16x16_neon
mov lr, #4*16*2
// DCT-1D
-.loop1:
+.Loop1:
// Row[0-3]
vld1.16 {q8-q9}, [r0, :64], r2 // q8 = [07 06 05 04 03 02 01 00], q9 = [0F 0E 0D 0C 0B 0A 09 08]
vld1.16 {q10-q11}, [r0, :64], r2 // q10 = [17 16 15 14 13 12 11 10], q11 = [1F 1E 1D 1C 1B 1A 19 18]
@@ -628,7 +628,7 @@ function x265_dct_16x16_neon
// loop into next process group
sub r3, #3*4*16*2
subs r12, #1
- bgt .loop1
+ bgt .Loop1
// DCT-2D
@@ -637,7 +637,7 @@ function x265_dct_16x16_neon
mov r3, #16*2*2
mov r12, #16/4 // Process 4 rows every loop
-.loop2:
+.Loop2:
vldm r2, {q8-q15}
// d16 = [30 20 10 00]
@@ -887,7 +887,7 @@ function x265_dct_16x16_neon
sub r1, #(17*16-4)*2
subs r12, #1
- bgt .loop2
+ bgt .Loop2
add sp, #16*16*2
vpop {q4-q7}
diff --git a/source/common/arm/ipfilter8.S b/source/common/arm/ipfilter8.S
index 8b7f5b3ca..b1ec6cc8b 100644
--- a/source/common/arm/ipfilter8.S
+++ b/source/common/arm/ipfilter8.S
@@ -372,7 +372,7 @@ function x265_filterPixelToShort_32x16_neon
vmov.u16 q1, #8192
vneg.s16 q1, q1
mov r12, #8
-.loop_filterP2S_32x16:
+.Loop_filterP2S_32x16:
subs r12, #1
.rept 2
vld1.u8 {q9-q10}, [r0], r1
@@ -391,7 +391,7 @@ function x265_filterPixelToShort_32x16_neon
vmla.s16 q3, q10, q0
vst1.16 {q2-q3}, [r2], r3
.endr
- bgt .loop_filterP2S_32x16
+ bgt .Loop_filterP2S_32x16
bx lr
endfunc
@@ -402,7 +402,7 @@ function x265_filterPixelToShort_32x24_neon
vmov.u16 q1, #8192
vneg.s16 q1, q1
mov r12, #12
-.loop_filterP2S_32x24:
+.Loop_filterP2S_32x24:
subs r12, #1
.rept 2
vld1.u8 {q9-q10}, [r0], r1
@@ -421,7 +421,7 @@ function x265_filterPixelToShort_32x24_neon
vmla.s16 q3, q10, q0
vst1.16 {q2-q3}, [r2], r3
.endr
- bgt .loop_filterP2S_32x24
+ bgt .Loop_filterP2S_32x24
bx lr
endfunc
@@ -432,7 +432,7 @@ function x265_filterPixelToShort_32x32_neon
vmov.u16 q1, #8192
vneg.s16 q1, q1
mov r12, #16
-.loop_filterP2S_32x32:
+.Loop_filterP2S_32x32:
subs r12, #1
.rept 2
vld1.u8 {q9-q10}, [r0], r1
@@ -451,7 +451,7 @@ function x265_filterPixelToShort_32x32_neon
vmla.s16 q3, q10, q0
vst1.16 {q2-q3}, [r2], r3
.endr
- bgt .loop_filterP2S_32x32
+ bgt .Loop_filterP2S_32x32
bx lr
endfunc
@@ -462,7 +462,7 @@ function x265_filterPixelToShort_32x64_neon
vmov.u16 q1, #8192
vneg.s16 q1, q1
mov r12, #32
-.loop_filterP2S_32x64:
+.Loop_filterP2S_32x64:
subs r12, #1
.rept 2
vld1.u8 {q9-q10}, [r0], r1
@@ -481,7 +481,7 @@ function x265_filterPixelToShort_32x64_neon
vmla.s16 q3, q10, q0
vst1.16 {q2-q3}, [r2], r3
.endr
- bgt .loop_filterP2S_32x64
+ bgt .Loop_filterP2S_32x64
bx lr
endfunc
@@ -493,7 +493,7 @@ function x265_filterPixelToShort_64x16_neon
vmov.u16 q1, #8192
vneg.s16 q1, q1
mov r12, #8
-.loop_filterP2S_64x16:
+.Loop_filterP2S_64x16:
subs r12, #1
.rept 2
vld1.u8 {q9-q10}, [r0]!
@@ -528,7 +528,7 @@ function x265_filterPixelToShort_64x16_neon
vmla.s16 q3, q10, q0
vst1.16 {q2-q3}, [r2], r3
.endr
- bgt .loop_filterP2S_64x16
+ bgt .Loop_filterP2S_64x16
bx lr
endfunc
@@ -540,7 +540,7 @@ function x265_filterPixelToShort_64x32_neon
vmov.u16 q1, #8192
vneg.s16 q1, q1
mov r12, #16
-.loop_filterP2S_64x32:
+.Loop_filterP2S_64x32:
subs r12, #1
.rept 2
vld1.u8 {q9-q10}, [r0]!
@@ -575,7 +575,7 @@ function x265_filterPixelToShort_64x32_neon
vmla.s16 q3, q10, q0
vst1.16 {q2-q3}, [r2], r3
.endr
- bgt .loop_filterP2S_64x32
+ bgt .Loop_filterP2S_64x32
bx lr
endfunc
@@ -587,7 +587,7 @@ function x265_filterPixelToShort_64x48_neon
vmov.u16 q1, #8192
vneg.s16 q1, q1
mov r12, #24
-.loop_filterP2S_64x48:
+.Loop_filterP2S_64x48:
subs r12, #1
.rept 2
vld1.u8 {q9-q10}, [r0]!
@@ -622,7 +622,7 @@ function x265_filterPixelToShort_64x48_neon
vmla.s16 q3, q10, q0
vst1.16 {q2-q3}, [r2], r3
.endr
- bgt .loop_filterP2S_64x48
+ bgt .Loop_filterP2S_64x48
bx lr
endfunc
@@ -634,7 +634,7 @@ function x265_filterPixelToShort_64x64_neon
vmov.u16 q1, #8192
vneg.s16 q1, q1
mov r12, #32
-.loop_filterP2S_64x64:
+.Loop_filterP2S_64x64:
subs r12, #1
.rept 2
vld1.u8 {q9-q10}, [r0]!
@@ -669,7 +669,7 @@ function x265_filterPixelToShort_64x64_neon
vmla.s16 q3, q10, q0
vst1.16 {q2-q3}, [r2], r3
.endr
- bgt .loop_filterP2S_64x64
+ bgt .Loop_filterP2S_64x64
bx lr
endfunc
@@ -681,7 +681,7 @@ function x265_filterPixelToShort_48x64_neon
vmov.u16 q1, #8192
vneg.s16 q1, q1
mov r12, #32
-.loop_filterP2S_48x64:
+.Loop_filterP2S_48x64:
subs r12, #1
.rept 2
vld1.u8 {q9-q10}, [r0]!
@@ -709,7 +709,7 @@ function x265_filterPixelToShort_48x64_neon
vmla.s16 q3, q9, q0
vst1.16 {q2-q3}, [r2], r3
.endr
- bgt .loop_filterP2S_48x64
+ bgt .Loop_filterP2S_48x64
bx lr
endfunc
@@ -756,7 +756,7 @@ function x265_interp_8tap_vert_pp_4x\h\()_neon
vmovl.u8 q2, d4
vmovl.u8 q3, d6
-.loop_4x\h:
+.Loop_4x\h:
// TODO: read extra 1 row for speed optimize, may made crash on OS X platform!
vld1.u32 {d16[0]}, [r0], r1
vld1.u32 {d16[1]}, [r0], r1
@@ -795,7 +795,7 @@ function x265_interp_8tap_vert_pp_4x\h\()_neon
vst1.u32 {d18[1]}, [r2], r3
subs r12, #2
- bne .loop_4x4
+ bne .Loop_4x4
pop {pc}
.ltorg
@@ -945,13 +945,13 @@ LUMA_VPP_4xN 16
.macro FILTER_VPP a b filterv
-.loop_\filterv\()_\a\()x\b:
+.Loop_\filterv\()_\a\()x\b:
mov r7, r2
mov r6, r0
eor r8, r8
-.loop_w8_\filterv\()_\a\()x\b:
+.Loop_w8_\filterv\()_\a\()x\b:
add r6, r0, r8
@@ -988,12 +988,12 @@ LUMA_VPP_4xN 16
add r8, #8
cmp r8, #\a
- blt .loop_w8_\filterv\()_\a\()x\b
+ blt .Loop_w8_\filterv\()_\a\()x\b
add r0, r1
add r2, r3
subs r4, #1
- bne .loop_\filterv\()_\a\()x\b
+ bne .Loop_\filterv\()_\a\()x\b
.endm
@@ -1063,7 +1063,7 @@ function x265_interp_8tap_vert_pp_12x16_neon
sub r0, r4
mov r4, #16
-.loop_vpp_12x16:
+.Loop_vpp_12x16:
mov r6, r0
mov r7, r2
@@ -1173,7 +1173,7 @@ function x265_interp_8tap_vert_pp_12x16_neon
add r0, r1
add r2, r3
subs r4, #1
- bne .loop_vpp_12x16
+ bne .Loop_vpp_12x16
pop {r4, r5, r6, r7}
bx lr
@@ -1194,7 +1194,7 @@ function x265_interp_8tap_vert_sp_4x\h\()_neon
add r12, #2048
vdup.32 q8, r12
mov r4, #\h
-.loop_vsp_4x\h:
+.Loop_vsp_4x\h:
movrel r12, g_lumaFilter
add r12, r5
mov r6, r0
@@ -1266,7 +1266,7 @@ function x265_interp_8tap_vert_sp_4x\h\()_neon
add r0, r1
subs r4, #1
- bne .loop_vsp_4x\h
+ bne .Loop_vsp_4x\h
pop {r4, r5, r6}
bx lr
.ltorg
@@ -1369,13 +1369,13 @@ LUMA_VSP_4xN 16
.macro FILTER_VSP a b filterv
vpush { q4 - q7}
-.loop_\filterv\()_\a\()x\b:
+.Loop_\filterv\()_\a\()x\b:
mov r7, r2
mov r6, r0
eor r8, r8
-.loop_w8_\filterv\()_\a\()x\b:
+.Loop_w8_\filterv\()_\a\()x\b:
add r6, r0, r8
@@ -1417,12 +1417,12 @@ LUMA_VSP_4xN 16
mov r12, #\a
lsl r12, #1
cmp r8, r12
- blt .loop_w8_\filterv\()_\a\()x\b
+ blt .Loop_w8_\filterv\()_\a\()x\b
add r0, r1
add r2, r3
subs r4, #1
- bne .loop_\filterv\()_\a\()x\b
+ bne .Loop_\filterv\()_\a\()x\b
vpop { q4 - q7}
@@ -1498,7 +1498,7 @@ function x265_interp_8tap_vert_sp_12x16_neon
mov r4, #16
vpush { q4 - q7}
-.loop1_12x16:
+.Loop1_12x16:
mov r6, r0
mov r7, r2
@@ -1612,7 +1612,7 @@ function x265_interp_8tap_vert_sp_12x16_neon
add r0, r1
add r2, r3
subs r4, #1
- bne .loop1_12x16
+ bne .Loop1_12x16
vpop { q4 - q7}
pop {r4, r5, r6, r7}
bx lr
@@ -1632,7 +1632,7 @@ function x265_interp_8tap_vert_ps_4x\h\()_neon
vdup.32 q8, r4
mov r4, #\h
-.loop_vps_4x\h:
+.Loop_vps_4x\h:
movrel r12, g_lumaFilter
add r12, r5
mov r6, r0
@@ -1702,7 +1702,7 @@ function x265_interp_8tap_vert_ps_4x\h\()_neon
add r0, r1
subs r4, #1
- bne .loop_vps_4x\h
+ bne .Loop_vps_4x\h
pop {r4, r5, r6}
bx lr
@@ -1717,13 +1717,13 @@ LUMA_VPS_4xN 16
.macro FILTER_VPS a b filterv
-.loop_ps_\filterv\()_\a\()x\b:
+.Loop_ps_\filterv\()_\a\()x\b:
mov r7, r2
mov r6, r0
eor r8, r8
-.loop_ps_w8_\filterv\()_\a\()x\b:
+.Loop_ps_w8_\filterv\()_\a\()x\b:
add r6, r0, r8
@@ -1759,12 +1759,12 @@ LUMA_VPS_4xN 16
add r8, #8
cmp r8, #\a
- blt .loop_ps_w8_\filterv\()_\a\()x\b
+ blt .Loop_ps_w8_\filterv\()_\a\()x\b
add r0, r1
add r2, r3
subs r4, #1
- bne .loop_ps_\filterv\()_\a\()x\b
+ bne .Loop_ps_\filterv\()_\a\()x\b
.endm
@@ -1836,7 +1836,7 @@ function x265_interp_8tap_vert_ps_12x16_neon
sub r0, r4
mov r4, #16
-.loop_vps_12x16:
+.Loop_vps_12x16:
mov r6, r0
mov r7, r2
@@ -1942,7 +1942,7 @@ function x265_interp_8tap_vert_ps_12x16_neon
add r0, r1
add r2, r3
subs r4, #1
- bne .loop_vps_12x16
+ bne .Loop_vps_12x16
pop {r4, r5, r6, r7}
bx lr
@@ -2081,13 +2081,13 @@ endfunc
vpush {q4-q7}
-.loop_\filterv\()_\a\()x\b:
+.Loop_\filterv\()_\a\()x\b:
mov r7, r2
mov r6, r0
eor r8, r8
-.loop_w8_\filterv\()_\a\()x\b:
+.Loop_w8_\filterv\()_\a\()x\b:
add r6, r0, r8
@@ -2121,12 +2121,12 @@ endfunc
add r8, #8
cmp r8, #\a
- blt .loop_w8_\filterv\()_\a\()x\b
+ blt .Loop_w8_\filterv\()_\a\()x\b
add r0, r1
add r2, r3
subs r4, #1
- bne .loop_\filterv\()_\a\()x\b
+ bne .Loop_\filterv\()_\a\()x\b
vpop {q4-q7}
.endm
@@ -2217,13 +2217,13 @@ CHROMA_VPP 48 64
vpush {q4-q7}
-.loop_vps_\filterv\()_\a\()x\b:
+.Loop_vps_\filterv\()_\a\()x\b:
mov r7, r2
mov r6, r0
eor r8, r8
-.loop_vps_w8_\filterv\()_\a\()x\b:
+.Loop_vps_w8_\filterv\()_\a\()x\b:
add r6, r0, r8
@@ -2256,12 +2256,12 @@ CHROMA_VPP 48 64
add r8, #8
cmp r8, #\a
- blt .loop_vps_w8_\filterv\()_\a\()x\b
+ blt .Loop_vps_w8_\filterv\()_\a\()x\b
add r0, r1
add r2, r3
subs r4, #1
- bne .loop_vps_\filterv\()_\a\()x\b
+ bne .Loop_vps_\filterv\()_\a\()x\b
vpop {q4-q7}
.endm
@@ -2353,13 +2353,13 @@ CHROMA_VPS 48 64
vpush {q4-q7}
-.loop_vsp_\filterv\()_\a\()x\b:
+.Loop_vsp_\filterv\()_\a\()x\b:
mov r7, r2
mov r6, r0
eor r8, r8
-.loop_vsp_w8_\filterv\()_\a\()x\b:
+.Loop_vsp_w8_\filterv\()_\a\()x\b:
add r6, r0, r8
@@ -2392,12 +2392,12 @@ CHROMA_VPS 48 64
mov r12, #\a
lsl r12, #1
cmp r8, r12
- blt .loop_vsp_w8_\filterv\()_\a\()x\b
+ blt .Loop_vsp_w8_\filterv\()_\a\()x\b
add r0, r1
add r2, r3
subs r4, #1
- bne .loop_vsp_\filterv\()_\a\()x\b
+ bne .Loop_vsp_\filterv\()_\a\()x\b
vpop {q4-q7}
.endm
diff --git a/source/common/arm/mc-a.S b/source/common/arm/mc-a.S
index b10e9e816..839d192cd 100644
--- a/source/common/arm/mc-a.S
+++ b/source/common/arm/mc-a.S
@@ -554,7 +554,7 @@ function x265_cpy2Dto1D_shr_16x16_neon
vsri.s16 q1, #1
vneg.s16 q0, q0
mov r3, #4
-.loop_cpy2Dto1D_shr_16:
+.Loop_cpy2Dto1D_shr_16:
subs r3, #1
.rept 4
vld1.s16 {q2-q3}, [r1], r2
@@ -564,7 +564,7 @@ function x265_cpy2Dto1D_shr_16x16_neon
vshl.s16 q3, q0
vst1.16 {q2-q3}, [r0]!
.endr
- bgt .loop_cpy2Dto1D_shr_16
+ bgt .Loop_cpy2Dto1D_shr_16
bx lr
endfunc
@@ -577,7 +577,7 @@ function x265_cpy2Dto1D_shr_32x32_neon
vsri.s16 q1, #1
vneg.s16 q0, q0
mov r3, 16
-.loop_cpy2Dto1D_shr_32:
+.Loop_cpy2Dto1D_shr_32:
subs r3, #1
.rept 2
vld1.s16 {q2-q3}, [r1]!
@@ -593,7 +593,7 @@ function x265_cpy2Dto1D_shr_32x32_neon
vst1.16 {q2-q3}, [r0]!
vst1.16 {q8-q9}, [r0]!
.endr
- bgt .loop_cpy2Dto1D_shr_32
+ bgt .Loop_cpy2Dto1D_shr_32
bx lr
endfunc
diff --git a/source/common/arm/pixel-util.S b/source/common/arm/pixel-util.S
index c26b17acc..67719c8e5 100644
--- a/source/common/arm/pixel-util.S
+++ b/source/common/arm/pixel-util.S
@@ -848,36 +848,36 @@ function x265_pixel_planecopy_cp_neon
vdup.8 q2, r12
sub r5, #1
-.loop_h:
+.Loop_h:
mov r6, r0
mov r12, r2
eor r7, r7
-.loop_w:
+.Loop_w:
vld1.u8 {q0}, [r6]!
vshl.u8 q0, q0, q2
vst1.u8 {q0}, [r12]!
add r7, #16
cmp r7, r4
- blt .loop_w
+ blt .Loop_w
add r0, r1
add r2, r3
subs r5, #1
- bgt .loop_h
+ bgt .Loop_h
// handle last row
mov r5, r4
lsr r5, #3
-.loopW8:
+.LoopW8:
vld1.u8 d0, [r0]!
vshl.u8 d0, d0, d4
vst1.u8 d0, [r2]!
subs r4, r4, #8
subs r5, #1
- bgt .loopW8
+ bgt .LoopW8
mov r5,#8
sub r5, r4
@@ -1970,7 +1970,7 @@ function x265_quant_neon
eor r5, r5
veor.s32 q12, q12
-.loop_quant:
+.Loop_quant:
vld1.s16 d16, [r0]!
vmovl.s16 q9, d16 // q9= coef[blockpos]
@@ -1999,7 +1999,7 @@ function x265_quant_neon
vst1.s16 d16, [r3]!
subs r4, #1
- bne .loop_quant
+ bne .Loop_quant
vadd.u32 d8, d9
vpadd.u32 d8, d8
@@ -2023,7 +2023,7 @@ function x265_nquant_neon
eor r4, r4
veor.s32 q12, q12
-.loop_nquant:
+.Loop_nquant:
vld1.s16 d16, [r0]!
vmovl.s16 q9, d16 // q9= coef[blockpos]
@@ -2049,7 +2049,7 @@ function x265_nquant_neon
vst1.s16 d17, [r2]!
subs r3, #1
- bne .loop_nquant
+ bne .Loop_nquant
vadd.u32 d8, d9
vpadd.u32 d8, d8
@@ -2148,7 +2148,7 @@ function x265_pixel_sa8d_32x64_neon
mov r10, #4
eor r9, r9
-.loop_32:
+.Loop_32:
sa8d_16x16 r4
@@ -2166,7 +2166,7 @@ function x265_pixel_sa8d_32x64_neon
sub r2, r2, #24
subs r10, #1
- bgt .loop_32
+ bgt .Loop_32
mov r0, r9
vpop {d8-d11}
@@ -2183,7 +2183,7 @@ function x265_pixel_sa8d_64x64_neon
mov r10, #4
eor r9, r9
-.loop_1:
+.Loop_1:
sa8d_16x16 r4
@@ -2217,7 +2217,7 @@ function x265_pixel_sa8d_64x64_neon
sub r2, r2, #56
subs r10, #1
- bgt .loop_1
+ bgt .Loop_1
mov r0, r9
vpop {d8-d11}
diff --git a/source/common/arm/sad-a.S b/source/common/arm/sad-a.S
index 6faf35957..b5cbded89 100644
--- a/source/common/arm/sad-a.S
+++ b/source/common/arm/sad-a.S
@@ -103,7 +103,7 @@ function x265_pixel_sad_16x\h\()_neon
vabal.u8 q9, d5, d7
mov r12, #(\h-2)/2
-.loop_16x\h:
+.Loop_16x\h:
subs r12, #1
vld1.8 {q0}, [r0], r1
@@ -115,7 +115,7 @@ function x265_pixel_sad_16x\h\()_neon
vabal.u8 q9, d1, d3
vabal.u8 q8, d4, d6
vabal.u8 q9, d5, d7
- bne .loop_16x\h
+ bne .Loop_16x\h
vadd.u16 q8, q8, q9
.if \h == 64
@@ -147,7 +147,7 @@ function x265_pixel_sad_32x\h\()_neon
veor.u8 q11, q11
mov r12, #\h/8
-.loop_32x\h:
+.Loop_32x\h:
subs r12, #1
.rept 4
@@ -166,7 +166,7 @@ function x265_pixel_sad_32x\h\()_neon
vabal.u8 q10, d26, d30
vabal.u8 q11, d27, d31
.endr
- bne .loop_32x\h
+ bne .Loop_32x\h
vadd.u16 q8, q8, q9
vadd.u16 q10, q10, q11
@@ -213,7 +213,7 @@ function x265_pixel_sad_64x\h\()_neon
sub r3, r12
mov r12, #\h/8
-.loop_64x\h:
+.Loop_64x\h:
subs r12, #1
.rept 4
@@ -246,7 +246,7 @@ function x265_pixel_sad_64x\h\()_neon
vabal.u8 q10, d26, d30
vabal.u8 q11, d27, d31
.endr
- bne .loop_64x\h
+ bne .Loop_64x\h
vadd.u16 q8, q8, q9
vadd.u16 q10, q10, q11
@@ -283,7 +283,7 @@ function x265_pixel_sad_24x32_neon
sub r3, #16
mov r12, #8
-.loop_24x32:
+.Loop_24x32:
subs r12, #1
.rept 4
@@ -296,7 +296,7 @@ function x265_pixel_sad_24x32_neon
vld1.8 {d1}, [r2], r3
vabal.u8 q10, d0, d1
.endr
- bne .loop_24x32
+ bne .Loop_24x32
vadd.u16 q8, q8, q9
vadd.u16 d16, d16, d17
@@ -322,7 +322,7 @@ function x265_pixel_sad_48x64_neon
sub r3, #32
mov r12, #16
-.loop_48x64:
+.Loop_48x64:
subs r12, #1
.rept 4
@@ -337,7 +337,7 @@ function x265_pixel_sad_48x64_neon
vabal.u8 q14, d4, d20
vabal.u8 q15, d5, d21
.endr
- bne .loop_48x64
+ bne .Loop_48x64
vadd.u16 q3, q3, q11
vadd.u16 d6, d6, d7
@@ -635,12 +635,12 @@ function x265_sad_x\x\()_16x\h\()_neon
veor.u8 q15, q15
.endif
-.loop_sad_x\x\()_16x\h:
+.Loop_sad_x\x\()_16x\h:
.rept 8
SAD_X_16 \x
.endr
subs r6, #1
- bne .loop_sad_x\x\()_16x\h
+ bne .Loop_sad_x\x\()_16x\h
vadd.u16 q8, q8, q9
vadd.u16 q10, q10, q11
@@ -929,12 +929,12 @@ function x265_sad_x\x\()_64x\h\()_neon
veor.u8 q14, q14
veor.u8 q15, q15
.endif
-.loop_sad_x\x\()_64x\h:
+.Loop_sad_x\x\()_64x\h:
.rept 8
SAD_X_64 \x
.endr
subs r6, #1
- bne .loop_sad_x\x\()_64x\h
+ bne .Loop_sad_x\x\()_64x\h
.if \h <= 16
vadd.u16 q8, q8, q9
@@ -1071,12 +1071,12 @@ function x265_sad_x\x\()_48x64_neon
veor.u8 q15, q15
.endif
-.loop_sad_x\x\()_48x64:
+.Loop_sad_x\x\()_48x64:
.rept 8
SAD_X_48 \x
.endr
subs r6, #1
- bne .loop_sad_x\x\()_48x64
+ bne .Loop_sad_x\x\()_48x64
vpaddl.u16 q8, q8
vpaddl.u16 q9, q9
@@ -1179,12 +1179,12 @@ function x265_sad_x\x\()_24x32_neon
veor.u8 q15, q15
.endif
-.loop_sad_x\x\()_24x32:
+.Loop_sad_x\x\()_24x32:
.rept 8
SAD_X_24 \x
.endr
subs r6, #1
- bne .loop_sad_x\x\()_24x32
+ bne .Loop_sad_x\x\()_24x32
vadd.u16 q8, q8, q9
vadd.u16 q10, q10, q11
diff --git a/source/common/arm/ssd-a.S b/source/common/arm/ssd-a.S
index bb91a0bcb..c00ab0023 100644
--- a/source/common/arm/ssd-a.S
+++ b/source/common/arm/ssd-a.S
@@ -121,7 +121,7 @@ function x265_pixel_sse_pp_32x32_neon
veor.u8 q0, q0
veor.u8 q1, q1
-.loop_sse_pp_32:
+.Loop_sse_pp_32:
subs r12, #1
.rept 4
vld1.64 {q8-q9}, [r0], r1
@@ -139,7 +139,7 @@ function x265_pixel_sse_pp_32x32_neon
vmlal.s16 q0, d26, d26
vmlal.s16 q1, d27, d27
.endr
- bne .loop_sse_pp_32
+ bne .Loop_sse_pp_32
vadd.s32 q0, q1
vadd.s32 d0, d0, d1
vpadd.s32 d0, d0, d0
@@ -154,7 +154,7 @@ function x265_pixel_sse_pp_64x64_neon
veor.u8 q0, q0
veor.u8 q1, q1
-.loop_sse_pp_64:
+.Loop_sse_pp_64:
subs r12, #1
.rept 4
vld1.64 {q8-q9}, [r0]!
@@ -187,7 +187,7 @@ function x265_pixel_sse_pp_64x64_neon
vmlal.s16 q0, d26, d26
vmlal.s16 q1, d27, d27
.endr
- bne .loop_sse_pp_64
+ bne .Loop_sse_pp_64
vadd.s32 q0, q1
vadd.s32 d0, d0, d1
vpadd.s32 d0, d0, d0
@@ -257,7 +257,7 @@ function x265_pixel_sse_ss_16x16_neon
veor.u8 q0, q0
veor.u8 q1, q1
-.loop_sse_ss_16:
+.Loop_sse_ss_16:
subs r12, #1
.rept 4
vld1.s16 {q8-q9}, [r0], r1
@@ -269,7 +269,7 @@ function x265_pixel_sse_ss_16x16_neon
vmlal.s16 q0, d18, d18
vmlal.s16 q1, d19, d19
.endr
- bne .loop_sse_ss_16
+ bne .Loop_sse_ss_16
vadd.s32 q0, q1
vadd.s32 d0, d0, d1
vpadd.s32 d0, d0, d0
@@ -286,7 +286,7 @@ function x265_pixel_sse_ss_32x32_neon
veor.u8 q0, q0
veor.u8 q1, q1
-.loop_sse_ss_32:
+.Loop_sse_ss_32:
subs r12, #1
.rept 4
vld1.s16 {q8-q9}, [r0]!
@@ -307,7 +307,7 @@ function x265_pixel_sse_ss_32x32_neon
vmlal.s16 q0, d18, d18
vmlal.s16 q1, d19, d19
.endr
- bne .loop_sse_ss_32
+ bne .Loop_sse_ss_32
vadd.s32 q0, q1
vadd.s32 d0, d0, d1
vpadd.s32 d0, d0, d0
@@ -324,7 +324,7 @@ function x265_pixel_sse_ss_64x64_neon
veor.u8 q0, q0
veor.u8 q1, q1
-.loop_sse_ss_64:
+.Loop_sse_ss_64:
subs r12, #1
.rept 2
vld1.s16 {q8-q9}, [r0]!
@@ -363,7 +363,7 @@ function x265_pixel_sse_ss_64x64_neon
vmlal.s16 q0, d18, d18
vmlal.s16 q1, d19, d19
.endr
- bne .loop_sse_ss_64
+ bne .Loop_sse_ss_64
vadd.s32 q0, q1
vadd.s32 d0, d0, d1
vpadd.s32 d0, d0, d0
@@ -417,7 +417,7 @@ function x265_pixel_ssd_s_16x16_neon
veor.u8 q0, q0
veor.u8 q1, q1
-.loop_ssd_s_16:
+.Loop_ssd_s_16:
subs r12, #1
.rept 2
vld1.s16 {q8-q9}, [r0], r1
@@ -431,7 +431,7 @@ function x265_pixel_ssd_s_16x16_neon
vmlal.s16 q0, d22, d22
vmlal.s16 q1, d23, d23
.endr
- bne .loop_ssd_s_16
+ bne .Loop_ssd_s_16
vadd.s32 q0, q1
vadd.s32 d0, d0, d1
vpadd.s32 d0, d0, d0
@@ -446,7 +446,7 @@ function x265_pixel_ssd_s_32x32_neon
veor.u8 q0, q0
veor.u8 q1, q1
-.loop_ssd_s_32:
+.Loop_ssd_s_32:
subs r12, #1
.rept 4
vld1.s16 {q8-q9}, [r0]!
@@ -460,7 +460,7 @@ function x265_pixel_ssd_s_32x32_neon
vmlal.s16 q0, d22, d22
vmlal.s16 q1, d23, d23
.endr
- bne .loop_ssd_s_32
+ bne .Loop_ssd_s_32
vadd.s32 q0, q1
vadd.s32 d0, d0, d1
vpadd.s32 d0, d0, d0
--
2.42.1
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
More information about the x265-devel
mailing list