[x265] [PATCH] arm: Implement pixelavg_pp_NxN_neon
radhakrishnan at multicorewareinc.com
radhakrishnan at multicorewareinc.com
Wed Feb 17 14:17:31 CET 2016
# HG changeset patch
# User Radhakrishnan VR <radhakrishnan at multicorewareinc.com>
# Date 1455714914 -19800
# Wed Feb 17 18:45:14 2016 +0530
# Node ID 4bc30a538a189e7e855f8619b8270bd5f730f8e8
# Parent f44b6adbffd32cff9ee565f6f263584b031559b4
arm: Implement pixelavg_pp_NxN_neon
diff -r f44b6adbffd3 -r 4bc30a538a18 source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp Thu Feb 11 15:49:14 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp Wed Feb 17 18:45:14 2016 +0530
@@ -43,21 +43,21 @@
{
// blockcopy
p.pu[LUMA_16x16].copy_pp = PFX(blockcopy_pp_16x16_neon);
- p.pu[LUMA_8x4].copy_pp = PFX(blockcopy_pp_8x4_neon);
- p.pu[LUMA_8x8].copy_pp = PFX(blockcopy_pp_8x8_neon);
- p.pu[LUMA_8x16].copy_pp = PFX(blockcopy_pp_8x16_neon);
- p.pu[LUMA_8x32].copy_pp = PFX(blockcopy_pp_8x32_neon);
- p.pu[LUMA_12x16].copy_pp = PFX(blockcopy_pp_12x16_neon);
- p.pu[LUMA_4x4].copy_pp = PFX(blockcopy_pp_4x4_neon);
- p.pu[LUMA_4x8].copy_pp = PFX(blockcopy_pp_4x8_neon);
- p.pu[LUMA_4x16].copy_pp = PFX(blockcopy_pp_4x16_neon);
- p.pu[LUMA_16x4].copy_pp = PFX(blockcopy_pp_16x4_neon);
- p.pu[LUMA_16x8].copy_pp = PFX(blockcopy_pp_16x8_neon);
+ p.pu[LUMA_8x4].copy_pp = PFX(blockcopy_pp_8x4_neon);
+ p.pu[LUMA_8x8].copy_pp = PFX(blockcopy_pp_8x8_neon);
+ p.pu[LUMA_8x16].copy_pp = PFX(blockcopy_pp_8x16_neon);
+ p.pu[LUMA_8x32].copy_pp = PFX(blockcopy_pp_8x32_neon);
+ p.pu[LUMA_12x16].copy_pp = PFX(blockcopy_pp_12x16_neon);
+ p.pu[LUMA_4x4].copy_pp = PFX(blockcopy_pp_4x4_neon);
+ p.pu[LUMA_4x8].copy_pp = PFX(blockcopy_pp_4x8_neon);
+ p.pu[LUMA_4x16].copy_pp = PFX(blockcopy_pp_4x16_neon);
+ p.pu[LUMA_16x4].copy_pp = PFX(blockcopy_pp_16x4_neon);
+ p.pu[LUMA_16x8].copy_pp = PFX(blockcopy_pp_16x8_neon);
p.pu[LUMA_16x12].copy_pp = PFX(blockcopy_pp_16x12_neon);
- p.pu[LUMA_16x32].copy_pp = PFX(blockcopy_pp_16x32_neon);
+ p.pu[LUMA_16x32].copy_pp = PFX(blockcopy_pp_16x32_neon);
p.pu[LUMA_16x64].copy_pp = PFX(blockcopy_pp_16x64_neon);
p.pu[LUMA_24x32].copy_pp = PFX(blockcopy_pp_24x32_neon);
- p.pu[LUMA_32x8].copy_pp = PFX(blockcopy_pp_32x8_neon);
+ p.pu[LUMA_32x8].copy_pp = PFX(blockcopy_pp_32x8_neon);
p.pu[LUMA_32x16].copy_pp = PFX(blockcopy_pp_32x16_neon);
p.pu[LUMA_32x24].copy_pp = PFX(blockcopy_pp_32x24_neon);
p.pu[LUMA_32x32].copy_pp = PFX(blockcopy_pp_32x32_neon);
@@ -121,6 +121,33 @@
p.pu[LUMA_64x32].sad_x4 = PFX(sad_x4_64x32_neon);
p.pu[LUMA_64x48].sad_x4 = PFX(sad_x4_64x48_neon);
p.pu[LUMA_64x64].sad_x4 = PFX(sad_x4_64x64_neon);
+
+ // pixel_avg_pp
+ p.pu[LUMA_4x4].pixelavg_pp = PFX(pixel_avg_pp_4x4_neon);
+ p.pu[LUMA_4x8].pixelavg_pp = PFX(pixel_avg_pp_4x8_neon);
+ p.pu[LUMA_4x16].pixelavg_pp = PFX(pixel_avg_pp_4x16_neon);
+ p.pu[LUMA_8x4].pixelavg_pp = PFX(pixel_avg_pp_8x4_neon);
+ p.pu[LUMA_8x8].pixelavg_pp = PFX(pixel_avg_pp_8x8_neon);
+ p.pu[LUMA_8x16].pixelavg_pp = PFX(pixel_avg_pp_8x16_neon);
+ p.pu[LUMA_8x32].pixelavg_pp = PFX(pixel_avg_pp_8x32_neon);
+ p.pu[LUMA_12x16].pixelavg_pp = PFX(pixel_avg_pp_12x16_neon);
+ p.pu[LUMA_16x4].pixelavg_pp = PFX(pixel_avg_pp_16x4_neon);
+ p.pu[LUMA_16x8].pixelavg_pp = PFX(pixel_avg_pp_16x8_neon);
+ p.pu[LUMA_16x12].pixelavg_pp = PFX(pixel_avg_pp_16x12_neon);
+ p.pu[LUMA_16x16].pixelavg_pp = PFX(pixel_avg_pp_16x16_neon);
+ p.pu[LUMA_16x32].pixelavg_pp = PFX(pixel_avg_pp_16x32_neon);
+ p.pu[LUMA_16x64].pixelavg_pp = PFX(pixel_avg_pp_16x64_neon);
+ p.pu[LUMA_24x32].pixelavg_pp = PFX(pixel_avg_pp_24x32_neon);
+ p.pu[LUMA_32x8].pixelavg_pp = PFX(pixel_avg_pp_32x8_neon);
+ p.pu[LUMA_32x16].pixelavg_pp = PFX(pixel_avg_pp_32x16_neon);
+ p.pu[LUMA_32x24].pixelavg_pp = PFX(pixel_avg_pp_32x24_neon);
+ p.pu[LUMA_32x32].pixelavg_pp = PFX(pixel_avg_pp_32x32_neon);
+ p.pu[LUMA_32x64].pixelavg_pp = PFX(pixel_avg_pp_32x64_neon);
+ p.pu[LUMA_48x64].pixelavg_pp = PFX(pixel_avg_pp_48x64_neon);
+ p.pu[LUMA_64x16].pixelavg_pp = PFX(pixel_avg_pp_64x16_neon);
+ p.pu[LUMA_64x32].pixelavg_pp = PFX(pixel_avg_pp_64x32_neon);
+ p.pu[LUMA_64x48].pixelavg_pp = PFX(pixel_avg_pp_64x48_neon);
+ p.pu[LUMA_64x64].pixelavg_pp = PFX(pixel_avg_pp_64x64_neon);
}
if (cpuMask & X265_CPU_ARMV6)
{
diff -r f44b6adbffd3 -r 4bc30a538a18 source/common/arm/mc-a.S
--- a/source/common/arm/mc-a.S Thu Feb 11 15:49:14 2016 +0530
+++ b/source/common/arm/mc-a.S Wed Feb 17 18:45:14 2016 +0530
@@ -2,6 +2,7 @@
* Copyright (C) 2016 x265 project
*
* Authors: Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
+ * Radhakrishnan <radhakrishnan at multicorewareinc.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -36,68 +37,12 @@
* r2 - src
* r3 - srcStride */
function x265_blockcopy_pp_16x16_neon
+.rept 16
vld1.8 {q0}, [r2]
vst1.8 {q0}, [r0]
add r2, r2, r3
add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
- add r2, r2, r3
- add r0, r0, r1
- vld1.8 {q0}, [r2]
- vst1.8 {q0}, [r0]
+.endr
bx lr
endfunc
@@ -108,7 +53,7 @@
ldr r4, [r2], r3
str r4, [r0], r1
.endr
- pop {r4}
+ pop {r4}
bx lr
endfunc
.endm
@@ -141,7 +86,7 @@
vst1.8 {q0}, [r0], r1
.endr
subs r4, r4, #1
- bne loop_16x\h
+ bne loop_16x\h
pop {r4}
bx lr
endfunc
@@ -171,12 +116,12 @@
sub r3, r5
sub r1, r5
.rept 16
- vld1.8 {d0}, [r2]!
+ vld1.8 {d0}, [r2]!
ldr r4, [r2], r3
vst1.8 {d0}, [r0]!
str r4, [r0], r1
.endr
- pop {r4, r5}
+ pop {r4, r5}
bx lr
endfunc
@@ -189,7 +134,7 @@
vst1.8 {d0, d1, d2}, [r0], r1
.endr
subs r4, r4, #1
- bne loop_24x32
+ bne loop_24x32
pop {r4}
bx lr
endfunc
@@ -212,7 +157,7 @@
vst1.8 {q0, q1}, [r0], r1
.endr
subs r4, r4, #1
- bne loop_32x\h
+ bne loop_32x\h
pop {r4}
bx lr
endfunc
@@ -237,7 +182,7 @@
vst1.8 {q2}, [r0], r1
.endr
subs r4, r4, #1
- bne loop_48x64
+ bne loop_48x64
pop {r4, r5}
bx lr
endfunc
@@ -257,7 +202,7 @@
vst1.8 {q2, q3}, [r0], r1
.endr
subs r4, r4, #1
- bne loop_64x\h
+ bne loop_64x\h
pop {r4, r5}
bx lr
endfunc
@@ -267,3 +212,234 @@
blockcopy_pp_64xN_neon 32 8
blockcopy_pp_64xN_neon 48 12
blockcopy_pp_64xN_neon 64 16
+
+// void pixelavg_pp(pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int)
+.macro pixel_avg_pp_4xN_neon h
+function x265_pixel_avg_pp_4x\h\()_neon
+ push {r4}
+ ldr r4, [sp, #4]
+ ldr r12, [sp, #8]
+.rept \h
+ vld1.32 {d0[]}, [r2], r3
+ vld1.32 {d1[]}, [r4], r12
+ vrhadd.u8 d2, d0, d1
+ vst1.32 {d2[0]}, [r0], r1
+.endr
+ pop {r4}
+ bx lr
+endfunc
+.endm
+
+pixel_avg_pp_4xN_neon 4
+pixel_avg_pp_4xN_neon 8
+pixel_avg_pp_4xN_neon 16
+
+.macro pixel_avg_pp_8xN_neon h
+function x265_pixel_avg_pp_8x\h\()_neon
+ push {r4}
+ ldr r4, [sp, #4]
+ ldr r12, [sp, #8]
+.rept \h
+ vld1.8 {d0}, [r2], r3
+ vld1.8 {d1}, [r4], r12
+ vrhadd.u8 d2, d0, d1
+ vst1.8 {d2}, [r0], r1
+.endr
+ pop {r4}
+ bx lr
+endfunc
+.endm
+
+pixel_avg_pp_8xN_neon 4
+pixel_avg_pp_8xN_neon 8
+pixel_avg_pp_8xN_neon 16
+pixel_avg_pp_8xN_neon 32
+
+function x265_pixel_avg_pp_12x16_neon
+ push {r4, r6}
+ mov r6, #8
+ ldr r4, [sp, #8]
+ ldr r12, [sp, #12]
+ sub r1, r6
+ sub r3, r6
+ sub r12, r6
+.rept 16
+ vld1.32 {d0}, [r2]!
+ vld1.32 {d1[0]}, [r2], r3
+ vld1.32 {d2}, [r4]!
+ vld1.32 {d3[0]}, [r4], r12
+ vrhadd.u8 d0, d0, d2
+ vrhadd.u8 d1, d1, d3
+ vst1.8 {d0}, [r0]!
+ vst1.32 {d1[0]}, [r0], r1
+.endr
+ pop {r4, r6}
+ bx lr
+endfunc
+
+.macro pixel_avg_pp_16xN_neon h
+function x265_pixel_avg_pp_16x\h\()_neon
+ push {r4}
+ ldr r4, [sp, #4]
+ ldr r12, [sp, #8]
+.rept \h
+ vld1.8 {q0}, [r2], r3
+ vld1.8 {q1}, [r4], r12
+ vrhadd.u8 q2, q0, q1
+ vst1.8 {q2}, [r0], r1
+.endr
+ pop {r4}
+ bx lr
+endfunc
+.endm
+
+pixel_avg_pp_16xN_neon 4
+pixel_avg_pp_16xN_neon 8
+pixel_avg_pp_16xN_neon 12
+pixel_avg_pp_16xN_neon 16
+pixel_avg_pp_16xN_neon 32
+
+function x265_pixel_avg_pp_16x64_neon
+ push {r4, r6}
+ ldr r4, [sp, #8]
+ ldr r12, [sp, #12]
+ mov r6, #8
+lpavg_16x64:
+.rept 8
+ vld1.8 {q0}, [r2], r3
+ vld1.8 {q1}, [r4], r12
+ vrhadd.u8 q2, q0, q1
+ vst1.8 {q2}, [r0], r1
+.endr
+ subs r6, r6, #1
+ bne lpavg_16x64
+ pop {r4 , r6}
+ bx lr
+endfunc
+
+function x265_pixel_avg_pp_24x32_neon
+ push {r4, r6}
+ ldr r4, [sp, #8]
+ ldr r12, [sp, #12]
+ mov r6, #4
+lpavg_24x32:
+.rept 8
+ vld1.8 {d0, d1, d2}, [r2], r3
+ vld1.8 {d3, d4, d5}, [r4], r12
+ vrhadd.u8 d0, d0, d3
+ vrhadd.u8 d1, d1, d4
+ vrhadd.u8 d2, d2, d5
+ vst1.8 {d0, d1, d2}, [r0], r1
+.endr
+ subs r6, r6, #1
+ bne lpavg_24x32
+ pop {r4, r6}
+ bx lr
+endfunc
+
+.macro pixel_avg_pp_32xN_neon h
+function x265_pixel_avg_pp_32x\h\()_neon
+ push {r4}
+ ldr r4, [sp, #4]
+ ldr r12, [sp, #8]
+.rept \h
+ vld1.8 {q0, q1}, [r2], r3
+ vld1.8 {q2, q3}, [r4], r12
+ vrhadd.u8 q0, q0, q2
+ vrhadd.u8 q1, q1, q3
+ vst1.8 {q0, q1}, [r0], r1
+.endr
+ pop {r4}
+ bx lr
+endfunc
+.endm
+
+pixel_avg_pp_32xN_neon 8
+pixel_avg_pp_32xN_neon 16
+pixel_avg_pp_32xN_neon 24
+
+.macro pixel_avg_pp_32xN1_neon h i
+function x265_pixel_avg_pp_32x\h\()_neon
+ push {r4, r6}
+ ldr r4, [sp, #8]
+ ldr r12, [sp, #12]
+ mov r6, #\i
+lpavg_32x\h\():
+.rept 8
+ vld1.8 {q0, q1}, [r2], r3
+ vld1.8 {q2, q3}, [r4], r12
+ vrhadd.u8 q0, q0, q2
+ vrhadd.u8 q1, q1, q3
+ vst1.8 {q0, q1}, [r0], r1
+.endr
+ subs r6, r6, #1
+ bne lpavg_32x\h
+ pop {r4, r6}
+ bx lr
+endfunc
+.endm
+
+pixel_avg_pp_32xN1_neon 32 4
+pixel_avg_pp_32xN1_neon 64 8
+
+function x265_pixel_avg_pp_48x64_neon
+ push {r4, r6, r7}
+ ldr r4, [sp, #12]
+ ldr r12, [sp, #16]
+ mov r6, #8
+ mov r7, #32
+ sub r1, r7
+ sub r3, r7
+ sub r12, r7
+lpavg_48x64:
+.rept 8
+ vld1.8 {q0, q1}, [r2]!
+ vld1.8 {q2}, [r2], r3
+ vld1.8 {q8, q9}, [r4]!
+ vld1.8 {q10}, [r4], r12
+ vrhadd.u8 q0, q0, q8
+ vrhadd.u8 q1, q1, q9
+ vrhadd.u8 q2, q2, q10
+ vst1.8 {q0, q1}, [r0]!
+ vst1.8 {q2}, [r0], r1
+.endr
+ subs r6, r6, #1
+ bne lpavg_48x64
+ pop {r4, r6, r7}
+ bx lr
+endfunc
+
+.macro pixel_avg_pp_64xN_neon h i
+function x265_pixel_avg_pp_64x\h\()_neon
+ push {r4, r6, r7}
+ ldr r4, [sp, #12]
+ ldr r12, [sp, #16]
+ mov r7, #32
+ mov r6, #\i
+ sub r3, r7
+ sub r12, r7
+ sub r1, r7
+lpavg_64x\h\():
+.rept 4
+ vld1.8 {q0, q1}, [r2]!
+ vld1.8 {q2, q3}, [r2], r3
+ vld1.8 {q8, q9}, [r4]!
+ vld1.8 {q10, q11}, [r4], r12
+ vrhadd.u8 q0, q0, q8
+ vrhadd.u8 q1, q1, q9
+ vrhadd.u8 q2, q2, q10
+ vrhadd.u8 q3, q3, q11
+ vst1.8 {q0, q1}, [r0]!
+ vst1.8 {q2, q3}, [r0], r1
+.endr
+ subs r6, r6, #1
+ bne lpavg_64x\h
+ pop {r4, r6, r7}
+ bx lr
+endfunc
+.endm
+
+pixel_avg_pp_64xN_neon 16 4
+pixel_avg_pp_64xN_neon 32 8
+pixel_avg_pp_64xN_neon 48 12
+pixel_avg_pp_64xN_neon 64 16
diff -r f44b6adbffd3 -r 4bc30a538a18 source/common/arm/pixel.h
--- a/source/common/arm/pixel.h Thu Feb 11 15:49:14 2016 +0530
+++ b/source/common/arm/pixel.h Wed Feb 17 18:45:14 2016 +0530
@@ -33,6 +33,32 @@
int x265_pixel_sad_4x4_armv6(const pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
int x265_pixel_sad_4x8_armv6(const pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
+void x265_pixel_avg_pp_4x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_4x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_4x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_8x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_8x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_8x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_8x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_12x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_16x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_16x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_16x12_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_16x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_16x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_16x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_24x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_32x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_32x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_32x24_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_32x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_32x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_48x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_64x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_64x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_64x48_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+void x265_pixel_avg_pp_64x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
+
void x265_sad_x3_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
void x265_sad_x3_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
void x265_sad_x3_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
More information about the x265-devel
mailing list