[x264-devel] aarch64: x264_denoise_dct_neon
Janne Grunau
git at videolan.org
Sat Dec 20 21:10:47 CET 2014
x264 | branch: master | Janne Grunau <janne-x264 at jannau.net> | Tue Oct 21 15:18:49 2014 +0200| [4d400a6ec67f17ae3b17876b0318b956b6d5c856] | committer: Anton Mitrofanov
aarch64: x264_denoise_dct_neon
3.5 times faster.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=4d400a6ec67f17ae3b17876b0318b956b6d5c856
---
common/aarch64/quant-a.S | 25 +++++++++++++++++++++++++
common/aarch64/quant.h | 3 +++
common/quant.c | 1 +
3 files changed, 29 insertions(+)
diff --git a/common/aarch64/quant-a.S b/common/aarch64/quant-a.S
index d3b2933..f4be81b 100644
--- a/common/aarch64/quant-a.S
+++ b/common/aarch64/quant-a.S
@@ -574,3 +574,28 @@ endfunc
X264_COEFF_LEVEL_RUN 8
X264_COEFF_LEVEL_RUN 15
X264_COEFF_LEVEL_RUN 16
+
+function x264_denoise_dct_neon, export=1
+1: subs w3, w3, #16
+ ld1 {v0.8h,v1.8h}, [x0]
+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x1]
+ abs v16.8h, v0.8h
+ abs v17.8h, v1.8h
+ ld1 {v2.8h,v3.8h}, [x2], #32
+ cmlt v18.8h, v0.8h, #0
+ cmlt v19.8h, v1.8h, #0
+ uaddw v4.4s, v4.4s, v16.4h
+ uaddw2 v5.4s, v5.4s, v16.8h
+ uqsub v20.8h, v16.8h, v2.8h
+ uqsub v21.8h, v17.8h, v3.8h
+ uaddw v6.4s, v6.4s, v17.4h
+ uaddw2 v7.4s, v7.4s, v17.8h
+ neg v22.8h, v20.8h
+ neg v23.8h, v21.8h
+ bsl v18.16b, v22.16b, v20.16b
+ bsl v19.16b, v23.16b, v21.16b
+ st1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x1], #64
+ st1 {v18.8h,v19.8h}, [x0], #32
+ b.gt 1b
+ ret
+endfunc
diff --git a/common/aarch64/quant.h b/common/aarch64/quant.h
index 360af26..a06e78e 100644
--- a/common/aarch64/quant.h
+++ b/common/aarch64/quant.h
@@ -53,4 +53,7 @@ int x264_coeff_level_run4_aarch64( int16_t *, x264_run_level_t * );
int x264_coeff_level_run8_neon( int16_t *, x264_run_level_t * );
int x264_coeff_level_run15_neon( int16_t *, x264_run_level_t * );
int x264_coeff_level_run16_neon( int16_t *, x264_run_level_t * );
+
+void x264_denoise_dct_neon( dctcoef *, uint32_t *, udctcoef *, int );
+
#endif
diff --git a/common/quant.c b/common/quant.c
index 514e658..c3392bc 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -764,6 +764,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->decimate_score15 = x264_decimate_score15_neon;
pf->decimate_score16 = x264_decimate_score16_neon;
pf->decimate_score64 = x264_decimate_score64_neon;
+ pf->denoise_dct = x264_denoise_dct_neon;
}
#endif
#endif // HIGH_BIT_DEPTH
More information about the x264-devel
mailing list