[x264-devel] arm: Implement x264_deblock_h_chroma_422_neon

Martin Storsjö git at videolan.org
Sun Oct 11 19:01:05 CEST 2015


x264 | branch: master | Martin Storsjö <martin at martin.st> | Tue Aug 25 14:38:15 2015 +0300| [3c66591e859045ef79a7131b991a5f20c80ffbb4] | committer: Henrik Gramner

arm: Implement x264_deblock_h_chroma_422_neon

checkasm timing       Cortex-A7      A8     A9
deblock_h_chroma_422_c       6953    6269   5145
deblock_h_chroma_422_neon    3905    2569   2551

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=3c66591e859045ef79a7131b991a5f20c80ffbb4
---

 common/arm/deblock-a.S |   18 ++++++++++++++++++
 common/deblock.c       |    4 ++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/common/arm/deblock-a.S b/common/arm/deblock-a.S
index 446e678..a300220 100644
--- a/common/arm/deblock-a.S
+++ b/common/arm/deblock-a.S
@@ -4,6 +4,7 @@
  * Copyright (C) 2009-2015 x264 project
  *
  * Authors: Mans Rullgard <mans at mansr.com>
+ *          Martin Storsjo <martin at martin.st>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -261,6 +262,7 @@ function x264_deblock_h_chroma_neon
     h264_loop_filter_start
 
     sub             r0,  r0,  #4
+deblock_h_chroma:
     vld1.8          {d18}, [r0], r1
     vld1.8          {d16}, [r0], r1
     vld1.8          {d0},  [r0], r1
@@ -290,6 +292,22 @@ function x264_deblock_h_chroma_neon
     bx              lr
 endfunc
 
+function x264_deblock_h_chroma_422_neon
+    h264_loop_filter_start
+    push            {lr}
+    sub             r0,  r0,  #4
+    add             r1,  r1,  r1
+    bl              deblock_h_chroma
+    ldr             ip,  [sp, #4]
+    ldr             ip,  [ip]
+    vdup.32         d24, ip
+    sub             r0,  r0,  r1, lsl #3
+    add             r0,  r0,  r1, lsr #1
+    sub             r0,  r0,  #2
+    pop             {lr}
+    b               deblock_h_chroma
+endfunc
+
 function x264_deblock_strength_neon
     ldr             ip,  [sp]
     vmov.i8         q8,  #0
diff --git a/common/deblock.c b/common/deblock.c
index 374e293..83bda62 100644
--- a/common/deblock.c
+++ b/common/deblock.c
@@ -739,8 +739,8 @@ void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int b
 void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                  int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                  int mvy_limit, int bframe );
-#if ARCH_AARCH64
 void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#if ARCH_AARCH64
 void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
 void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
 void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
@@ -873,11 +873,11 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
         pf->deblock_luma[0] = x264_deblock_h_luma_neon;
         pf->deblock_chroma[1] = x264_deblock_v_chroma_neon;
         pf->deblock_h_chroma_420 = x264_deblock_h_chroma_neon;
+        pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_neon;
 #if ARCH_AARCH64
         pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_neon;
         pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_neon;
         pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_neon;
-        pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_neon;
         pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_neon;
         pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_neon;
         pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_neon;



More information about the x264-devel mailing list