[x265] [PATCH] arm : Implement interp_4tap_horiz_pp,ps ARM NEON

radhakrishnan at multicorewareinc.com radhakrishnan at multicorewareinc.com
Tue Mar 29 09:15:42 CEST 2016


# HG changeset patch
# User Radhakrishnan VR <radhakrishnan at multicorewareinc.com>
# Date 1459232792 -19800
#      Tue Mar 29 11:56:32 2016 +0530
# Node ID f3e5e1fac999100f3acc42d698e6b1fc78dbb86a
# Parent  14ffbe7738e5bfbe9a0f19328f00f1d8821922f8
arm : Implement interp_4tap_horiz_pp,ps ARM NEON

diff -r 14ffbe7738e5 -r f3e5e1fac999 source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp	Tue Mar 29 11:43:17 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp	Tue Mar 29 11:56:32 2016 +0530
@@ -43,6 +43,148 @@
 {
     if (cpuMask & X265_CPU_NEON)
     {
+        // chroma_hpp
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_hpp   = PFX(interp_4tap_horiz_pp_4x2_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_hpp   = PFX(interp_4tap_horiz_pp_4x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].filter_hpp   = PFX(interp_4tap_horiz_pp_4x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].filter_hpp  = PFX(interp_4tap_horiz_pp_4x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].filter_hpp   = PFX(interp_4tap_horiz_pp_8x2_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_hpp   = PFX(interp_4tap_horiz_pp_8x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].filter_hpp   = PFX(interp_4tap_horiz_pp_8x6_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_hpp   = PFX(interp_4tap_horiz_pp_8x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].filter_hpp  = PFX(interp_4tap_horiz_pp_8x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].filter_hpp  = PFX(interp_4tap_horiz_pp_8x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].filter_hpp = PFX(interp_4tap_horiz_pp_12x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].filter_hpp  = PFX(interp_4tap_horiz_pp_16x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_hpp  = PFX(interp_4tap_horiz_pp_16x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].filter_hpp = PFX(interp_4tap_horiz_pp_16x12_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_hpp = PFX(interp_4tap_horiz_pp_16x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].filter_hpp = PFX(interp_4tap_horiz_pp_16x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].filter_hpp = PFX(interp_4tap_horiz_pp_24x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].filter_hpp  = PFX(interp_4tap_horiz_pp_32x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].filter_hpp = PFX(interp_4tap_horiz_pp_32x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].filter_hpp = PFX(interp_4tap_horiz_pp_32x24_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hpp = PFX(interp_4tap_horiz_pp_32x32_neon);
+
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_hpp   = PFX(interp_4tap_horiz_pp_4x4_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_hpp   = PFX(interp_4tap_horiz_pp_4x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_hpp  = PFX(interp_4tap_horiz_pp_4x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_hpp  = PFX(interp_4tap_horiz_pp_4x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].filter_hpp   = PFX(interp_4tap_horiz_pp_8x4_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].filter_hpp   = PFX(interp_4tap_horiz_pp_8x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].filter_hpp  = PFX(interp_4tap_horiz_pp_8x12_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].filter_hpp  = PFX(interp_4tap_horiz_pp_8x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].filter_hpp  = PFX(interp_4tap_horiz_pp_8x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].filter_hpp  = PFX(interp_4tap_horiz_pp_8x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_hpp = PFX(interp_4tap_horiz_pp_12x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_hpp  = PFX(interp_4tap_horiz_pp_16x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_hpp = PFX(interp_4tap_horiz_pp_16x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_hpp = PFX(interp_4tap_horiz_pp_16x24_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_hpp = PFX(interp_4tap_horiz_pp_16x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_hpp = PFX(interp_4tap_horiz_pp_16x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_hpp = PFX(interp_4tap_horiz_pp_24x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_hpp = PFX(interp_4tap_horiz_pp_32x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_hpp = PFX(interp_4tap_horiz_pp_32x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_hpp = PFX(interp_4tap_horiz_pp_32x48_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_hpp = PFX(interp_4tap_horiz_pp_32x64_neon);
+
+        p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_hpp   = PFX(interp_4tap_horiz_pp_4x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_4x8].filter_hpp   = PFX(interp_4tap_horiz_pp_4x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_4x16].filter_hpp  = PFX(interp_4tap_horiz_pp_4x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x4].filter_hpp   = PFX(interp_4tap_horiz_pp_8x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x8].filter_hpp   = PFX(interp_4tap_horiz_pp_8x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x16].filter_hpp  = PFX(interp_4tap_horiz_pp_8x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x32].filter_hpp  = PFX(interp_4tap_horiz_pp_8x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_12x16].filter_hpp = PFX(interp_4tap_horiz_pp_12x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x4].filter_hpp  = PFX(interp_4tap_horiz_pp_16x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x8].filter_hpp  = PFX(interp_4tap_horiz_pp_16x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x12].filter_hpp = PFX(interp_4tap_horiz_pp_16x12_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x16].filter_hpp = PFX(interp_4tap_horiz_pp_16x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x32].filter_hpp = PFX(interp_4tap_horiz_pp_16x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_hpp = PFX(interp_4tap_horiz_pp_16x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_24x32].filter_hpp = PFX(interp_4tap_horiz_pp_24x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x8].filter_hpp  = PFX(interp_4tap_horiz_pp_32x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x16].filter_hpp = PFX(interp_4tap_horiz_pp_32x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x24].filter_hpp = PFX(interp_4tap_horiz_pp_32x24_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x32].filter_hpp = PFX(interp_4tap_horiz_pp_32x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x64].filter_hpp = PFX(interp_4tap_horiz_pp_32x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_hpp = PFX(interp_4tap_horiz_pp_48x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_hpp = PFX(interp_4tap_horiz_pp_64x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_hpp = PFX(interp_4tap_horiz_pp_64x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_hpp = PFX(interp_4tap_horiz_pp_64x48_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_hpp = PFX(interp_4tap_horiz_pp_64x64_neon);
+
+        // chroma_hps
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].filter_hps   = PFX(interp_4tap_horiz_ps_4x2_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].filter_hps   = PFX(interp_4tap_horiz_ps_4x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].filter_hps   = PFX(interp_4tap_horiz_ps_4x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].filter_hps  = PFX(interp_4tap_horiz_ps_4x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].filter_hps   = PFX(interp_4tap_horiz_ps_8x2_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_hps   = PFX(interp_4tap_horiz_ps_8x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].filter_hps   = PFX(interp_4tap_horiz_ps_8x6_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_hps   = PFX(interp_4tap_horiz_ps_8x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].filter_hps  = PFX(interp_4tap_horiz_ps_8x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].filter_hps  = PFX(interp_4tap_horiz_ps_8x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].filter_hps = PFX(interp_4tap_horiz_ps_12x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].filter_hps  = PFX(interp_4tap_horiz_ps_16x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_hps  = PFX(interp_4tap_horiz_ps_16x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].filter_hps = PFX(interp_4tap_horiz_ps_16x12_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_hps = PFX(interp_4tap_horiz_ps_16x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].filter_hps = PFX(interp_4tap_horiz_ps_16x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].filter_hps = PFX(interp_4tap_horiz_ps_24x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].filter_hps  = PFX(interp_4tap_horiz_ps_32x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].filter_hps = PFX(interp_4tap_horiz_ps_32x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].filter_hps = PFX(interp_4tap_horiz_ps_32x24_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_hps = PFX(interp_4tap_horiz_ps_32x32_neon);
+
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].filter_hps   = PFX(interp_4tap_horiz_ps_4x4_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].filter_hps   = PFX(interp_4tap_horiz_ps_4x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].filter_hps  = PFX(interp_4tap_horiz_ps_4x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].filter_hps  = PFX(interp_4tap_horiz_ps_4x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].filter_hps   = PFX(interp_4tap_horiz_ps_8x4_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].filter_hps   = PFX(interp_4tap_horiz_ps_8x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].filter_hps  = PFX(interp_4tap_horiz_ps_8x12_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].filter_hps  = PFX(interp_4tap_horiz_ps_8x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].filter_hps  = PFX(interp_4tap_horiz_ps_8x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].filter_hps  = PFX(interp_4tap_horiz_ps_8x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].filter_hps = PFX(interp_4tap_horiz_ps_12x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_hps  = PFX(interp_4tap_horiz_ps_16x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_hps = PFX(interp_4tap_horiz_ps_16x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_hps = PFX(interp_4tap_horiz_ps_16x24_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_hps = PFX(interp_4tap_horiz_ps_16x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_hps = PFX(interp_4tap_horiz_ps_16x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_hps = PFX(interp_4tap_horiz_ps_24x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_hps = PFX(interp_4tap_horiz_ps_32x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_hps = PFX(interp_4tap_horiz_ps_32x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_hps = PFX(interp_4tap_horiz_ps_32x48_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_hps = PFX(interp_4tap_horiz_ps_32x64_neon);
+
+        p.chroma[X265_CSP_I444].pu[LUMA_4x4].filter_hps   = PFX(interp_4tap_horiz_ps_4x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_4x8].filter_hps   = PFX(interp_4tap_horiz_ps_4x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_4x16].filter_hps  = PFX(interp_4tap_horiz_ps_4x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x4].filter_hps   = PFX(interp_4tap_horiz_ps_8x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x8].filter_hps   = PFX(interp_4tap_horiz_ps_8x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x16].filter_hps  = PFX(interp_4tap_horiz_ps_8x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x32].filter_hps  = PFX(interp_4tap_horiz_ps_8x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_12x16].filter_hps = PFX(interp_4tap_horiz_ps_12x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x4].filter_hps  = PFX(interp_4tap_horiz_ps_16x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x8].filter_hps  = PFX(interp_4tap_horiz_ps_16x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x12].filter_hps = PFX(interp_4tap_horiz_ps_16x12_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x16].filter_hps = PFX(interp_4tap_horiz_ps_16x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x32].filter_hps = PFX(interp_4tap_horiz_ps_16x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_hps = PFX(interp_4tap_horiz_ps_16x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_24x32].filter_hps = PFX(interp_4tap_horiz_ps_24x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x8].filter_hps  = PFX(interp_4tap_horiz_ps_32x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x16].filter_hps = PFX(interp_4tap_horiz_ps_32x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x24].filter_hps = PFX(interp_4tap_horiz_ps_32x24_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x32].filter_hps = PFX(interp_4tap_horiz_ps_32x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x64].filter_hps = PFX(interp_4tap_horiz_ps_32x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_hps = PFX(interp_4tap_horiz_ps_48x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_hps = PFX(interp_4tap_horiz_ps_64x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_hps = PFX(interp_4tap_horiz_ps_64x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_hps = PFX(interp_4tap_horiz_ps_64x48_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_hps = PFX(interp_4tap_horiz_ps_64x64_neon);
+
         // luma_hpp
         p.pu[LUMA_4x4].luma_hpp   = PFX(interp_horiz_pp_4x4_neon);
         p.pu[LUMA_4x8].luma_hpp   = PFX(interp_horiz_pp_4x8_neon);
diff -r 14ffbe7738e5 -r f3e5e1fac999 source/common/arm/ipfilter8.S
--- a/source/common/arm/ipfilter8.S	Tue Mar 29 11:43:17 2016 +0530
+++ b/source/common/arm/ipfilter8.S	Tue Mar 29 11:56:32 2016 +0530
@@ -2868,3 +2868,465 @@
 LUMA_HPS    64 32
 LUMA_HPS    64 48
 LUMA_HPS    64 64
+
+// ******* Chroma_hpp *******
+.macro vextin8_chroma
+    pld             [r5]
+    vld1.8          {q3}, [r5]!
+    vext.8          d0, d6, d7, #1
+    vext.8          d1, d6, d7, #2
+    vext.8          d2, d6, d7, #3
+    vext.8          d3, d6, d7, #4
+
+    vmovl.u8        q2, d0
+    vmovl.u8        q3, d1
+    vmovl.u8        q4, d2
+    vmovl.u8        q5, d3
+.endm
+
+.macro FILTER_CHROMA_HPP a b filterhpp
+    vpush           {q4-q7}
+    mov             r12,#32
+    mov             r6, #\b
+    sub             r3, #\a
+    mov             r8, #\a
+    cmp             r8, #4
+    beq             11f
+    cmp             r8, #12
+    beq             12f
+    b               13f
+11:
+    FILTER_CHROMA_HPP_4 \a \b \filterhpp
+    b               14f
+12:
+    FILTER_CHROMA_HPP_12 \a \b \filterhpp
+    b               14f
+13:
+    veor            q6, q6
+    veor            q7, q7
+
+loop2_hpp_\filterhpp\()_\a\()x\b:
+    mov             r7, #\a
+    lsr             r7, #3
+    mov             r5, r0
+    sub             r5, #2
+loop3_hpp_\filterhpp\()_\a\()x\b:
+    vextin8_chroma
+    \filterhpp
+    vdup.32         q8, r12
+    vadd.s32        q6, q8
+    vqshrun.s32     d0, q6, #6
+    vadd.s32        q7, q8
+    vqshrun.s32     d1, q7, #6
+    vqmovn.u16      d0, q0
+    vst1.u8         d0, [r2]!
+    subs            r7, #1
+    sub             r5, #8
+    bne             loop3_hpp_\filterhpp\()_\a\()x\b
+    subs            r6, #1
+    add             r0, r1
+    add             r2, r3
+    bne             loop2_hpp_\filterhpp\()_\a\()x\b
+14:
+    vpop            {q4-q7}
+.endm
+
+.macro FILTER_CHROMA_HPP_4 w h filterhpp
+loop4_hpp_\filterhpp\()_\w\()x\h:
+    mov             r5, r0
+    sub             r5, #2
+    vextin8_chroma
+    \filterhpp
+    vdup.32         q8, r12
+    vadd.s32        q6, q8
+    vqshrun.s32     d0, q6, #6
+    vadd.s32        q7, q8
+    vqshrun.s32     d1, q7, #6
+    vqmovn.u16      d0, q0
+    vst1.u32        {d0[0]}, [r2]!
+    sub             r5, #8
+    subs            r6, #1
+    add             r0, r1
+    add             r2, r3
+    bne             loop4_hpp_\filterhpp\()_\w\()x\h
+.endm
+
+.macro FILTER_CHROMA_HPP_12 w h filterhpp
+loop12_hpp_\filterhpp\()_\w\()x\h:
+    mov             r5, r0
+    sub             r5, #2
+    vextin8_chroma
+    \filterhpp
+    vdup.32         q8, r12
+    vadd.s32        q6, q8
+    vqshrun.s32     d0, q6, #6
+    vadd.s32        q7, q8
+    vqshrun.s32     d1, q7, #6
+    vqmovn.u16      d0, q0
+    vst1.u8         {d0}, [r2]!
+    sub             r5, #8
+
+    vextin8_chroma
+    \filterhpp
+    vdup.32         q8, r12
+    vadd.s32        q6, q8
+    vqshrun.s32     d0, q6, #6
+    vadd.s32        q7, q8
+    vqshrun.s32     d1, q7, #6
+    vqmovn.u16      d0, q0
+    vst1.u32        {d0[0]}, [r2]!
+    add             r2, r3
+    subs            r6, #1
+    add             r0, r1
+    bne             loop12_hpp_\filterhpp\()_\w\()x\h
+.endm
+
+.macro CHROMA_HPP  w h
+function x265_interp_4tap_horiz_pp_\w\()x\h\()_neon
+
+    push            {r4, r5, r6, r7, r8}
+    ldr             r4, [sp, #4 * 5]
+
+    cmp             r4, #0
+    beq              0f
+    cmp             r4, #1
+    beq              1f
+    cmp             r4, #2
+    beq              2f
+    cmp             r4, #3
+    beq              3f
+    cmp             r4, #4
+    beq              4f
+    cmp             r4, #5
+    beq              5f
+    cmp             r4, #6
+    beq              6f
+    cmp             r4, #7
+    beq              7f
+0:
+    FILTER_CHROMA_HPP  \w \h qpel_filter_chroma_0_32b
+    b            8f
+1:
+    FILTER_CHROMA_HPP  \w \h qpel_filter_chroma_1_32b
+    b            8f
+2:
+    FILTER_CHROMA_HPP  \w \h qpel_filter_chroma_2_32b
+    b            8f
+3:
+    FILTER_CHROMA_HPP  \w \h qpel_filter_chroma_3_32b
+    b            8f
+4:
+    FILTER_CHROMA_HPP  \w \h qpel_filter_chroma_4_32b
+    b            8f
+5:
+    FILTER_CHROMA_HPP  \w \h qpel_filter_chroma_5_32b
+    b            8f
+6:
+    FILTER_CHROMA_HPP  \w \h qpel_filter_chroma_6_32b
+    b            8f
+7:
+    FILTER_CHROMA_HPP  \w \h qpel_filter_chroma_7_32b
+
+8:
+    pop             {r4, r5, r6, r7, r8}
+    bx              lr
+endfunc
+.endm
+
+CHROMA_HPP 4 2
+CHROMA_HPP 4 4
+CHROMA_HPP 4 8
+CHROMA_HPP 4 16
+CHROMA_HPP 4 32
+CHROMA_HPP 8 2
+CHROMA_HPP 8 4
+CHROMA_HPP 8 6
+CHROMA_HPP 8 8
+CHROMA_HPP 8 12
+CHROMA_HPP 8 16
+CHROMA_HPP 8 32
+CHROMA_HPP 8 64
+CHROMA_HPP 12 16
+CHROMA_HPP 12 32
+CHROMA_HPP 16 4
+CHROMA_HPP 16 8
+CHROMA_HPP 16 12
+CHROMA_HPP 16 16
+CHROMA_HPP 16 24
+CHROMA_HPP 16 32
+CHROMA_HPP 16 64
+CHROMA_HPP 24 32
+CHROMA_HPP 24 64
+CHROMA_HPP 32 8
+CHROMA_HPP 32 16
+CHROMA_HPP 32 24
+CHROMA_HPP 32 32
+CHROMA_HPP 32 48
+CHROMA_HPP 32 64
+CHROMA_HPP 48 64
+CHROMA_HPP 64 16
+CHROMA_HPP 64 32
+CHROMA_HPP 64 48
+CHROMA_HPP 64 64
+// ***** Chroma_hps *****
+.macro FILTER_CHROMA_HPS a b filterhps
+    vpush           {q4-q7}
+    mov             r12, #8192
+    mov             r6, r10
+    sub             r3, #\a
+    lsl             r3, #1
+
+    mov             r8, #\a
+    cmp             r8, #4
+    beq             14f
+    cmp             r8, #12
+    beq             15f
+    b               16f
+14:
+    FILTER_CHROMA_HPS_4 \a \b \filterhps
+    b               10f
+15:
+    FILTER_CHROMA_HPS_12 \a \b \filterhps
+    b               10f
+16:
+    cmp             r9, #0
+    beq             17f
+    cmp             r9, #1
+    beq             18f
+17:
+loop1_hps_\filterhps\()_\a\()x\b\()_rowext0:
+    mov             r7, #\a
+    lsr             r7, #3
+    mov             r5, r0
+    sub             r5, #2
+loop2_hps_\filterhps\()_\a\()x\b\()_rowext0:
+    vextin8_chroma
+    \filterhps
+    vdup.32         q8, r12
+    vsub.s32        q6, q8
+    vsub.s32        q7, q8
+    vmovn.u32       d0, q6
+    vmovn.u32       d1, q7
+    vst1.s16        {q0}, [r2]!
+    subs            r7, #1
+    sub             r5, #8
+    bne             loop2_hps_\filterhps\()_\a\()x\b\()_rowext0
+    subs            r6, #1
+    add             r0, r1
+    add             r2, r3
+    bne             loop1_hps_\filterhps\()_\a\()x\b\()_rowext0
+    b               10f
+18:
+loop3_hps_\filterhps\()_\a\()x\b\()_rowext1:
+    mov             r7, #\a
+    lsr             r7, #3
+    mov             r5, r0
+    sub             r5, #2
+loop4_hps_\filterhps\()_\a\()x\b\()_rowext1:
+    vextin8_chroma
+    \filterhps
+    vdup.32         q8, r12
+    vsub.s32        q6, q8
+    vsub.s32        q7, q8
+    vmovn.u32       d0, q6
+    vmovn.u32       d1, q7
+    vst1.s16        {q0}, [r2]!
+    subs            r7, #1
+    sub             r5, #8
+    bne             loop4_hps_\filterhps\()_\a\()x\b\()_rowext1
+    subs            r6, #1
+    add             r0, r1
+    add             r2, r3
+    bne             loop3_hps_\filterhps\()_\a\()x\b\()_rowext1
+10:
+    vpop            {q4-q7}
+.endm
+
+.macro FILTER_CHROMA_HPS_4 w h filterhps
+    cmp             r9, #0
+    beq             19f
+    cmp             r9, #1
+    beq             20f
+19:
+loop4_hps_\filterhps\()_\w\()x\h\()_rowext0:
+    mov             r5, r0
+    sub             r5, #2
+    vextin8_chroma
+    \filterhps
+    vdup.32         q8, r12
+    vsub.s32        q6, q8
+    vmovn.u32       d0, q6
+    vst1.s16        {d0}, [r2]!
+    sub             r5, #8
+    subs            r6, #1
+    add             r0, r1
+    add             r2, r3
+    bne             loop4_hps_\filterhps\()_\w\()x\h\()_rowext0
+    b               21f
+20:
+loop5_hps_\filterhps\()_\w\()x\h\()_rowext1:
+    mov             r5, r0
+    sub             r5, #2
+    vextin8_chroma
+    \filterhps
+    vdup.32         q8, r12
+    vsub.s32        q6, q8
+    vmovn.u32       d0, q6
+    vst1.s16        {d0}, [r2]!
+    sub             r5, #8
+    subs            r6, #1
+    add             r0, r1
+    add             r2, r3
+    bne             loop5_hps_\filterhps\()_\w\()x\h\()_rowext1
+21:
+.endm
+
+.macro FILTER_CHROMA_HPS_12 w h filterhpp
+    cmp             r9, #0
+    beq             22f
+    cmp             r9, #1
+    beq             23f
+22:
+loop12_hps_\filterhpp\()_\w\()x\h\()_rowext0:
+    mov             r5, r0
+    sub             r5, #2
+    vextin8_chroma
+    \filterhpp
+    vdup.32         q8, r12
+    vsub.s32        q6, q8
+    vsub.s32        q7, q8
+    vmovn.u32       d0, q6
+    vmovn.u32       d1, q7
+    vst1.s16        {q0}, [r2]!
+    sub             r5, #8
+
+    vextin8_chroma
+    \filterhpp
+    vdup.32         q8, r12
+    vsub.s32        q6, q8
+    vmovn.u32       d0, q6
+    vst1.s16        {d0}, [r2]!
+    add             r2, r3
+    subs            r6, #1
+    add             r0, r1
+    bne             loop12_hps_\filterhpp\()_\w\()x\h\()_rowext0
+    b               24f
+23:
+loop12_hps_\filterhpp\()_\w\()x\h\()_rowext1:
+    mov             r5, r0
+    sub             r5, #2
+    vextin8_chroma
+    \filterhpp
+    vdup.32         q8, r12
+    vsub.s32        q6, q8
+    vsub.s32        q7, q8
+    vmovn.u32       d0, q6
+    vmovn.u32       d1, q7
+    vst1.s16        {q0}, [r2]!
+    sub             r5, #8
+
+    vextin8_chroma
+    \filterhpp
+    vdup.32         q8, r12
+    vsub.s32        q6, q8
+    vmovn.u32       d0, q6
+    vst1.s16        {d0}, [r2]!
+    add             r2, r3
+    subs            r6, #1
+    add             r0, r1
+    bne             loop12_hps_\filterhpp\()_\w\()x\h\()_rowext1
+24:
+.endm
+
+.macro CHROMA_HPS w h
+function x265_interp_4tap_horiz_ps_\w\()x\h\()_neon
+    push            {r4, r5, r6, r7, r8, r9, r10}
+    ldr             r4, [sp, #28]
+    ldr             r9, [sp, #32]
+    mov             r10, #\h
+    cmp             r9, #0
+    beq             9f
+    sub             r0, r1
+    add             r10, #3
+9:
+    cmp             r4, #0
+    beq              0f
+    cmp             r4, #1
+    beq              1f
+    cmp             r4, #2
+    beq              2f
+    cmp             r4, #3
+    beq              3f
+    cmp             r4, #4
+    beq              4f
+    cmp             r4, #5
+    beq              5f
+    cmp             r4, #6
+    beq              6f
+    cmp             r4, #7
+    beq              7f
+0:
+    FILTER_CHROMA_HPS  \w \h qpel_filter_chroma_0_32b
+    b            8f
+1:
+    FILTER_CHROMA_HPS  \w \h qpel_filter_chroma_1_32b
+    b            8f
+2:
+    FILTER_CHROMA_HPS  \w \h qpel_filter_chroma_2_32b
+    b            8f
+3:
+    FILTER_CHROMA_HPS  \w \h qpel_filter_chroma_3_32b
+    b            8f
+4:
+    FILTER_CHROMA_HPS  \w \h qpel_filter_chroma_4_32b
+    b            8f
+5:
+    FILTER_CHROMA_HPS  \w \h qpel_filter_chroma_5_32b
+    b            8f
+6:
+    FILTER_CHROMA_HPS  \w \h qpel_filter_chroma_6_32b
+    b            8f
+7:
+    FILTER_CHROMA_HPS  \w \h qpel_filter_chroma_7_32b
+
+8:
+    pop             {r4, r5, r6, r7, r8, r9, r10}
+    bx              lr
+endfunc
+.endm
+
+CHROMA_HPS 4 2
+CHROMA_HPS 4 4
+CHROMA_HPS 4 8
+CHROMA_HPS 4 16
+CHROMA_HPS 4 32
+CHROMA_HPS 8 2
+CHROMA_HPS 8 4
+CHROMA_HPS 8 6
+CHROMA_HPS 8 8
+CHROMA_HPS 8 12
+CHROMA_HPS 8 16
+CHROMA_HPS 8 32
+CHROMA_HPS 8 64
+CHROMA_HPS 12 16
+CHROMA_HPS 12 32
+CHROMA_HPS 16 4
+CHROMA_HPS 16 8
+CHROMA_HPS 16 12
+CHROMA_HPS 16 16
+CHROMA_HPS 16 24
+CHROMA_HPS 16 32
+CHROMA_HPS 16 64
+CHROMA_HPS 24 32
+CHROMA_HPS 24 64
+CHROMA_HPS 32 8
+CHROMA_HPS 32 16
+CHROMA_HPS 32 24
+CHROMA_HPS 32 32
+CHROMA_HPS 32 48
+CHROMA_HPS 32 64
+CHROMA_HPS 48 64
+CHROMA_HPS 64 16
+CHROMA_HPS 64 32
+CHROMA_HPS 64 48
+CHROMA_HPS 64 64
diff -r 14ffbe7738e5 -r f3e5e1fac999 source/common/arm/ipfilter8.h
--- a/source/common/arm/ipfilter8.h	Tue Mar 29 11:43:17 2016 +0530
+++ b/source/common/arm/ipfilter8.h	Tue Mar 29 11:56:32 2016 +0530
@@ -267,4 +267,76 @@
 void x265_interp_horiz_ps_64x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
 void x265_interp_horiz_ps_64x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
 void x265_interp_horiz_ps_64x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+
+void x265_interp_4tap_horiz_pp_4x2_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_4x4_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_4x8_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_4x16_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_4x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x2_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x4_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x6_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x8_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x12_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x16_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_8x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_12x16_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_12x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x4_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x8_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x12_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x16_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x24_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_16x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_24x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_24x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x8_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x16_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x24_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x48_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_32x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_48x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x16_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x48_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_horiz_pp_64x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+
+void x265_interp_4tap_horiz_ps_4x2_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_4x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_4x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_4x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_4x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x2_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x6_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x12_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_8x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_12x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_12x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x12_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x24_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_16x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_24x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_24x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x24_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_32x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_48x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_64x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_64x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_64x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
+void x265_interp_4tap_horiz_ps_64x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
 #endif // ifndef X265_IPFILTER8_ARM_H



More information about the x265-devel mailing list