[x265] [PATCH] arm: Implement interp_4tap_vert_pp, ps &sp for NxN NEON

ramya at multicorewareinc.com ramya at multicorewareinc.com
Thu Mar 24 11:31:32 CET 2016


# HG changeset patch
# User Ramya Sriraman<ramya at multicorewareinc.com>
# Date 1458813337 -19800
#      Thu Mar 24 15:25:37 2016 +0530
# Node ID d99ba191af64ac8455f94abe9c35f641400aa670
# Parent  cd8244e3c3beb0946ebc4131da3eeb6992cd5c57
arm: Implement interp_4tap_vert_pp,ps &sp for NxN NEON

diff -r cd8244e3c3be -r d99ba191af64 source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp	Tue Mar 22 18:41:56 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp	Thu Mar 24 15:25:37 2016 +0530
@@ -380,6 +380,173 @@
         p.pu[LUMA_24x32].luma_vps   = PFX(interp_8tap_vert_ps_24x32_neon);
         p.pu[LUMA_48x64].luma_vps   = PFX(interp_8tap_vert_ps_48x64_neon);
         p.pu[LUMA_12x16].luma_vps   = PFX(interp_8tap_vert_ps_12x16_neon);
+
+        //vertical chroma filters
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].filter_vpp = PFX(interp_4tap_vert_pp_8x2_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_vpp = PFX(interp_4tap_vert_pp_8x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].filter_vpp = PFX(interp_4tap_vert_pp_8x6_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_vpp = PFX(interp_4tap_vert_pp_8x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].filter_vpp = PFX(interp_4tap_vert_pp_8x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].filter_vpp = PFX(interp_4tap_vert_pp_8x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].filter_vpp = PFX(interp_4tap_vert_pp_16x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_vpp = PFX(interp_4tap_vert_pp_16x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].filter_vpp = PFX(interp_4tap_vert_pp_16x12_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_vpp = PFX(interp_4tap_vert_pp_16x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].filter_vpp = PFX(interp_4tap_vert_pp_16x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].filter_vpp = PFX(interp_4tap_vert_pp_32x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].filter_vpp = PFX(interp_4tap_vert_pp_32x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].filter_vpp = PFX(interp_4tap_vert_pp_32x24_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_vpp = PFX(interp_4tap_vert_pp_32x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].filter_vpp = PFX(interp_4tap_vert_pp_24x32_neon);
+
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].filter_vpp = PFX(interp_4tap_vert_pp_8x4_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].filter_vpp = PFX(interp_4tap_vert_pp_8x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].filter_vpp = PFX(interp_4tap_vert_pp_8x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].filter_vpp = PFX(interp_4tap_vert_pp_8x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].filter_vpp = PFX(interp_4tap_vert_pp_8x12_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].filter_vpp = PFX(interp_4tap_vert_pp_8x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_vpp = PFX(interp_4tap_vert_pp_16x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_vpp = PFX(interp_4tap_vert_pp_16x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_vpp = PFX(interp_4tap_vert_pp_16x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vpp = PFX(interp_4tap_vert_pp_16x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vpp = PFX(interp_4tap_vert_pp_16x24_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vpp = PFX(interp_4tap_vert_pp_32x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_vpp = PFX(interp_4tap_vert_pp_32x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vpp = PFX(interp_4tap_vert_pp_32x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vpp = PFX(interp_4tap_vert_pp_32x48_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vpp = PFX(interp_4tap_vert_pp_24x64_neon);
+
+        p.chroma[X265_CSP_I444].pu[LUMA_8x4].filter_vpp = PFX(interp_4tap_vert_pp_8x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x8].filter_vpp = PFX(interp_4tap_vert_pp_8x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x16].filter_vpp = PFX(interp_4tap_vert_pp_8x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x32].filter_vpp = PFX(interp_4tap_vert_pp_8x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x4].filter_vpp = PFX(interp_4tap_vert_pp_16x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x8].filter_vpp = PFX(interp_4tap_vert_pp_16x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x12].filter_vpp = PFX(interp_4tap_vert_pp_16x12_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x16].filter_vpp = PFX(interp_4tap_vert_pp_16x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x32].filter_vpp = PFX(interp_4tap_vert_pp_16x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_vpp = PFX(interp_4tap_vert_pp_16x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x8].filter_vpp = PFX(interp_4tap_vert_pp_32x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x16].filter_vpp = PFX(interp_4tap_vert_pp_32x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x32].filter_vpp = PFX(interp_4tap_vert_pp_32x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x64].filter_vpp = PFX(interp_4tap_vert_pp_32x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vpp = PFX(interp_4tap_vert_pp_64x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_vpp = PFX(interp_4tap_vert_pp_64x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_vpp = PFX(interp_4tap_vert_pp_64x48_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vpp = PFX(interp_4tap_vert_pp_64x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_24x32].filter_vpp = PFX(interp_4tap_vert_pp_24x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_vpp = PFX(interp_4tap_vert_pp_48x64_neon);
+
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].filter_vps = PFX(interp_4tap_vert_ps_8x2_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_vps = PFX(interp_4tap_vert_ps_8x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].filter_vps = PFX(interp_4tap_vert_ps_8x6_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_vps = PFX(interp_4tap_vert_ps_8x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].filter_vps = PFX(interp_4tap_vert_ps_8x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].filter_vps = PFX(interp_4tap_vert_ps_8x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].filter_vps = PFX(interp_4tap_vert_ps_16x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_vps = PFX(interp_4tap_vert_ps_16x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].filter_vps = PFX(interp_4tap_vert_ps_16x12_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_vps = PFX(interp_4tap_vert_ps_16x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].filter_vps = PFX(interp_4tap_vert_ps_16x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].filter_vps = PFX(interp_4tap_vert_ps_32x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].filter_vps = PFX(interp_4tap_vert_ps_32x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].filter_vps = PFX(interp_4tap_vert_ps_32x24_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_vps = PFX(interp_4tap_vert_ps_32x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].filter_vps = PFX(interp_4tap_vert_ps_24x32_neon);
+
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].filter_vps = PFX(interp_4tap_vert_ps_8x4_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].filter_vps = PFX(interp_4tap_vert_ps_8x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].filter_vps = PFX(interp_4tap_vert_ps_8x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].filter_vps = PFX(interp_4tap_vert_ps_8x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].filter_vps = PFX(interp_4tap_vert_ps_8x12_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].filter_vps = PFX(interp_4tap_vert_ps_8x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_vps = PFX(interp_4tap_vert_ps_16x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_vps = PFX(interp_4tap_vert_ps_16x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_vps = PFX(interp_4tap_vert_ps_16x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vps = PFX(interp_4tap_vert_ps_16x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vps = PFX(interp_4tap_vert_ps_16x24_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vps = PFX(interp_4tap_vert_ps_32x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_vps = PFX(interp_4tap_vert_ps_32x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vps = PFX(interp_4tap_vert_ps_32x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vps = PFX(interp_4tap_vert_ps_32x48_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vps = PFX(interp_4tap_vert_ps_24x64_neon);
+
+        p.chroma[X265_CSP_I444].pu[LUMA_8x4].filter_vps = PFX(interp_4tap_vert_ps_8x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x8].filter_vps = PFX(interp_4tap_vert_ps_8x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x16].filter_vps = PFX(interp_4tap_vert_ps_8x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x32].filter_vps = PFX(interp_4tap_vert_ps_8x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x4].filter_vps = PFX(interp_4tap_vert_ps_16x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x8].filter_vps = PFX(interp_4tap_vert_ps_16x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x12].filter_vps = PFX(interp_4tap_vert_ps_16x12_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x16].filter_vps = PFX(interp_4tap_vert_ps_16x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x32].filter_vps = PFX(interp_4tap_vert_ps_16x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_vps = PFX(interp_4tap_vert_ps_16x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x8].filter_vps = PFX(interp_4tap_vert_ps_32x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x16].filter_vps = PFX(interp_4tap_vert_ps_32x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x32].filter_vps = PFX(interp_4tap_vert_ps_32x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x64].filter_vps = PFX(interp_4tap_vert_ps_32x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vps = PFX(interp_4tap_vert_ps_64x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_vps = PFX(interp_4tap_vert_ps_64x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_vps = PFX(interp_4tap_vert_ps_64x48_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vps = PFX(interp_4tap_vert_ps_64x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_24x32].filter_vps = PFX(interp_4tap_vert_ps_24x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_vps = PFX(interp_4tap_vert_ps_48x64_neon);
+
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].filter_vsp = PFX(interp_4tap_vert_sp_8x2_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].filter_vsp = PFX(interp_4tap_vert_sp_8x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].filter_vsp = PFX(interp_4tap_vert_sp_8x6_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].filter_vsp = PFX(interp_4tap_vert_sp_8x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].filter_vsp = PFX(interp_4tap_vert_sp_8x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].filter_vsp = PFX(interp_4tap_vert_sp_8x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].filter_vsp = PFX(interp_4tap_vert_sp_16x4_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].filter_vsp = PFX(interp_4tap_vert_sp_16x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].filter_vsp = PFX(interp_4tap_vert_sp_16x12_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].filter_vsp = PFX(interp_4tap_vert_sp_16x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].filter_vsp = PFX(interp_4tap_vert_sp_16x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].filter_vsp = PFX(interp_4tap_vert_sp_32x8_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].filter_vsp = PFX(interp_4tap_vert_sp_32x16_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].filter_vsp = PFX(interp_4tap_vert_sp_32x24_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].filter_vsp = PFX(interp_4tap_vert_sp_32x32_neon);
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].filter_vsp = PFX(interp_4tap_vert_sp_24x32_neon);
+
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].filter_vsp = PFX(interp_4tap_vert_sp_8x4_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].filter_vsp = PFX(interp_4tap_vert_sp_8x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].filter_vsp = PFX(interp_4tap_vert_sp_8x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].filter_vsp = PFX(interp_4tap_vert_sp_8x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].filter_vsp = PFX(interp_4tap_vert_sp_8x12_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].filter_vsp = PFX(interp_4tap_vert_sp_8x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].filter_vsp = PFX(interp_4tap_vert_sp_16x8_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].filter_vsp = PFX(interp_4tap_vert_sp_16x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].filter_vsp = PFX(interp_4tap_vert_sp_16x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].filter_vsp = PFX(interp_4tap_vert_sp_16x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].filter_vsp = PFX(interp_4tap_vert_sp_16x24_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].filter_vsp = PFX(interp_4tap_vert_sp_32x16_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].filter_vsp = PFX(interp_4tap_vert_sp_32x32_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].filter_vsp = PFX(interp_4tap_vert_sp_32x64_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].filter_vsp = PFX(interp_4tap_vert_sp_32x48_neon);
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].filter_vsp = PFX(interp_4tap_vert_sp_24x64_neon);
+
+        p.chroma[X265_CSP_I444].pu[LUMA_8x4].filter_vsp = PFX(interp_4tap_vert_sp_8x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x8].filter_vsp = PFX(interp_4tap_vert_sp_8x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x16].filter_vsp = PFX(interp_4tap_vert_sp_8x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_8x32].filter_vsp = PFX(interp_4tap_vert_sp_8x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x4].filter_vsp = PFX(interp_4tap_vert_sp_16x4_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x8].filter_vsp = PFX(interp_4tap_vert_sp_16x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x12].filter_vsp = PFX(interp_4tap_vert_sp_16x12_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x16].filter_vsp = PFX(interp_4tap_vert_sp_16x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x32].filter_vsp = PFX(interp_4tap_vert_sp_16x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_16x64].filter_vsp = PFX(interp_4tap_vert_sp_16x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x8].filter_vsp = PFX(interp_4tap_vert_sp_32x8_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x16].filter_vsp = PFX(interp_4tap_vert_sp_32x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x32].filter_vsp = PFX(interp_4tap_vert_sp_32x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_32x64].filter_vsp = PFX(interp_4tap_vert_sp_32x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vsp = PFX(interp_4tap_vert_sp_64x16_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_vsp = PFX(interp_4tap_vert_sp_64x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_vsp = PFX(interp_4tap_vert_sp_64x48_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vsp = PFX(interp_4tap_vert_sp_64x64_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_24x32].filter_vsp = PFX(interp_4tap_vert_sp_24x32_neon);
+        p.chroma[X265_CSP_I444].pu[LUMA_48x64].filter_vsp = PFX(interp_4tap_vert_sp_48x64_neon);
+
     }
     if (cpuMask & X265_CPU_ARMV6)
     {
diff -r cd8244e3c3be -r d99ba191af64 source/common/arm/ipfilter8.S
--- a/source/common/arm/ipfilter8.S	Tue Mar 22 18:41:56 2016 +0530
+++ b/source/common/arm/ipfilter8.S	Thu Mar 24 15:25:37 2016 +0530
@@ -31,7 +31,15 @@
 .word -1,-1,4,4,-10,-10,58,58,17,17,-5,-5,1,1,0,0
 .word -1,-1,4,4,-11,-11,40,40,40,40,-11,-11,4,4,-1,-1
 .word 0,0,1,1,-5,-5,17,17,58,58,-10,-10,4,4,-1,-1 
-
+g_chromaFilter:
+.word 0, 0, 64, 64, 0, 0, 0, 0
+.word -2, -2, 58, 58, 10, 10, -2, -2
+.word -4, -4, 54, 54, 16, 16, -2, -2
+.word -6, -6, 46, 46, 28, 28, -4, -4
+.word -4, -4, 36, 36, 36, 36, -4 ,-4
+.word -4, -4, 28, 28, 46, 46, -6, -6
+.word -2, -2, 16, 16, 54, 54, -4 ,-4
+.word -2, -2, 10, 10, 58, 58, -2, -2
 
 .text
 
@@ -1936,3 +1944,541 @@
     pop             {r4, r5, r6, r7}
     bx              lr
 endfunc
+
+//************chroma_vpp************
+
+.macro qpel_filter_chroma_0_32b
+    vmov.i16        d16, #64
+    vmull.s16       q6, d6, d16    // 64*b0
+    vmull.s16       q7, d7, d16   // 64*b1
+.endm
+
+.macro qpel_filter_chroma_1_32b
+    vmov.i16        d16, #58
+    vmov.i16        d17, #10
+    vmull.s16       q9, d6, d16     // 58*b0
+    vmull.s16       q10, d7, d16     // 58*b1
+    vmull.s16       q11, d8, d17    // 10*c0
+    vmull.s16       q12, d9, d17    // 10*c1
+    vadd.s16        q2, q5          //a +d
+    vshll.s16       q13, d4, #1     // 2 * (a0+d0)
+    vshll.s16       q14, d5, #1     // 2 * (a1+d1)
+    vsub.s32        q9, q13         // 58*b0 - 2 * (a0+d0)
+    vsub.s32        q10, q14         // 58*b1 - 2 * (a1+d1)
+    vadd.s32        q6, q9, q11         // 58*b0 - 2 * (a0+d0) +10*c0
+    vadd.s32        q7, q10, q12         // 58*b1 - 2 * (a1+d1) +10*c1
+.endm
+
+.macro qpel_filter_chroma_2_32b
+    vmov.i16        d16, #54
+    vmull.s16       q9, d6, d16     // 54*b0
+    vmull.s16       q10, d7, d16     // 54*b1
+    vshll.s16       q11, d4, #2     // 4 * a0
+    vshll.s16       q12, d5, #2     // 4 * a1
+    vshll.s16       q13, d8, #4     // 16 * c0
+    vshll.s16       q14, d9, #4     // 16 * c1
+    vshll.s16       q15, d10, #1     // 2 * d0
+    vshll.s16       q8, d11, #1     // 2 * d1
+
+    vadd.s32        q9, q13         // 54*b0 + 16 * c0
+    vadd.s32        q10, q14        // 54*b1 + 16 * c1
+    vadd.s32        q11, q15         // 4 * a0 +2 * d0
+    vadd.s32        q12, q8         // 4 * a1 +2 * d1
+    vsub.s32        q6, q9, q11     // 54*b0 + 16 * c0 - ( 4 * a0 +2 * d0)
+    vsub.s32        q7, q10, q12     // 54*b0 + 16 * c0 - ( 4 * a0 +2 * d0)
+.endm
+
+.macro qpel_filter_chroma_3_32b
+    vmov.i16        d16, #46
+    vmov.i16        d17, #28
+    vmull.s16       q9, d6, d16     // 46*b0
+    vmull.s16       q10, d7, d16     // 46*b1
+    vmull.s16       q11, d8, d17    // 28*c0
+    vmull.s16       q12, d9, d17    // 28*c1
+    vmov.i16        d17, #6
+    vshll.s16       q13, d10, #2     // 4 * d0
+    vshll.s16       q14, d11, #2     // 4 * d1
+    vmull.s16       q15, d4, d17    // 6*a0
+    vmull.s16       q8, d5, d17    // 6*a1
+    vadd.s32        q9, q11         // 46*b0 + 28*c0
+    vadd.s32        q10, q12        //  46*b1 + 28*c1
+    vadd.s32        q13, q15         // 4 * d0 + 6*a0
+    vadd.s32        q14, q8        // 4 * d1 + 6*a1
+    vsub.s32        q6, q9, q13         // 46*b0 + 28*c0 -(4 * d0 + 6*a0)
+    vsub.s32        q7, q10, q14         //  46*b1 + 28*c1 -(4 * d1 + 6*a1)
+.endm
+
+.macro qpel_filter_chroma_4_32b
+    vmov.i16        d16, #36
+    vadd.s16        q2, q5          // a +d
+    vadd.s16        q3, q4          // b+c
+    vmull.s16       q9, d6, d16     // 36*(b0 + c0)
+    vmull.s16       q10, d7, d16     // 36*(b1 + c1)
+    vshll.s16       q11, d4, #2     // 4 * (a0+d0)
+    vshll.s16       q12, d5, #2     // 4 * (a1+d1)
+    vsub.s32        q6, q9, q11         // 36*(b0 + c0) - ( 4 * (a0+d0))
+    vsub.s32        q7, q10, q12         // 36*(b1 + c1) - ( 4 * (a1+d1))
+.endm
+
+.macro qpel_filter_chroma_5_32b
+    vmov.i16        d16, #46
+    vmov.i16        d17, #28
+    vmull.s16       q9, d6, d17     // 28*b0
+    vmull.s16       q10, d7, d17     // 28*b1
+    vmull.s16       q11, d8, d16    // 46*c0
+    vmull.s16       q12, d9, d16    // 46*c1
+    vmov.i16        d17, #6
+    vshll.s16       q13, d4, #2     // 4 * a0
+    vshll.s16       q14, d5, #2     // 4 * a1
+    vmull.s16       q15, d10, d17    // 6*d0
+    vmull.s16       q8, d11, d17    // 6*d1
+    vadd.s32        q9, q11         // 28*b0 + 46*c0
+    vadd.s32        q10, q12        //  28*b1 + 46*c1
+    vadd.s32        q13, q15         // 4 * a0 + 6*d0
+    vadd.s32        q14, q8        //  4 * a1 + 6*d1
+    vsub.s32        q6, q9, q13         // 28*b0 + 46*c0- (4 * a0 + 6*d0)
+    vsub.s32        q7, q10, q14         //   28*b1 + 46*c1- (4 * a1 + 6*d1)
+.endm
+
+.macro qpel_filter_chroma_6_32b
+    vmov.i16        d16, #54
+    vmull.s16       q9, d8, d16     // 54*c0
+    vmull.s16       q10, d9, d16     // 54*c1
+    vshll.s16       q11, d4, #1     // 2 * a0
+    vshll.s16       q12, d5, #1     // 2 * a1
+    vshll.s16       q13, d6, #4     // 16 * b0
+    vshll.s16       q14, d7, #4     // 16 * b1
+    vshll.s16       q15, d10, #2     // 4 * d0
+    vshll.s16       q8, d11, #2     // 4 * d1
+    vadd.s32        q9, q13         // 54*c0 + 16 * b0
+    vadd.s32        q10, q14        // 54*c1 + 16 * b1
+    vadd.s32        q11, q15         // 2 * a0 + 4 * d0
+    vadd.s32        q12, q8         // 2 * a1 + 4 * d1
+    vsub.s32        q6, q9, q11     // 54*c0 + 16 * b0 - ( 2 * a0 + 4 * d0)
+    vsub.s32        q7, q10, q12     //  54*c1 + 16 * b1 - ( 2 * a1 + 4 * d1)
+.endm
+
+.macro qpel_filter_chroma_7_32b
+    vmov.i16        d16, #10
+    vmov.i16        d17, #58
+    vmull.s16       q9, d6, d16     // 10*b0
+    vmull.s16       q10, d7, d16     // 10*b1
+    vmull.s16       q11, d8, d17    // 58*c0
+    vmull.s16       q12, d9, d17    // 58*c1
+    vadd.s16        q2, q5          //a +d
+    vshll.s16       q13, d4, #1     // 2 * (a0+d0)
+    vshll.s16       q14, d5, #1     // 2 * (a1+d1)
+    vsub.s32        q9, q13         // 58*c0 - 2 * (a0+d0)
+    vsub.s32        q10, q14         // 58*c1 - 2 * (a1+d1)
+    vadd.s32        q6, q9, q11         // 58*c0 - 2 * (a0+d0) +10*b0
+    vadd.s32        q7, q10, q12         // 58*c1 - 2 * (a1+d1) +10*b1
+.endm
+
+.macro FILTER_CHROMA_VPP a b filterv
+
+    vpush           {q4-q7}
+
+.loop_\filterv\()_\a\()x\b:
+
+    mov             r7, r2
+    mov             r6, r0
+    eor             r8, r8
+
+.loop_w8_\filterv\()_\a\()x\b:
+
+    add             r6, r0, r8
+
+    pld [r6]
+    vld1.u8         d0, [r6], r1
+    pld [r6]
+    vld1.u8         d1, [r6], r1
+    pld [r6]
+    vld1.u8         d2, [r6], r1
+    pld [r6]
+    vld1.u8         d3, [r6], r1
+
+    vmovl.u8        q2, d0
+    vmovl.u8        q3, d1
+    vmovl.u8        q4, d2
+    vmovl.u8        q5, d3
+
+    veor.u8         q6, q6
+    veor.u8         q7, q7
+
+   \filterv
+
+    mov             r12,#32
+    vdup.32         q8, r12
+    vadd.s32        q6, q8
+    vqshrun.s32     d0, q6, #6
+    vadd.s32        q7, q8
+    vqshrun.s32     d1, q7, #6
+    vqmovn.u16      d0, q0
+    vst1.u8         d0, [r7]!
+
+    add             r8, #8
+    cmp             r8, #\a
+    blt             .loop_w8_\filterv\()_\a\()x\b
+
+    add             r0, r1
+    add             r2, r3
+    subs            r4, #1
+    bne             .loop_\filterv\()_\a\()x\b 
+    vpop            {q4-q7}
+.endm 
+
+.macro CHROMA_VPP  w h
+function x265_interp_4tap_vert_pp_\w\()x\h\()_neon
+
+    push            {r4, r5, r6, r7, r8}
+    ldr             r5, [sp, #4 * 5]
+    sub             r0, r1
+    mov             r4, #\h
+
+    cmp             r5, #0
+    beq              0f
+    cmp             r5, #1
+    beq              1f
+    cmp             r5, #2
+    beq              2f
+    cmp             r5, #3
+    beq              3f
+    cmp             r5, #4
+    beq              4f
+    cmp             r5, #5
+    beq              5f
+    cmp             r5, #6
+    beq              6f
+    cmp             r5, #7
+    beq              7f
+0:
+    FILTER_CHROMA_VPP  \w \h qpel_filter_chroma_0_32b
+    b            8f
+1:
+    FILTER_CHROMA_VPP  \w \h qpel_filter_chroma_1_32b
+    b            8f
+2:
+    FILTER_CHROMA_VPP  \w \h qpel_filter_chroma_2_32b
+    b            8f
+3:
+    FILTER_CHROMA_VPP  \w \h qpel_filter_chroma_3_32b
+    b            8f
+4:
+    FILTER_CHROMA_VPP  \w \h qpel_filter_chroma_4_32b
+    b            8f
+5:
+    FILTER_CHROMA_VPP  \w \h qpel_filter_chroma_5_32b
+    b            8f
+6:
+    FILTER_CHROMA_VPP  \w \h qpel_filter_chroma_6_32b
+    b            8f
+7:
+    FILTER_CHROMA_VPP  \w \h qpel_filter_chroma_7_32b
+    b            8f
+8:
+    pop             {r4, r5, r6, r7, r8}
+    bx              lr
+endfunc
+.endm
+
+CHROMA_VPP 8 2
+CHROMA_VPP 8 4
+CHROMA_VPP 8 6
+CHROMA_VPP 8 8
+CHROMA_VPP 8 16
+CHROMA_VPP 8 32
+CHROMA_VPP 8 12
+CHROMA_VPP 8 64
+CHROMA_VPP 16 4
+CHROMA_VPP 16 8
+CHROMA_VPP 16 12
+CHROMA_VPP 16 16
+CHROMA_VPP 16 32
+CHROMA_VPP 16 64
+CHROMA_VPP 16 24
+CHROMA_VPP 32 8
+CHROMA_VPP 32 16
+CHROMA_VPP 32 24
+CHROMA_VPP 32 32
+CHROMA_VPP 32 64
+CHROMA_VPP 32 48
+CHROMA_VPP 24 32
+CHROMA_VPP 24 64
+CHROMA_VPP 64 16
+CHROMA_VPP 64 32
+CHROMA_VPP 64 48
+CHROMA_VPP 64 64
+CHROMA_VPP 48 64
+
+.macro FILTER_CHROMA_VPS a b filterv
+
+    vpush           {q4-q7}
+
+.loop_vps_\filterv\()_\a\()x\b:
+
+    mov             r7, r2
+    mov             r6, r0
+    eor             r8, r8
+
+.loop_vps_w8_\filterv\()_\a\()x\b:
+
+    add             r6, r0, r8
+
+    pld [r6]
+    vld1.u8         d0, [r6], r1
+    pld [r6]
+    vld1.u8         d1, [r6], r1
+    pld [r6]
+    vld1.u8         d2, [r6], r1
+    pld [r6]
+    vld1.u8         d3, [r6], r1
+
+    vmovl.u8        q2, d0
+    vmovl.u8        q3, d1
+    vmovl.u8        q4, d2
+    vmovl.u8        q5, d3
+
+    veor.u8         q6, q6
+    veor.u8         q7, q7
+
+   \filterv
+
+    mov             r12,#8192
+    vdup.32         q8, r12
+    vsub.s32        q6, q8
+    vqmovn.s32      d0, q6
+    vsub.s32        q7, q8
+    vqmovn.s32      d1, q7
+    vst1.u16         {q0}, [r7]!
+
+    add             r8, #8
+    cmp             r8, #\a
+    blt             .loop_vps_w8_\filterv\()_\a\()x\b
+
+    add             r0, r1
+    add             r2, r3
+    subs            r4, #1
+    bne             .loop_vps_\filterv\()_\a\()x\b 
+    vpop            {q4-q7}
+.endm 
+
+.macro CHROMA_VPS  w h
+function x265_interp_4tap_vert_ps_\w\()x\h\()_neon
+
+    push            {r4, r5, r6, r7, r8}
+    ldr             r5, [sp, #4 * 5]
+    lsl             r3, #1
+    sub             r0, r1
+    mov             r4, #\h
+
+    cmp             r5, #0
+    beq              0f
+    cmp             r5, #1
+    beq              1f
+    cmp             r5, #2
+    beq              2f
+    cmp             r5, #3
+    beq              3f
+    cmp             r5, #4
+    beq              4f
+    cmp             r5, #5
+    beq              5f
+    cmp             r5, #6
+    beq              6f
+    cmp             r5, #7
+    beq              7f
+0:
+    FILTER_CHROMA_VPS  \w \h qpel_filter_chroma_0_32b
+    b            8f
+1:
+    FILTER_CHROMA_VPS  \w \h qpel_filter_chroma_1_32b
+    b            8f
+2:
+    FILTER_CHROMA_VPS  \w \h qpel_filter_chroma_2_32b
+    b            8f
+3:
+    FILTER_CHROMA_VPS  \w \h qpel_filter_chroma_3_32b
+    b            8f
+4:
+    FILTER_CHROMA_VPS  \w \h qpel_filter_chroma_4_32b
+    b            8f
+5:
+    FILTER_CHROMA_VPS  \w \h qpel_filter_chroma_5_32b
+    b            8f
+6:
+    FILTER_CHROMA_VPS  \w \h qpel_filter_chroma_6_32b
+    b            8f
+7:
+    FILTER_CHROMA_VPS  \w \h qpel_filter_chroma_7_32b
+    b            8f
+8:
+    pop             {r4, r5, r6, r7, r8}
+    bx              lr
+endfunc
+.endm
+
+CHROMA_VPS 8 2
+CHROMA_VPS 8 4
+CHROMA_VPS 8 6
+CHROMA_VPS 8 8
+CHROMA_VPS 8 16
+CHROMA_VPS 8 32
+CHROMA_VPS 8 12
+CHROMA_VPS 8 64
+CHROMA_VPS 16 4
+CHROMA_VPS 16 8
+CHROMA_VPS 16 12
+CHROMA_VPS 16 16
+CHROMA_VPS 16 32
+CHROMA_VPS 16 64
+CHROMA_VPS 16 24
+CHROMA_VPS 32 8
+CHROMA_VPS 32 16
+CHROMA_VPS 32 24
+CHROMA_VPS 32 32
+CHROMA_VPS 32 64
+CHROMA_VPS 32 48
+CHROMA_VPS 24 32
+CHROMA_VPS 24 64
+CHROMA_VPS 64 16
+CHROMA_VPS 64 32
+CHROMA_VPS 64 48
+CHROMA_VPS 64 64
+CHROMA_VPS 48 64
+
+.macro FILTER_CHROMA_VSP a b filterv
+
+    vpush           {q4-q7}
+
+.loop_vsp_\filterv\()_\a\()x\b:
+
+    mov             r7, r2
+    mov             r6, r0
+    eor             r8, r8
+
+.loop_vsp_w8_\filterv\()_\a\()x\b:
+
+    add             r6, r0, r8
+
+    pld [r6]
+    vld1.u16         {q2}, [r6], r1
+    pld [r6]
+    vld1.u16         {q3}, [r6], r1
+    pld [r6]
+    vld1.u16         {q4}, [r6], r1
+    pld [r6]
+    vld1.u16         {q5}, [r6], r1
+
+    veor.u8         q6, q6
+    veor.u8         q7, q7
+
+   \filterv
+
+    mov             r12,#1
+    lsl             r12, #19
+    add             r12, #2048
+    vdup.32         q8, r12
+    vadd.s32        q6, q8
+    vqshrun.s32     d0, q6, #12
+    vadd.s32        q7, q8
+    vqshrun.s32     d1, q7, #12
+    vqmovn.u16      d0, q0
+    vst1.u8         d0, [r7]!
+
+    add             r8, #16
+    mov             r12, #\a
+    lsl             r12, #1
+    cmp             r8, r12
+    blt             .loop_vsp_w8_\filterv\()_\a\()x\b
+
+    add             r0, r1
+    add             r2, r3
+    subs            r4, #1
+    bne             .loop_vsp_\filterv\()_\a\()x\b 
+    vpop            {q4-q7}
+.endm 
+
+.macro CHROMA_VSP  w h
+function x265_interp_4tap_vert_sp_\w\()x\h\()_neon
+
+    push            {r4, r5, r6, r7, r8}
+    ldr             r5, [sp, #4 * 5]
+    lsl             r1, #1
+    sub             r0, r1
+    mov             r4, #\h
+
+    cmp             r5, #0
+    beq              0f
+    cmp             r5, #1
+    beq              1f
+    cmp             r5, #2
+    beq              2f
+    cmp             r5, #3
+    beq              3f
+    cmp             r5, #4
+    beq              4f
+    cmp             r5, #5
+    beq              5f
+    cmp             r5, #6
+    beq              6f
+    cmp             r5, #7
+    beq              7f
+0:
+    FILTER_CHROMA_VSP  \w \h qpel_filter_chroma_0_32b
+    b            8f
+1:
+    FILTER_CHROMA_VSP  \w \h qpel_filter_chroma_1_32b
+    b            8f
+2:
+    FILTER_CHROMA_VSP  \w \h qpel_filter_chroma_2_32b
+    b            8f
+3:
+    FILTER_CHROMA_VSP  \w \h qpel_filter_chroma_3_32b
+    b            8f
+4:
+    FILTER_CHROMA_VSP  \w \h qpel_filter_chroma_4_32b
+    b            8f
+5:
+    FILTER_CHROMA_VSP  \w \h qpel_filter_chroma_5_32b
+    b            8f
+6:
+    FILTER_CHROMA_VSP  \w \h qpel_filter_chroma_6_32b
+    b            8f
+7:
+    FILTER_CHROMA_VSP  \w \h qpel_filter_chroma_7_32b
+    b            8f
+8:
+    pop             {r4, r5, r6, r7, r8}
+    bx              lr
+endfunc
+.endm
+
+CHROMA_VSP 8 2
+CHROMA_VSP 8 4
+CHROMA_VSP 8 6
+CHROMA_VSP 8 8
+CHROMA_VSP 8 16
+CHROMA_VSP 8 32
+CHROMA_VSP 8 12
+CHROMA_VSP 8 64
+CHROMA_VSP 16 4
+CHROMA_VSP 16 8
+CHROMA_VSP 16 12
+CHROMA_VSP 16 16
+CHROMA_VSP 16 32
+CHROMA_VSP 16 64
+CHROMA_VSP 16 24
+CHROMA_VSP 32 8
+CHROMA_VSP 32 16
+CHROMA_VSP 32 24
+CHROMA_VSP 32 32
+CHROMA_VSP 32 64
+CHROMA_VSP 32 48
+CHROMA_VSP 24 32
+CHROMA_VSP 24 64
+CHROMA_VSP 64 16
+CHROMA_VSP 64 32
+CHROMA_VSP 64 48
+CHROMA_VSP 64 64
+CHROMA_VSP 48 64
+
diff -r cd8244e3c3be -r d99ba191af64 source/common/arm/ipfilter8.h
--- a/source/common/arm/ipfilter8.h	Tue Mar 22 18:41:56 2016 +0530
+++ b/source/common/arm/ipfilter8.h	Thu Mar 24 15:25:37 2016 +0530
@@ -128,4 +128,92 @@
 void x265_interp_8tap_vert_ps_24x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
 void x265_interp_8tap_vert_ps_48x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
 void x265_interp_8tap_vert_ps_12x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+
+void x265_interp_4tap_vert_pp_8x2_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_8x4_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_8x6_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_8x8_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_8x16_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_8x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_8x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_8x12_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_16x4_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_16x8_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_16x12_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_16x16_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_16x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_16x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_16x24_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_32x8_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_32x16_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_32x24_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_32x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_32x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_32x48_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_24x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_24x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_48x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_64x16_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_64x32_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_64x64_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_pp_64x48_neon(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+
+void x265_interp_4tap_vert_ps_8x2_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_8x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_8x6_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_8x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_8x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_8x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_8x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_8x12_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_16x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_16x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_16x12_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_16x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_16x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_16x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_16x24_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_32x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_32x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_32x24_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_32x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_32x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_32x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_24x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_24x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_48x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_64x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_64x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_64x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_ps_64x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx);
+
+void x265_interp_4tap_vert_sp_8x2_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_8x4_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_8x6_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_8x8_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_8x16_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_8x32_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_8x64_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_8x12_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_16x4_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_16x8_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_16x12_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_16x16_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_16x32_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_16x64_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_16x24_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_32x8_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_32x16_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_32x24_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_32x32_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_32x64_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_32x48_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_24x32_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_24x64_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_48x64_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_64x16_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_64x32_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_64x64_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+void x265_interp_4tap_vert_sp_64x48_neon(const int16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int coeffIdx);
+
 #endif // ifndef X265_IPFILTER8_ARM_H



More information about the x265-devel mailing list