[x265] [PATCH] arm: Port sad_4x4 and sad_4x8 from x264

ramya at multicorewareinc.com ramya at multicorewareinc.com
Fri Feb 5 06:54:11 CET 2016


# HG changeset patch
# User Your Name <ramya at multicorewareinc.com>
# Date 1454581110 -19800
#      Thu Feb 04 15:48:30 2016 +0530
# Node ID b0a1c1165f196a10d01982a44085ba4ebb3cbd91
# Parent  87799c713bb05208a36545d68577f7cd58b6b7e3
arm: Port sad_4x4 and sad_4x8 from x264

diff -r 87799c713bb0 -r b0a1c1165f19 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt	Tue Feb 02 16:29:04 2016 +0530
+++ b/source/common/CMakeLists.txt	Thu Feb 04 15:48:30 2016 +0530
@@ -89,7 +89,7 @@
     set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)
 
     # add ARM assembly/intrinsic files here
-    set(A_SRCS asm.S cpu-a.S mc-a.S)
+    set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S)
     set(VEC_PRIMITIVES)
 
     set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources")
diff -r 87799c713bb0 -r b0a1c1165f19 source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp	Tue Feb 02 16:29:04 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp	Thu Feb 04 15:48:30 2016 +0530
@@ -31,6 +31,7 @@
 
 extern "C" {
 #include "blockcopy8.h"
+#include "pixel.h"
 }
 
 namespace X265_NS {
@@ -42,5 +43,10 @@
     {
         p.pu[LUMA_16x16].copy_pp = PFX(blockcopy_pp_16x16_neon);
     }
+    if (cpuMask & X265_CPU_ARMV6)
+    {
+	 p.pu[LUMA_4x4].sad=PFX(pixel_sad_4x4_armv6);
+         p.pu[LUMA_4x8].sad=PFX(pixel_sad_4x8_armv6);
+    }
 }
 } // namespace X265_NS
diff -r 87799c713bb0 -r b0a1c1165f19 source/common/arm/pixel.h
--- a/source/common/arm/pixel.h	Tue Feb 02 16:29:04 2016 +0530
+++ b/source/common/arm/pixel.h	Thu Feb 04 15:48:30 2016 +0530
@@ -29,5 +29,6 @@
 
 #ifndef X265_I386_PIXEL_ARM_H
 #define X265_I386_PIXEL_ARM_H
-
+int x265_pixel_sad_4x4_armv6(const pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
+int x265_pixel_sad_4x8_armv6(const pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
 #endif // ifndef X265_I386_PIXEL_ARM_H
diff -r 87799c713bb0 -r b0a1c1165f19 source/common/arm/sad-a.S
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/arm/sad-a.S	Thu Feb 04 15:48:30 2016 +0530
@@ -0,0 +1,63 @@
+/*****************************************************************************
+ * Copyright (C) 2016 x265 project
+ *
+ * Authors:David Conrad <lessen42 at gmail.com>
+ *          Janne Grunau <janne-x264 at jannau.net>
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "asm.S"
+
+.section .rodata
+
+.align 4
+
+.text
+
+/* sad4x4(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
+ *
+ * r0   - dst
+ * r1   - dstStride
+ * r2   - src
+ * d3   - srcStride */
+
+.macro SAD4_ARMV6 h
+function x265_pixel_sad_4x\h\()_armv6
+    push        {r4-r6,lr}
+    ldr         r4, [r2], r3
+    ldr         r5, [r0], r1
+    ldr         r6, [r2], r3
+    ldr         lr, [r0], r1
+    usad8       ip, r4, r5
+.rept (\h - 2)/2
+    ldr         r4, [r2], r3
+    ldr         r5, [r0], r1
+    usada8      ip, r6, lr, ip
+    ldr         r6, [r2], r3
+    ldr         lr, [r0], r1
+    usada8      ip, r4, r5, ip
+.endr
+    usada8      r0, r6, lr, ip
+    pop         {r4-r6,pc}
+endfunc
+.endm
+
+SAD4_ARMV6 4
+SAD4_ARMV6 8
+


More information about the x265-devel mailing list