[x265] [PATCH] arm: Port sad_4x4 and sad_4x8 from x264
ramya at multicorewareinc.com
ramya at multicorewareinc.com
Fri Feb 5 06:54:11 CET 2016
# HG changeset patch
# User Your Name <ramya at multicorewareinc.com>
# Date 1454581110 -19800
# Thu Feb 04 15:48:30 2016 +0530
# Node ID b0a1c1165f196a10d01982a44085ba4ebb3cbd91
# Parent 87799c713bb05208a36545d68577f7cd58b6b7e3
arm: Port sad_4x4 and sad_4x8 from x264
diff -r 87799c713bb0 -r b0a1c1165f19 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Tue Feb 02 16:29:04 2016 +0530
+++ b/source/common/CMakeLists.txt Thu Feb 04 15:48:30 2016 +0530
@@ -89,7 +89,7 @@
set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)
# add ARM assembly/intrinsic files here
- set(A_SRCS asm.S cpu-a.S mc-a.S)
+ set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S)
set(VEC_PRIMITIVES)
set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources")
diff -r 87799c713bb0 -r b0a1c1165f19 source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp Tue Feb 02 16:29:04 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp Thu Feb 04 15:48:30 2016 +0530
@@ -31,6 +31,7 @@
extern "C" {
#include "blockcopy8.h"
+#include "pixel.h"
}
namespace X265_NS {
@@ -42,5 +43,10 @@
{
p.pu[LUMA_16x16].copy_pp = PFX(blockcopy_pp_16x16_neon);
}
+ if (cpuMask & X265_CPU_ARMV6)
+ {
+ p.pu[LUMA_4x4].sad=PFX(pixel_sad_4x4_armv6);
+ p.pu[LUMA_4x8].sad=PFX(pixel_sad_4x8_armv6);
+ }
}
} // namespace X265_NS
diff -r 87799c713bb0 -r b0a1c1165f19 source/common/arm/pixel.h
--- a/source/common/arm/pixel.h Tue Feb 02 16:29:04 2016 +0530
+++ b/source/common/arm/pixel.h Thu Feb 04 15:48:30 2016 +0530
@@ -29,5 +29,6 @@
#ifndef X265_I386_PIXEL_ARM_H
#define X265_I386_PIXEL_ARM_H
-
+int x265_pixel_sad_4x4_armv6(const pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
+int x265_pixel_sad_4x8_armv6(const pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
#endif // ifndef X265_I386_PIXEL_ARM_H
diff -r 87799c713bb0 -r b0a1c1165f19 source/common/arm/sad-a.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/arm/sad-a.S Thu Feb 04 15:48:30 2016 +0530
@@ -0,0 +1,63 @@
+/*****************************************************************************
+ * Copyright (C) 2016 x265 project
+ *
+ * Authors:David Conrad <lessen42 at gmail.com>
+ * Janne Grunau <janne-x264 at jannau.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "asm.S"
+
+.section .rodata
+
+.align 4
+
+.text
+
+/* sad4x4(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
+ *
+ * r0 - dst
+ * r1 - dstStride
+ * r2 - src
+ * d3 - srcStride */
+
+.macro SAD4_ARMV6 h
+function x265_pixel_sad_4x\h\()_armv6
+ push {r4-r6,lr}
+ ldr r4, [r2], r3
+ ldr r5, [r0], r1
+ ldr r6, [r2], r3
+ ldr lr, [r0], r1
+ usad8 ip, r4, r5
+.rept (\h - 2)/2
+ ldr r4, [r2], r3
+ ldr r5, [r0], r1
+ usada8 ip, r6, lr, ip
+ ldr r6, [r2], r3
+ ldr lr, [r0], r1
+ usada8 ip, r4, r5, ip
+.endr
+ usada8 r0, r6, lr, ip
+ pop {r4-r6,pc}
+endfunc
+.endm
+
+SAD4_ARMV6 4
+SAD4_ARMV6 8
+
More information about the x265-devel
mailing list