<div dir="ltr"><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Feb 2, 2016 at 11:30 AM, <span dir="ltr"><<a href="mailto:dnyaneshwar@multicorewareinc.com" target="_blank">dnyaneshwar@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Dnyaneshwar G <<a href="mailto:dnyaneshwar@multicorewareinc.com" target="_blank">dnyaneshwar@multicorewareinc.com</a>><br>
# Date 1454327470 -19800<br>
# Mon Feb 01 17:21:10 2016 +0530<br>
# Node ID 894e0fce5d14844d3c85cdb2a287f302fc8cffca<br>
# Parent dc62b47dd0d98f732165345883edac55320baec1<br>
arm: Implement blockcopy_pp_16x16_neon. Modified include guards with ARM suffix.<br>
<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/CMakeLists.txt<br>
--- a/source/CMakeLists.txt Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/CMakeLists.txt Mon Feb 01 17:21:10 2016 +0530<br>
@@ -182,9 +182,11 @@<br>
add_definitions(-march=i686)<br>
endif()<br>
if(ARM AND CROSS_COMPILE_ARM)<br>
- add_definitions(-march=armv6 -mfloat-abi=soft -mfpu=vfp)<br>
+ set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp)<br>
+ add_definitions(${ARM_ARGS})<br>
elseif(ARM)<br>
- add_definitions(-march=armv6 -mfloat-abi=hard -mfpu=vfp)<br>
+ set(ARM_ARGS -march=armv6 -mfloat-abi=hard -mfpu=vfp)<br>
+ add_definitions(${ARM_ARGS})<br>
endif()<br>
if(FPROFILE_GENERATE)<br>
if(INTEL_CXX)<br>
@@ -418,7 +420,7 @@<br>
add_subdirectory(encoder)<br>
add_subdirectory(common)<br>
<br>
-if((MSVC_IDE OR XCODE) AND ENABLE_ASSEMBLY)<br>
+if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)<br>
# this is required because of this cmake bug<br>
# <a href="http://www.cmake.org/Bug/print_bug_page.php?bug_id=8170" rel="noreferrer" target="_blank">http://www.cmake.org/Bug/print_bug_page.php?bug_id=8170</a><br>
if(WIN32)<br>
@@ -429,7 +431,17 @@<br>
<br>
if(ARM OR CROSS_COMPILE_ARM)<br>
# compile ARM arch asm files here<br>
-<br>
+ enable_language(ASM)<br>
+ foreach(ASM ${ARM_ASMS})<br>
+ set(YASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM})<br>
+ list(APPEND YASM_SRCS ${YASM_SRC})<br>
+ list(APPEND YASM_OBJS ${ASM}.${SUFFIX})<br>
+ add_custom_command(<br>
+ OUTPUT ${ASM}.${SUFFIX}<br>
+ COMMAND ${CMAKE_CXX_COMPILER}<br>
+ ARGS ${ARM_ARGS} -c ${YASM_SRC} -o ${ASM}.${SUFFIX}<br>
+ DEPENDS ${YASM_SRC})<br>
+ endforeach()<br></blockquote><div><br></div><div>Can you please rename all YASM_SRCS/YASM_OBJS as just ASM_SRCS/ASM_OBJS? YASM is the assembler for just Intel architectures and since we're also supporting ARM now, it is prudent to rename.</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
elseif(X86)<br>
# compile X86 arch asm files here<br>
foreach(ASM ${MSVC_ASMS})<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/CMakeLists.txt<br>
--- a/source/common/CMakeLists.txt Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/CMakeLists.txt Mon Feb 01 17:21:10 2016 +0530<br>
@@ -89,9 +89,10 @@<br>
set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)<br>
<br>
# add ARM assembly/intrinsic files here<br>
- set(A_SRCS)<br>
+ set(ARM_SRCS asm.S cpu-a.S mc-a.S)<br></blockquote><div><br></div><div>Continue to call this A_SRCS to denote "assembly sources", so that we are consistent in the naming convention with x86 assembly.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
set(VEC_PRIMITIVES)<br>
<br>
+ set(ARM_ASMS "${ARM_SRCS}" CACHE INTERNAL "ARM Assembly Sources")<br>
foreach(SRC ${C_SRCS})<br>
set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC})<br>
endforeach()<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/asm-primitives.cpp<br>
--- a/source/common/arm/asm-primitives.cpp Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/arm/asm-primitives.cpp Mon Feb 01 17:21:10 2016 +0530<br>
@@ -29,12 +29,18 @@<br>
#include "x265.h"<br>
#include "cpu.h"<br>
<br>
+extern "C" {<br>
+#include "blockcopy8.h"<br>
+}<br>
<br>
namespace X265_NS {<br>
// private x265 namespace<br>
<br>
void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask)<br>
{<br>
-<br>
+ if (cpuMask & X265_CPU_NEON)<br>
+ {<br>
+ p.pu[LUMA_16x16].copy_pp = PFX(blockcopy_pp_16x16_neon);<br>
+ }<br>
}<br>
} // namespace X265_NS<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/asm.S<br>
--- a/source/common/arm/asm.S Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/arm/asm.S Mon Feb 01 17:21:10 2016 +0530<br>
@@ -25,8 +25,6 @@<br>
* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
*****************************************************************************/<br>
<br>
-#include "x265_config.h"<br>
-<br>
.syntax unified<br>
<br>
#if HAVE_NEON<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/blockcopy8.h<br>
--- a/source/common/arm/blockcopy8.h Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/arm/blockcopy8.h Mon Feb 01 17:21:10 2016 +0530<br>
@@ -23,7 +23,9 @@<br>
* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
*****************************************************************************/<br>
<br>
-#ifndef X265_BLOCKCOPY8_H<br>
-#define X265_BLOCKCOPY8_H<br>
+#ifndef X265_BLOCKCOPY8_ARM_H<br>
+#define X265_BLOCKCOPY8_ARM_H<br>
<br>
-#endif // ifndef X265_I386_PIXEL_H<br>
+void x265_blockcopy_pp_16x16_neon(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);<br>
+<br>
+#endif // ifndef X265_I386_PIXEL_ARM_H<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/dct8.h<br>
--- a/source/common/arm/dct8.h Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/arm/dct8.h Mon Feb 01 17:21:10 2016 +0530<br>
@@ -22,7 +22,7 @@<br>
* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
*****************************************************************************/<br>
<br>
-#ifndef X265_DCT8_H<br>
-#define X265_DCT8_H<br>
+#ifndef X265_DCT8_ARM_H<br>
+#define X265_DCT8_ARM_H<br>
<br>
-#endif // ifndef X265_DCT8_H<br>
+#endif // ifndef X265_DCT8_ARM_H<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/intrapred.h<br>
--- a/source/common/arm/intrapred.h Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/arm/intrapred.h Mon Feb 01 17:21:10 2016 +0530<br>
@@ -25,7 +25,7 @@<br>
* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
*****************************************************************************/<br>
<br>
-#ifndef X265_INTRAPRED_H<br>
-#define X265_INTRAPRED_H<br>
+#ifndef X265_INTRAPRED_ARM_H<br>
+#define X265_INTRAPRED_ARM_H<br>
<br>
-#endif // ifndef X265_INTRAPRED_H<br>
+#endif // ifndef X265_INTRAPRED_ARM_H<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/ipfilter8.h<br>
--- a/source/common/arm/ipfilter8.h Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/arm/ipfilter8.h Mon Feb 01 17:21:10 2016 +0530<br>
@@ -22,7 +22,7 @@<br>
* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
*****************************************************************************/<br>
<br>
-#ifndef X265_IPFILTER8_H<br>
-#define X265_IPFILTER8_H<br>
+#ifndef X265_IPFILTER8_ARM_H<br>
+#define X265_IPFILTER8_ARM_H<br>
<br>
-#endif // ifndef X265_IPFILTER8_H<br>
+#endif // ifndef X265_IPFILTER8_ARM_H<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/loopfilter.h<br>
--- a/source/common/arm/loopfilter.h Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/arm/loopfilter.h Mon Feb 01 17:21:10 2016 +0530<br>
@@ -23,7 +23,7 @@<br>
* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
*****************************************************************************/<br>
<br>
-#ifndef X265_LOOPFILTER_H<br>
-#define X265_LOOPFILTER_H<br>
+#ifndef X265_LOOPFILTER_ARM_H<br>
+#define X265_LOOPFILTER_ARM_H<br>
<br>
-#endif // ifndef X265_LOOPFILTER_H<br>
+#endif // ifndef X265_LOOPFILTER_ARM_H<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/mc-a.S<br>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000<br>
+++ b/source/common/arm/mc-a.S Mon Feb 01 17:21:10 2016 +0530<br>
@@ -0,0 +1,102 @@<br>
+/*****************************************************************************<br>
+ * Copyright (C) 2016 x265 project<br>
+ *<br>
+ * Authors: Dnyaneshwar Gorade <<a href="mailto:dnyaneshwar@multicorewareinc.com" target="_blank">dnyaneshwar@multicorewareinc.com</a>><br>
+ *<br>
+ * This program is free software; you can redistribute it and/or modify<br>
+ * it under the terms of the GNU General Public License as published by<br>
+ * the Free Software Foundation; either version 2 of the License, or<br>
+ * (at your option) any later version.<br>
+ *<br>
+ * This program is distributed in the hope that it will be useful,<br>
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of<br>
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the<br>
+ * GNU General Public License for more details.<br>
+ *<br>
+ * You should have received a copy of the GNU General Public License<br>
+ * along with this program; if not, write to the Free Software<br>
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.<br>
+ *<br>
+ * This program is also available under a commercial proprietary license.<br>
+ * For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
+ *****************************************************************************/<br>
+<br>
+#include "asm.S"<br>
+<br>
+.section .rodata<br>
+<br>
+.align 4<br>
+<br>
+.text<br>
+<br>
+/* blockcopy_pp_16x16(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)<br>
+ *<br>
+ * r0 - dst<br>
+ * r1 - dstStride<br>
+ * r2 - src<br>
+ * d3 - srcStride */<br>
+function x265_blockcopy_pp_16x16_neon<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ add r2, r2, r3<br>
+ add r0, r0, r1<br>
+ vld1.8 {q0}, [r2]<br>
+ vst1.8 {q0}, [r0]<br>
+ bx lr<br>
+endfunc<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/mc.h<br>
--- a/source/common/arm/mc.h Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/arm/mc.h Mon Feb 01 17:21:10 2016 +0530<br>
@@ -21,7 +21,7 @@<br>
* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
*****************************************************************************/<br>
<br>
-#ifndef X265_MC_H<br>
-#define X265_MC_H<br>
+#ifndef X265_MC_ARM_H<br>
+#define X265_MC_ARM_H<br>
<br>
-#endif // ifndef X265_MC_H<br>
+#endif // ifndef X265_MC_ARM_H<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/pixel-util.h<br>
--- a/source/common/arm/pixel-util.h Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/arm/pixel-util.h Mon Feb 01 17:21:10 2016 +0530<br>
@@ -22,7 +22,7 @@<br>
* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
*****************************************************************************/<br>
<br>
-#ifndef X265_PIXEL_UTIL_H<br>
-#define X265_PIXEL_UTIL_H<br>
+#ifndef X265_PIXEL_UTIL_ARM_H<br>
+#define X265_PIXEL_UTIL_ARM_H<br>
<br>
-#endif // ifndef X265_PIXEL_UTIL_H<br>
+#endif // ifndef X265_PIXEL_UTIL_ARM_H<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/pixel.h<br>
--- a/source/common/arm/pixel.h Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/arm/pixel.h Mon Feb 01 17:21:10 2016 +0530<br>
@@ -27,7 +27,7 @@<br>
* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
*****************************************************************************/<br>
<br>
-#ifndef X265_I386_PIXEL_H<br>
-#define X265_I386_PIXEL_H<br>
+#ifndef X265_I386_PIXEL_ARM_H<br>
+#define X265_I386_PIXEL_ARM_H<br>
<br>
-#endif // ifndef X265_I386_PIXEL_H<br>
+#endif // ifndef X265_I386_PIXEL_ARM_H<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/primitives.cpp<br>
--- a/source/common/primitives.cpp Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/common/primitives.cpp Mon Feb 01 17:21:10 2016 +0530<br>
@@ -260,7 +260,10 @@<br>
void PFX(cpu_emms)(void) {}<br>
void PFX(cpu_cpuid)(uint32_t, uint32_t *eax, uint32_t *, uint32_t *, uint32_t *) { *eax = 0; }<br>
void PFX(cpu_xgetbv)(uint32_t, uint32_t *, uint32_t *) {}<br>
+<br>
+#if ENABLE_ASSEMBLY && X265_ARCH_ARM == 0<br>
void PFX(cpu_neon_test)(void) {}<br>
int PFX(cpu_fast_neon_mrc_test)(void) { return 0; }<br>
+#endif<br>
}<br>
#endif<br>
diff -r dc62b47dd0d9 -r 894e0fce5d14 source/test/testharness.h<br>
--- a/source/test/testharness.h Mon Jan 25 14:59:50 2016 +0530<br>
+++ b/source/test/testharness.h Mon Feb 01 17:21:10 2016 +0530<br>
@@ -80,6 +80,9 @@<br>
#elif X265_ARCH_ARM<br>
// TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch<br>
// asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));<br>
+<br>
+ // TO-DO: replace clock() function with appropriate ARM cpu instructions<br>
+ a = clock();<br>
#endif<br>
return a;<br>
}<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div></div>