[x265] [PATCH] arm: Implement blockcopy_pp_16x16_neon. Modified include guards with ARM suffix
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue Feb 2 12:00:05 CET 2016
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1454410744 -19800
# Tue Feb 02 16:29:04 2016 +0530
# Node ID 5463e2b9f37e4952bb16e94673c6fd2991243145
# Parent dc62b47dd0d98f732165345883edac55320baec1
arm: Implement blockcopy_pp_16x16_neon. Modified include guards with ARM suffix.
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/CMakeLists.txt
--- a/source/CMakeLists.txt Mon Jan 25 14:59:50 2016 +0530
+++ b/source/CMakeLists.txt Tue Feb 02 16:29:04 2016 +0530
@@ -182,9 +182,11 @@
add_definitions(-march=i686)
endif()
if(ARM AND CROSS_COMPILE_ARM)
- add_definitions(-march=armv6 -mfloat-abi=soft -mfpu=vfp)
+ set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp)
+ add_definitions(${ARM_ARGS})
elseif(ARM)
- add_definitions(-march=armv6 -mfloat-abi=hard -mfpu=vfp)
+ set(ARM_ARGS -march=armv6 -mfloat-abi=hard -mfpu=vfp)
+ add_definitions(${ARM_ARGS})
endif()
if(FPROFILE_GENERATE)
if(INTEL_CXX)
@@ -418,7 +420,7 @@
add_subdirectory(encoder)
add_subdirectory(common)
-if((MSVC_IDE OR XCODE) AND ENABLE_ASSEMBLY)
+if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
# this is required because of this cmake bug
# http://www.cmake.org/Bug/print_bug_page.php?bug_id=8170
if(WIN32)
@@ -429,23 +431,33 @@
if(ARM OR CROSS_COMPILE_ARM)
# compile ARM arch asm files here
-
+ enable_language(ASM)
+ foreach(ASM ${ARM_ASMS})
+ set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM})
+ list(APPEND ASM_SRCS ${ASM_SRC})
+ list(APPEND ASM_OBJS ${ASM}.${SUFFIX})
+ add_custom_command(
+ OUTPUT ${ASM}.${SUFFIX}
+ COMMAND ${CMAKE_CXX_COMPILER}
+ ARGS ${ARM_ARGS} -c ${ASM_SRC} -o ${ASM}.${SUFFIX}
+ DEPENDS ${ASM_SRC})
+ endforeach()
elseif(X86)
# compile X86 arch asm files here
foreach(ASM ${MSVC_ASMS})
- set(YASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/x86/${ASM})
- list(APPEND YASM_SRCS ${YASM_SRC})
- list(APPEND YASM_OBJS ${ASM}.${SUFFIX})
+ set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/x86/${ASM})
+ list(APPEND ASM_SRCS ${ASM_SRC})
+ list(APPEND ASM_OBJS ${ASM}.${SUFFIX})
add_custom_command(
OUTPUT ${ASM}.${SUFFIX}
- COMMAND ${YASM_EXECUTABLE} ARGS ${YASM_FLAGS} ${YASM_SRC} -o ${ASM}.${SUFFIX}
- DEPENDS ${YASM_SRC})
+ COMMAND ${YASM_EXECUTABLE} ARGS ${YASM_FLAGS} ${ASM_SRC} -o ${ASM}.${SUFFIX}
+ DEPENDS ${ASM_SRC})
endforeach()
endif()
endif()
-source_group(ASM FILES ${YASM_SRCS})
-add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${YASM_OBJS} ${YASM_SRCS})
+source_group(ASM FILES ${ASM_SRCS})
+add_library(x265-static STATIC $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS} ${ASM_SRCS})
if(NOT MSVC)
set_target_properties(x265-static PROPERTIES OUTPUT_NAME x265)
endif()
@@ -479,7 +491,7 @@
option(ENABLE_SHARED "Build shared library" ON)
if(ENABLE_SHARED)
- add_library(x265-shared SHARED "${PROJECT_BINARY_DIR}/x265.def" ${YASM_OBJS}
+ add_library(x265-shared SHARED "${PROJECT_BINARY_DIR}/x265.def" ${ASM_OBJS}
${X265_RC_FILE} $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common>)
if(EXTRA_LIB)
target_link_libraries(x265-shared ${EXTRA_LIB})
@@ -575,7 +587,7 @@
# Xcode seems unable to link the CLI with libs, so link as one targget
add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
x265.cpp x265.h x265cli.h x265-extras.h x265-extras.cpp
- $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${YASM_OBJS} ${YASM_SRCS})
+ $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS} ${ASM_SRCS})
else()
add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE}
${ExportDefs} x265.cpp x265.h x265cli.h x265-extras.h x265-extras.cpp)
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/CMakeLists.txt Tue Feb 02 16:29:04 2016 +0530
@@ -89,9 +89,10 @@
set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)
# add ARM assembly/intrinsic files here
- set(A_SRCS)
+ set(A_SRCS asm.S cpu-a.S mc-a.S)
set(VEC_PRIMITIVES)
+ set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources")
foreach(SRC ${C_SRCS})
set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC})
endforeach()
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/asm-primitives.cpp
--- a/source/common/arm/asm-primitives.cpp Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/arm/asm-primitives.cpp Tue Feb 02 16:29:04 2016 +0530
@@ -29,12 +29,18 @@
#include "x265.h"
#include "cpu.h"
+extern "C" {
+#include "blockcopy8.h"
+}
namespace X265_NS {
// private x265 namespace
void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask)
{
-
+ if (cpuMask & X265_CPU_NEON)
+ {
+ p.pu[LUMA_16x16].copy_pp = PFX(blockcopy_pp_16x16_neon);
+ }
}
} // namespace X265_NS
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/asm.S
--- a/source/common/arm/asm.S Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/arm/asm.S Tue Feb 02 16:29:04 2016 +0530
@@ -25,8 +25,6 @@
* For more information, contact us at license @ x265.com.
*****************************************************************************/
-#include "x265_config.h"
-
.syntax unified
#if HAVE_NEON
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/blockcopy8.h
--- a/source/common/arm/blockcopy8.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/arm/blockcopy8.h Tue Feb 02 16:29:04 2016 +0530
@@ -23,7 +23,9 @@
* For more information, contact us at license @ x265.com.
*****************************************************************************/
-#ifndef X265_BLOCKCOPY8_H
-#define X265_BLOCKCOPY8_H
+#ifndef X265_BLOCKCOPY8_ARM_H
+#define X265_BLOCKCOPY8_ARM_H
-#endif // ifndef X265_I386_PIXEL_H
+void x265_blockcopy_pp_16x16_neon(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride);
+
+#endif // ifndef X265_I386_PIXEL_ARM_H
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/dct8.h
--- a/source/common/arm/dct8.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/arm/dct8.h Tue Feb 02 16:29:04 2016 +0530
@@ -22,7 +22,7 @@
* For more information, contact us at license @ x265.com.
*****************************************************************************/
-#ifndef X265_DCT8_H
-#define X265_DCT8_H
+#ifndef X265_DCT8_ARM_H
+#define X265_DCT8_ARM_H
-#endif // ifndef X265_DCT8_H
+#endif // ifndef X265_DCT8_ARM_H
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/intrapred.h
--- a/source/common/arm/intrapred.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/arm/intrapred.h Tue Feb 02 16:29:04 2016 +0530
@@ -25,7 +25,7 @@
* For more information, contact us at license @ x265.com.
*****************************************************************************/
-#ifndef X265_INTRAPRED_H
-#define X265_INTRAPRED_H
+#ifndef X265_INTRAPRED_ARM_H
+#define X265_INTRAPRED_ARM_H
-#endif // ifndef X265_INTRAPRED_H
+#endif // ifndef X265_INTRAPRED_ARM_H
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/ipfilter8.h
--- a/source/common/arm/ipfilter8.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/arm/ipfilter8.h Tue Feb 02 16:29:04 2016 +0530
@@ -22,7 +22,7 @@
* For more information, contact us at license @ x265.com.
*****************************************************************************/
-#ifndef X265_IPFILTER8_H
-#define X265_IPFILTER8_H
+#ifndef X265_IPFILTER8_ARM_H
+#define X265_IPFILTER8_ARM_H
-#endif // ifndef X265_IPFILTER8_H
+#endif // ifndef X265_IPFILTER8_ARM_H
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/loopfilter.h
--- a/source/common/arm/loopfilter.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/arm/loopfilter.h Tue Feb 02 16:29:04 2016 +0530
@@ -23,7 +23,7 @@
* For more information, contact us at license @ x265.com.
*****************************************************************************/
-#ifndef X265_LOOPFILTER_H
-#define X265_LOOPFILTER_H
+#ifndef X265_LOOPFILTER_ARM_H
+#define X265_LOOPFILTER_ARM_H
-#endif // ifndef X265_LOOPFILTER_H
+#endif // ifndef X265_LOOPFILTER_ARM_H
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/mc-a.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/arm/mc-a.S Tue Feb 02 16:29:04 2016 +0530
@@ -0,0 +1,102 @@
+/*****************************************************************************
+ * Copyright (C) 2016 x265 project
+ *
+ * Authors: Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com.
+ *****************************************************************************/
+
+#include "asm.S"
+
+.section .rodata
+
+.align 4
+
+.text
+
+/* blockcopy_pp_16x16(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
+ *
+ * r0 - dst
+ * r1 - dstStride
+ * r2 - src
+ * d3 - srcStride */
+function x265_blockcopy_pp_16x16_neon
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ add r2, r2, r3
+ add r0, r0, r1
+ vld1.8 {q0}, [r2]
+ vst1.8 {q0}, [r0]
+ bx lr
+endfunc
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/mc.h
--- a/source/common/arm/mc.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/arm/mc.h Tue Feb 02 16:29:04 2016 +0530
@@ -21,7 +21,7 @@
* For more information, contact us at license @ x265.com.
*****************************************************************************/
-#ifndef X265_MC_H
-#define X265_MC_H
+#ifndef X265_MC_ARM_H
+#define X265_MC_ARM_H
-#endif // ifndef X265_MC_H
+#endif // ifndef X265_MC_ARM_H
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/pixel-util.h
--- a/source/common/arm/pixel-util.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/arm/pixel-util.h Tue Feb 02 16:29:04 2016 +0530
@@ -22,7 +22,7 @@
* For more information, contact us at license @ x265.com.
*****************************************************************************/
-#ifndef X265_PIXEL_UTIL_H
-#define X265_PIXEL_UTIL_H
+#ifndef X265_PIXEL_UTIL_ARM_H
+#define X265_PIXEL_UTIL_ARM_H
-#endif // ifndef X265_PIXEL_UTIL_H
+#endif // ifndef X265_PIXEL_UTIL_ARM_H
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/arm/pixel.h
--- a/source/common/arm/pixel.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/arm/pixel.h Tue Feb 02 16:29:04 2016 +0530
@@ -27,7 +27,7 @@
* For more information, contact us at license @ x265.com.
*****************************************************************************/
-#ifndef X265_I386_PIXEL_H
-#define X265_I386_PIXEL_H
+#ifndef X265_I386_PIXEL_ARM_H
+#define X265_I386_PIXEL_ARM_H
-#endif // ifndef X265_I386_PIXEL_H
+#endif // ifndef X265_I386_PIXEL_ARM_H
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/common/primitives.cpp
--- a/source/common/primitives.cpp Mon Jan 25 14:59:50 2016 +0530
+++ b/source/common/primitives.cpp Tue Feb 02 16:29:04 2016 +0530
@@ -260,7 +260,10 @@
void PFX(cpu_emms)(void) {}
void PFX(cpu_cpuid)(uint32_t, uint32_t *eax, uint32_t *, uint32_t *, uint32_t *) { *eax = 0; }
void PFX(cpu_xgetbv)(uint32_t, uint32_t *, uint32_t *) {}
+
+#if ENABLE_ASSEMBLY && X265_ARCH_ARM == 0
void PFX(cpu_neon_test)(void) {}
int PFX(cpu_fast_neon_mrc_test)(void) { return 0; }
+#endif
}
#endif
diff -r dc62b47dd0d9 -r 5463e2b9f37e source/test/testharness.h
--- a/source/test/testharness.h Mon Jan 25 14:59:50 2016 +0530
+++ b/source/test/testharness.h Tue Feb 02 16:29:04 2016 +0530
@@ -80,6 +80,9 @@
#elif X265_ARCH_ARM
// TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch
// asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));
+
+ // TO-DO: replace clock() function with appropriate ARM cpu instructions
+ a = clock();
#endif
return a;
}
More information about the x265-devel
mailing list