[x265] [PATCH] arm: Implement blockcopy_pp_16x16_neon. Modified include guards with ARM suffix

Pradeep Ramachandran pradeep at multicorewareinc.com
Tue Feb 2 10:49:24 CET 2016


On Tue, Feb 2, 2016 at 11:30 AM, <dnyaneshwar at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
> # Date 1454327470 -19800
> #      Mon Feb 01 17:21:10 2016 +0530
> # Node ID 894e0fce5d14844d3c85cdb2a287f302fc8cffca
> # Parent  dc62b47dd0d98f732165345883edac55320baec1
> arm: Implement blockcopy_pp_16x16_neon. Modified include guards with ARM
> suffix.
>
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/CMakeLists.txt
> --- a/source/CMakeLists.txt     Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/CMakeLists.txt     Mon Feb 01 17:21:10 2016 +0530
> @@ -182,9 +182,11 @@
>          add_definitions(-march=i686)
>      endif()
>      if(ARM AND CROSS_COMPILE_ARM)
> -        add_definitions(-march=armv6 -mfloat-abi=soft -mfpu=vfp)
> +        set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp)
> +        add_definitions(${ARM_ARGS})
>      elseif(ARM)
> -        add_definitions(-march=armv6 -mfloat-abi=hard -mfpu=vfp)
> +        set(ARM_ARGS -march=armv6 -mfloat-abi=hard -mfpu=vfp)
> +        add_definitions(${ARM_ARGS})
>      endif()
>      if(FPROFILE_GENERATE)
>          if(INTEL_CXX)
> @@ -418,7 +420,7 @@
>  add_subdirectory(encoder)
>  add_subdirectory(common)
>
> -if((MSVC_IDE OR XCODE) AND ENABLE_ASSEMBLY)
> +if((MSVC_IDE OR XCODE OR GCC) AND ENABLE_ASSEMBLY)
>      # this is required because of this cmake bug
>      # http://www.cmake.org/Bug/print_bug_page.php?bug_id=8170
>      if(WIN32)
> @@ -429,7 +431,17 @@
>
>      if(ARM OR CROSS_COMPILE_ARM)
>      # compile ARM arch asm files here
> -
> +        enable_language(ASM)
> +        foreach(ASM ${ARM_ASMS})
> +            set(YASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM})
> +            list(APPEND YASM_SRCS ${YASM_SRC})
> +            list(APPEND YASM_OBJS ${ASM}.${SUFFIX})
> +            add_custom_command(
> +                OUTPUT ${ASM}.${SUFFIX}
> +                COMMAND ${CMAKE_CXX_COMPILER}
> +                ARGS ${ARM_ARGS} -c ${YASM_SRC} -o ${ASM}.${SUFFIX}
> +                DEPENDS ${YASM_SRC})
> +        endforeach()
>

Can you please rename all YASM_SRCS/YASM_OBJS as just ASM_SRCS/ASM_OBJS?
YASM is the assembler for just Intel architectures and since we're also
supporting ARM now, it is prudent to rename.

     elseif(X86)
>      # compile X86 arch asm files here
>          foreach(ASM ${MSVC_ASMS})
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/CMakeLists.txt
> --- a/source/common/CMakeLists.txt      Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/CMakeLists.txt      Mon Feb 01 17:21:10 2016 +0530
> @@ -89,9 +89,10 @@
>      set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h
> dct8.h loopfilter.h)
>
>      # add ARM assembly/intrinsic files here
> -    set(A_SRCS)
> +    set(ARM_SRCS asm.S cpu-a.S mc-a.S)
>

Continue to call this A_SRCS to denote "assembly sources", so that we are
consistent in the naming convention with x86 assembly.


>      set(VEC_PRIMITIVES)
>
> +    set(ARM_ASMS "${ARM_SRCS}" CACHE INTERNAL "ARM Assembly Sources")
>      foreach(SRC ${C_SRCS})
>          set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC})
>      endforeach()
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/asm-primitives.cpp
> --- a/source/common/arm/asm-primitives.cpp      Mon Jan 25 14:59:50 2016
> +0530
> +++ b/source/common/arm/asm-primitives.cpp      Mon Feb 01 17:21:10 2016
> +0530
> @@ -29,12 +29,18 @@
>  #include "x265.h"
>  #include "cpu.h"
>
> +extern "C" {
> +#include "blockcopy8.h"
> +}
>
>  namespace X265_NS {
>  // private x265 namespace
>
>  void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask)
>  {
> -
> +    if (cpuMask & X265_CPU_NEON)
> +    {
> +        p.pu[LUMA_16x16].copy_pp = PFX(blockcopy_pp_16x16_neon);
> +    }
>  }
>  } // namespace X265_NS
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/asm.S
> --- a/source/common/arm/asm.S   Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/arm/asm.S   Mon Feb 01 17:21:10 2016 +0530
> @@ -25,8 +25,6 @@
>   * For more information, contact us at license @ x265.com.
>
> *****************************************************************************/
>
> -#include "x265_config.h"
> -
>  .syntax unified
>
>  #if   HAVE_NEON
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/blockcopy8.h
> --- a/source/common/arm/blockcopy8.h    Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/arm/blockcopy8.h    Mon Feb 01 17:21:10 2016 +0530
> @@ -23,7 +23,9 @@
>   * For more information, contact us at license @ x265.com.
>
> *****************************************************************************/
>
> -#ifndef X265_BLOCKCOPY8_H
> -#define X265_BLOCKCOPY8_H
> +#ifndef X265_BLOCKCOPY8_ARM_H
> +#define X265_BLOCKCOPY8_ARM_H
>
> -#endif // ifndef X265_I386_PIXEL_H
> +void x265_blockcopy_pp_16x16_neon(pixel* dst, intptr_t dstStride, const
> pixel* src, intptr_t srcStride);
> +
> +#endif // ifndef X265_I386_PIXEL_ARM_H
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/dct8.h
> --- a/source/common/arm/dct8.h  Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/arm/dct8.h  Mon Feb 01 17:21:10 2016 +0530
> @@ -22,7 +22,7 @@
>   * For more information, contact us at license @ x265.com.
>
> *****************************************************************************/
>
> -#ifndef X265_DCT8_H
> -#define X265_DCT8_H
> +#ifndef X265_DCT8_ARM_H
> +#define X265_DCT8_ARM_H
>
> -#endif // ifndef X265_DCT8_H
> +#endif // ifndef X265_DCT8_ARM_H
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/intrapred.h
> --- a/source/common/arm/intrapred.h     Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/arm/intrapred.h     Mon Feb 01 17:21:10 2016 +0530
> @@ -25,7 +25,7 @@
>   * For more information, contact us at license @ x265.com.
>
> *****************************************************************************/
>
> -#ifndef X265_INTRAPRED_H
> -#define X265_INTRAPRED_H
> +#ifndef X265_INTRAPRED_ARM_H
> +#define X265_INTRAPRED_ARM_H
>
> -#endif // ifndef X265_INTRAPRED_H
> +#endif // ifndef X265_INTRAPRED_ARM_H
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/ipfilter8.h
> --- a/source/common/arm/ipfilter8.h     Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/arm/ipfilter8.h     Mon Feb 01 17:21:10 2016 +0530
> @@ -22,7 +22,7 @@
>   * For more information, contact us at license @ x265.com.
>
> *****************************************************************************/
>
> -#ifndef X265_IPFILTER8_H
> -#define X265_IPFILTER8_H
> +#ifndef X265_IPFILTER8_ARM_H
> +#define X265_IPFILTER8_ARM_H
>
> -#endif // ifndef X265_IPFILTER8_H
> +#endif // ifndef X265_IPFILTER8_ARM_H
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/loopfilter.h
> --- a/source/common/arm/loopfilter.h    Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/arm/loopfilter.h    Mon Feb 01 17:21:10 2016 +0530
> @@ -23,7 +23,7 @@
>   * For more information, contact us at license @ x265.com.
>
> *****************************************************************************/
>
> -#ifndef X265_LOOPFILTER_H
> -#define X265_LOOPFILTER_H
> +#ifndef X265_LOOPFILTER_ARM_H
> +#define X265_LOOPFILTER_ARM_H
>
> -#endif // ifndef X265_LOOPFILTER_H
> +#endif // ifndef X265_LOOPFILTER_ARM_H
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/mc-a.S
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/common/arm/mc-a.S  Mon Feb 01 17:21:10 2016 +0530
> @@ -0,0 +1,102 @@
>
> +/*****************************************************************************
> + * Copyright (C) 2016 x265 project
> + *
> + * Authors: Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at license @ x265.com.
> +
> *****************************************************************************/
> +
> +#include "asm.S"
> +
> +.section .rodata
> +
> +.align 4
> +
> +.text
> +
> +/* blockcopy_pp_16x16(pixel* dst, intptr_t dstStride, const pixel* src,
> intptr_t srcStride)
> + *
> + * r0   - dst
> + * r1   - dstStride
> + * r2   - src
> + * d3   - srcStride */
> +function x265_blockcopy_pp_16x16_neon
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    add             r2, r2, r3
> +    add             r0, r0, r1
> +    vld1.8          {q0}, [r2]
> +    vst1.8          {q0}, [r0]
> +    bx              lr
> +endfunc
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/mc.h
> --- a/source/common/arm/mc.h    Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/arm/mc.h    Mon Feb 01 17:21:10 2016 +0530
> @@ -21,7 +21,7 @@
>   * For more information, contact us at license @ x265.com.
>
> *****************************************************************************/
>
> -#ifndef X265_MC_H
> -#define X265_MC_H
> +#ifndef X265_MC_ARM_H
> +#define X265_MC_ARM_H
>
> -#endif // ifndef X265_MC_H
> +#endif // ifndef X265_MC_ARM_H
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/pixel-util.h
> --- a/source/common/arm/pixel-util.h    Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/arm/pixel-util.h    Mon Feb 01 17:21:10 2016 +0530
> @@ -22,7 +22,7 @@
>   * For more information, contact us at license @ x265.com.
>
> *****************************************************************************/
>
> -#ifndef X265_PIXEL_UTIL_H
> -#define X265_PIXEL_UTIL_H
> +#ifndef X265_PIXEL_UTIL_ARM_H
> +#define X265_PIXEL_UTIL_ARM_H
>
> -#endif // ifndef X265_PIXEL_UTIL_H
> +#endif // ifndef X265_PIXEL_UTIL_ARM_H
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/arm/pixel.h
> --- a/source/common/arm/pixel.h Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/arm/pixel.h Mon Feb 01 17:21:10 2016 +0530
> @@ -27,7 +27,7 @@
>   * For more information, contact us at license @ x265.com.
>
> *****************************************************************************/
>
> -#ifndef X265_I386_PIXEL_H
> -#define X265_I386_PIXEL_H
> +#ifndef X265_I386_PIXEL_ARM_H
> +#define X265_I386_PIXEL_ARM_H
>
> -#endif // ifndef X265_I386_PIXEL_H
> +#endif // ifndef X265_I386_PIXEL_ARM_H
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/common/primitives.cpp
> --- a/source/common/primitives.cpp      Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/common/primitives.cpp      Mon Feb 01 17:21:10 2016 +0530
> @@ -260,7 +260,10 @@
>  void PFX(cpu_emms)(void) {}
>  void PFX(cpu_cpuid)(uint32_t, uint32_t *eax, uint32_t *, uint32_t *,
> uint32_t *) { *eax = 0; }
>  void PFX(cpu_xgetbv)(uint32_t, uint32_t *, uint32_t *) {}
> +
> +#if ENABLE_ASSEMBLY && X265_ARCH_ARM == 0
>  void PFX(cpu_neon_test)(void) {}
>  int PFX(cpu_fast_neon_mrc_test)(void) { return 0; }
> +#endif
>  }
>  #endif
> diff -r dc62b47dd0d9 -r 894e0fce5d14 source/test/testharness.h
> --- a/source/test/testharness.h Mon Jan 25 14:59:50 2016 +0530
> +++ b/source/test/testharness.h Mon Feb 01 17:21:10 2016 +0530
> @@ -80,6 +80,9 @@
>  #elif X265_ARCH_ARM
>      // TOD-DO: verify following inline asm to get cpu Timestamp Counter
> for ARM arch
>      // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));
> +
> +    // TO-DO: replace clock() function with appropriate ARM cpu
> instructions
> +    a = clock();
>  #endif
>      return a;
>  }
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160202/dd5e0c33/attachment-0001.html>


More information about the x265-devel mailing list