[x265] [PATCH] add OpenCL environment codes
Steve Borho
steve at borho.org
Wed Sep 18 04:31:10 CEST 2013
On Tue, Sep 17, 2013 at 1:42 PM, Wenju He <wenju at multicorewareinc.com>wrote:
> # HG changeset patch
> # User Wenju He <wenju at multicorewareinc.com>
> # Date 1379443287 18000
> # Tue Sep 17 13:41:27 2013 -0500
> # Node ID 97e1bed019a62849c5cc514ee74efbf64647986c
> # Parent 0d33ff236f68bc2238138a7213301b2efc0e6426
> add OpenCL environment codes
>
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/CMakeLists.txt
> --- a/source/CMakeLists.txt Mon Sep 16 21:06:49 2013 +0800
> +++ b/source/CMakeLists.txt Tue Sep 17 13:41:27 2013 -0500
> @@ -1,190 +1,198 @@
> -if(NOT CMAKE_BUILD_TYPE)
>
you've changed the file EOLN inadvertently, causing every line to be
changed.
> - # default to Release build for GCC builds
> - set(CMAKE_BUILD_TYPE Release CACHE STRING
> - "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or
> CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
> - FORCE)
> -endif()
> -
> -project (x265)
> -cmake_minimum_required (VERSION 2.6)
> -
> -SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake"
> "${CMAKE_MODULE_PATH}")
> -
> -if ("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
> - set(X64 1)
> - add_definitions(-DX86_64=1)
> -endif()
> -
> -if (CMAKE_GENERATOR STREQUAL "Xcode")
> - set(XCODE 1)
> -endif()
> -if (APPLE)
> - add_definitions(-DMACOS)
> -endif()
> -
> -# Enforce coding standards. Full warnings and warnings as errors
> -if("$ENV{CXX}" STREQUAL "icl")
> - set(MSVC 1)
> -endif()
> -if(MSVC)
> - option(STATIC_LINK_CRT "Statically link C runtime for release builds"
> OFF)
> - if (STATIC_LINK_CRT)
> - set(CompilerFlags CMAKE_CXX_FLAGS_RELEASE CMAKE_C_FLAGS_RELEASE)
> - foreach(CompilerFlag ${CompilerFlags})
> - string(REPLACE "/MD" "/MT" ${CompilerFlag}
> "${${CompilerFlag}}")
> - endforeach()
> - endif (STATIC_LINK_CRT)
> - add_definitions(/W4 /WX /D_CRT_SECURE_NO_WARNINGS)
> - add_definitions(/Ob2) # always inline
> - add_definitions(/Oi) # enable intrinsics
> - add_definitions(/MP) # multithreaded build
> - include_directories(compat/msvc)
> -endif(MSVC)
> -
> -if("$ENV{CXX}" STREQUAL "icpc")
> - set(GCC 1)
> - add_definitions(-Wall -Wextra -Wshadow -no-vec)
> -elseif(CMAKE_COMPILER_IS_GNUCXX)
> - execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
> OUTPUT_VARIABLE GCC_VERSION)
> - set(GCC 1)
> - add_definitions(-Wall -Wextra -Wshadow -mstackrealign)
> - if(NOT GCC_VERSION VERSION_LESS 4.7)
> - # this is necessary to avoid name conflicts in vector class
> - # library. if vector classes are removed/replaced this can
> - # likely be removed as well.
> - add_definitions(-fabi-version=6)
> - endif()
> -elseif(APPLE)
> - exec_program(uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
> - string(REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
> - message(STATUS "DARWIN_VERSION=${DARWIN_VERSION}")
> - if (DARWIN_VERSION GREATER 12)
> - # Detect Mac OS X 10.8 llvm/gcc frankenstein's monster
> - if (${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/c++")
> - execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
> OUTPUT_VARIABLE GCC_VERSION)
> - set(GCC 1)
> - add_definitions(-Wall -Wextra -Wshadow)
> - endif()
> - endif()
> -endif()
> -if (GCC)
> - option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
> - if(WARNINGS_AS_ERRORS)
> - add_definitions(-Werror)
> - endif(WARNINGS_AS_ERRORS)
> - if(X64 AND NOT WIN32)
> - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
> - endif(X64 AND NOT WIN32)
> -endif(GCC)
> -
> -option(HIGH_BIT_DEPTH "Use 16bit pixels internally" OFF)
> -if(HIGH_BIT_DEPTH)
> - add_definitions(-DHIGH_BIT_DEPTH=1)
> -else(HIGH_BIT_DEPTH)
> - add_definitions(-DHIGH_BIT_DEPTH=0)
> -endif(HIGH_BIT_DEPTH)
> -
> -option(ENABLE_PRIMITIVES_VEC "Enable use of SIMD vector class primitives"
> ON)
> -find_package(Yasm)
> -if(YASM_FOUND)
> - if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
> - message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old.
> 1.2.0 or later required")
> - else()
> - message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build
> assembly primitives")
> - option(ENABLE_PRIMITIVES_ASM "Enable use of assembly coded
> primitives" ON)
> - endif()
> -endif(YASM_FOUND)
> -
> -if(UNIX)
> - SET(PLATFORM_LIBS pthread m)
> - if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
> - SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
> - endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
> -endif(UNIX)
> -
> -option(ENABLE_STLPORT "Enable use of STLPort STL replacement library" OFF)
> -if(ENABLE_STLPORT)
> - set(STLPORT_LOCATION "${STLPORT_INCLUDE_DIR}" CACHE PATH "Location of
> STLPort sources")
> - # STLPort headers must be included before system headers
> - include_directories(BEFORE ${STLPORT_LOCATION}/stlport)
> - link_directories(${STLPORT_LOCATION}/lib)
> - if("$ENV{CXX}" STREQUAL "icl")
> - add_definitions(/Qwd1879)
> - endif()
> -endif(ENABLE_STLPORT)
> -
> -option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
> -if(ENABLE_PPA)
> - add_definitions(-DENABLE_PPA)
> - add_subdirectory(PPA)
> - SET(PLATFORM_LIBS ${PLATFORM_LIBS} PPA)
> - if(UNIX)
> - SET(PLATFORM_LIBS ${PLATFORM_LIBS} dl)
> - endif(UNIX)
> -endif(ENABLE_PPA)
> -
> -if (WIN32)
> - # Visual leak detector
> - find_package(VLD)
> - if(VLD_FOUND)
> - add_definitions(-DHAVE_VLD)
> - include_directories(${VLD_INCLUDE_DIRS})
> - set(PLATFORM_LIBS ${PLATFORM_LIBS} ${VLD_LIBRARIES})
> - link_directories(${VLD_LIBRARY_DIRS})
> - endif()
> - option(WINXP_SUPPORT "Make binaries compatible with Windows XP" OFF)
> - if(WINXP_SUPPORT)
> - # force workarounds for atomic intrinsics introduced after XP
> - add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP)
> - endif(WINXP_SUPPORT)
> -endif()
> -
> -include_directories(. Lib common encoder)
> -add_subdirectory(common)
> -add_subdirectory(encoder)
> -
> -set(LIBS common encoder)
> -if(ENABLE_PRIMITIVES_VEC)
> - set(LIBS ${LIBS} PrimitivesVec)
> -endif(ENABLE_PRIMITIVES_VEC)
> -
> -if(ENABLE_PRIMITIVES_ASM)
> - set(LIBS ${LIBS} PrimitivesASM)
> -endif(ENABLE_PRIMITIVES_ASM)
> -
> -if(NOT XCODE)
> - include(mergestaticlibs)
> - merge_static_libs(x265 ${LIBS})
> -endif()
> -
> -# Test applications
> -option(ENABLE_TESTS "Enable Unit Tests" OFF)
> -if(ENABLE_TESTS)
> - add_subdirectory(test)
> -endif(ENABLE_TESTS)
> -
> -# Main CLI application
> -option(ENABLE_CLI "Build standalone CLI application" ON)
> -if(ENABLE_CLI)
> - file(GLOB InputFiles input/*.cpp input/*.h)
> - file(GLOB OutputFiles output/*.cpp output/*.h)
> - source_group(input FILES ${InputFiles})
> - source_group(output FILES ${OutputFiles})
> - if (MSVC)
> - set_source_files_properties(compat/msvc/getopt.c PROPERTIES
> COMPILE_FLAGS "/wd4100")
> - elseif(GCC)
> - set_source_files_properties(compat/msvc/getopt.c PROPERTIES
> COMPILE_FLAGS "-Wno-unused-parameter")
> - endif()
> -
> - include(version) # determine X265_VERSION
> - set_source_files_properties(x265.cpp PROPERTIES COMPILE_FLAGS
> -DX265_VERSION=${X265_VERSION})
> - add_executable(cli ${EXTRAS} ../COPYING ${InputFiles} ${OutputFiles}
> - x265.cpp x265opts.h x265.h
> - compat/msvc/getopt.c compat/msvc/getopt.h)
> -if(XCODE)
> - target_link_libraries(cli ${LIBS} ${PLATFORM_LIBS})
> -else()
> - target_link_libraries(cli x265 ${PLATFORM_LIBS})
> -endif()
> - SET_TARGET_PROPERTIES(cli PROPERTIES OUTPUT_NAME x265)
> -endif(ENABLE_CLI)
> +if(NOT CMAKE_BUILD_TYPE)
> + # default to Release build for GCC builds
> + set(CMAKE_BUILD_TYPE Release CACHE STRING
> + "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or
> CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
> + FORCE)
> +endif()
> +
> +project (x265)
> +cmake_minimum_required (VERSION 2.6)
> +
> +SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake"
> "${CMAKE_MODULE_PATH}")
> +
> +if ("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
> + set(X64 1)
> + add_definitions(-DX86_64=1)
> +endif()
> +
> +if (CMAKE_GENERATOR STREQUAL "Xcode")
> + set(XCODE 1)
> +endif()
> +if (APPLE)
> + add_definitions(-DMACOS)
> +endif()
> +
> +# Enforce coding standards. Full warnings and warnings as errors
> +if("$ENV{CXX}" STREQUAL "icl")
> + set(MSVC 1)
> +endif()
> +if(MSVC)
> + option(STATIC_LINK_CRT "Statically link C runtime for release builds"
> OFF)
> + if (STATIC_LINK_CRT)
> + set(CompilerFlags CMAKE_CXX_FLAGS_RELEASE CMAKE_C_FLAGS_RELEASE)
> + foreach(CompilerFlag ${CompilerFlags})
> + string(REPLACE "/MD" "/MT" ${CompilerFlag}
> "${${CompilerFlag}}")
> + endforeach()
> + endif (STATIC_LINK_CRT)
> + add_definitions(/W4 /WX /D_CRT_SECURE_NO_WARNINGS)
> + add_definitions(/Ob2) # always inline
> + add_definitions(/Oi) # enable intrinsics
> + add_definitions(/MP) # multithreaded build
> + include_directories(compat/msvc)
> +endif(MSVC)
> +
> +if("$ENV{CXX}" STREQUAL "icpc")
> + set(GCC 1)
> + add_definitions(-Wall -Wextra -Wshadow -no-vec)
> +elseif(CMAKE_COMPILER_IS_GNUCXX)
> + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
> OUTPUT_VARIABLE GCC_VERSION)
> + set(GCC 1)
> + add_definitions(-Wall -Wextra -Wshadow -mstackrealign)
> + if(NOT GCC_VERSION VERSION_LESS 4.7)
> + # this is necessary to avoid name conflicts in vector class
> + # library. if vector classes are removed/replaced this can
> + # likely be removed as well.
> + add_definitions(-fabi-version=6)
> + endif()
> +elseif(APPLE)
> + exec_program(uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
> + string(REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
> + message(STATUS "DARWIN_VERSION=${DARWIN_VERSION}")
> + if (DARWIN_VERSION GREATER 12)
> + # Detect Mac OS X 10.8 llvm/gcc frankenstein's monster
> + if (${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/c++")
> + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
> OUTPUT_VARIABLE GCC_VERSION)
> + set(GCC 1)
> + add_definitions(-Wall -Wextra -Wshadow)
> + endif()
> + endif()
> +endif()
> +if (GCC)
> + option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
> + if(WARNINGS_AS_ERRORS)
> + add_definitions(-Werror)
> + endif(WARNINGS_AS_ERRORS)
> + if(X64 AND NOT WIN32)
> + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
> + endif(X64 AND NOT WIN32)
> +endif(GCC)
> +
> +option(HIGH_BIT_DEPTH "Use 16bit pixels internally" OFF)
> +if(HIGH_BIT_DEPTH)
> + add_definitions(-DHIGH_BIT_DEPTH=1)
> +else(HIGH_BIT_DEPTH)
> + add_definitions(-DHIGH_BIT_DEPTH=0)
> +endif(HIGH_BIT_DEPTH)
> +
> +option(ENABLE_PRIMITIVES_VEC "Enable use of SIMD vector class primitives"
> ON)
> +find_package(Yasm)
> +if(YASM_FOUND)
> + if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
> + message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old.
> 1.2.0 or later required")
> + else()
> + message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build
> assembly primitives")
> + option(ENABLE_PRIMITIVES_ASM "Enable use of assembly coded
> primitives" ON)
> + endif()
> +endif(YASM_FOUND)
> +
> +if(UNIX)
> + SET(PLATFORM_LIBS pthread m)
> + if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
> + SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
> + endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
> +endif(UNIX)
> +
> +option(ENABLE_STLPORT "Enable use of STLPort STL replacement library" OFF)
> +if(ENABLE_STLPORT)
> + set(STLPORT_LOCATION "${STLPORT_INCLUDE_DIR}" CACHE PATH "Location of
> STLPort sources")
> + # STLPort headers must be included before system headers
> + include_directories(BEFORE ${STLPORT_LOCATION}/stlport)
> + link_directories(${STLPORT_LOCATION}/lib)
> + if("$ENV{CXX}" STREQUAL "icl")
> + add_definitions(/Qwd1879)
> + endif()
> +endif(ENABLE_STLPORT)
> +
> +option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
> +if(ENABLE_PPA)
> + add_definitions(-DENABLE_PPA)
> + add_subdirectory(PPA)
> + SET(PLATFORM_LIBS ${PLATFORM_LIBS} PPA)
> + if(UNIX)
> + SET(PLATFORM_LIBS ${PLATFORM_LIBS} dl)
> + endif(UNIX)
> +endif(ENABLE_PPA)
> +
> +option(ENABLE_OPENCL "Use OpenCL for motion estimation" OFF)
> +if(ENABLE_OPENCL)
> + find_package(OpenCL)
> + if(OPENCL_FOUND)
> + add_definitions(-DHAVE_OPENCL)
> + endif()
> +endif(ENABLE_OPENCL)
> +
> +if (WIN32)
> + # Visual leak detector
> + find_package(VLD)
> + if(VLD_FOUND)
> + add_definitions(-DHAVE_VLD)
> + include_directories(${VLD_INCLUDE_DIRS})
> + set(PLATFORM_LIBS ${PLATFORM_LIBS} ${VLD_LIBRARIES})
> + link_directories(${VLD_LIBRARY_DIRS})
> + endif()
> + option(WINXP_SUPPORT "Make binaries compatible with Windows XP" OFF)
> + if(WINXP_SUPPORT)
> + # force workarounds for atomic intrinsics introduced after XP
> + add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP)
> + endif(WINXP_SUPPORT)
> +endif()
> +
> +include_directories(. Lib common encoder)
> +add_subdirectory(common)
> +add_subdirectory(encoder)
> +
> +set(LIBS common encoder)
> +if(ENABLE_PRIMITIVES_VEC)
> + set(LIBS ${LIBS} PrimitivesVec)
> +endif(ENABLE_PRIMITIVES_VEC)
> +
> +if(ENABLE_PRIMITIVES_ASM)
> + set(LIBS ${LIBS} PrimitivesASM)
> +endif(ENABLE_PRIMITIVES_ASM)
> +
> +if(NOT XCODE)
> + include(mergestaticlibs)
> + merge_static_libs(x265 ${LIBS})
> +endif()
> +
> +# Test applications
> +option(ENABLE_TESTS "Enable Unit Tests" OFF)
> +if(ENABLE_TESTS)
> + add_subdirectory(test)
> +endif(ENABLE_TESTS)
> +
> +# Main CLI application
> +option(ENABLE_CLI "Build standalone CLI application" ON)
> +if(ENABLE_CLI)
> + file(GLOB InputFiles input/*.cpp input/*.h)
> + file(GLOB OutputFiles output/*.cpp output/*.h)
> + source_group(input FILES ${InputFiles})
> + source_group(output FILES ${OutputFiles})
> + if (MSVC)
> + set_source_files_properties(compat/msvc/getopt.c PROPERTIES
> COMPILE_FLAGS "/wd4100")
> + elseif(GCC)
> + set_source_files_properties(compat/msvc/getopt.c PROPERTIES
> COMPILE_FLAGS "-Wno-unused-parameter")
> + endif()
> +
> + include(version) # determine X265_VERSION
> + set_source_files_properties(x265.cpp PROPERTIES COMPILE_FLAGS
> -DX265_VERSION=${X265_VERSION})
> + add_executable(cli ${EXTRAS} ../COPYING ${InputFiles} ${OutputFiles}
> + x265.cpp x265opts.h x265.h
> + compat/msvc/getopt.c compat/msvc/getopt.h)
> +if(XCODE)
> + target_link_libraries(cli ${LIBS} ${PLATFORM_LIBS}
> ${OPENCL_LIBRARIES})
> +else()
> + target_link_libraries(cli x265 ${PLATFORM_LIBS} ${OPENCL_LIBRARIES})
> +endif()
> + SET_TARGET_PROPERTIES(cli PROPERTIES OUTPUT_NAME x265)
> +endif(ENABLE_CLI)
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/cmake/FindOpenCL.cmake
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/FindOpenCL.cmake Tue Sep 17 13:41:27 2013 -0500
> @@ -0,0 +1,58 @@
> +# modified from OpenCV
>
If this file was copied and adapted from OpenCV, we need to keep their
license header and attribution here, even if MCW contributed the patch.
+
> +if(APPLE)
> + set(OPENCL_FOUND YES)
> + set(OPENCL_LIBRARY "-framework OpenCL" CACHE STRING "OpenCL library")
> + set(OPENCL_INCLUDE_DIR "" CACHE STRING "OpenCL include directory")
> + mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
> +else(APPLE)
> + #find_package(OpenCL QUIET)
>
let's not add commented lines in new files
> +
> + if (NOT OPENCL_FOUND)
> + find_path(OPENCL_ROOT_DIR
> + NAMES OpenCL/cl.h CL/cl.h include/CL/cl.h
> include/nvidia-current/CL/cl.h
> + PATHS ENV OCLROOT ENV AMDAPPSDKROOT ENV CUDA_PATH ENV
> INTELOCLSDKROOT
> + DOC "OpenCL root directory"
> + NO_DEFAULT_PATH)
> +
> + find_path(OPENCL_INCLUDE_DIR
> + NAMES OpenCL/cl.h CL/cl.h
> + HINTS ${OPENCL_ROOT_DIR}
> + PATH_SUFFIXES include include/nvidia-current
> + DOC "OpenCL include directory"
> + NO_DEFAULT_PATH)
> +
> + if(WIN32)
>
our cmake script style uses 4-spaces for intendation
> + if(X64)
> + set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win64 lib/x86_64 lib/x64)
> + else()
> + set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win32 lib/x86)
> + endif()
> + elseif(UNIX)
> + if(X64)
> + set(OPENCL_POSSIBLE_LIB_SUFFIXES lib64 lib)
> + else()
> + set(OPENCL_POSSIBLE_LIB_SUFFIXES lib32 lib)
> + endif()
> + else()
> + set(OPENCL_POSSIBLE_LIB_SUFFIXES lib)
> + endif()
> +
> + find_library(OPENCL_LIBRARY
> + NAMES OpenCL
> + HINTS ${OPENCL_ROOT_DIR}
> + PATH_SUFFIXES ${OPENCL_POSSIBLE_LIB_SUFFIXES}
> + DOC "OpenCL library"
> + NO_DEFAULT_PATH)
> +
> + mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
> + include(FindPackageHandleStandardArgs)
> + FIND_PACKAGE_HANDLE_STANDARD_ARGS(OPENCL DEFAULT_MSG OPENCL_LIBRARY
> OPENCL_INCLUDE_DIR )
> + endif()
> +endif(APPLE)
> +
> +if(OPENCL_FOUND)
> + set(HAVE_OPENCL 1)
> + set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
> + set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
> +endif()
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/cmake/cl2cpp.cmake
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/cl2cpp.cmake Tue Sep 17 13:41:27 2013 -0500
> @@ -0,0 +1,35 @@
> +# modified from OpenCV
>
ditto attribution here
> +
> +file(GLOB cl_list "${CL_DIR}/*.h" "${CL_DIR}/*.cl" )
> +
> +file(WRITE ${OUTPUT} "// This file is auto-generated. Do not edit!
> +
> +namespace x265
> +{
> +
> +")
> +
> +file(APPEND ${OUTPUT} "const char* x265_opencl_source=\n")
> +
> +foreach(cl ${cl_list})
> + file(READ "${cl}" lines)
> +
> + string(REPLACE "\r" "" lines "${lines}\n")
> + string(REPLACE "\t" " " lines "${lines}")
> +
> + string(REGEX REPLACE "/\\*([^*]/|\\*[^/]|[^*/])*\\*/" "" lines
> "${lines}") # multiline comments
> + string(REGEX REPLACE "/\\*([^\n])*\\*/" "" lines
> "${lines}") # single-line comments
> + string(REGEX REPLACE "[ ]*//[^\n]*\n" "\n" lines
> "${lines}") # single-line comments
> + string(REGEX REPLACE "\n[ ]*(\n[ ]*)*" "\n" lines
> "${lines}") # empty lines & leading whitespace
> + string(REGEX REPLACE "^\n" "" lines
> "${lines}") # leading new line
> +
> + string(REPLACE "\\" "\\\\" lines "${lines}")
> + string(REPLACE "\"" "\\\"" lines "${lines}")
> + string(REPLACE "\n" "\\n\"\n\"" lines "${lines}")
> +
> + string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof
> +
> + file(APPEND ${OUTPUT} "\"${lines}")
> +endforeach()
> +
> +file(APPEND ${OUTPUT} ";\n\n}\n")
>
we should also cleanup end of file eoln
> \ No newline at end of file
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/CMakeLists.txt
> --- a/source/common/CMakeLists.txt Mon Sep 16 21:06:49 2013 +0800
> +++ b/source/common/CMakeLists.txt Tue Sep 17 13:41:27 2013 -0500
> @@ -1,59 +1,73 @@
> -if(GCC)
>
again, you've changed the EOLN of this file on accident
> - if (NOT X64)
> - # force gcc to generate code for sync primitives
> - set_source_files_properties(threadpool.cpp reference.cpp
> wavefront.cpp PROPERTIES COMPILE_FLAGS -march=i686)
> - endif()
> -endif(GCC)
> -
> -if(ENABLE_PRIMITIVES_VEC)
> - add_definitions(-DENABLE_VECTOR_PRIMITIVES=1)
> -endif(ENABLE_PRIMITIVES_VEC)
> -if(ENABLE_PRIMITIVES_ASM)
> - add_definitions(-DENABLE_ASM_PRIMITIVES=1)
> -endif(ENABLE_PRIMITIVES_ASM)
> -
> -include_directories(../VectorClass)
> -
> -file(GLOB LIBCOMMON_HDR ../Lib/TLibCommon/*.h)
> -file(GLOB LIBCOMMON_SRC ../Lib/TLibCommon/*.cpp)
> -source_group(TLibCommon FILES ${LIBCOMMON_SRC})
> -source_group(TLibCommonH FILES ${LIBCOMMON_HDR})
> -if(GCC)
> - set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
> - "-Wno-sign-compare")
> -endif(GCC)
> -if(MSVC)
> - # ignore these warnings from HM source
> - # /wd4244 type conversion, possible loss of data
> - # /wd4512 assignment operator could not be generated
> - # /wd4127 conditional expression is constant
> - # /wd4389 signed/unsigned mismatch
> - # /wd4018 '<' signed/unsigned mismatch
> - # /wd4800 performance warning: bool coersion
> - set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
> - "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
> - if ("$ENV{CXX}" STREQUAL "icl")
> - add_definitions(/Qwd2557) # signed/unsigned mismatch
> - endif()
> -endif(MSVC)
> -
> -add_library(common STATIC ../../COPYING
> - ${LIBCOMMON_SRC} ${LIBCOMMON_HDR}
> - primitives.cpp primitives.h
> - pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp
> - ../VectorClass/instrset_detect.cpp
> - threading.cpp threading.h
> - threadpool.cpp threadpool.h
> - wavefront.h wavefront.cpp
> - md5.cpp md5.h
> - TShortYUV.cpp TShortYUV.h mv.h
> - reference.cpp reference.h
> - common.cpp common.h
> - lowres.cpp lowres.h)
> -
> -if(ENABLE_PRIMITIVES_VEC)
> - add_subdirectory(vec)
> -endif(ENABLE_PRIMITIVES_VEC)
> -if(ENABLE_PRIMITIVES_ASM)
> - add_subdirectory(x86)
> -endif(ENABLE_PRIMITIVES_ASM)
> +if(GCC)
> + if (NOT X64)
> + # force gcc to generate code for sync primitives
> + set_source_files_properties(threadpool.cpp reference.cpp
> wavefront.cpp PROPERTIES COMPILE_FLAGS -march=i686)
> + endif()
> +endif(GCC)
> +
> +if(ENABLE_PRIMITIVES_VEC)
> + add_definitions(-DENABLE_VECTOR_PRIMITIVES=1)
> +endif(ENABLE_PRIMITIVES_VEC)
> +if(ENABLE_PRIMITIVES_ASM)
> + add_definitions(-DENABLE_ASM_PRIMITIVES=1)
> +endif(ENABLE_PRIMITIVES_ASM)
> +
> +include_directories(../VectorClass)
> +
> +file(GLOB LIBCOMMON_HDR ../Lib/TLibCommon/*.h)
> +file(GLOB LIBCOMMON_SRC ../Lib/TLibCommon/*.cpp)
> +source_group(TLibCommon FILES ${LIBCOMMON_SRC})
> +source_group(TLibCommonH FILES ${LIBCOMMON_HDR})
> +if(GCC)
> + set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
> + "-Wno-sign-compare")
> +endif(GCC)
> +if(MSVC)
> + # ignore these warnings from HM source
> + # /wd4244 type conversion, possible loss of data
> + # /wd4512 assignment operator could not be generated
> + # /wd4127 conditional expression is constant
> + # /wd4389 signed/unsigned mismatch
> + # /wd4018 '<' signed/unsigned mismatch
> + # /wd4800 performance warning: bool coersion
> + set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
> + "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
> + if ("$ENV{CXX}" STREQUAL "icl")
> + add_definitions(/Qwd2557) # signed/unsigned mismatch
> + endif()
> +endif(MSVC)
> +
> +if(HAVE_OPENCL)
> + include_directories(${OPENCL_INCLUDE_DIRS})
> + set(OPENCL_KERNEL_HDR ${CMAKE_CURRENT_SOURCE_DIR}/oclkernels.h)
> + add_custom_command(
> + OUTPUT ${OPENCL_KERNEL_HDR}
> + COMMAND ${CMAKE_COMMAND}
> -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/opencl"
> + -DOUTPUT=${OPENCL_KERNEL_HDR}
> + -P "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/cl2cpp.cmake"
> + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/cl2cpp.cmake")
> + set(OPENCL_SRC opencl.cpp)
> + set(OPENCL_HDR opencl.h ${OPENCL_KERNEL_HDR})
> +endif(HAVE_OPENCL)
> +
> +add_library(common STATIC ../../COPYING
> + ${LIBCOMMON_SRC} ${LIBCOMMON_HDR}
> + primitives.cpp primitives.h
> + pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp
> + ../VectorClass/instrset_detect.cpp
> + threading.cpp threading.h
> + threadpool.cpp threadpool.h
> + wavefront.h wavefront.cpp
> + md5.cpp md5.h
> + TShortYUV.cpp TShortYUV.h mv.h
> + reference.cpp reference.h
> + common.cpp common.h
> + lowres.cpp lowres.h
> + ${OPENCL_SRC} ${OPENCL_HDR})
> +
> +if(ENABLE_PRIMITIVES_VEC)
> + add_subdirectory(vec)
> +endif(ENABLE_PRIMITIVES_VEC)
> +if(ENABLE_PRIMITIVES_ASM)
> + add_subdirectory(x86)
> +endif(ENABLE_PRIMITIVES_ASM)
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl.cpp
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/common/opencl.cpp Tue Sep 17 13:41:27 2013 -0500
> @@ -0,0 +1,631 @@
>
> +/*****************************************************************************
> + * opencl.c: OpenCL initialization and kernel compilation
> +
> *****************************************************************************
> + * Copyright (C) 2013 x265 project
> + *
> + * Authors: Steve Borho <steve at borho.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at licensing at multicorewareinc.com.
> +
> *****************************************************************************/
> +
> +#include "TLibCommon/CommonDef.h"
> +#include "common.h"
> +
> +#if defined WIN32 || defined _WIN32
> +#include <Windows.h>
>
windows.h needs to be lower case, this actually matters for mingw cross
compiles
> +#else
> +#include <dlfcn.h> //dlopen, dlsym, dlclose
> +#endif
> +#include "opencl.h"
> +#include "oclkernels.h"
> +#include "stdio.h"
> +
> +/* define from recent cl_ext.h, copied here in case headers are old */
> +#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
> +
> +using namespace x265;
> +
> +// global variable
> +static ocl g_ocl;
> +
> +static void CALLBACK oclErrorNotify( const char *errinfo, const void
> *private_info, size_t cb, void *user_data )
> +{
> + /* Any error notification can be assumed to be fatal to the OpenCL
> context.
> + * We need to stop using it immediately to prevent further damage. */
> + x265_param_t *param = (x265_param_t*)user_data;
> + param->useOpenCL = 0;
> + //h->opencl.b_fatal_error = 1;
> + x265_log(param, X265_LOG_ERROR, "OpenCL: %s\n", errinfo );
> + x265_log(param, X265_LOG_ERROR, "OpenCL: fatal error, aborting
> encode\n" );
> + (void)cb;
> + (void)private_info;
> +}
> +
> +ocl::ocl()
> + : m_context(NULL)
> + , m_device(NULL)
> + , m_queue(NULL)
> + , m_program(NULL)
> + , m_initialized(false)
> + , m_deviceAMDSI(false)
> + , m_fatalError(false)
> + , m_openclThreadPri(0)
> + , m_kernelMeStar(NULL)
> +{}
> +
> +ocl::~ocl()
> +{}
> +
> +
> +bool ocl::init(x265_param_t *param)
> +{
> + cl_int status;
> + cl_uint numPlatforms;
> + bool ret = false;
> +
> + if (m_initialized)
> + {
> + return true;
> + }
> +
> + status = clGetPlatformIDs(0, NULL, &numPlatforms);
> + if( status != CL_SUCCESS || numPlatforms == 0 )
> + {
> + x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to query
> installed platforms\n");
> + return false;
> + }
> +
> + cl_platform_id *platforms =
> (cl_platform_id*)X265_MALLOC(cl_platform_id, numPlatforms);
> + status = clGetPlatformIDs( numPlatforms, platforms, NULL );
> + if( status != CL_SUCCESS )
> + {
> + x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to query
> installed platforms\n");
> + X265_FREE( platforms );
> + return false;
> + }
> +
> + /* Select the first OpenCL platform with a GPU device that supports
> our
> + * required image (texture) formats */
> + for( cl_uint i = 0; i < numPlatforms; ++i )
> + {
> + cl_uint gpu_count = 0;
> + status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0,
> NULL, &gpu_count );
> + if( status != CL_SUCCESS || !gpu_count )
> + continue;
> +
> + cl_device_id *devices = (cl_device_id*) X265_MALLOC(
> cl_device_id, gpu_count );
> + if( !devices )
> + continue;
> +
> + status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU,
> gpu_count, devices, NULL );
> + if( status != CL_SUCCESS )
> + {
> + X265_FREE( devices );
> + continue;
> + }
> +
> + /* Find a GPU device that supports our image formats */
> + for( cl_uint gpu = 0; gpu < gpu_count; gpu++ )
> + {
> + cl_device_id device = devices[gpu];
> +
> + /* if the user has specified an exact device ID, skip all
> other
> + * GPUs. If this device matches, allow it to continue
> through the
> + * checks for supported images, etc. */
> + if( param->OpenCLDeviceId && devices[gpu] != (cl_device_id)
> param->OpenCLDeviceId )
> + continue;
> +
> + cl_bool image_support;
> + clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT,
> sizeof(cl_bool), &image_support, NULL );
> + if( !image_support )
> + continue;
> +
> + cl_context context = clCreateContext( NULL, 1, &device,
> oclErrorNotify, (void*)param, &status );
> + if( status != CL_SUCCESS )
> + continue;
> +
> + cl_uint imagecount = 0;
> + clGetSupportedImageFormats( context, CL_MEM_READ_WRITE,
> CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
> + if( !imagecount )
> + {
> + clReleaseContext( context );
> + continue;
> + }
> +
> + cl_image_format *imageType =
> (cl_image_format*)X265_MALLOC(cl_image_format, imagecount);
> + if( !imageType )
> + {
> + clReleaseContext( context );
> + continue;
> + }
> +
> + clGetSupportedImageFormats( context, CL_MEM_READ_WRITE,
> CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
> +
> + int b_has_r = 0;
> + int b_has_rgba = 0;
> + for( cl_uint j = 0; j < imagecount; j++ )
> + {
> + if( imageType[j].image_channel_order == CL_R &&
> + imageType[j].image_channel_data_type ==
> CL_UNSIGNED_INT32 )
> + b_has_r = 1;
> + else if( imageType[j].image_channel_order == CL_RGBA &&
> + imageType[j].image_channel_data_type ==
> CL_UNSIGNED_INT8 )
> + b_has_rgba = 1;
> + }
> + X265_FREE( imageType );
> + if( !b_has_r || !b_has_rgba )
> + {
> + char devname[64];
> + status = clGetDeviceInfo(device, CL_DEVICE_NAME,
> sizeof(devname), devname, NULL);
> + if( status == CL_SUCCESS )
> + {
> + /* emit warning if we are discarding the user's
> explicit choice */
> + int level = param->OpenCLDeviceId ? X265_LOG_WARNING
> : X265_LOG_DEBUG;
> + x265_log(param, level, "OpenCL: %s does not support
> required image formats\n", devname);
> + }
> + clReleaseContext( context );
> + continue;
> + }
> +
> + /* user selection of GPU device, skip N first matches */
> + if( param->iOpenCLDevice )
> + {
> + param->iOpenCLDevice--;
> + clReleaseContext( context );
> + continue;
> + }
> +
> + cl_command_queue queue = clCreateCommandQueue( context,
> device, 0, &status );
> + if( status != CL_SUCCESS )
>
watch the white-space coding style
> + {
> + clReleaseContext( context );
> + continue;
> + }
> +
> + m_context = context;
> + m_device = device;
> + m_queue = queue;
> +
> + ret = true;
> + break;
> + }
> +
> + X265_FREE( devices );
> +
> + if (ret)
> + {
> + break;
> + }
> + }
> +
> + X265_FREE( platforms );
> +
> + if (!param->pszCLbinFile)
> + {
> + param->pszCLbinFile = "x265_kernels.clbin";
> + }
> +
> + if (!ret)
> + {
> + x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to find a
> compatible device\n");
> + }
> + else
> + {
> + ret = createKernels(param);
> + }
> +
> + m_initialized = ret;
> +
> + return ret;
> +}
> +
> +bool ocl::createKernels(x265_param_t *param)
> +{
> + static const char *kernelnames[] = {
> + "motion_estimate"
> + };
> + cl_kernel *kernels[] = {
> + &m_kernelMeStar
> + };
> + cl_int status;
> +
> + compile(param);
> + if( !m_program )
> + {
> + return false;
> + }
> +
> + for( int i = 0; i < sizeof(kernelnames)/sizeof(kernelnames[0]); i++ )
> + {
> + *kernels[i] = clCreateKernel(m_program, kernelnames[i], &status);
> + if( status != CL_SUCCESS )
> + {
> + x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to compile
> kernel '%s' (%d)\n", kernelnames[i], status );
> + return false;
> + }
> + }
> +
> + return true;
> +}
> +
> +void ocl::destroy()
> +{
> + if (m_queue)
> + {
> + clFinish(m_queue);
> + }
> +
> + if (m_kernelMeStar)
> + {
> + clReleaseKernel(m_kernelMeStar);
> + m_kernelMeStar = NULL;
> + }
> +
> + if (m_program)
> + {
> + clReleaseProgram(m_program);
> + m_program = NULL;
> + }
> +
> + if (m_queue)
> + {
> + clReleaseCommandQueue( m_queue );
> + m_queue = NULL;
> + }
> +
> + if (m_context)
> + {
> + clReleaseContext( m_context );
> + m_context = NULL;
> + }
> +
> + m_initialized = false;
> +}
> +
> +/* Try to load the cached compiled program binary, verify the device
> context is
> + * still valid before reuse */
> +void ocl::cache_load(x265_param_t *param, char *devname, char *devvendor,
> char *driverversion)
> +{
> + cl_int status;
> +
> + /* try to load cached program binary */
> + FILE *fp = fopen( param->pszCLbinFile, "rb" );
> + if( !fp )
> + return;
> +
> + fseek( fp, 0L, SEEK_END );
> + size_t size = ftell( fp );
> + rewind( fp );
> + uint8_t *binary = (uint8_t*)X265_MALLOC( uint8_t, size );
> + if (!binary)
> + return;
> +
> + fread( binary, 1, size, fp );
> + const uint8_t *ptr = (const uint8_t*)binary;
> +
> +#define CHECK_STRING( STR )\
> + {\
> + size_t len = strlen( STR );\
> + if( size <= len || strncmp( (char*)ptr, STR, len ) )\
> + goto fail;\
> + else {\
> + size -= (len+1); ptr += (len+1);\
> + }\
> + }
> +
> + CHECK_STRING( devname );
> + CHECK_STRING( devvendor );
> + CHECK_STRING( driverversion );
> + //CHECK_STRING( x265_opencl_source_hash );
> +#undef CHECK_STRING
> +
> + m_program = clCreateProgramWithBinary( m_context, 1, &m_device,
> &size, &ptr, NULL, &status );
> + if (status != CL_SUCCESS)
> + {
> + m_program = NULL;
> + }
> +
> +fail:
> + fclose( fp );
> + X265_FREE( binary );
> +}
> +
> +/* Save the compiled program binary to a file for later reuse. Device
> context
> + * is also saved in the cache file so we do not reuse stale binaries */
> +void ocl::cache_save(x265_param_t *param, char *devname, char *devvendor,
> char *driverversion)
> +{
> + FILE *fp = fopen(param->pszCLbinFile, "wb");
> + if( !fp )
> + {
> + x265_log(param, X265_LOG_INFO, "OpenCL: unable to open clbin file
> for write");
> + return;
> + }
> +
> + size_t size;
> + cl_int status = clGetProgramInfo( m_program, CL_PROGRAM_BINARY_SIZES,
> sizeof(size_t), &size, NULL );
> + if( status == CL_SUCCESS )
> + {
> + uint8_t *binary = (uint8_t*)X265_MALLOC(uint8_t, size);
> + status = clGetProgramInfo( m_program, CL_PROGRAM_BINARIES,
> sizeof(uint8_t *), &binary, NULL );
> + if( status == CL_SUCCESS )
> + {
> + fputs( devname, fp );
> + fputc( '\n', fp );
> + fputs( devvendor, fp );
> + fputc( '\n', fp );
> + fputs( driverversion, fp );
> + fputc( '\n', fp );
> + //fputs( x265_opencl_source_hash, fp );
> + //fputc( '\n', fp );
> + fwrite( binary, 1, size, fp );
> + }
> + else
> + x265_log(param, X265_LOG_INFO, "OpenCL: Unable to query
> program binary, no cache file generated");
> + X265_FREE( binary );
> + }
> + else
> + x265_log(param, X265_LOG_INFO, "OpenCL: Unable to query program
> binary size, no cache file generated");
> + fclose( fp );
> +
> + return;
> +}
> +
> +
> +oclME::oclME(x265_param_t *param)
> + : m_pageLockedBuffer(NULL)
> + , m_pageLockedPtr(NULL)
> + , m_plOccupancy(0)
> + , m_numCopies(0)
> + , m_mvCostsBuffer(NULL)
> + , m_mvpBuffer(NULL)
> +{
> + for (int i = 0; i < 2; i++)
> + {
> + m_mvBuffer[i];
> + m_costsBuffer[i];
> + m_frameStatsBuffer[i];
> + }
> +
> + m_ocl = &g_ocl;
> + init(param);
> +}
> +
> +/* The OpenCL source under common/opencl will be merged into
> common/oclkernels.h
> + * by CMake. It defines a x265_opencl_source string which we will pass
> + * to clCreateProgramWithSource(). We also attempt to use a cache file
> for the
> + * compiled binary, stored in the current working folder. */
> +bool ocl::compile(x265_param_t *param)
> +{
> + cl_int status;
> + char devname[64];
> + char devvendor[64];
> + char driverversion[64];
> +
> + status = clGetDeviceInfo( m_device, CL_DEVICE_NAME,
> sizeof(devname), devname, NULL );
> + status |= clGetDeviceInfo( m_device, CL_DEVICE_VENDOR,
> sizeof(devvendor), devvendor, NULL );
> + status |= clGetDeviceInfo( m_device, CL_DRIVER_VERSION,
> sizeof(driverversion), driverversion, NULL );
> + if (status != CL_SUCCESS)
> + {
> + return false;
> + }
> +
> + // Most AMD GPUs have vector registers
> + int vectorize = !strcmp( devvendor, "Advanced Micro Devices, Inc." );
> + m_deviceAMDSI = false;
> +
> + if (vectorize)
> + {
> + /* Disable OpenCL on Intel/AMD switchable graphics devices */
> + if (detectSwitchableGraphics())
> + {
> + x265_log(param, X265_LOG_INFO, "OpenCL acceleration disabled,
> switchable graphics detected\n" );
> + return false;
> + }
> +
> + /* Detect AMD SouthernIsland or newer device (single-width
> registers) */
> + cl_uint simdwidth = 4;
> + status = clGetDeviceInfo( m_device,
> CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
> + if( status == CL_SUCCESS && simdwidth == 1 )
> + {
> + vectorize = 0;
> + m_deviceAMDSI = true;
> + }
> + }
> +
> + x265_log(param, X265_LOG_INFO, "OpenCL acceleration enabled with %s
> %s %s\n", devvendor, devname, m_deviceAMDSI ? "(SI)" : "" );
> +
> + cache_load(param, devname, devvendor, driverversion);
> + if( !m_program )
> + {
> + /* clCreateProgramWithSource() requires a pointer variable, you
> cannot just use &x265_opencl_source */
> + x265_log(param, X265_LOG_INFO, "Compiling OpenCL kernels...\n" );
> + size_t size = strlen(x265_opencl_source);
> + m_program = clCreateProgramWithSource( m_context, 1,
> &x265_opencl_source, &size, &status );
> + if( status != CL_SUCCESS || !m_program )
> + {
> + x265_log(param, X265_LOG_WARNING, "OpenCL: unable to create
> program\n" );
> + return false;
> + }
> + }
> +
> + /* Build the program binary for the OpenCL device */
> + const char *buildopts = vectorize ? "-DVECTORIZE=1" : "";
> + status = clBuildProgram( m_program, 1, &m_device, buildopts, NULL,
> NULL );
> + if( status == CL_SUCCESS )
> + {
> + cache_save(param, devname, devvendor, driverversion);
> + return true;
> + }
> +
> + /* Compile failure, should not happen with production code. */
> +
> + size_t build_log_len = 0;
> +
> + status = clGetProgramBuildInfo( m_program, m_device,
> CL_PROGRAM_BUILD_LOG, build_log_len, NULL, &build_log_len );
> + if( status != CL_SUCCESS )
> + {
> + x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed,
> unable to query build log\n" );
> + return false;
> + }
> +
> + char *build_log = (char*)X265_MALLOC(char, build_log_len);
> + if( !build_log )
> + {
> + x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed,
> unable to alloc build log\n" );
> + return false;
> + }
> +
> + status = clGetProgramBuildInfo( m_program, m_device,
> CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
> + if( status != CL_SUCCESS )
> + {
> + x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed,
> unable to get build log\n" );
> + X265_FREE( build_log );
> + return false;
> + }
> +
> + FILE *lg = fopen( "x265_kernel_build_log.txt", "w" );
> + if( lg )
> + {
> + fwrite( build_log, 1, build_log_len, lg );
> + fclose( lg );
> + x265_log(param, X265_LOG_WARNING, "OpenCL: kernel build errors
> written to x265_kernel_build_log.txt\n" );
> + }
> +
> + X265_FREE( build_log );
> +
> + clReleaseProgram(m_program);
> + m_program = NULL;
> +
> + return false;
> +}
> +
> +oclME::~oclME()
> +{
> +}
> +
> +void oclME::destroy()
> +{
> +}
> +
> +bool oclME::init(x265_param_t *param)
> +{
> + cl_int status;
> +
> + if (!m_ocl)
> + {
> + return false;
> + }
> + m_ocl->init(param);
> +
> + m_pageLockedBuffer = clCreateBuffer(m_ocl->m_context,
> CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL,
> &status);
> + if( status != CL_SUCCESS )
> + {
> + x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to allocate
> page-locked buffer, error '%d'\n", status );
> + return false;
> + }
> + m_pageLockedPtr = (char*) clEnqueueMapBuffer(m_ocl->m_queue,
> m_pageLockedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
> + 0, PAGE_LOCKED_BUF_SIZE,
> 0, NULL, NULL, &status);
> + if( status != CL_SUCCESS )
> + {
> + x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to map
> page-locked buffer, error '%d'\n", status );
> + return false;
> + }
> +
> + return true;
> +}
>
let's not pull in all this ADL stuff until we have to
> +/* OpenCL misbehaves on hybrid laptops with Intel iGPU and AMD dGPU, so
> + * we consult AMD's ADL interface to detect this situation and disable
> + * OpenCL on these machines (Linux and Windows) */
> +#ifndef _WIN32
> +#define __stdcall
> +#define HINSTANCE void *
> +#endif
> +typedef void* ( __stdcall *ADL_MAIN_MALLOC_CALLBACK )( int );
> +typedef int ( *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int );
> +typedef int ( *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* );
> +typedef int ( *ADL_POWERXPRESS_SCHEME_GET ) ( int, int *, int *, int * );
> +typedef int ( *ADL_MAIN_CONTROL_DESTROY )();
> +#define ADL_OK 0
> +#define ADL_PX_SCHEME_DYNAMIC 2
> +
> +void* __stdcall adl_malloc_wrapper( int iSize ) { return
> X265_MALLOC(char, iSize); }
> +
> +int ocl::detectSwitchableGraphics()
> +{
> + ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create;
> + ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
> + ADL_POWERXPRESS_SCHEME_GET ADL_PowerXpress_Scheme_Get;
> + ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy;
> + HINSTANCE hDLL;
> + int ret = 0;
> +
> +#if _WIN32
> + hDLL = LoadLibrary( "atiadlxx.dll" );
> + if( !hDLL )
> + hDLL = LoadLibrary( "atiadlxy.dll" );
> +#else
> + hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
> +#define GetProcAddress dlsym
> +#endif
> + if( !hDLL )
> + return ret;
> +
> + ADL_Main_Control_Create = (ADL_MAIN_CONTROL_CREATE)
> GetProcAddress(hDLL, "ADL_Main_Control_Create");
> + ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY)
> GetProcAddress(hDLL, "ADL_Main_Control_Destroy");
> + ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)
> GetProcAddress(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
> + ADL_PowerXpress_Scheme_Get = (ADL_POWERXPRESS_SCHEME_GET)
> GetProcAddress(hDLL, "ADL_PowerXpress_Scheme_Get");
> + if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy ||
> !ADL_Adapter_NumberOfAdapters_Get ||
> + !ADL_PowerXpress_Scheme_Get )
> + goto bail;
> +
> + if( ADL_OK != ADL_Main_Control_Create( adl_malloc_wrapper, 1) )
> + goto bail;
> +
> + int numAdapters = 0;
> + if( ADL_OK != ADL_Adapter_NumberOfAdapters_Get( &numAdapters ) )
> + {
> + ADL_Main_Control_Destroy();
> + goto bail;
> + }
> +
> + for( int i = 0; i < numAdapters; i++ )
> + {
> + int PXSchemeRange, PXSchemeCurrentState, PXSchemeDefaultState;
> + if( ADL_OK != ADL_PowerXpress_Scheme_Get( i, &PXSchemeRange,
> &PXSchemeCurrentState, &PXSchemeDefaultState) )
> + break;
> +
> + if( PXSchemeRange >= ADL_PX_SCHEME_DYNAMIC )
> + {
> + ret = 1;
> + break;
> + }
> + }
> +
> + ADL_Main_Control_Destroy();
> +
> +bail:
> +#if _WIN32
> + FreeLibrary( hDLL );
> +#else
> + dlclose( hDLL );
> +#endif
> +
> + return ret;
> +}
> +
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl.h
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/common/opencl.h Tue Sep 17 13:41:27 2013 -0500
> @@ -0,0 +1,128 @@
>
> +/*****************************************************************************
> + * opencl.h: OpenCL structures and defines
> +
> *****************************************************************************
> + * Copyright (C) 2013 x265 project
> + *
> + * Authors: Steve Borho <steve at borho.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at licensing at multicorewareinc.com.
> +
> *****************************************************************************/
> +
> +#ifndef X265_OPENCL_H
> +#define X265_OPENCL_H
> +
> +#if defined __APPLE__
> +#include <OpenCL/opencl.h>
> +#else
> +#include <CL/opencl.h>
> +#endif
> +
> +#include "x265.h"
> +
> +namespace x265
> +{
> +
> +/* Number of PCIe copies that can be queued before requiring a flush */
> +#define MAX_FINISH_COPIES 1024
> +
> +/* Size (in bytes) of the page-locked buffer used for PCIe xfers */
> +#define PAGE_LOCKED_BUF_SIZE 32 * 1024 * 1024
> +
> +/* We define CL_QUEUE_THREAD_HANDLE_AMD here because it is not defined
> + * in the OpenCL headers shipped with NVIDIA drivers. We need to be
> + * able to compile on an NVIDIA machine and run optimally on an AMD GPU.
> */
> +#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
> +
> +#define OCLCHECK( method, ... )\
> + status = method( __VA_ARGS__ );\
> + if( status != CL_SUCCESS ) {\
> + param->useOpenCL = 0;\
> + m_ocl->m_fatalError = 1;\
> + x264_log( param, X264_LOG_ERROR, # method " error '%d'\n", status
> );\
> + return status;\
> + }
> +
> +class ocl
> +{
> +public:
> +
> + ocl();
> + ~ocl();
> +
> + cl_context m_context;
> + cl_device_id m_device;
> + cl_command_queue m_queue;
> + cl_program m_program;
> +
> + bool m_initialized;
> + bool m_deviceAMDSI;
> + bool m_fatalError;
> + int m_openclThreadPri;
> +
> + cl_kernel m_kernelMeStar;
> +
> + bool init(x265_param_t *param);
> + void destroy();
> + int detectSwitchableGraphics();
> +
> +private:
> +
> + bool createKernels(x265_param_t *param);
> + bool compile(x265_param_t *param);
> + void cache_load(x265_param_t *param, char *devname, char
> *devvendor, char *driverversion);
> + void cache_save(x265_param_t *param, char *devname, char
> *devvendor, char *driverversion);
> +};
> +
> +
> +/* motion estimation */
> +class oclME
> +{
> +public:
> +
> + oclME(x265_param_t *param);
> + ~oclME();
> +
> + ocl* m_ocl;
> +
> + cl_mem m_pageLockedBuffer;
> + char *m_pageLockedPtr;
> + int m_plOccupancy;
> +
> + struct
> + {
> + void *src;
> + void *dest;
> + int bytes;
> + } m_copies[MAX_FINISH_COPIES];
> + int m_numCopies;
> +
> + cl_mem m_mvBuffer[2];
> + cl_mem m_mvCostsBuffer;
> + cl_mem m_mvpBuffer;
> +
> + cl_mem m_costsBuffer[2];
> + cl_mem m_frameStatsBuffer[2]; /* cost_est, cost_est_aq,
> intra_mbs */
> +
> + bool init(x265_param_t *param);
> + void destroy();
> +
> +};
> +
> +} // end namespace x265
> +
> +#endif
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl/me.cl
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/common/opencl/me.cl Tue Sep 17 13:41:27 2013 -0500
> @@ -0,0 +1,15 @@
> +/* motion estimate */
>
Oh interesting
It's pretty unlikely for a number of reasons that we'll be able to use the
image (texture) data packing hack that we used in x264.
> +__kernel void motion_estimate( read_only image2d_t fenc,
> + read_only image2d_t fref,
> + const __global short2 *in_mvs,
> + __global short2 *out_mvs,
> + __global short *out_mv_costs,
> + __global short2 *mvp_buffer,
> + __local short *cost_local,
> + __local short2 *mvc_local,
> + int mb_width,
> + int lambda,
> + int me_range)
> +{
> +}
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/encoder/CMakeLists.txt
> --- a/source/encoder/CMakeLists.txt Mon Sep 16 21:06:49 2013 +0800
> +++ b/source/encoder/CMakeLists.txt Tue Sep 17 13:41:27 2013 -0500
> @@ -1,43 +1,47 @@
>
another EOLN switch
> -if(GCC)
> - # encoder.cpp must include HM headers which are not careful about
> named parameters
> - set_source_files_properties(encoder.cpp PROPERTIES COMPILE_FLAGS
> -Wno-unused-parameter)
> -endif(GCC)
> -
> -if(GCC)
> - if (NOT X64)
> - # force gcc to generate code for sync primitives
> - set_source_files_properties(framefilter.cpp PROPERTIES
> COMPILE_FLAGS -march=i686)
> - endif()
> -endif(GCC)
> -
> -file(GLOB LIBENCODER_HDR ../Lib/TLibEncoder/*.h)
> -file(GLOB LIBENCODER_SRC ../Lib/TLibEncoder/*.cpp)
> -source_group(TLibEncoder FILES ${LIBENCODER_SRC})
> -source_group(TLibEncoderH FILES ${LIBENCODER_HDR})
> -if(GCC)
> - set_source_files_properties(compress.cpp ${LIBENCODER_SRC} PROPERTIES
> COMPILE_FLAGS
> - "-Wno-sign-compare")
> -endif(GCC)
> -if(MSVC)
> - # ignore these warnings from HM source
> - # /wd4244 type conversion, possible loss of data
> - # /wd4512 assignment operator could not be generated
> - # /wd4127 conditional expression is constant
> - # /wd4389 signed/unsigned mismatch
> - # /wd4018 '<' signed/unsigned mismatch
> - # /wd4800 performance warning: bool coersion
> - set_source_files_properties(${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS
> - "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
> -endif(MSVC)
> -
> -add_library(encoder STATIC ../../COPYING ../x265.h
> - ${LIBENCODER_SRC} ${LIBENCODER_HDR}
> - bitcost.cpp bitcost.h
> - motion.cpp motion.h
> - slicetype.cpp slicetype.h
> - frameencoder.cpp frameencoder.h
> - framefilter.cpp framefilter.h
> - cturow.cpp cturow.h
> - dpb.cpp dpb.h
> - ratecontrol.cpp ratecontrol.h
> - compress.cpp encoder.cpp)
> +if(GCC)
> + # encoder.cpp must include HM headers which are not careful about
> named parameters
> + set_source_files_properties(encoder.cpp PROPERTIES COMPILE_FLAGS
> -Wno-unused-parameter)
> +endif(GCC)
> +
> +if(GCC)
> + if (NOT X64)
> + # force gcc to generate code for sync primitives
> + set_source_files_properties(framefilter.cpp PROPERTIES
> COMPILE_FLAGS -march=i686)
> + endif()
> +endif(GCC)
> +
> +file(GLOB LIBENCODER_HDR ../Lib/TLibEncoder/*.h)
> +file(GLOB LIBENCODER_SRC ../Lib/TLibEncoder/*.cpp)
> +source_group(TLibEncoder FILES ${LIBENCODER_SRC})
> +source_group(TLibEncoderH FILES ${LIBENCODER_HDR})
> +if(GCC)
> + set_source_files_properties(compress.cpp ${LIBENCODER_SRC} PROPERTIES
> COMPILE_FLAGS
> + "-Wno-sign-compare")
> +endif(GCC)
> +if(MSVC)
> + # ignore these warnings from HM source
> + # /wd4244 type conversion, possible loss of data
> + # /wd4512 assignment operator could not be generated
> + # /wd4127 conditional expression is constant
> + # /wd4389 signed/unsigned mismatch
> + # /wd4018 '<' signed/unsigned mismatch
> + # /wd4800 performance warning: bool coersion
> + set_source_files_properties(${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS
> + "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
> +endif(MSVC)
> +
> +if(HAVE_OPENCL)
> + include_directories(${OPENCL_INCLUDE_DIRS})
> +endif(HAVE_OPENCL)
> +
> +add_library(encoder STATIC ../../COPYING ../x265.h
> + ${LIBENCODER_SRC} ${LIBENCODER_HDR}
> + bitcost.cpp bitcost.h
> + motion.cpp motion.h
> + slicetype.cpp slicetype.h
> + frameencoder.cpp frameencoder.h
> + framefilter.cpp framefilter.h
> + cturow.cpp cturow.h
> + dpb.cpp dpb.h
> + ratecontrol.cpp ratecontrol.h
> + compress.cpp encoder.cpp)
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/x265.h
> --- a/source/x265.h Mon Sep 16 21:06:49 2013 +0800
> +++ b/source/x265.h Tue Sep 17 13:41:27 2013 -0500
> @@ -277,6 +277,12 @@
> int qp; ///< Constant QP base value
> int rateFactor; ///< Constant rate factor
> (CRF)
> } rc;
> +
> + // opencl
> + int useOpenCL; ///< use OpenCL when
> available
> + int iOpenCLDevice; ///< specify count of GPU
> devices to skip, for CLI users
> + void *OpenCLDeviceId; ///< pass explicit
> cl_device_id as void*, for API users
> + char *pszCLbinFile; ///< compiled OpenCL
> kernel cache file
> }
> x265_param_t;
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130917/072d9a5c/attachment-0001.html>
More information about the x265-devel
mailing list