[x265] [PATCH] add OpenCL environment codes
Wenju He
wenju at multicorewareinc.com
Tue Sep 17 20:42:24 CEST 2013
# HG changeset patch
# User Wenju He <wenju at multicorewareinc.com>
# Date 1379443287 18000
# Tue Sep 17 13:41:27 2013 -0500
# Node ID 97e1bed019a62849c5cc514ee74efbf64647986c
# Parent 0d33ff236f68bc2238138a7213301b2efc0e6426
add OpenCL environment codes
diff -r 0d33ff236f68 -r 97e1bed019a6 source/CMakeLists.txt
--- a/source/CMakeLists.txt Mon Sep 16 21:06:49 2013 +0800
+++ b/source/CMakeLists.txt Tue Sep 17 13:41:27 2013 -0500
@@ -1,190 +1,198 @@
-if(NOT CMAKE_BUILD_TYPE)
- # default to Release build for GCC builds
- set(CMAKE_BUILD_TYPE Release CACHE STRING
- "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
- FORCE)
-endif()
-
-project (x265)
-cmake_minimum_required (VERSION 2.6)
-
-SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")
-
-if ("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
- set(X64 1)
- add_definitions(-DX86_64=1)
-endif()
-
-if (CMAKE_GENERATOR STREQUAL "Xcode")
- set(XCODE 1)
-endif()
-if (APPLE)
- add_definitions(-DMACOS)
-endif()
-
-# Enforce coding standards. Full warnings and warnings as errors
-if("$ENV{CXX}" STREQUAL "icl")
- set(MSVC 1)
-endif()
-if(MSVC)
- option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
- if (STATIC_LINK_CRT)
- set(CompilerFlags CMAKE_CXX_FLAGS_RELEASE CMAKE_C_FLAGS_RELEASE)
- foreach(CompilerFlag ${CompilerFlags})
- string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
- endforeach()
- endif (STATIC_LINK_CRT)
- add_definitions(/W4 /WX /D_CRT_SECURE_NO_WARNINGS)
- add_definitions(/Ob2) # always inline
- add_definitions(/Oi) # enable intrinsics
- add_definitions(/MP) # multithreaded build
- include_directories(compat/msvc)
-endif(MSVC)
-
-if("$ENV{CXX}" STREQUAL "icpc")
- set(GCC 1)
- add_definitions(-Wall -Wextra -Wshadow -no-vec)
-elseif(CMAKE_COMPILER_IS_GNUCXX)
- execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
- set(GCC 1)
- add_definitions(-Wall -Wextra -Wshadow -mstackrealign)
- if(NOT GCC_VERSION VERSION_LESS 4.7)
- # this is necessary to avoid name conflicts in vector class
- # library. if vector classes are removed/replaced this can
- # likely be removed as well.
- add_definitions(-fabi-version=6)
- endif()
-elseif(APPLE)
- exec_program(uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
- string(REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
- message(STATUS "DARWIN_VERSION=${DARWIN_VERSION}")
- if (DARWIN_VERSION GREATER 12)
- # Detect Mac OS X 10.8 llvm/gcc frankenstein's monster
- if (${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/c++")
- execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
- set(GCC 1)
- add_definitions(-Wall -Wextra -Wshadow)
- endif()
- endif()
-endif()
-if (GCC)
- option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
- if(WARNINGS_AS_ERRORS)
- add_definitions(-Werror)
- endif(WARNINGS_AS_ERRORS)
- if(X64 AND NOT WIN32)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
- endif(X64 AND NOT WIN32)
-endif(GCC)
-
-option(HIGH_BIT_DEPTH "Use 16bit pixels internally" OFF)
-if(HIGH_BIT_DEPTH)
- add_definitions(-DHIGH_BIT_DEPTH=1)
-else(HIGH_BIT_DEPTH)
- add_definitions(-DHIGH_BIT_DEPTH=0)
-endif(HIGH_BIT_DEPTH)
-
-option(ENABLE_PRIMITIVES_VEC "Enable use of SIMD vector class primitives" ON)
-find_package(Yasm)
-if(YASM_FOUND)
- if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
- message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old. 1.2.0 or later required")
- else()
- message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build assembly primitives")
- option(ENABLE_PRIMITIVES_ASM "Enable use of assembly coded primitives" ON)
- endif()
-endif(YASM_FOUND)
-
-if(UNIX)
- SET(PLATFORM_LIBS pthread m)
- if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
- SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
- endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-endif(UNIX)
-
-option(ENABLE_STLPORT "Enable use of STLPort STL replacement library" OFF)
-if(ENABLE_STLPORT)
- set(STLPORT_LOCATION "${STLPORT_INCLUDE_DIR}" CACHE PATH "Location of STLPort sources")
- # STLPort headers must be included before system headers
- include_directories(BEFORE ${STLPORT_LOCATION}/stlport)
- link_directories(${STLPORT_LOCATION}/lib)
- if("$ENV{CXX}" STREQUAL "icl")
- add_definitions(/Qwd1879)
- endif()
-endif(ENABLE_STLPORT)
-
-option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
-if(ENABLE_PPA)
- add_definitions(-DENABLE_PPA)
- add_subdirectory(PPA)
- SET(PLATFORM_LIBS ${PLATFORM_LIBS} PPA)
- if(UNIX)
- SET(PLATFORM_LIBS ${PLATFORM_LIBS} dl)
- endif(UNIX)
-endif(ENABLE_PPA)
-
-if (WIN32)
- # Visual leak detector
- find_package(VLD)
- if(VLD_FOUND)
- add_definitions(-DHAVE_VLD)
- include_directories(${VLD_INCLUDE_DIRS})
- set(PLATFORM_LIBS ${PLATFORM_LIBS} ${VLD_LIBRARIES})
- link_directories(${VLD_LIBRARY_DIRS})
- endif()
- option(WINXP_SUPPORT "Make binaries compatible with Windows XP" OFF)
- if(WINXP_SUPPORT)
- # force workarounds for atomic intrinsics introduced after XP
- add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP)
- endif(WINXP_SUPPORT)
-endif()
-
-include_directories(. Lib common encoder)
-add_subdirectory(common)
-add_subdirectory(encoder)
-
-set(LIBS common encoder)
-if(ENABLE_PRIMITIVES_VEC)
- set(LIBS ${LIBS} PrimitivesVec)
-endif(ENABLE_PRIMITIVES_VEC)
-
-if(ENABLE_PRIMITIVES_ASM)
- set(LIBS ${LIBS} PrimitivesASM)
-endif(ENABLE_PRIMITIVES_ASM)
-
-if(NOT XCODE)
- include(mergestaticlibs)
- merge_static_libs(x265 ${LIBS})
-endif()
-
-# Test applications
-option(ENABLE_TESTS "Enable Unit Tests" OFF)
-if(ENABLE_TESTS)
- add_subdirectory(test)
-endif(ENABLE_TESTS)
-
-# Main CLI application
-option(ENABLE_CLI "Build standalone CLI application" ON)
-if(ENABLE_CLI)
- file(GLOB InputFiles input/*.cpp input/*.h)
- file(GLOB OutputFiles output/*.cpp output/*.h)
- source_group(input FILES ${InputFiles})
- source_group(output FILES ${OutputFiles})
- if (MSVC)
- set_source_files_properties(compat/msvc/getopt.c PROPERTIES COMPILE_FLAGS "/wd4100")
- elseif(GCC)
- set_source_files_properties(compat/msvc/getopt.c PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter")
- endif()
-
- include(version) # determine X265_VERSION
- set_source_files_properties(x265.cpp PROPERTIES COMPILE_FLAGS -DX265_VERSION=${X265_VERSION})
- add_executable(cli ${EXTRAS} ../COPYING ${InputFiles} ${OutputFiles}
- x265.cpp x265opts.h x265.h
- compat/msvc/getopt.c compat/msvc/getopt.h)
-if(XCODE)
- target_link_libraries(cli ${LIBS} ${PLATFORM_LIBS})
-else()
- target_link_libraries(cli x265 ${PLATFORM_LIBS})
-endif()
- SET_TARGET_PROPERTIES(cli PROPERTIES OUTPUT_NAME x265)
-endif(ENABLE_CLI)
+if(NOT CMAKE_BUILD_TYPE)
+ # default to Release build for GCC builds
+ set(CMAKE_BUILD_TYPE Release CACHE STRING
+ "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
+ FORCE)
+endif()
+
+project (x265)
+cmake_minimum_required (VERSION 2.6)
+
+SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")
+
+if ("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
+ set(X64 1)
+ add_definitions(-DX86_64=1)
+endif()
+
+if (CMAKE_GENERATOR STREQUAL "Xcode")
+ set(XCODE 1)
+endif()
+if (APPLE)
+ add_definitions(-DMACOS)
+endif()
+
+# Enforce coding standards. Full warnings and warnings as errors
+if("$ENV{CXX}" STREQUAL "icl")
+ set(MSVC 1)
+endif()
+if(MSVC)
+ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
+ if (STATIC_LINK_CRT)
+ set(CompilerFlags CMAKE_CXX_FLAGS_RELEASE CMAKE_C_FLAGS_RELEASE)
+ foreach(CompilerFlag ${CompilerFlags})
+ string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
+ endforeach()
+ endif (STATIC_LINK_CRT)
+ add_definitions(/W4 /WX /D_CRT_SECURE_NO_WARNINGS)
+ add_definitions(/Ob2) # always inline
+ add_definitions(/Oi) # enable intrinsics
+ add_definitions(/MP) # multithreaded build
+ include_directories(compat/msvc)
+endif(MSVC)
+
+if("$ENV{CXX}" STREQUAL "icpc")
+ set(GCC 1)
+ add_definitions(-Wall -Wextra -Wshadow -no-vec)
+elseif(CMAKE_COMPILER_IS_GNUCXX)
+ execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
+ set(GCC 1)
+ add_definitions(-Wall -Wextra -Wshadow -mstackrealign)
+ if(NOT GCC_VERSION VERSION_LESS 4.7)
+ # this is necessary to avoid name conflicts in vector class
+ # library. if vector classes are removed/replaced this can
+ # likely be removed as well.
+ add_definitions(-fabi-version=6)
+ endif()
+elseif(APPLE)
+ exec_program(uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
+ string(REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
+ message(STATUS "DARWIN_VERSION=${DARWIN_VERSION}")
+ if (DARWIN_VERSION GREATER 12)
+ # Detect Mac OS X 10.8 llvm/gcc frankenstein's monster
+ if (${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/c++")
+ execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
+ set(GCC 1)
+ add_definitions(-Wall -Wextra -Wshadow)
+ endif()
+ endif()
+endif()
+if (GCC)
+ option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
+ if(WARNINGS_AS_ERRORS)
+ add_definitions(-Werror)
+ endif(WARNINGS_AS_ERRORS)
+ if(X64 AND NOT WIN32)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+ endif(X64 AND NOT WIN32)
+endif(GCC)
+
+option(HIGH_BIT_DEPTH "Use 16bit pixels internally" OFF)
+if(HIGH_BIT_DEPTH)
+ add_definitions(-DHIGH_BIT_DEPTH=1)
+else(HIGH_BIT_DEPTH)
+ add_definitions(-DHIGH_BIT_DEPTH=0)
+endif(HIGH_BIT_DEPTH)
+
+option(ENABLE_PRIMITIVES_VEC "Enable use of SIMD vector class primitives" ON)
+find_package(Yasm)
+if(YASM_FOUND)
+ if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
+ message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old. 1.2.0 or later required")
+ else()
+ message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build assembly primitives")
+ option(ENABLE_PRIMITIVES_ASM "Enable use of assembly coded primitives" ON)
+ endif()
+endif(YASM_FOUND)
+
+if(UNIX)
+ SET(PLATFORM_LIBS pthread m)
+ if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+ SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
+ endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+endif(UNIX)
+
+option(ENABLE_STLPORT "Enable use of STLPort STL replacement library" OFF)
+if(ENABLE_STLPORT)
+ set(STLPORT_LOCATION "${STLPORT_INCLUDE_DIR}" CACHE PATH "Location of STLPort sources")
+ # STLPort headers must be included before system headers
+ include_directories(BEFORE ${STLPORT_LOCATION}/stlport)
+ link_directories(${STLPORT_LOCATION}/lib)
+ if("$ENV{CXX}" STREQUAL "icl")
+ add_definitions(/Qwd1879)
+ endif()
+endif(ENABLE_STLPORT)
+
+option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
+if(ENABLE_PPA)
+ add_definitions(-DENABLE_PPA)
+ add_subdirectory(PPA)
+ SET(PLATFORM_LIBS ${PLATFORM_LIBS} PPA)
+ if(UNIX)
+ SET(PLATFORM_LIBS ${PLATFORM_LIBS} dl)
+ endif(UNIX)
+endif(ENABLE_PPA)
+
+option(ENABLE_OPENCL "Use OpenCL for motion estimation" OFF)
+if(ENABLE_OPENCL)
+ find_package(OpenCL)
+ if(OPENCL_FOUND)
+ add_definitions(-DHAVE_OPENCL)
+ endif()
+endif(ENABLE_OPENCL)
+
+if (WIN32)
+ # Visual leak detector
+ find_package(VLD)
+ if(VLD_FOUND)
+ add_definitions(-DHAVE_VLD)
+ include_directories(${VLD_INCLUDE_DIRS})
+ set(PLATFORM_LIBS ${PLATFORM_LIBS} ${VLD_LIBRARIES})
+ link_directories(${VLD_LIBRARY_DIRS})
+ endif()
+ option(WINXP_SUPPORT "Make binaries compatible with Windows XP" OFF)
+ if(WINXP_SUPPORT)
+ # force workarounds for atomic intrinsics introduced after XP
+ add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP)
+ endif(WINXP_SUPPORT)
+endif()
+
+include_directories(. Lib common encoder)
+add_subdirectory(common)
+add_subdirectory(encoder)
+
+set(LIBS common encoder)
+if(ENABLE_PRIMITIVES_VEC)
+ set(LIBS ${LIBS} PrimitivesVec)
+endif(ENABLE_PRIMITIVES_VEC)
+
+if(ENABLE_PRIMITIVES_ASM)
+ set(LIBS ${LIBS} PrimitivesASM)
+endif(ENABLE_PRIMITIVES_ASM)
+
+if(NOT XCODE)
+ include(mergestaticlibs)
+ merge_static_libs(x265 ${LIBS})
+endif()
+
+# Test applications
+option(ENABLE_TESTS "Enable Unit Tests" OFF)
+if(ENABLE_TESTS)
+ add_subdirectory(test)
+endif(ENABLE_TESTS)
+
+# Main CLI application
+option(ENABLE_CLI "Build standalone CLI application" ON)
+if(ENABLE_CLI)
+ file(GLOB InputFiles input/*.cpp input/*.h)
+ file(GLOB OutputFiles output/*.cpp output/*.h)
+ source_group(input FILES ${InputFiles})
+ source_group(output FILES ${OutputFiles})
+ if (MSVC)
+ set_source_files_properties(compat/msvc/getopt.c PROPERTIES COMPILE_FLAGS "/wd4100")
+ elseif(GCC)
+ set_source_files_properties(compat/msvc/getopt.c PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter")
+ endif()
+
+ include(version) # determine X265_VERSION
+ set_source_files_properties(x265.cpp PROPERTIES COMPILE_FLAGS -DX265_VERSION=${X265_VERSION})
+ add_executable(cli ${EXTRAS} ../COPYING ${InputFiles} ${OutputFiles}
+ x265.cpp x265opts.h x265.h
+ compat/msvc/getopt.c compat/msvc/getopt.h)
+if(XCODE)
+ target_link_libraries(cli ${LIBS} ${PLATFORM_LIBS} ${OPENCL_LIBRARIES})
+else()
+ target_link_libraries(cli x265 ${PLATFORM_LIBS} ${OPENCL_LIBRARIES})
+endif()
+ SET_TARGET_PROPERTIES(cli PROPERTIES OUTPUT_NAME x265)
+endif(ENABLE_CLI)
diff -r 0d33ff236f68 -r 97e1bed019a6 source/cmake/FindOpenCL.cmake
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/cmake/FindOpenCL.cmake Tue Sep 17 13:41:27 2013 -0500
@@ -0,0 +1,58 @@
+# modified from OpenCV
+
+if(APPLE)
+ set(OPENCL_FOUND YES)
+ set(OPENCL_LIBRARY "-framework OpenCL" CACHE STRING "OpenCL library")
+ set(OPENCL_INCLUDE_DIR "" CACHE STRING "OpenCL include directory")
+ mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
+else(APPLE)
+ #find_package(OpenCL QUIET)
+
+ if (NOT OPENCL_FOUND)
+ find_path(OPENCL_ROOT_DIR
+ NAMES OpenCL/cl.h CL/cl.h include/CL/cl.h include/nvidia-current/CL/cl.h
+ PATHS ENV OCLROOT ENV AMDAPPSDKROOT ENV CUDA_PATH ENV INTELOCLSDKROOT
+ DOC "OpenCL root directory"
+ NO_DEFAULT_PATH)
+
+ find_path(OPENCL_INCLUDE_DIR
+ NAMES OpenCL/cl.h CL/cl.h
+ HINTS ${OPENCL_ROOT_DIR}
+ PATH_SUFFIXES include include/nvidia-current
+ DOC "OpenCL include directory"
+ NO_DEFAULT_PATH)
+
+ if(WIN32)
+ if(X64)
+ set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win64 lib/x86_64 lib/x64)
+ else()
+ set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win32 lib/x86)
+ endif()
+ elseif(UNIX)
+ if(X64)
+ set(OPENCL_POSSIBLE_LIB_SUFFIXES lib64 lib)
+ else()
+ set(OPENCL_POSSIBLE_LIB_SUFFIXES lib32 lib)
+ endif()
+ else()
+ set(OPENCL_POSSIBLE_LIB_SUFFIXES lib)
+ endif()
+
+ find_library(OPENCL_LIBRARY
+ NAMES OpenCL
+ HINTS ${OPENCL_ROOT_DIR}
+ PATH_SUFFIXES ${OPENCL_POSSIBLE_LIB_SUFFIXES}
+ DOC "OpenCL library"
+ NO_DEFAULT_PATH)
+
+ mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
+ include(FindPackageHandleStandardArgs)
+ FIND_PACKAGE_HANDLE_STANDARD_ARGS(OPENCL DEFAULT_MSG OPENCL_LIBRARY OPENCL_INCLUDE_DIR )
+ endif()
+endif(APPLE)
+
+if(OPENCL_FOUND)
+ set(HAVE_OPENCL 1)
+ set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
+ set(OPENCL_LIBRARIES ${OPENCL_LIBRARY})
+endif()
diff -r 0d33ff236f68 -r 97e1bed019a6 source/cmake/cl2cpp.cmake
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/cmake/cl2cpp.cmake Tue Sep 17 13:41:27 2013 -0500
@@ -0,0 +1,35 @@
+# modified from OpenCV
+
+file(GLOB cl_list "${CL_DIR}/*.h" "${CL_DIR}/*.cl" )
+
+file(WRITE ${OUTPUT} "// This file is auto-generated. Do not edit!
+
+namespace x265
+{
+
+")
+
+file(APPEND ${OUTPUT} "const char* x265_opencl_source=\n")
+
+foreach(cl ${cl_list})
+ file(READ "${cl}" lines)
+
+ string(REPLACE "\r" "" lines "${lines}\n")
+ string(REPLACE "\t" " " lines "${lines}")
+
+ string(REGEX REPLACE "/\\*([^*]/|\\*[^/]|[^*/])*\\*/" "" lines "${lines}") # multiline comments
+ string(REGEX REPLACE "/\\*([^\n])*\\*/" "" lines "${lines}") # single-line comments
+ string(REGEX REPLACE "[ ]*//[^\n]*\n" "\n" lines "${lines}") # single-line comments
+ string(REGEX REPLACE "\n[ ]*(\n[ ]*)*" "\n" lines "${lines}") # empty lines & leading whitespace
+ string(REGEX REPLACE "^\n" "" lines "${lines}") # leading new line
+
+ string(REPLACE "\\" "\\\\" lines "${lines}")
+ string(REPLACE "\"" "\\\"" lines "${lines}")
+ string(REPLACE "\n" "\\n\"\n\"" lines "${lines}")
+
+ string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof
+
+ file(APPEND ${OUTPUT} "\"${lines}")
+endforeach()
+
+file(APPEND ${OUTPUT} ";\n\n}\n")
\ No newline at end of file
diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Mon Sep 16 21:06:49 2013 +0800
+++ b/source/common/CMakeLists.txt Tue Sep 17 13:41:27 2013 -0500
@@ -1,59 +1,73 @@
-if(GCC)
- if (NOT X64)
- # force gcc to generate code for sync primitives
- set_source_files_properties(threadpool.cpp reference.cpp wavefront.cpp PROPERTIES COMPILE_FLAGS -march=i686)
- endif()
-endif(GCC)
-
-if(ENABLE_PRIMITIVES_VEC)
- add_definitions(-DENABLE_VECTOR_PRIMITIVES=1)
-endif(ENABLE_PRIMITIVES_VEC)
-if(ENABLE_PRIMITIVES_ASM)
- add_definitions(-DENABLE_ASM_PRIMITIVES=1)
-endif(ENABLE_PRIMITIVES_ASM)
-
-include_directories(../VectorClass)
-
-file(GLOB LIBCOMMON_HDR ../Lib/TLibCommon/*.h)
-file(GLOB LIBCOMMON_SRC ../Lib/TLibCommon/*.cpp)
-source_group(TLibCommon FILES ${LIBCOMMON_SRC})
-source_group(TLibCommonH FILES ${LIBCOMMON_HDR})
-if(GCC)
- set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
- "-Wno-sign-compare")
-endif(GCC)
-if(MSVC)
- # ignore these warnings from HM source
- # /wd4244 type conversion, possible loss of data
- # /wd4512 assignment operator could not be generated
- # /wd4127 conditional expression is constant
- # /wd4389 signed/unsigned mismatch
- # /wd4018 '<' signed/unsigned mismatch
- # /wd4800 performance warning: bool coersion
- set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
- "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
- if ("$ENV{CXX}" STREQUAL "icl")
- add_definitions(/Qwd2557) # signed/unsigned mismatch
- endif()
-endif(MSVC)
-
-add_library(common STATIC ../../COPYING
- ${LIBCOMMON_SRC} ${LIBCOMMON_HDR}
- primitives.cpp primitives.h
- pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp
- ../VectorClass/instrset_detect.cpp
- threading.cpp threading.h
- threadpool.cpp threadpool.h
- wavefront.h wavefront.cpp
- md5.cpp md5.h
- TShortYUV.cpp TShortYUV.h mv.h
- reference.cpp reference.h
- common.cpp common.h
- lowres.cpp lowres.h)
-
-if(ENABLE_PRIMITIVES_VEC)
- add_subdirectory(vec)
-endif(ENABLE_PRIMITIVES_VEC)
-if(ENABLE_PRIMITIVES_ASM)
- add_subdirectory(x86)
-endif(ENABLE_PRIMITIVES_ASM)
+if(GCC)
+ if (NOT X64)
+ # force gcc to generate code for sync primitives
+ set_source_files_properties(threadpool.cpp reference.cpp wavefront.cpp PROPERTIES COMPILE_FLAGS -march=i686)
+ endif()
+endif(GCC)
+
+if(ENABLE_PRIMITIVES_VEC)
+ add_definitions(-DENABLE_VECTOR_PRIMITIVES=1)
+endif(ENABLE_PRIMITIVES_VEC)
+if(ENABLE_PRIMITIVES_ASM)
+ add_definitions(-DENABLE_ASM_PRIMITIVES=1)
+endif(ENABLE_PRIMITIVES_ASM)
+
+include_directories(../VectorClass)
+
+file(GLOB LIBCOMMON_HDR ../Lib/TLibCommon/*.h)
+file(GLOB LIBCOMMON_SRC ../Lib/TLibCommon/*.cpp)
+source_group(TLibCommon FILES ${LIBCOMMON_SRC})
+source_group(TLibCommonH FILES ${LIBCOMMON_HDR})
+if(GCC)
+ set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
+ "-Wno-sign-compare")
+endif(GCC)
+if(MSVC)
+ # ignore these warnings from HM source
+ # /wd4244 type conversion, possible loss of data
+ # /wd4512 assignment operator could not be generated
+ # /wd4127 conditional expression is constant
+ # /wd4389 signed/unsigned mismatch
+ # /wd4018 '<' signed/unsigned mismatch
+ # /wd4800 performance warning: bool coersion
+ set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
+ "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
+ if ("$ENV{CXX}" STREQUAL "icl")
+ add_definitions(/Qwd2557) # signed/unsigned mismatch
+ endif()
+endif(MSVC)
+
+if(HAVE_OPENCL)
+ include_directories(${OPENCL_INCLUDE_DIRS})
+ set(OPENCL_KERNEL_HDR ${CMAKE_CURRENT_SOURCE_DIR}/oclkernels.h)
+ add_custom_command(
+ OUTPUT ${OPENCL_KERNEL_HDR}
+ COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/opencl"
+ -DOUTPUT=${OPENCL_KERNEL_HDR}
+ -P "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/cl2cpp.cmake"
+ DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/cl2cpp.cmake")
+ set(OPENCL_SRC opencl.cpp)
+ set(OPENCL_HDR opencl.h ${OPENCL_KERNEL_HDR})
+endif(HAVE_OPENCL)
+
+add_library(common STATIC ../../COPYING
+ ${LIBCOMMON_SRC} ${LIBCOMMON_HDR}
+ primitives.cpp primitives.h
+ pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp
+ ../VectorClass/instrset_detect.cpp
+ threading.cpp threading.h
+ threadpool.cpp threadpool.h
+ wavefront.h wavefront.cpp
+ md5.cpp md5.h
+ TShortYUV.cpp TShortYUV.h mv.h
+ reference.cpp reference.h
+ common.cpp common.h
+ lowres.cpp lowres.h
+ ${OPENCL_SRC} ${OPENCL_HDR})
+
+if(ENABLE_PRIMITIVES_VEC)
+ add_subdirectory(vec)
+endif(ENABLE_PRIMITIVES_VEC)
+if(ENABLE_PRIMITIVES_ASM)
+ add_subdirectory(x86)
+endif(ENABLE_PRIMITIVES_ASM)
diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/opencl.cpp Tue Sep 17 13:41:27 2013 -0500
@@ -0,0 +1,631 @@
+/*****************************************************************************
+ * opencl.c: OpenCL initialization and kernel compilation
+ *****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve at borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at multicorewareinc.com.
+ *****************************************************************************/
+
+#include "TLibCommon/CommonDef.h"
+#include "common.h"
+
+#if defined WIN32 || defined _WIN32
+#include <Windows.h>
+#else
+#include <dlfcn.h> //dlopen, dlsym, dlclose
+#endif
+#include "opencl.h"
+#include "oclkernels.h"
+#include "stdio.h"
+
+/* define from recent cl_ext.h, copied here in case headers are old */
+#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
+
+using namespace x265;
+
+// global variable
+static ocl g_ocl;
+
+static void CALLBACK oclErrorNotify( const char *errinfo, const void *private_info, size_t cb, void *user_data )
+{
+ /* Any error notification can be assumed to be fatal to the OpenCL context.
+ * We need to stop using it immediately to prevent further damage. */
+ x265_param_t *param = (x265_param_t*)user_data;
+ param->useOpenCL = 0;
+ //h->opencl.b_fatal_error = 1;
+ x265_log(param, X265_LOG_ERROR, "OpenCL: %s\n", errinfo );
+ x265_log(param, X265_LOG_ERROR, "OpenCL: fatal error, aborting encode\n" );
+ (void)cb;
+ (void)private_info;
+}
+
+ocl::ocl()
+ : m_context(NULL)
+ , m_device(NULL)
+ , m_queue(NULL)
+ , m_program(NULL)
+ , m_initialized(false)
+ , m_deviceAMDSI(false)
+ , m_fatalError(false)
+ , m_openclThreadPri(0)
+ , m_kernelMeStar(NULL)
+{}
+
+ocl::~ocl()
+{}
+
+
+bool ocl::init(x265_param_t *param)
+{
+ cl_int status;
+ cl_uint numPlatforms;
+ bool ret = false;
+
+ if (m_initialized)
+ {
+ return true;
+ }
+
+ status = clGetPlatformIDs(0, NULL, &numPlatforms);
+ if( status != CL_SUCCESS || numPlatforms == 0 )
+ {
+ x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to query installed platforms\n");
+ return false;
+ }
+
+ cl_platform_id *platforms = (cl_platform_id*)X265_MALLOC(cl_platform_id, numPlatforms);
+ status = clGetPlatformIDs( numPlatforms, platforms, NULL );
+ if( status != CL_SUCCESS )
+ {
+ x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to query installed platforms\n");
+ X265_FREE( platforms );
+ return false;
+ }
+
+ /* Select the first OpenCL platform with a GPU device that supports our
+ * required image (texture) formats */
+ for( cl_uint i = 0; i < numPlatforms; ++i )
+ {
+ cl_uint gpu_count = 0;
+ status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
+ if( status != CL_SUCCESS || !gpu_count )
+ continue;
+
+ cl_device_id *devices = (cl_device_id*) X265_MALLOC( cl_device_id, gpu_count );
+ if( !devices )
+ continue;
+
+ status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
+ if( status != CL_SUCCESS )
+ {
+ X265_FREE( devices );
+ continue;
+ }
+
+ /* Find a GPU device that supports our image formats */
+ for( cl_uint gpu = 0; gpu < gpu_count; gpu++ )
+ {
+ cl_device_id device = devices[gpu];
+
+ /* if the user has specified an exact device ID, skip all other
+ * GPUs. If this device matches, allow it to continue through the
+ * checks for supported images, etc. */
+ if( param->OpenCLDeviceId && devices[gpu] != (cl_device_id) param->OpenCLDeviceId )
+ continue;
+
+ cl_bool image_support;
+ clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL );
+ if( !image_support )
+ continue;
+
+ cl_context context = clCreateContext( NULL, 1, &device, oclErrorNotify, (void*)param, &status );
+ if( status != CL_SUCCESS )
+ continue;
+
+ cl_uint imagecount = 0;
+ clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
+ if( !imagecount )
+ {
+ clReleaseContext( context );
+ continue;
+ }
+
+ cl_image_format *imageType = (cl_image_format*)X265_MALLOC(cl_image_format, imagecount);
+ if( !imageType )
+ {
+ clReleaseContext( context );
+ continue;
+ }
+
+ clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
+
+ int b_has_r = 0;
+ int b_has_rgba = 0;
+ for( cl_uint j = 0; j < imagecount; j++ )
+ {
+ if( imageType[j].image_channel_order == CL_R &&
+ imageType[j].image_channel_data_type == CL_UNSIGNED_INT32 )
+ b_has_r = 1;
+ else if( imageType[j].image_channel_order == CL_RGBA &&
+ imageType[j].image_channel_data_type == CL_UNSIGNED_INT8 )
+ b_has_rgba = 1;
+ }
+ X265_FREE( imageType );
+ if( !b_has_r || !b_has_rgba )
+ {
+ char devname[64];
+ status = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(devname), devname, NULL);
+ if( status == CL_SUCCESS )
+ {
+ /* emit warning if we are discarding the user's explicit choice */
+ int level = param->OpenCLDeviceId ? X265_LOG_WARNING : X265_LOG_DEBUG;
+ x265_log(param, level, "OpenCL: %s does not support required image formats\n", devname);
+ }
+ clReleaseContext( context );
+ continue;
+ }
+
+ /* user selection of GPU device, skip N first matches */
+ if( param->iOpenCLDevice )
+ {
+ param->iOpenCLDevice--;
+ clReleaseContext( context );
+ continue;
+ }
+
+ cl_command_queue queue = clCreateCommandQueue( context, device, 0, &status );
+ if( status != CL_SUCCESS )
+ {
+ clReleaseContext( context );
+ continue;
+ }
+
+ m_context = context;
+ m_device = device;
+ m_queue = queue;
+
+ ret = true;
+ break;
+ }
+
+ X265_FREE( devices );
+
+ if (ret)
+ {
+ break;
+ }
+ }
+
+ X265_FREE( platforms );
+
+ if (!param->pszCLbinFile)
+ {
+ param->pszCLbinFile = "x265_kernels.clbin";
+ }
+
+ if (!ret)
+ {
+ x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to find a compatible device\n");
+ }
+ else
+ {
+ ret = createKernels(param);
+ }
+
+ m_initialized = ret;
+
+ return ret;
+}
+
+bool ocl::createKernels(x265_param_t *param)
+{
+ static const char *kernelnames[] = {
+ "motion_estimate"
+ };
+ cl_kernel *kernels[] = {
+ &m_kernelMeStar
+ };
+ cl_int status;
+
+ compile(param);
+ if( !m_program )
+ {
+ return false;
+ }
+
+ for( int i = 0; i < sizeof(kernelnames)/sizeof(kernelnames[0]); i++ )
+ {
+ *kernels[i] = clCreateKernel(m_program, kernelnames[i], &status);
+ if( status != CL_SUCCESS )
+ {
+ x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to compile kernel '%s' (%d)\n", kernelnames[i], status );
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void ocl::destroy()
+{
+ if (m_queue)
+ {
+ clFinish(m_queue);
+ }
+
+ if (m_kernelMeStar)
+ {
+ clReleaseKernel(m_kernelMeStar);
+ m_kernelMeStar = NULL;
+ }
+
+ if (m_program)
+ {
+ clReleaseProgram(m_program);
+ m_program = NULL;
+ }
+
+ if (m_queue)
+ {
+ clReleaseCommandQueue( m_queue );
+ m_queue = NULL;
+ }
+
+ if (m_context)
+ {
+ clReleaseContext( m_context );
+ m_context = NULL;
+ }
+
+ m_initialized = false;
+}
+
+/* Try to load the cached compiled program binary, verify the device context is
+ * still valid before reuse */
+void ocl::cache_load(x265_param_t *param, char *devname, char *devvendor, char *driverversion)
+{
+ cl_int status;
+
+ /* try to load cached program binary */
+ FILE *fp = fopen( param->pszCLbinFile, "rb" );
+ if( !fp )
+ return;
+
+ fseek( fp, 0L, SEEK_END );
+ size_t size = ftell( fp );
+ rewind( fp );
+ uint8_t *binary = (uint8_t*)X265_MALLOC( uint8_t, size );
+ if (!binary)
+ return;
+
+ fread( binary, 1, size, fp );
+ const uint8_t *ptr = (const uint8_t*)binary;
+
+#define CHECK_STRING( STR )\
+ {\
+ size_t len = strlen( STR );\
+ if( size <= len || strncmp( (char*)ptr, STR, len ) )\
+ goto fail;\
+ else {\
+ size -= (len+1); ptr += (len+1);\
+ }\
+ }
+
+ CHECK_STRING( devname );
+ CHECK_STRING( devvendor );
+ CHECK_STRING( driverversion );
+ //CHECK_STRING( x265_opencl_source_hash );
+#undef CHECK_STRING
+
+ m_program = clCreateProgramWithBinary( m_context, 1, &m_device, &size, &ptr, NULL, &status );
+ if (status != CL_SUCCESS)
+ {
+ m_program = NULL;
+ }
+
+fail:
+ fclose( fp );
+ X265_FREE( binary );
+}
+
+/* Save the compiled program binary to a file for later reuse. Device context
+ * is also saved in the cache file so we do not reuse stale binaries */
+void ocl::cache_save(x265_param_t *param, char *devname, char *devvendor, char *driverversion)
+{
+ FILE *fp = fopen(param->pszCLbinFile, "wb");
+ if( !fp )
+ {
+ x265_log(param, X265_LOG_INFO, "OpenCL: unable to open clbin file for write");
+ return;
+ }
+
+ size_t size;
+ cl_int status = clGetProgramInfo( m_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL );
+ if( status == CL_SUCCESS )
+ {
+ uint8_t *binary = (uint8_t*)X265_MALLOC(uint8_t, size);
+ status = clGetProgramInfo( m_program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL );
+ if( status == CL_SUCCESS )
+ {
+ fputs( devname, fp );
+ fputc( '\n', fp );
+ fputs( devvendor, fp );
+ fputc( '\n', fp );
+ fputs( driverversion, fp );
+ fputc( '\n', fp );
+ //fputs( x265_opencl_source_hash, fp );
+ //fputc( '\n', fp );
+ fwrite( binary, 1, size, fp );
+ }
+ else
+ x265_log(param, X265_LOG_INFO, "OpenCL: Unable to query program binary, no cache file generated");
+ X265_FREE( binary );
+ }
+ else
+ x265_log(param, X265_LOG_INFO, "OpenCL: Unable to query program binary size, no cache file generated");
+ fclose( fp );
+
+ return;
+}
+
+
+oclME::oclME(x265_param_t *param)
+ : m_pageLockedBuffer(NULL)
+ , m_pageLockedPtr(NULL)
+ , m_plOccupancy(0)
+ , m_numCopies(0)
+ , m_mvCostsBuffer(NULL)
+ , m_mvpBuffer(NULL)
+{
+ for (int i = 0; i < 2; i++)
+ {
+ m_mvBuffer[i];
+ m_costsBuffer[i];
+ m_frameStatsBuffer[i];
+ }
+
+ m_ocl = &g_ocl;
+ init(param);
+}
+
+/* The OpenCL source under common/opencl will be merged into common/oclkernels.h
+ * by CMake. It defines a x265_opencl_source string which we will pass
+ * to clCreateProgramWithSource(). We also attempt to use a cache file for the
+ * compiled binary, stored in the current working folder. */
+bool ocl::compile(x265_param_t *param)
+{
+ cl_int status;
+ char devname[64];
+ char devvendor[64];
+ char driverversion[64];
+
+ status = clGetDeviceInfo( m_device, CL_DEVICE_NAME, sizeof(devname), devname, NULL );
+ status |= clGetDeviceInfo( m_device, CL_DEVICE_VENDOR, sizeof(devvendor), devvendor, NULL );
+ status |= clGetDeviceInfo( m_device, CL_DRIVER_VERSION, sizeof(driverversion), driverversion, NULL );
+ if (status != CL_SUCCESS)
+ {
+ return false;
+ }
+
+ // Most AMD GPUs have vector registers
+ int vectorize = !strcmp( devvendor, "Advanced Micro Devices, Inc." );
+ m_deviceAMDSI = false;
+
+ if (vectorize)
+ {
+ /* Disable OpenCL on Intel/AMD switchable graphics devices */
+ if (detectSwitchableGraphics())
+ {
+ x265_log(param, X265_LOG_INFO, "OpenCL acceleration disabled, switchable graphics detected\n" );
+ return false;
+ }
+
+ /* Detect AMD SouthernIsland or newer device (single-width registers) */
+ cl_uint simdwidth = 4;
+ status = clGetDeviceInfo( m_device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
+ if( status == CL_SUCCESS && simdwidth == 1 )
+ {
+ vectorize = 0;
+ m_deviceAMDSI = true;
+ }
+ }
+
+ x265_log(param, X265_LOG_INFO, "OpenCL acceleration enabled with %s %s %s\n", devvendor, devname, m_deviceAMDSI ? "(SI)" : "" );
+
+ cache_load(param, devname, devvendor, driverversion);
+ if( !m_program )
+ {
+ /* clCreateProgramWithSource() requires a pointer variable, you cannot just use &x265_opencl_source */
+ x265_log(param, X265_LOG_INFO, "Compiling OpenCL kernels...\n" );
+ size_t size = strlen(x265_opencl_source);
+ m_program = clCreateProgramWithSource( m_context, 1, &x265_opencl_source, &size, &status );
+ if( status != CL_SUCCESS || !m_program )
+ {
+ x265_log(param, X265_LOG_WARNING, "OpenCL: unable to create program\n" );
+ return false;
+ }
+ }
+
+ /* Build the program binary for the OpenCL device */
+ const char *buildopts = vectorize ? "-DVECTORIZE=1" : "";
+ status = clBuildProgram( m_program, 1, &m_device, buildopts, NULL, NULL );
+ if( status == CL_SUCCESS )
+ {
+ cache_save(param, devname, devvendor, driverversion);
+ return true;
+ }
+
+ /* Compile failure, should not happen with production code. */
+
+ size_t build_log_len = 0;
+
+ status = clGetProgramBuildInfo( m_program, m_device, CL_PROGRAM_BUILD_LOG, build_log_len, NULL, &build_log_len );
+ if( status != CL_SUCCESS )
+ {
+ x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" );
+ return false;
+ }
+
+ char *build_log = (char*)X265_MALLOC(char, build_log_len);
+ if( !build_log )
+ {
+ x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed, unable to alloc build log\n" );
+ return false;
+ }
+
+ status = clGetProgramBuildInfo( m_program, m_device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
+ if( status != CL_SUCCESS )
+ {
+ x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" );
+ X265_FREE( build_log );
+ return false;
+ }
+
+ FILE *lg = fopen( "x265_kernel_build_log.txt", "w" );
+ if( lg )
+ {
+ fwrite( build_log, 1, build_log_len, lg );
+ fclose( lg );
+ x265_log(param, X265_LOG_WARNING, "OpenCL: kernel build errors written to x265_kernel_build_log.txt\n" );
+ }
+
+ X265_FREE( build_log );
+
+ clReleaseProgram(m_program);
+ m_program = NULL;
+
+ return false;
+}
+
+oclME::~oclME()
+{
+}
+
+void oclME::destroy()
+{
+}
+
+bool oclME::init(x265_param_t *param)
+{
+ cl_int status;
+
+ if (!m_ocl)
+ {
+ return false;
+ }
+ m_ocl->init(param);
+
+ m_pageLockedBuffer = clCreateBuffer(m_ocl->m_context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status);
+ if( status != CL_SUCCESS )
+ {
+ x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to allocate page-locked buffer, error '%d'\n", status );
+ return false;
+ }
+ m_pageLockedPtr = (char*) clEnqueueMapBuffer(m_ocl->m_queue, m_pageLockedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+ 0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status);
+ if( status != CL_SUCCESS )
+ {
+ x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to map page-locked buffer, error '%d'\n", status );
+ return false;
+ }
+
+ return true;
+}
+
+/* OpenCL misbehaves on hybrid laptops with Intel iGPU and AMD dGPU, so
+ * we consult AMD's ADL interface to detect this situation and disable
+ * OpenCL on these machines (Linux and Windows) */
+#ifndef _WIN32
+#define __stdcall
+#define HINSTANCE void *
+#endif
+typedef void* ( __stdcall *ADL_MAIN_MALLOC_CALLBACK )( int );
+typedef int ( *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int );
+typedef int ( *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* );
+typedef int ( *ADL_POWERXPRESS_SCHEME_GET ) ( int, int *, int *, int * );
+typedef int ( *ADL_MAIN_CONTROL_DESTROY )();
+#define ADL_OK 0
+#define ADL_PX_SCHEME_DYNAMIC 2
+
+void* __stdcall adl_malloc_wrapper( int iSize ) { return X265_MALLOC(char, iSize); }
+
+int ocl::detectSwitchableGraphics()
+{
+ ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create;
+ ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
+ ADL_POWERXPRESS_SCHEME_GET ADL_PowerXpress_Scheme_Get;
+ ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy;
+ HINSTANCE hDLL;
+ int ret = 0;
+
+#if _WIN32
+ hDLL = LoadLibrary( "atiadlxx.dll" );
+ if( !hDLL )
+ hDLL = LoadLibrary( "atiadlxy.dll" );
+#else
+ hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
+#define GetProcAddress dlsym
+#endif
+ if( !hDLL )
+ return ret;
+
+ ADL_Main_Control_Create = (ADL_MAIN_CONTROL_CREATE) GetProcAddress(hDLL, "ADL_Main_Control_Create");
+ ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY) GetProcAddress(hDLL, "ADL_Main_Control_Destroy");
+ ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET) GetProcAddress(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
+ ADL_PowerXpress_Scheme_Get = (ADL_POWERXPRESS_SCHEME_GET) GetProcAddress(hDLL, "ADL_PowerXpress_Scheme_Get");
+ if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
+ !ADL_PowerXpress_Scheme_Get )
+ goto bail;
+
+ if( ADL_OK != ADL_Main_Control_Create( adl_malloc_wrapper, 1) )
+ goto bail;
+
+ int numAdapters = 0;
+ if( ADL_OK != ADL_Adapter_NumberOfAdapters_Get( &numAdapters ) )
+ {
+ ADL_Main_Control_Destroy();
+ goto bail;
+ }
+
+ for( int i = 0; i < numAdapters; i++ )
+ {
+ int PXSchemeRange, PXSchemeCurrentState, PXSchemeDefaultState;
+ if( ADL_OK != ADL_PowerXpress_Scheme_Get( i, &PXSchemeRange, &PXSchemeCurrentState, &PXSchemeDefaultState) )
+ break;
+
+ if( PXSchemeRange >= ADL_PX_SCHEME_DYNAMIC )
+ {
+ ret = 1;
+ break;
+ }
+ }
+
+ ADL_Main_Control_Destroy();
+
+bail:
+#if _WIN32
+ FreeLibrary( hDLL );
+#else
+ dlclose( hDLL );
+#endif
+
+ return ret;
+}
+
diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/opencl.h Tue Sep 17 13:41:27 2013 -0500
@@ -0,0 +1,128 @@
+/*****************************************************************************
+ * opencl.h: OpenCL structures and defines
+ *****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve at borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at multicorewareinc.com.
+ *****************************************************************************/
+
+#ifndef X265_OPENCL_H
+#define X265_OPENCL_H
+
+#if defined __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+
+#include "x265.h"
+
+namespace x265
+{
+
+/* Number of PCIe copies that can be queued before requiring a flush */
+#define MAX_FINISH_COPIES 1024
+
+/* Size (in bytes) of the page-locked buffer used for PCIe xfers */
+#define PAGE_LOCKED_BUF_SIZE 32 * 1024 * 1024
+
+/* We define CL_QUEUE_THREAD_HANDLE_AMD here because it is not defined
+ * in the OpenCL headers shipped with NVIDIA drivers. We need to be
+ * able to compile on an NVIDIA machine and run optimally on an AMD GPU. */
+#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
+
+#define OCLCHECK( method, ... )\
+ status = method( __VA_ARGS__ );\
+ if( status != CL_SUCCESS ) {\
+ param->useOpenCL = 0;\
+ m_ocl->m_fatalError = 1;\
+ x264_log( param, X264_LOG_ERROR, # method " error '%d'\n", status );\
+ return status;\
+ }
+
+class ocl
+{
+public:
+
+ ocl();
+ ~ocl();
+
+ cl_context m_context;
+ cl_device_id m_device;
+ cl_command_queue m_queue;
+ cl_program m_program;
+
+ bool m_initialized;
+ bool m_deviceAMDSI;
+ bool m_fatalError;
+ int m_openclThreadPri;
+
+ cl_kernel m_kernelMeStar;
+
+ bool init(x265_param_t *param);
+ void destroy();
+ int detectSwitchableGraphics();
+
+private:
+
+ bool createKernels(x265_param_t *param);
+ bool compile(x265_param_t *param);
+ void cache_load(x265_param_t *param, char *devname, char *devvendor, char *driverversion);
+ void cache_save(x265_param_t *param, char *devname, char *devvendor, char *driverversion);
+};
+
+
+/* motion estimation */
+class oclME
+{
+public:
+
+ oclME(x265_param_t *param);
+ ~oclME();
+
+ ocl* m_ocl;
+
+ cl_mem m_pageLockedBuffer;
+ char *m_pageLockedPtr;
+ int m_plOccupancy;
+
+ struct
+ {
+ void *src;
+ void *dest;
+ int bytes;
+ } m_copies[MAX_FINISH_COPIES];
+ int m_numCopies;
+
+ cl_mem m_mvBuffer[2];
+ cl_mem m_mvCostsBuffer;
+ cl_mem m_mvpBuffer;
+
+ cl_mem m_costsBuffer[2];
+ cl_mem m_frameStatsBuffer[2]; /* cost_est, cost_est_aq, intra_mbs */
+
+ bool init(x265_param_t *param);
+ void destroy();
+
+};
+
+} // end namespace x265
+
+#endif
diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl/me.cl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/opencl/me.cl Tue Sep 17 13:41:27 2013 -0500
@@ -0,0 +1,15 @@
+/* motion estimate */
+
+__kernel void motion_estimate( read_only image2d_t fenc,
+ read_only image2d_t fref,
+ const __global short2 *in_mvs,
+ __global short2 *out_mvs,
+ __global short *out_mv_costs,
+ __global short2 *mvp_buffer,
+ __local short *cost_local,
+ __local short2 *mvc_local,
+ int mb_width,
+ int lambda,
+ int me_range)
+{
+}
diff -r 0d33ff236f68 -r 97e1bed019a6 source/encoder/CMakeLists.txt
--- a/source/encoder/CMakeLists.txt Mon Sep 16 21:06:49 2013 +0800
+++ b/source/encoder/CMakeLists.txt Tue Sep 17 13:41:27 2013 -0500
@@ -1,43 +1,47 @@
-if(GCC)
- # encoder.cpp must include HM headers which are not careful about named parameters
- set_source_files_properties(encoder.cpp PROPERTIES COMPILE_FLAGS -Wno-unused-parameter)
-endif(GCC)
-
-if(GCC)
- if (NOT X64)
- # force gcc to generate code for sync primitives
- set_source_files_properties(framefilter.cpp PROPERTIES COMPILE_FLAGS -march=i686)
- endif()
-endif(GCC)
-
-file(GLOB LIBENCODER_HDR ../Lib/TLibEncoder/*.h)
-file(GLOB LIBENCODER_SRC ../Lib/TLibEncoder/*.cpp)
-source_group(TLibEncoder FILES ${LIBENCODER_SRC})
-source_group(TLibEncoderH FILES ${LIBENCODER_HDR})
-if(GCC)
- set_source_files_properties(compress.cpp ${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS
- "-Wno-sign-compare")
-endif(GCC)
-if(MSVC)
- # ignore these warnings from HM source
- # /wd4244 type conversion, possible loss of data
- # /wd4512 assignment operator could not be generated
- # /wd4127 conditional expression is constant
- # /wd4389 signed/unsigned mismatch
- # /wd4018 '<' signed/unsigned mismatch
- # /wd4800 performance warning: bool coersion
- set_source_files_properties(${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS
- "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
-endif(MSVC)
-
-add_library(encoder STATIC ../../COPYING ../x265.h
- ${LIBENCODER_SRC} ${LIBENCODER_HDR}
- bitcost.cpp bitcost.h
- motion.cpp motion.h
- slicetype.cpp slicetype.h
- frameencoder.cpp frameencoder.h
- framefilter.cpp framefilter.h
- cturow.cpp cturow.h
- dpb.cpp dpb.h
- ratecontrol.cpp ratecontrol.h
- compress.cpp encoder.cpp)
+if(GCC)
+ # encoder.cpp must include HM headers which are not careful about named parameters
+ set_source_files_properties(encoder.cpp PROPERTIES COMPILE_FLAGS -Wno-unused-parameter)
+endif(GCC)
+
+if(GCC)
+ if (NOT X64)
+ # force gcc to generate code for sync primitives
+ set_source_files_properties(framefilter.cpp PROPERTIES COMPILE_FLAGS -march=i686)
+ endif()
+endif(GCC)
+
+file(GLOB LIBENCODER_HDR ../Lib/TLibEncoder/*.h)
+file(GLOB LIBENCODER_SRC ../Lib/TLibEncoder/*.cpp)
+source_group(TLibEncoder FILES ${LIBENCODER_SRC})
+source_group(TLibEncoderH FILES ${LIBENCODER_HDR})
+if(GCC)
+ set_source_files_properties(compress.cpp ${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS
+ "-Wno-sign-compare")
+endif(GCC)
+if(MSVC)
+ # ignore these warnings from HM source
+ # /wd4244 type conversion, possible loss of data
+ # /wd4512 assignment operator could not be generated
+ # /wd4127 conditional expression is constant
+ # /wd4389 signed/unsigned mismatch
+ # /wd4018 '<' signed/unsigned mismatch
+ # /wd4800 performance warning: bool coersion
+ set_source_files_properties(${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS
+ "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
+endif(MSVC)
+
+if(HAVE_OPENCL)
+ include_directories(${OPENCL_INCLUDE_DIRS})
+endif(HAVE_OPENCL)
+
+add_library(encoder STATIC ../../COPYING ../x265.h
+ ${LIBENCODER_SRC} ${LIBENCODER_HDR}
+ bitcost.cpp bitcost.h
+ motion.cpp motion.h
+ slicetype.cpp slicetype.h
+ frameencoder.cpp frameencoder.h
+ framefilter.cpp framefilter.h
+ cturow.cpp cturow.h
+ dpb.cpp dpb.h
+ ratecontrol.cpp ratecontrol.h
+ compress.cpp encoder.cpp)
diff -r 0d33ff236f68 -r 97e1bed019a6 source/x265.h
--- a/source/x265.h Mon Sep 16 21:06:49 2013 +0800
+++ b/source/x265.h Tue Sep 17 13:41:27 2013 -0500
@@ -277,6 +277,12 @@
int qp; ///< Constant QP base value
int rateFactor; ///< Constant rate factor (CRF)
} rc;
+
+ // opencl
+ int useOpenCL; ///< use OpenCL when available
+ int iOpenCLDevice; ///< specify count of GPU devices to skip, for CLI users
+ void *OpenCLDeviceId; ///< pass explicit cl_device_id as void*, for API users
+ char *pszCLbinFile; ///< compiled OpenCL kernel cache file
}
x265_param_t;
More information about the x265-devel
mailing list