[x265] [PATCH] add OpenCL environment codes

Steve Borho steve at borho.org
Wed Sep 18 04:31:10 CEST 2013


On Tue, Sep 17, 2013 at 1:42 PM, Wenju He <wenju at multicorewareinc.com>wrote:

> # HG changeset patch
> # User Wenju He <wenju at multicorewareinc.com>
> # Date 1379443287 18000
> #      Tue Sep 17 13:41:27 2013 -0500
> # Node ID 97e1bed019a62849c5cc514ee74efbf64647986c
> # Parent  0d33ff236f68bc2238138a7213301b2efc0e6426
> add OpenCL environment codes
>
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/CMakeLists.txt
> --- a/source/CMakeLists.txt     Mon Sep 16 21:06:49 2013 +0800
> +++ b/source/CMakeLists.txt     Tue Sep 17 13:41:27 2013 -0500
> @@ -1,190 +1,198 @@
> -if(NOT CMAKE_BUILD_TYPE)
>

you've changed the file EOLN inadvertently, causing every line to be
changed.


> -    # default to Release build for GCC builds
> -    set(CMAKE_BUILD_TYPE Release CACHE STRING
> -        "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or
> CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
> -        FORCE)
> -endif()
> -
> -project (x265)
> -cmake_minimum_required (VERSION 2.6)
> -
> -SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake"
> "${CMAKE_MODULE_PATH}")
> -
> -if ("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
> -    set(X64 1)
> -    add_definitions(-DX86_64=1)
> -endif()
> -
> -if (CMAKE_GENERATOR STREQUAL "Xcode")
> -  set(XCODE 1)
> -endif()
> -if (APPLE)
> -  add_definitions(-DMACOS)
> -endif()
> -
> -# Enforce coding standards.  Full warnings and warnings as errors
> -if("$ENV{CXX}" STREQUAL "icl")
> -    set(MSVC 1)
> -endif()
> -if(MSVC)
> -    option(STATIC_LINK_CRT "Statically link C runtime for release builds"
> OFF)
> -    if (STATIC_LINK_CRT)
> -        set(CompilerFlags CMAKE_CXX_FLAGS_RELEASE CMAKE_C_FLAGS_RELEASE)
> -        foreach(CompilerFlag ${CompilerFlags})
> -            string(REPLACE "/MD" "/MT" ${CompilerFlag}
> "${${CompilerFlag}}")
> -        endforeach()
> -    endif (STATIC_LINK_CRT)
> -    add_definitions(/W4 /WX /D_CRT_SECURE_NO_WARNINGS)
> -    add_definitions(/Ob2) # always inline
> -    add_definitions(/Oi)  # enable intrinsics
> -    add_definitions(/MP)  # multithreaded build
> -    include_directories(compat/msvc)
> -endif(MSVC)
> -
> -if("$ENV{CXX}" STREQUAL "icpc")
> -    set(GCC 1)
> -    add_definitions(-Wall -Wextra -Wshadow -no-vec)
> -elseif(CMAKE_COMPILER_IS_GNUCXX)
> -    execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
> OUTPUT_VARIABLE GCC_VERSION)
> -    set(GCC 1)
> -    add_definitions(-Wall -Wextra -Wshadow -mstackrealign)
> -    if(NOT GCC_VERSION VERSION_LESS 4.7)
> -        # this is necessary to avoid name conflicts in vector class
> -        # library.  if vector classes are removed/replaced this can
> -        # likely be removed as well.
> -        add_definitions(-fabi-version=6)
> -    endif()
> -elseif(APPLE)
> -    exec_program(uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
> -    string(REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
> -    message(STATUS "DARWIN_VERSION=${DARWIN_VERSION}")
> -    if (DARWIN_VERSION GREATER 12)
> -        # Detect Mac OS X 10.8 llvm/gcc frankenstein's monster
> -        if (${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/c++")
> -            execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
> OUTPUT_VARIABLE GCC_VERSION)
> -            set(GCC 1)
> -            add_definitions(-Wall -Wextra -Wshadow)
> -        endif()
> -    endif()
> -endif()
> -if (GCC)
> -    option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
> -    if(WARNINGS_AS_ERRORS)
> -        add_definitions(-Werror)
> -    endif(WARNINGS_AS_ERRORS)
> -    if(X64 AND NOT WIN32)
> -        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
> -    endif(X64 AND NOT WIN32)
> -endif(GCC)
> -
> -option(HIGH_BIT_DEPTH "Use 16bit pixels internally" OFF)
> -if(HIGH_BIT_DEPTH)
> -    add_definitions(-DHIGH_BIT_DEPTH=1)
> -else(HIGH_BIT_DEPTH)
> -    add_definitions(-DHIGH_BIT_DEPTH=0)
> -endif(HIGH_BIT_DEPTH)
> -
> -option(ENABLE_PRIMITIVES_VEC "Enable use of SIMD vector class primitives"
> ON)
> -find_package(Yasm)
> -if(YASM_FOUND)
> -    if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
> -        message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old.
> 1.2.0 or later required")
> -    else()
> -        message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build
> assembly primitives")
> -        option(ENABLE_PRIMITIVES_ASM "Enable use of assembly coded
> primitives" ON)
> -    endif()
> -endif(YASM_FOUND)
> -
> -if(UNIX)
> -    SET(PLATFORM_LIBS pthread m)
> -    if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
> -        SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
> -    endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
> -endif(UNIX)
> -
> -option(ENABLE_STLPORT "Enable use of STLPort STL replacement library" OFF)
> -if(ENABLE_STLPORT)
> -    set(STLPORT_LOCATION "${STLPORT_INCLUDE_DIR}" CACHE PATH "Location of
> STLPort sources")
> -    # STLPort headers must be included before system headers
> -    include_directories(BEFORE ${STLPORT_LOCATION}/stlport)
> -    link_directories(${STLPORT_LOCATION}/lib)
> -    if("$ENV{CXX}" STREQUAL "icl")
> -        add_definitions(/Qwd1879)
> -    endif()
> -endif(ENABLE_STLPORT)
> -
> -option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
> -if(ENABLE_PPA)
> -    add_definitions(-DENABLE_PPA)
> -    add_subdirectory(PPA)
> -    SET(PLATFORM_LIBS ${PLATFORM_LIBS} PPA)
> -    if(UNIX)
> -        SET(PLATFORM_LIBS ${PLATFORM_LIBS} dl)
> -    endif(UNIX)
> -endif(ENABLE_PPA)
> -
> -if (WIN32)
> -    # Visual leak detector
> -    find_package(VLD)
> -    if(VLD_FOUND)
> -        add_definitions(-DHAVE_VLD)
> -        include_directories(${VLD_INCLUDE_DIRS})
> -        set(PLATFORM_LIBS ${PLATFORM_LIBS} ${VLD_LIBRARIES})
> -        link_directories(${VLD_LIBRARY_DIRS})
> -    endif()
> -    option(WINXP_SUPPORT "Make binaries compatible with Windows XP" OFF)
> -    if(WINXP_SUPPORT)
> -        # force workarounds for atomic intrinsics introduced after XP
> -        add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP)
> -    endif(WINXP_SUPPORT)
> -endif()
> -
> -include_directories(. Lib common encoder)
> -add_subdirectory(common)
> -add_subdirectory(encoder)
> -
> -set(LIBS common encoder)
> -if(ENABLE_PRIMITIVES_VEC)
> -    set(LIBS ${LIBS} PrimitivesVec)
> -endif(ENABLE_PRIMITIVES_VEC)
> -
> -if(ENABLE_PRIMITIVES_ASM)
> -    set(LIBS ${LIBS} PrimitivesASM)
> -endif(ENABLE_PRIMITIVES_ASM)
> -
> -if(NOT XCODE)
> -    include(mergestaticlibs)
> -    merge_static_libs(x265 ${LIBS})
> -endif()
> -
> -# Test applications
> -option(ENABLE_TESTS "Enable Unit Tests" OFF)
> -if(ENABLE_TESTS)
> -    add_subdirectory(test)
> -endif(ENABLE_TESTS)
> -
> -# Main CLI application
> -option(ENABLE_CLI "Build standalone CLI application" ON)
> -if(ENABLE_CLI)
> -    file(GLOB InputFiles input/*.cpp input/*.h)
> -    file(GLOB OutputFiles output/*.cpp output/*.h)
> -    source_group(input FILES ${InputFiles})
> -    source_group(output FILES ${OutputFiles})
> -    if (MSVC)
> -        set_source_files_properties(compat/msvc/getopt.c PROPERTIES
> COMPILE_FLAGS "/wd4100")
> -    elseif(GCC)
> -        set_source_files_properties(compat/msvc/getopt.c PROPERTIES
> COMPILE_FLAGS "-Wno-unused-parameter")
> -    endif()
> -
> -    include(version) # determine X265_VERSION
> -    set_source_files_properties(x265.cpp PROPERTIES COMPILE_FLAGS
> -DX265_VERSION=${X265_VERSION})
> -    add_executable(cli ${EXTRAS} ../COPYING ${InputFiles} ${OutputFiles}
> -        x265.cpp x265opts.h x265.h
> -        compat/msvc/getopt.c compat/msvc/getopt.h)
> -if(XCODE)
> -    target_link_libraries(cli ${LIBS} ${PLATFORM_LIBS})
> -else()
> -    target_link_libraries(cli x265 ${PLATFORM_LIBS})
> -endif()
> -    SET_TARGET_PROPERTIES(cli PROPERTIES OUTPUT_NAME x265)
> -endif(ENABLE_CLI)
> +if(NOT CMAKE_BUILD_TYPE)
> +    # default to Release build for GCC builds
> +    set(CMAKE_BUILD_TYPE Release CACHE STRING
> +        "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or
> CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
> +        FORCE)
> +endif()
> +
> +project (x265)
> +cmake_minimum_required (VERSION 2.6)
> +
> +SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake"
> "${CMAKE_MODULE_PATH}")
> +
> +if ("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
> +    set(X64 1)
> +    add_definitions(-DX86_64=1)
> +endif()
> +
> +if (CMAKE_GENERATOR STREQUAL "Xcode")
> +  set(XCODE 1)
> +endif()
> +if (APPLE)
> +  add_definitions(-DMACOS)
> +endif()
> +
> +# Enforce coding standards.  Full warnings and warnings as errors
> +if("$ENV{CXX}" STREQUAL "icl")
> +    set(MSVC 1)
> +endif()
> +if(MSVC)
> +    option(STATIC_LINK_CRT "Statically link C runtime for release builds"
> OFF)
> +    if (STATIC_LINK_CRT)
> +        set(CompilerFlags CMAKE_CXX_FLAGS_RELEASE CMAKE_C_FLAGS_RELEASE)
> +        foreach(CompilerFlag ${CompilerFlags})
> +            string(REPLACE "/MD" "/MT" ${CompilerFlag}
> "${${CompilerFlag}}")
> +        endforeach()
> +    endif (STATIC_LINK_CRT)
> +    add_definitions(/W4 /WX /D_CRT_SECURE_NO_WARNINGS)
> +    add_definitions(/Ob2) # always inline
> +    add_definitions(/Oi)  # enable intrinsics
> +    add_definitions(/MP)  # multithreaded build
> +    include_directories(compat/msvc)
> +endif(MSVC)
> +
> +if("$ENV{CXX}" STREQUAL "icpc")
> +    set(GCC 1)
> +    add_definitions(-Wall -Wextra -Wshadow -no-vec)
> +elseif(CMAKE_COMPILER_IS_GNUCXX)
> +    execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
> OUTPUT_VARIABLE GCC_VERSION)
> +    set(GCC 1)
> +    add_definitions(-Wall -Wextra -Wshadow -mstackrealign)
> +    if(NOT GCC_VERSION VERSION_LESS 4.7)
> +        # this is necessary to avoid name conflicts in vector class
> +        # library.  if vector classes are removed/replaced this can
> +        # likely be removed as well.
> +        add_definitions(-fabi-version=6)
> +    endif()
> +elseif(APPLE)
> +    exec_program(uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
> +    string(REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
> +    message(STATUS "DARWIN_VERSION=${DARWIN_VERSION}")
> +    if (DARWIN_VERSION GREATER 12)
> +        # Detect Mac OS X 10.8 llvm/gcc frankenstein's monster
> +        if (${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/c++")
> +            execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
> OUTPUT_VARIABLE GCC_VERSION)
> +            set(GCC 1)
> +            add_definitions(-Wall -Wextra -Wshadow)
> +        endif()
> +    endif()
> +endif()
> +if (GCC)
> +    option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
> +    if(WARNINGS_AS_ERRORS)
> +        add_definitions(-Werror)
> +    endif(WARNINGS_AS_ERRORS)
> +    if(X64 AND NOT WIN32)
> +        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
> +    endif(X64 AND NOT WIN32)
> +endif(GCC)
> +
> +option(HIGH_BIT_DEPTH "Use 16bit pixels internally" OFF)
> +if(HIGH_BIT_DEPTH)
> +    add_definitions(-DHIGH_BIT_DEPTH=1)
> +else(HIGH_BIT_DEPTH)
> +    add_definitions(-DHIGH_BIT_DEPTH=0)
> +endif(HIGH_BIT_DEPTH)
> +
> +option(ENABLE_PRIMITIVES_VEC "Enable use of SIMD vector class primitives"
> ON)
> +find_package(Yasm)
> +if(YASM_FOUND)
> +    if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
> +        message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old.
> 1.2.0 or later required")
> +    else()
> +        message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build
> assembly primitives")
> +        option(ENABLE_PRIMITIVES_ASM "Enable use of assembly coded
> primitives" ON)
> +    endif()
> +endif(YASM_FOUND)
> +
> +if(UNIX)
> +    SET(PLATFORM_LIBS pthread m)
> +    if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
> +        SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
> +    endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
> +endif(UNIX)
> +
> +option(ENABLE_STLPORT "Enable use of STLPort STL replacement library" OFF)
> +if(ENABLE_STLPORT)
> +    set(STLPORT_LOCATION "${STLPORT_INCLUDE_DIR}" CACHE PATH "Location of
> STLPort sources")
> +    # STLPort headers must be included before system headers
> +    include_directories(BEFORE ${STLPORT_LOCATION}/stlport)
> +    link_directories(${STLPORT_LOCATION}/lib)
> +    if("$ENV{CXX}" STREQUAL "icl")
> +        add_definitions(/Qwd1879)
> +    endif()
> +endif(ENABLE_STLPORT)
> +
> +option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
> +if(ENABLE_PPA)
> +    add_definitions(-DENABLE_PPA)
> +    add_subdirectory(PPA)
> +    SET(PLATFORM_LIBS ${PLATFORM_LIBS} PPA)
> +    if(UNIX)
> +        SET(PLATFORM_LIBS ${PLATFORM_LIBS} dl)
> +    endif(UNIX)
> +endif(ENABLE_PPA)
> +
> +option(ENABLE_OPENCL "Use OpenCL for motion estimation" OFF)
> +if(ENABLE_OPENCL)
> +    find_package(OpenCL)
> +    if(OPENCL_FOUND)
> +        add_definitions(-DHAVE_OPENCL)
> +    endif()
> +endif(ENABLE_OPENCL)
> +
> +if (WIN32)
> +    # Visual leak detector
> +    find_package(VLD)
> +    if(VLD_FOUND)
> +        add_definitions(-DHAVE_VLD)
> +        include_directories(${VLD_INCLUDE_DIRS})
> +        set(PLATFORM_LIBS ${PLATFORM_LIBS} ${VLD_LIBRARIES})
> +        link_directories(${VLD_LIBRARY_DIRS})
> +    endif()
> +    option(WINXP_SUPPORT "Make binaries compatible with Windows XP" OFF)
> +    if(WINXP_SUPPORT)
> +        # force workarounds for atomic intrinsics introduced after XP
> +        add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP)
> +    endif(WINXP_SUPPORT)
> +endif()
> +
> +include_directories(. Lib common encoder)
> +add_subdirectory(common)
> +add_subdirectory(encoder)
> +
> +set(LIBS common encoder)
> +if(ENABLE_PRIMITIVES_VEC)
> +    set(LIBS ${LIBS} PrimitivesVec)
> +endif(ENABLE_PRIMITIVES_VEC)
> +
> +if(ENABLE_PRIMITIVES_ASM)
> +    set(LIBS ${LIBS} PrimitivesASM)
> +endif(ENABLE_PRIMITIVES_ASM)
> +
> +if(NOT XCODE)
> +    include(mergestaticlibs)
> +    merge_static_libs(x265 ${LIBS})
> +endif()
> +
> +# Test applications
> +option(ENABLE_TESTS "Enable Unit Tests" OFF)
> +if(ENABLE_TESTS)
> +    add_subdirectory(test)
> +endif(ENABLE_TESTS)
> +
> +# Main CLI application
> +option(ENABLE_CLI "Build standalone CLI application" ON)
> +if(ENABLE_CLI)
> +    file(GLOB InputFiles input/*.cpp input/*.h)
> +    file(GLOB OutputFiles output/*.cpp output/*.h)
> +    source_group(input FILES ${InputFiles})
> +    source_group(output FILES ${OutputFiles})
> +    if (MSVC)
> +        set_source_files_properties(compat/msvc/getopt.c PROPERTIES
> COMPILE_FLAGS "/wd4100")
> +    elseif(GCC)
> +        set_source_files_properties(compat/msvc/getopt.c PROPERTIES
> COMPILE_FLAGS "-Wno-unused-parameter")
> +    endif()
> +
> +    include(version) # determine X265_VERSION
> +    set_source_files_properties(x265.cpp PROPERTIES COMPILE_FLAGS
> -DX265_VERSION=${X265_VERSION})
> +    add_executable(cli ${EXTRAS} ../COPYING ${InputFiles} ${OutputFiles}
> +        x265.cpp x265opts.h x265.h
> +        compat/msvc/getopt.c compat/msvc/getopt.h)
> +if(XCODE)
> +    target_link_libraries(cli ${LIBS} ${PLATFORM_LIBS}
> ${OPENCL_LIBRARIES})
> +else()
> +    target_link_libraries(cli x265 ${PLATFORM_LIBS} ${OPENCL_LIBRARIES})
> +endif()
> +    SET_TARGET_PROPERTIES(cli PROPERTIES OUTPUT_NAME x265)
> +endif(ENABLE_CLI)
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/cmake/FindOpenCL.cmake
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/FindOpenCL.cmake     Tue Sep 17 13:41:27 2013 -0500
> @@ -0,0 +1,58 @@
> +# modified from OpenCV
>

If this file was copied and adapted from OpenCV, we need to keep their
license header and attribution here, even if MCW contributed the patch.

+
> +if(APPLE)
> +  set(OPENCL_FOUND YES)
> +  set(OPENCL_LIBRARY "-framework OpenCL" CACHE STRING "OpenCL library")
> +  set(OPENCL_INCLUDE_DIR "" CACHE STRING "OpenCL include directory")
> +  mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
> +else(APPLE)
> +  #find_package(OpenCL QUIET)
>

let's not add commented lines in new files


> +
> +  if (NOT OPENCL_FOUND)
> +    find_path(OPENCL_ROOT_DIR
> +              NAMES OpenCL/cl.h CL/cl.h include/CL/cl.h
> include/nvidia-current/CL/cl.h
> +              PATHS ENV OCLROOT ENV AMDAPPSDKROOT ENV CUDA_PATH ENV
> INTELOCLSDKROOT
> +              DOC "OpenCL root directory"
> +              NO_DEFAULT_PATH)
> +
> +    find_path(OPENCL_INCLUDE_DIR
> +              NAMES OpenCL/cl.h CL/cl.h
> +              HINTS ${OPENCL_ROOT_DIR}
> +              PATH_SUFFIXES include include/nvidia-current
> +              DOC "OpenCL include directory"
> +              NO_DEFAULT_PATH)
> +
> +    if(WIN32)
>

our cmake script style uses 4-spaces for intendation


> +      if(X64)
> +        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win64 lib/x86_64 lib/x64)
> +      else()
> +        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win32 lib/x86)
> +      endif()
> +    elseif(UNIX)
> +      if(X64)
> +        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib64 lib)
> +      else()
> +        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib32 lib)
> +      endif()
> +    else()
> +      set(OPENCL_POSSIBLE_LIB_SUFFIXES lib)
> +    endif()
> +
> +    find_library(OPENCL_LIBRARY
> +              NAMES OpenCL
> +              HINTS ${OPENCL_ROOT_DIR}
> +              PATH_SUFFIXES ${OPENCL_POSSIBLE_LIB_SUFFIXES}
> +              DOC "OpenCL library"
> +              NO_DEFAULT_PATH)
> +
> +    mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
> +    include(FindPackageHandleStandardArgs)
> +    FIND_PACKAGE_HANDLE_STANDARD_ARGS(OPENCL DEFAULT_MSG OPENCL_LIBRARY
> OPENCL_INCLUDE_DIR )
> +  endif()
> +endif(APPLE)
> +
> +if(OPENCL_FOUND)
> +  set(HAVE_OPENCL 1)
> +  set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
> +  set(OPENCL_LIBRARIES    ${OPENCL_LIBRARY})
> +endif()
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/cmake/cl2cpp.cmake
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/cmake/cl2cpp.cmake Tue Sep 17 13:41:27 2013 -0500
> @@ -0,0 +1,35 @@
> +# modified from OpenCV
>

ditto attribution here


> +
> +file(GLOB cl_list "${CL_DIR}/*.h" "${CL_DIR}/*.cl" )
> +
> +file(WRITE ${OUTPUT} "// This file is auto-generated. Do not edit!
> +
> +namespace x265
> +{
> +
> +")
> +
> +file(APPEND ${OUTPUT} "const char* x265_opencl_source=\n")
> +
> +foreach(cl ${cl_list})
> +  file(READ "${cl}" lines)
> +
> +  string(REPLACE "\r" "" lines "${lines}\n")
> +  string(REPLACE "\t" "  " lines "${lines}")
> +
> +  string(REGEX REPLACE "/\\*([^*]/|\\*[^/]|[^*/])*\\*/" ""   lines
> "${lines}") # multiline comments
> +  string(REGEX REPLACE "/\\*([^\n])*\\*/"               ""   lines
> "${lines}") # single-line comments
> +  string(REGEX REPLACE "[ ]*//[^\n]*\n"                 "\n" lines
> "${lines}") # single-line comments
> +  string(REGEX REPLACE "\n[ ]*(\n[ ]*)*"                "\n" lines
> "${lines}") # empty lines & leading whitespace
> +  string(REGEX REPLACE "^\n"                            ""   lines
> "${lines}") # leading new line
> +
> +  string(REPLACE "\\" "\\\\" lines "${lines}")
> +  string(REPLACE "\"" "\\\"" lines "${lines}")
> +  string(REPLACE "\n" "\\n\"\n\"" lines "${lines}")
> +
> +  string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof
> +
> +  file(APPEND ${OUTPUT} "\"${lines}")
> +endforeach()
> +
> +file(APPEND ${OUTPUT} ";\n\n}\n")
>

we should also cleanup end of file eoln


> \ No newline at end of file
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/CMakeLists.txt
> --- a/source/common/CMakeLists.txt      Mon Sep 16 21:06:49 2013 +0800
> +++ b/source/common/CMakeLists.txt      Tue Sep 17 13:41:27 2013 -0500
> @@ -1,59 +1,73 @@
> -if(GCC)
>

again, you've changed the EOLN of this file on accident


> -    if (NOT X64)
> -        # force gcc to generate code for sync primitives
> -        set_source_files_properties(threadpool.cpp reference.cpp
> wavefront.cpp PROPERTIES COMPILE_FLAGS -march=i686)
> -    endif()
> -endif(GCC)
> -
> -if(ENABLE_PRIMITIVES_VEC)
> -    add_definitions(-DENABLE_VECTOR_PRIMITIVES=1)
> -endif(ENABLE_PRIMITIVES_VEC)
> -if(ENABLE_PRIMITIVES_ASM)
> -    add_definitions(-DENABLE_ASM_PRIMITIVES=1)
> -endif(ENABLE_PRIMITIVES_ASM)
> -
> -include_directories(../VectorClass)
> -
> -file(GLOB LIBCOMMON_HDR ../Lib/TLibCommon/*.h)
> -file(GLOB LIBCOMMON_SRC ../Lib/TLibCommon/*.cpp)
> -source_group(TLibCommon FILES ${LIBCOMMON_SRC})
> -source_group(TLibCommonH FILES ${LIBCOMMON_HDR})
> -if(GCC)
> -    set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
> -        "-Wno-sign-compare")
> -endif(GCC)
> -if(MSVC)
> -    # ignore these warnings from HM source
> -    # /wd4244 type conversion, possible loss of data
> -    # /wd4512 assignment operator could not be generated
> -    # /wd4127 conditional expression is constant
> -    # /wd4389 signed/unsigned mismatch
> -    # /wd4018 '<' signed/unsigned mismatch
> -    # /wd4800 performance warning: bool coersion
> -    set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
> -        "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
> -    if ("$ENV{CXX}" STREQUAL "icl")
> -        add_definitions(/Qwd2557) # signed/unsigned mismatch
> -    endif()
> -endif(MSVC)
> -
> -add_library(common STATIC ../../COPYING
> -    ${LIBCOMMON_SRC} ${LIBCOMMON_HDR}
> -    primitives.cpp primitives.h
> -    pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp
> -    ../VectorClass/instrset_detect.cpp
> -    threading.cpp threading.h
> -    threadpool.cpp threadpool.h
> -    wavefront.h wavefront.cpp
> -    md5.cpp md5.h
> -    TShortYUV.cpp TShortYUV.h mv.h
> -    reference.cpp reference.h
> -    common.cpp common.h
> -    lowres.cpp lowres.h)
> -
> -if(ENABLE_PRIMITIVES_VEC)
> -    add_subdirectory(vec)
> -endif(ENABLE_PRIMITIVES_VEC)
> -if(ENABLE_PRIMITIVES_ASM)
> -    add_subdirectory(x86)
> -endif(ENABLE_PRIMITIVES_ASM)
> +if(GCC)
> +    if (NOT X64)
> +        # force gcc to generate code for sync primitives
> +        set_source_files_properties(threadpool.cpp reference.cpp
> wavefront.cpp PROPERTIES COMPILE_FLAGS -march=i686)
> +    endif()
> +endif(GCC)
> +
> +if(ENABLE_PRIMITIVES_VEC)
> +    add_definitions(-DENABLE_VECTOR_PRIMITIVES=1)
> +endif(ENABLE_PRIMITIVES_VEC)
> +if(ENABLE_PRIMITIVES_ASM)
> +    add_definitions(-DENABLE_ASM_PRIMITIVES=1)
> +endif(ENABLE_PRIMITIVES_ASM)
> +
> +include_directories(../VectorClass)
> +
> +file(GLOB LIBCOMMON_HDR ../Lib/TLibCommon/*.h)
> +file(GLOB LIBCOMMON_SRC ../Lib/TLibCommon/*.cpp)
> +source_group(TLibCommon FILES ${LIBCOMMON_SRC})
> +source_group(TLibCommonH FILES ${LIBCOMMON_HDR})
> +if(GCC)
> +    set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
> +        "-Wno-sign-compare")
> +endif(GCC)
> +if(MSVC)
> +    # ignore these warnings from HM source
> +    # /wd4244 type conversion, possible loss of data
> +    # /wd4512 assignment operator could not be generated
> +    # /wd4127 conditional expression is constant
> +    # /wd4389 signed/unsigned mismatch
> +    # /wd4018 '<' signed/unsigned mismatch
> +    # /wd4800 performance warning: bool coersion
> +    set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
> +        "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
> +    if ("$ENV{CXX}" STREQUAL "icl")
> +        add_definitions(/Qwd2557) # signed/unsigned mismatch
> +    endif()
> +endif(MSVC)
> +
> +if(HAVE_OPENCL)
> +    include_directories(${OPENCL_INCLUDE_DIRS})
> +    set(OPENCL_KERNEL_HDR ${CMAKE_CURRENT_SOURCE_DIR}/oclkernels.h)
> +    add_custom_command(
> +      OUTPUT ${OPENCL_KERNEL_HDR}
> +      COMMAND ${CMAKE_COMMAND}
> -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/opencl"
> +              -DOUTPUT=${OPENCL_KERNEL_HDR}
> +              -P "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/cl2cpp.cmake"
> +      DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/cl2cpp.cmake")
> +    set(OPENCL_SRC opencl.cpp)
> +    set(OPENCL_HDR opencl.h ${OPENCL_KERNEL_HDR})
> +endif(HAVE_OPENCL)
> +
> +add_library(common STATIC ../../COPYING
> +    ${LIBCOMMON_SRC} ${LIBCOMMON_HDR}
> +    primitives.cpp primitives.h
> +    pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp
> +    ../VectorClass/instrset_detect.cpp
> +    threading.cpp threading.h
> +    threadpool.cpp threadpool.h
> +    wavefront.h wavefront.cpp
> +    md5.cpp md5.h
> +    TShortYUV.cpp TShortYUV.h mv.h
> +    reference.cpp reference.h
> +    common.cpp common.h
> +    lowres.cpp lowres.h
> +    ${OPENCL_SRC} ${OPENCL_HDR})
> +
> +if(ENABLE_PRIMITIVES_VEC)
> +    add_subdirectory(vec)
> +endif(ENABLE_PRIMITIVES_VEC)
> +if(ENABLE_PRIMITIVES_ASM)
> +    add_subdirectory(x86)
> +endif(ENABLE_PRIMITIVES_ASM)
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl.cpp
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/common/opencl.cpp  Tue Sep 17 13:41:27 2013 -0500
> @@ -0,0 +1,631 @@
>
> +/*****************************************************************************
> + * opencl.c: OpenCL initialization and kernel compilation
> +
> *****************************************************************************
> + * Copyright (C) 2013 x265 project
> + *
> + * Authors: Steve Borho <steve at borho.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at licensing at multicorewareinc.com.
> +
> *****************************************************************************/
> +
> +#include "TLibCommon/CommonDef.h"
> +#include "common.h"
> +
> +#if defined WIN32 || defined _WIN32
> +#include <Windows.h>
>

windows.h needs to be lower case, this actually matters for mingw cross
compiles


> +#else
> +#include <dlfcn.h> //dlopen, dlsym, dlclose
> +#endif
> +#include "opencl.h"
> +#include "oclkernels.h"
> +#include "stdio.h"
> +
> +/* define from recent cl_ext.h, copied here in case headers are old */
> +#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD        0x4042
> +
> +using namespace x265;
> +
> +// global variable
> +static ocl g_ocl;
> +
> +static void CALLBACK oclErrorNotify( const char *errinfo, const void
> *private_info, size_t cb, void *user_data )
> +{
> +    /* Any error notification can be assumed to be fatal to the OpenCL
> context.
> +     * We need to stop using it immediately to prevent further damage. */
> +    x265_param_t *param = (x265_param_t*)user_data;
> +    param->useOpenCL = 0;
> +    //h->opencl.b_fatal_error = 1;
> +    x265_log(param, X265_LOG_ERROR, "OpenCL: %s\n", errinfo );
> +    x265_log(param, X265_LOG_ERROR, "OpenCL: fatal error, aborting
> encode\n" );
> +    (void)cb;
> +    (void)private_info;
> +}
> +
> +ocl::ocl()
> +    : m_context(NULL)
> +    , m_device(NULL)
> +    , m_queue(NULL)
> +    , m_program(NULL)
> +    , m_initialized(false)
> +    , m_deviceAMDSI(false)
> +    , m_fatalError(false)
> +    , m_openclThreadPri(0)
> +    , m_kernelMeStar(NULL)
> +{}
> +
> +ocl::~ocl()
> +{}
> +
> +
> +bool ocl::init(x265_param_t *param)
> +{
> +    cl_int status;
> +    cl_uint numPlatforms;
> +    bool ret = false;
> +
> +    if (m_initialized)
> +    {
> +        return true;
> +    }
> +
> +    status = clGetPlatformIDs(0, NULL, &numPlatforms);
> +    if( status != CL_SUCCESS || numPlatforms == 0 )
> +    {
> +        x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to query
> installed platforms\n");
> +        return false;
> +    }
> +
> +    cl_platform_id *platforms =
> (cl_platform_id*)X265_MALLOC(cl_platform_id, numPlatforms);
> +    status = clGetPlatformIDs( numPlatforms, platforms, NULL );
> +    if( status != CL_SUCCESS )
> +    {
> +        x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to query
> installed platforms\n");
> +        X265_FREE( platforms );
> +        return false;
> +    }
> +
> +    /* Select the first OpenCL platform with a GPU device that supports
> our
> +     * required image (texture) formats */
> +    for( cl_uint i = 0; i < numPlatforms; ++i )
> +    {
> +        cl_uint gpu_count = 0;
> +        status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0,
> NULL, &gpu_count );
> +        if( status != CL_SUCCESS || !gpu_count )
> +            continue;
> +
> +        cl_device_id *devices = (cl_device_id*) X265_MALLOC(
> cl_device_id, gpu_count );
> +        if( !devices )
> +            continue;
> +
> +        status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU,
> gpu_count, devices, NULL );
> +        if( status != CL_SUCCESS )
> +        {
> +            X265_FREE( devices );
> +            continue;
> +        }
> +
> +        /* Find a GPU device that supports our image formats */
> +        for( cl_uint gpu = 0; gpu < gpu_count; gpu++ )
> +        {
> +            cl_device_id device = devices[gpu];
> +
> +            /* if the user has specified an exact device ID, skip all
> other
> +             * GPUs.  If this device matches, allow it to continue
> through the
> +             * checks for supported images, etc.  */
> +            if( param->OpenCLDeviceId && devices[gpu] != (cl_device_id)
> param->OpenCLDeviceId )
> +                continue;
> +
> +            cl_bool image_support;
> +            clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT,
> sizeof(cl_bool), &image_support, NULL );
> +            if( !image_support )
> +                continue;
> +
> +            cl_context context = clCreateContext( NULL, 1, &device,
> oclErrorNotify, (void*)param, &status );
> +            if( status != CL_SUCCESS )
> +                continue;
> +
> +            cl_uint imagecount = 0;
> +            clGetSupportedImageFormats( context, CL_MEM_READ_WRITE,
> CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
> +            if( !imagecount )
> +            {
> +                clReleaseContext( context );
> +                continue;
> +            }
> +
> +            cl_image_format *imageType =
> (cl_image_format*)X265_MALLOC(cl_image_format, imagecount);
> +            if( !imageType )
> +            {
> +                clReleaseContext( context );
> +                continue;
> +            }
> +
> +            clGetSupportedImageFormats( context, CL_MEM_READ_WRITE,
> CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
> +
> +            int b_has_r = 0;
> +            int b_has_rgba = 0;
> +            for( cl_uint j = 0; j < imagecount; j++ )
> +            {
> +                if( imageType[j].image_channel_order == CL_R &&
> +                    imageType[j].image_channel_data_type ==
> CL_UNSIGNED_INT32 )
> +                    b_has_r = 1;
> +                else if( imageType[j].image_channel_order == CL_RGBA &&
> +                         imageType[j].image_channel_data_type ==
> CL_UNSIGNED_INT8 )
> +                    b_has_rgba = 1;
> +            }
> +            X265_FREE( imageType );
> +            if( !b_has_r || !b_has_rgba )
> +            {
> +                char devname[64];
> +                status = clGetDeviceInfo(device, CL_DEVICE_NAME,
> sizeof(devname), devname, NULL);
> +                if( status == CL_SUCCESS )
> +                {
> +                    /* emit warning if we are discarding the user's
> explicit choice */
> +                    int level = param->OpenCLDeviceId ? X265_LOG_WARNING
> : X265_LOG_DEBUG;
> +                    x265_log(param, level, "OpenCL: %s does not support
> required image formats\n", devname);
> +                }
> +                clReleaseContext( context );
> +                continue;
> +            }
> +
> +            /* user selection of GPU device, skip N first matches */
> +            if( param->iOpenCLDevice )
> +            {
> +                param->iOpenCLDevice--;
> +                clReleaseContext( context );
> +                continue;
> +            }
> +
> +            cl_command_queue queue = clCreateCommandQueue( context,
> device, 0, &status );
> +            if( status != CL_SUCCESS )
>

watch the white-space coding style


> +            {
> +                clReleaseContext( context );
> +                continue;
> +            }
> +
> +            m_context = context;
> +            m_device = device;
> +            m_queue = queue;
> +
> +            ret = true;
> +            break;
> +        }
> +
> +        X265_FREE( devices );
> +
> +        if (ret)
> +        {
> +            break;
> +        }
> +    }
> +
> +    X265_FREE( platforms );
> +
> +    if (!param->pszCLbinFile)
> +    {
> +        param->pszCLbinFile = "x265_kernels.clbin";
> +    }
> +
> +    if (!ret)
> +    {
> +        x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to find a
> compatible device\n");
> +    }
> +    else
> +    {
> +        ret = createKernels(param);
> +    }
> +
> +    m_initialized = ret;
> +
> +    return ret;
> +}
> +
> +bool ocl::createKernels(x265_param_t *param)
> +{
> +    static const char *kernelnames[] = {
> +        "motion_estimate"
> +    };
> +    cl_kernel *kernels[] = {
> +        &m_kernelMeStar
> +    };
> +    cl_int status;
> +
> +    compile(param);
> +    if( !m_program )
> +    {
> +        return false;
> +    }
> +
> +    for( int i = 0; i < sizeof(kernelnames)/sizeof(kernelnames[0]); i++ )
> +    {
> +        *kernels[i] = clCreateKernel(m_program, kernelnames[i], &status);
> +        if( status != CL_SUCCESS )
> +        {
> +            x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to compile
> kernel '%s' (%d)\n", kernelnames[i], status );
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
> +void ocl::destroy()
> +{
> +    if (m_queue)
> +    {
> +        clFinish(m_queue);
> +    }
> +
> +    if (m_kernelMeStar)
> +    {
> +        clReleaseKernel(m_kernelMeStar);
> +        m_kernelMeStar = NULL;
> +    }
> +
> +    if (m_program)
> +    {
> +        clReleaseProgram(m_program);
> +        m_program = NULL;
> +    }
> +
> +    if (m_queue)
> +    {
> +        clReleaseCommandQueue( m_queue );
> +        m_queue = NULL;
> +    }
> +
> +    if (m_context)
> +    {
> +        clReleaseContext( m_context );
> +        m_context = NULL;
> +    }
> +
> +    m_initialized = false;
> +}
> +
> +/* Try to load the cached compiled program binary, verify the device
> context is
> + * still valid before reuse */
> +void ocl::cache_load(x265_param_t *param, char *devname, char *devvendor,
> char *driverversion)
> +{
> +    cl_int status;
> +
> +    /* try to load cached program binary */
> +    FILE *fp = fopen( param->pszCLbinFile, "rb" );
> +    if( !fp )
> +        return;
> +
> +    fseek( fp, 0L, SEEK_END );
> +    size_t size = ftell( fp );
> +    rewind( fp );
> +    uint8_t *binary = (uint8_t*)X265_MALLOC( uint8_t, size );
> +    if (!binary)
> +        return;
> +
> +    fread( binary, 1, size, fp );
> +    const uint8_t *ptr = (const uint8_t*)binary;
> +
> +#define CHECK_STRING( STR )\
> +    {\
> +        size_t len = strlen( STR );\
> +        if( size <= len || strncmp( (char*)ptr, STR, len ) )\
> +            goto fail;\
> +        else {\
> +            size -= (len+1); ptr += (len+1);\
> +        }\
> +    }
> +
> +    CHECK_STRING( devname );
> +    CHECK_STRING( devvendor );
> +    CHECK_STRING( driverversion );
> +    //CHECK_STRING( x265_opencl_source_hash );
> +#undef CHECK_STRING
> +
> +    m_program = clCreateProgramWithBinary( m_context, 1, &m_device,
> &size, &ptr, NULL, &status );
> +    if (status != CL_SUCCESS)
> +    {
> +        m_program = NULL;
> +    }
> +
> +fail:
> +    fclose( fp );
> +    X265_FREE( binary );
> +}
> +
> +/* Save the compiled program binary to a file for later reuse.  Device
> context
> + * is also saved in the cache file so we do not reuse stale binaries */
> +void ocl::cache_save(x265_param_t *param, char *devname, char *devvendor,
> char *driverversion)
> +{
> +    FILE *fp = fopen(param->pszCLbinFile, "wb");
> +    if( !fp )
> +    {
> +        x265_log(param, X265_LOG_INFO, "OpenCL: unable to open clbin file
> for write");
> +        return;
> +    }
> +
> +    size_t size;
> +    cl_int status = clGetProgramInfo( m_program, CL_PROGRAM_BINARY_SIZES,
> sizeof(size_t), &size, NULL );
> +    if( status == CL_SUCCESS )
> +    {
> +        uint8_t *binary = (uint8_t*)X265_MALLOC(uint8_t, size);
> +        status = clGetProgramInfo( m_program, CL_PROGRAM_BINARIES,
> sizeof(uint8_t *), &binary, NULL );
> +        if( status == CL_SUCCESS )
> +        {
> +            fputs( devname, fp );
> +            fputc( '\n', fp );
> +            fputs( devvendor, fp );
> +            fputc( '\n', fp );
> +            fputs( driverversion, fp );
> +            fputc( '\n', fp );
> +            //fputs( x265_opencl_source_hash, fp );
> +            //fputc( '\n', fp );
> +            fwrite( binary, 1, size, fp );
> +        }
> +        else
> +            x265_log(param, X265_LOG_INFO, "OpenCL: Unable to query
> program binary, no cache file generated");
> +        X265_FREE( binary );
> +    }
> +    else
> +        x265_log(param, X265_LOG_INFO, "OpenCL: Unable to query program
> binary size, no cache file generated");
> +    fclose( fp );
> +
> +    return;
> +}
> +
> +
> +oclME::oclME(x265_param_t *param)
> +    : m_pageLockedBuffer(NULL)
> +    , m_pageLockedPtr(NULL)
> +    , m_plOccupancy(0)
> +    , m_numCopies(0)
> +    , m_mvCostsBuffer(NULL)
> +    , m_mvpBuffer(NULL)
> +{
> +    for (int i = 0; i < 2; i++)
> +    {
> +        m_mvBuffer[i];
> +        m_costsBuffer[i];
> +        m_frameStatsBuffer[i];
> +    }
> +
> +    m_ocl = &g_ocl;
> +    init(param);
> +}
> +
> +/* The OpenCL source under common/opencl will be merged into
> common/oclkernels.h
> + * by CMake. It defines a x265_opencl_source string which we will pass
> + * to clCreateProgramWithSource().  We also attempt to use a cache file
> for the
> + * compiled binary, stored in the current working folder. */
> +bool ocl::compile(x265_param_t *param)
> +{
> +    cl_int status;
> +    char devname[64];
> +    char devvendor[64];
> +    char driverversion[64];
> +
> +    status  = clGetDeviceInfo( m_device, CL_DEVICE_NAME,
>  sizeof(devname), devname, NULL );
> +    status |= clGetDeviceInfo( m_device, CL_DEVICE_VENDOR,
>  sizeof(devvendor), devvendor, NULL );
> +    status |= clGetDeviceInfo( m_device, CL_DRIVER_VERSION,
> sizeof(driverversion), driverversion, NULL );
> +    if (status != CL_SUCCESS)
> +    {
> +        return false;
> +    }
> +
> +    // Most AMD GPUs have vector registers
> +    int vectorize = !strcmp( devvendor, "Advanced Micro Devices, Inc." );
> +    m_deviceAMDSI = false;
> +
> +    if (vectorize)
> +    {
> +        /* Disable OpenCL on Intel/AMD switchable graphics devices */
> +        if (detectSwitchableGraphics())
> +        {
> +            x265_log(param, X265_LOG_INFO, "OpenCL acceleration disabled,
> switchable graphics detected\n" );
> +            return false;
> +        }
> +
> +        /* Detect AMD SouthernIsland or newer device (single-width
> registers) */
> +        cl_uint simdwidth = 4;
> +        status = clGetDeviceInfo( m_device,
> CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
> +        if( status == CL_SUCCESS && simdwidth == 1 )
> +        {
> +            vectorize = 0;
> +            m_deviceAMDSI = true;
> +        }
> +    }
> +
> +    x265_log(param, X265_LOG_INFO, "OpenCL acceleration enabled with %s
> %s %s\n", devvendor, devname, m_deviceAMDSI ? "(SI)" : "" );
> +
> +    cache_load(param, devname, devvendor, driverversion);
> +    if( !m_program )
> +    {
> +        /* clCreateProgramWithSource() requires a pointer variable, you
> cannot just use &x265_opencl_source */
> +        x265_log(param, X265_LOG_INFO, "Compiling OpenCL kernels...\n" );
> +        size_t size = strlen(x265_opencl_source);
> +        m_program = clCreateProgramWithSource( m_context, 1,
> &x265_opencl_source, &size, &status );
> +        if( status != CL_SUCCESS || !m_program )
> +        {
> +            x265_log(param, X265_LOG_WARNING, "OpenCL: unable to create
> program\n" );
> +            return false;
> +        }
> +    }
> +
> +    /* Build the program binary for the OpenCL device */
> +    const char *buildopts = vectorize ? "-DVECTORIZE=1" : "";
> +    status = clBuildProgram( m_program, 1, &m_device, buildopts, NULL,
> NULL );
> +    if( status == CL_SUCCESS )
> +    {
> +        cache_save(param, devname, devvendor, driverversion);
> +        return true;
> +    }
> +
> +    /* Compile failure, should not happen with production code. */
> +
> +    size_t build_log_len = 0;
> +
> +    status = clGetProgramBuildInfo( m_program, m_device,
> CL_PROGRAM_BUILD_LOG, build_log_len, NULL, &build_log_len );
> +    if( status != CL_SUCCESS )
> +    {
> +        x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed,
> unable to query build log\n" );
> +        return false;
> +    }
> +
> +    char *build_log = (char*)X265_MALLOC(char, build_log_len);
> +    if( !build_log )
> +    {
> +        x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed,
> unable to alloc build log\n" );
> +        return false;
> +    }
> +
> +    status = clGetProgramBuildInfo( m_program, m_device,
> CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
> +    if( status != CL_SUCCESS )
> +    {
> +        x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed,
> unable to get build log\n" );
> +        X265_FREE( build_log );
> +        return false;
> +    }
> +
> +    FILE *lg = fopen( "x265_kernel_build_log.txt", "w" );
> +    if( lg )
> +    {
> +        fwrite( build_log, 1, build_log_len, lg );
> +        fclose( lg );
> +        x265_log(param, X265_LOG_WARNING, "OpenCL: kernel build errors
> written to x265_kernel_build_log.txt\n" );
> +    }
> +
> +    X265_FREE( build_log );
> +
> +    clReleaseProgram(m_program);
> +    m_program = NULL;
> +
> +    return false;
> +}
> +
> +oclME::~oclME()
> +{
> +}
> +
> +void oclME::destroy()
> +{
> +}
> +
> +bool oclME::init(x265_param_t *param)
> +{
> +    cl_int status;
> +
> +    if (!m_ocl)
> +    {
> +        return false;
> +    }
> +    m_ocl->init(param);
> +
> +    m_pageLockedBuffer = clCreateBuffer(m_ocl->m_context,
> CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL,
> &status);
> +    if( status != CL_SUCCESS )
> +    {
> +        x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to allocate
> page-locked buffer, error '%d'\n", status );
> +        return false;
> +    }
> +    m_pageLockedPtr = (char*) clEnqueueMapBuffer(m_ocl->m_queue,
> m_pageLockedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
> +                                                 0, PAGE_LOCKED_BUF_SIZE,
> 0, NULL, NULL, &status);
> +    if( status != CL_SUCCESS )
> +    {
> +        x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to map
> page-locked buffer, error '%d'\n", status );
> +        return false;
> +    }
> +
> +    return true;
> +}
>

let's not pull in all this ADL stuff until we have to


> +/* OpenCL misbehaves on hybrid laptops with Intel iGPU and AMD dGPU, so
> + * we consult AMD's ADL interface to detect this situation and disable
> + * OpenCL on these machines (Linux and Windows) */
> +#ifndef _WIN32
> +#define __stdcall
> +#define HINSTANCE void *
> +#endif
> +typedef void* ( __stdcall *ADL_MAIN_MALLOC_CALLBACK )( int );
> +typedef int ( *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int );
> +typedef int ( *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* );
> +typedef int ( *ADL_POWERXPRESS_SCHEME_GET ) ( int, int *, int *, int * );
> +typedef int ( *ADL_MAIN_CONTROL_DESTROY )();
> +#define ADL_OK 0
> +#define ADL_PX_SCHEME_DYNAMIC 2
> +
> +void* __stdcall adl_malloc_wrapper( int iSize ) { return
> X265_MALLOC(char, iSize); }
> +
> +int ocl::detectSwitchableGraphics()
> +{
> +    ADL_MAIN_CONTROL_CREATE          ADL_Main_Control_Create;
> +    ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
> +    ADL_POWERXPRESS_SCHEME_GET       ADL_PowerXpress_Scheme_Get;
> +    ADL_MAIN_CONTROL_DESTROY         ADL_Main_Control_Destroy;
> +    HINSTANCE hDLL;
> +    int ret = 0;
> +
> +#if _WIN32
> +    hDLL = LoadLibrary( "atiadlxx.dll" );
> +    if( !hDLL )
> +        hDLL = LoadLibrary( "atiadlxy.dll" );
> +#else
> +    hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
> +#define GetProcAddress dlsym
> +#endif
> +    if( !hDLL )
> +        return ret;
> +
> +    ADL_Main_Control_Create = (ADL_MAIN_CONTROL_CREATE)
> GetProcAddress(hDLL, "ADL_Main_Control_Create");
> +    ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY)
> GetProcAddress(hDLL, "ADL_Main_Control_Destroy");
> +    ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET)
> GetProcAddress(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
> +    ADL_PowerXpress_Scheme_Get = (ADL_POWERXPRESS_SCHEME_GET)
> GetProcAddress(hDLL, "ADL_PowerXpress_Scheme_Get");
> +    if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy ||
> !ADL_Adapter_NumberOfAdapters_Get ||
> +        !ADL_PowerXpress_Scheme_Get )
> +        goto bail;
> +
> +    if( ADL_OK != ADL_Main_Control_Create( adl_malloc_wrapper, 1) )
> +        goto bail;
> +
> +    int numAdapters = 0;
> +    if( ADL_OK != ADL_Adapter_NumberOfAdapters_Get( &numAdapters ) )
> +    {
> +        ADL_Main_Control_Destroy();
> +        goto bail;
> +    }
> +
> +    for( int i = 0; i < numAdapters; i++ )
> +    {
> +        int PXSchemeRange, PXSchemeCurrentState, PXSchemeDefaultState;
> +        if( ADL_OK != ADL_PowerXpress_Scheme_Get( i, &PXSchemeRange,
> &PXSchemeCurrentState, &PXSchemeDefaultState) )
> +            break;
> +
> +        if( PXSchemeRange >= ADL_PX_SCHEME_DYNAMIC )
> +        {
> +            ret = 1;
> +            break;
> +        }
> +    }
> +
> +    ADL_Main_Control_Destroy();
> +
> +bail:
> +#if _WIN32
> +    FreeLibrary( hDLL );
> +#else
> +    dlclose( hDLL );
> +#endif
> +
> +    return ret;
> +}
> +
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl.h
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/common/opencl.h    Tue Sep 17 13:41:27 2013 -0500
> @@ -0,0 +1,128 @@
>
> +/*****************************************************************************
> + * opencl.h: OpenCL structures and defines
> +
> *****************************************************************************
> + * Copyright (C) 2013 x265 project
> + *
> + * Authors: Steve Borho <steve at borho.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at licensing at multicorewareinc.com.
> +
> *****************************************************************************/
> +
> +#ifndef X265_OPENCL_H
> +#define X265_OPENCL_H
> +
> +#if defined __APPLE__
> +#include <OpenCL/opencl.h>
> +#else
> +#include <CL/opencl.h>
> +#endif
> +
> +#include "x265.h"
> +
> +namespace x265
> +{
> +
> +/* Number of PCIe copies that can be queued before requiring a flush */
> +#define MAX_FINISH_COPIES 1024
> +
> +/* Size (in bytes) of the page-locked buffer used for PCIe xfers */
> +#define PAGE_LOCKED_BUF_SIZE 32 * 1024 * 1024
> +
> +/* We define CL_QUEUE_THREAD_HANDLE_AMD here because it is not defined
> + * in the OpenCL headers shipped with NVIDIA drivers.  We need to be
> + * able to compile on an NVIDIA machine and run optimally on an AMD GPU.
> */
> +#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
> +
> +#define OCLCHECK( method, ... )\
> +    status = method( __VA_ARGS__ );\
> +    if( status != CL_SUCCESS ) {\
> +        param->useOpenCL = 0;\
> +        m_ocl->m_fatalError = 1;\
> +        x264_log( param, X264_LOG_ERROR, # method " error '%d'\n", status
> );\
> +        return status;\
> +    }
> +
> +class ocl
> +{
> +public:
> +
> +    ocl();
> +    ~ocl();
> +
> +    cl_context       m_context;
> +    cl_device_id     m_device;
> +    cl_command_queue m_queue;
> +    cl_program       m_program;
> +
> +    bool             m_initialized;
> +    bool             m_deviceAMDSI;
> +    bool             m_fatalError;
> +    int              m_openclThreadPri;
> +
> +    cl_kernel        m_kernelMeStar;
> +
> +    bool       init(x265_param_t *param);
> +    void       destroy();
> +    int        detectSwitchableGraphics();
> +
> +private:
> +
> +    bool       createKernels(x265_param_t *param);
> +    bool       compile(x265_param_t *param);
> +    void       cache_load(x265_param_t *param, char *devname, char
> *devvendor, char *driverversion);
> +    void       cache_save(x265_param_t *param, char *devname, char
> *devvendor, char *driverversion);
> +};
> +
> +
> +/* motion estimation */
> +class oclME
> +{
> +public:
> +
> +    oclME(x265_param_t *param);
> +    ~oclME();
> +
> +    ocl*             m_ocl;
> +
> +    cl_mem           m_pageLockedBuffer;
> +    char            *m_pageLockedPtr;
> +    int              m_plOccupancy;
> +
> +    struct
> +    {
> +        void *src;
> +        void *dest;
> +        int   bytes;
> +    } m_copies[MAX_FINISH_COPIES];
> +    int              m_numCopies;
> +
> +    cl_mem           m_mvBuffer[2];
> +    cl_mem           m_mvCostsBuffer;
> +    cl_mem           m_mvpBuffer;
> +
> +    cl_mem           m_costsBuffer[2];
> +    cl_mem           m_frameStatsBuffer[2]; /* cost_est, cost_est_aq,
> intra_mbs */
> +
> +    bool       init(x265_param_t *param);
> +    void       destroy();
> +
> +};
> +
> +} // end namespace x265
> +
> +#endif
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl/me.cl
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/common/opencl/me.cl        Tue Sep 17 13:41:27 2013 -0500
> @@ -0,0 +1,15 @@
> +/* motion estimate */
>

Oh interesting

It's pretty unlikely for a number of reasons that we'll be able to use the
image (texture) data packing hack that we used in x264.


> +__kernel void motion_estimate( read_only image2d_t    fenc,
> +                               read_only image2d_t    fref,
> +                               const __global short2 *in_mvs,
> +                               __global short2       *out_mvs,
> +                               __global short        *out_mv_costs,
> +                               __global short2       *mvp_buffer,
> +                               __local short         *cost_local,
> +                               __local short2        *mvc_local,
> +                               int                    mb_width,
> +                               int                    lambda,
> +                               int                    me_range)
> +{
> +}
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/encoder/CMakeLists.txt
> --- a/source/encoder/CMakeLists.txt     Mon Sep 16 21:06:49 2013 +0800
> +++ b/source/encoder/CMakeLists.txt     Tue Sep 17 13:41:27 2013 -0500
> @@ -1,43 +1,47 @@
>

another EOLN switch


> -if(GCC)
> -    # encoder.cpp must include HM headers which are not careful about
> named parameters
> -    set_source_files_properties(encoder.cpp PROPERTIES COMPILE_FLAGS
> -Wno-unused-parameter)
> -endif(GCC)
> -
> -if(GCC)
> -    if (NOT X64)
> -        # force gcc to generate code for sync primitives
> -        set_source_files_properties(framefilter.cpp PROPERTIES
> COMPILE_FLAGS -march=i686)
> -    endif()
> -endif(GCC)
> -
> -file(GLOB LIBENCODER_HDR  ../Lib/TLibEncoder/*.h)
> -file(GLOB LIBENCODER_SRC  ../Lib/TLibEncoder/*.cpp)
> -source_group(TLibEncoder FILES ${LIBENCODER_SRC})
> -source_group(TLibEncoderH FILES ${LIBENCODER_HDR})
> -if(GCC)
> -    set_source_files_properties(compress.cpp ${LIBENCODER_SRC} PROPERTIES
> COMPILE_FLAGS
> -        "-Wno-sign-compare")
> -endif(GCC)
> -if(MSVC)
> -    # ignore these warnings from HM source
> -    # /wd4244 type conversion, possible loss of data
> -    # /wd4512 assignment operator could not be generated
> -    # /wd4127 conditional expression is constant
> -    # /wd4389 signed/unsigned mismatch
> -    # /wd4018 '<' signed/unsigned mismatch
> -    # /wd4800 performance warning: bool coersion
> -    set_source_files_properties(${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS
> -        "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
> -endif(MSVC)
> -
> -add_library(encoder STATIC ../../COPYING ../x265.h
> -    ${LIBENCODER_SRC} ${LIBENCODER_HDR}
> -    bitcost.cpp bitcost.h
> -    motion.cpp motion.h
> -    slicetype.cpp slicetype.h
> -    frameencoder.cpp frameencoder.h
> -    framefilter.cpp framefilter.h
> -    cturow.cpp cturow.h
> -    dpb.cpp dpb.h
> -    ratecontrol.cpp ratecontrol.h
> -    compress.cpp encoder.cpp)
> +if(GCC)
> +    # encoder.cpp must include HM headers which are not careful about
> named parameters
> +    set_source_files_properties(encoder.cpp PROPERTIES COMPILE_FLAGS
> -Wno-unused-parameter)
> +endif(GCC)
> +
> +if(GCC)
> +    if (NOT X64)
> +        # force gcc to generate code for sync primitives
> +        set_source_files_properties(framefilter.cpp PROPERTIES
> COMPILE_FLAGS -march=i686)
> +    endif()
> +endif(GCC)
> +
> +file(GLOB LIBENCODER_HDR  ../Lib/TLibEncoder/*.h)
> +file(GLOB LIBENCODER_SRC  ../Lib/TLibEncoder/*.cpp)
> +source_group(TLibEncoder FILES ${LIBENCODER_SRC})
> +source_group(TLibEncoderH FILES ${LIBENCODER_HDR})
> +if(GCC)
> +    set_source_files_properties(compress.cpp ${LIBENCODER_SRC} PROPERTIES
> COMPILE_FLAGS
> +        "-Wno-sign-compare")
> +endif(GCC)
> +if(MSVC)
> +    # ignore these warnings from HM source
> +    # /wd4244 type conversion, possible loss of data
> +    # /wd4512 assignment operator could not be generated
> +    # /wd4127 conditional expression is constant
> +    # /wd4389 signed/unsigned mismatch
> +    # /wd4018 '<' signed/unsigned mismatch
> +    # /wd4800 performance warning: bool coersion
> +    set_source_files_properties(${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS
> +        "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
> +endif(MSVC)
> +
> +if(HAVE_OPENCL)
> +    include_directories(${OPENCL_INCLUDE_DIRS})
> +endif(HAVE_OPENCL)
> +
> +add_library(encoder STATIC ../../COPYING ../x265.h
> +    ${LIBENCODER_SRC} ${LIBENCODER_HDR}
> +    bitcost.cpp bitcost.h
> +    motion.cpp motion.h
> +    slicetype.cpp slicetype.h
> +    frameencoder.cpp frameencoder.h
> +    framefilter.cpp framefilter.h
> +    cturow.cpp cturow.h
> +    dpb.cpp dpb.h
> +    ratecontrol.cpp ratecontrol.h
> +    compress.cpp encoder.cpp)
> diff -r 0d33ff236f68 -r 97e1bed019a6 source/x265.h
> --- a/source/x265.h     Mon Sep 16 21:06:49 2013 +0800
> +++ b/source/x265.h     Tue Sep 17 13:41:27 2013 -0500
> @@ -277,6 +277,12 @@
>          int       qp;                          ///< Constant QP base value
>          int       rateFactor;                  ///< Constant rate factor
> (CRF)
>      } rc;
> +
> +    // opencl
> +    int       useOpenCL;                       ///<  use OpenCL when
> available
> +    int       iOpenCLDevice;                   ///<  specify count of GPU
> devices to skip, for CLI users
> +    void     *OpenCLDeviceId;                  ///<  pass explicit
> cl_device_id as void*, for API users
> +    char     *pszCLbinFile;                    ///<  compiled OpenCL
> kernel cache file
>  }
>  x265_param_t;
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130917/072d9a5c/attachment-0001.html>


More information about the x265-devel mailing list