[x265] [PATCH] add OpenCL environment codes

Wenju He wenju at multicorewareinc.com
Tue Sep 17 20:42:24 CEST 2013


# HG changeset patch
# User Wenju He <wenju at multicorewareinc.com>
# Date 1379443287 18000
#      Tue Sep 17 13:41:27 2013 -0500
# Node ID 97e1bed019a62849c5cc514ee74efbf64647986c
# Parent  0d33ff236f68bc2238138a7213301b2efc0e6426
add OpenCL environment codes

diff -r 0d33ff236f68 -r 97e1bed019a6 source/CMakeLists.txt
--- a/source/CMakeLists.txt	Mon Sep 16 21:06:49 2013 +0800
+++ b/source/CMakeLists.txt	Tue Sep 17 13:41:27 2013 -0500
@@ -1,190 +1,198 @@
-if(NOT CMAKE_BUILD_TYPE)
-    # default to Release build for GCC builds
-    set(CMAKE_BUILD_TYPE Release CACHE STRING
-        "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
-        FORCE)
-endif()
-
-project (x265)
-cmake_minimum_required (VERSION 2.6)
-
-SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")
-
-if ("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
-    set(X64 1)
-    add_definitions(-DX86_64=1)
-endif()
-
-if (CMAKE_GENERATOR STREQUAL "Xcode")
-  set(XCODE 1)
-endif()
-if (APPLE)
-  add_definitions(-DMACOS)
-endif()
-
-# Enforce coding standards.  Full warnings and warnings as errors
-if("$ENV{CXX}" STREQUAL "icl")
-    set(MSVC 1)
-endif()
-if(MSVC)
-    option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
-    if (STATIC_LINK_CRT)
-        set(CompilerFlags CMAKE_CXX_FLAGS_RELEASE CMAKE_C_FLAGS_RELEASE)
-        foreach(CompilerFlag ${CompilerFlags})
-            string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
-        endforeach()
-    endif (STATIC_LINK_CRT)
-    add_definitions(/W4 /WX /D_CRT_SECURE_NO_WARNINGS)
-    add_definitions(/Ob2) # always inline
-    add_definitions(/Oi)  # enable intrinsics
-    add_definitions(/MP)  # multithreaded build
-    include_directories(compat/msvc)
-endif(MSVC)
-
-if("$ENV{CXX}" STREQUAL "icpc")
-    set(GCC 1)
-    add_definitions(-Wall -Wextra -Wshadow -no-vec)
-elseif(CMAKE_COMPILER_IS_GNUCXX)
-    execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
-    set(GCC 1)
-    add_definitions(-Wall -Wextra -Wshadow -mstackrealign)
-    if(NOT GCC_VERSION VERSION_LESS 4.7)
-        # this is necessary to avoid name conflicts in vector class
-        # library.  if vector classes are removed/replaced this can
-        # likely be removed as well.
-        add_definitions(-fabi-version=6)
-    endif()
-elseif(APPLE)
-    exec_program(uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
-    string(REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
-    message(STATUS "DARWIN_VERSION=${DARWIN_VERSION}")
-    if (DARWIN_VERSION GREATER 12)
-        # Detect Mac OS X 10.8 llvm/gcc frankenstein's monster
-        if (${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/c++")
-            execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
-            set(GCC 1)
-            add_definitions(-Wall -Wextra -Wshadow)
-        endif()
-    endif()
-endif()
-if (GCC)
-    option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
-    if(WARNINGS_AS_ERRORS)
-        add_definitions(-Werror)
-    endif(WARNINGS_AS_ERRORS)
-    if(X64 AND NOT WIN32)
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-    endif(X64 AND NOT WIN32)
-endif(GCC)
-
-option(HIGH_BIT_DEPTH "Use 16bit pixels internally" OFF)
-if(HIGH_BIT_DEPTH)
-    add_definitions(-DHIGH_BIT_DEPTH=1)
-else(HIGH_BIT_DEPTH)
-    add_definitions(-DHIGH_BIT_DEPTH=0)
-endif(HIGH_BIT_DEPTH)
-
-option(ENABLE_PRIMITIVES_VEC "Enable use of SIMD vector class primitives" ON)
-find_package(Yasm)
-if(YASM_FOUND)
-    if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
-        message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old. 1.2.0 or later required")
-    else()
-        message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build assembly primitives")
-        option(ENABLE_PRIMITIVES_ASM "Enable use of assembly coded primitives" ON)
-    endif()
-endif(YASM_FOUND)
-
-if(UNIX)
-    SET(PLATFORM_LIBS pthread m)
-    if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-        SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
-    endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-endif(UNIX)
-
-option(ENABLE_STLPORT "Enable use of STLPort STL replacement library" OFF)
-if(ENABLE_STLPORT)
-    set(STLPORT_LOCATION "${STLPORT_INCLUDE_DIR}" CACHE PATH "Location of STLPort sources")
-    # STLPort headers must be included before system headers
-    include_directories(BEFORE ${STLPORT_LOCATION}/stlport)
-    link_directories(${STLPORT_LOCATION}/lib)
-    if("$ENV{CXX}" STREQUAL "icl")
-        add_definitions(/Qwd1879)
-    endif()
-endif(ENABLE_STLPORT)
-
-option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
-if(ENABLE_PPA)
-    add_definitions(-DENABLE_PPA)
-    add_subdirectory(PPA)
-    SET(PLATFORM_LIBS ${PLATFORM_LIBS} PPA)
-    if(UNIX)
-        SET(PLATFORM_LIBS ${PLATFORM_LIBS} dl)
-    endif(UNIX)
-endif(ENABLE_PPA)
-
-if (WIN32)
-    # Visual leak detector
-    find_package(VLD)
-    if(VLD_FOUND)
-        add_definitions(-DHAVE_VLD)
-        include_directories(${VLD_INCLUDE_DIRS})
-        set(PLATFORM_LIBS ${PLATFORM_LIBS} ${VLD_LIBRARIES})
-        link_directories(${VLD_LIBRARY_DIRS})
-    endif()
-    option(WINXP_SUPPORT "Make binaries compatible with Windows XP" OFF)
-    if(WINXP_SUPPORT)
-        # force workarounds for atomic intrinsics introduced after XP
-        add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP)
-    endif(WINXP_SUPPORT)
-endif()
-
-include_directories(. Lib common encoder)
-add_subdirectory(common)
-add_subdirectory(encoder)
-
-set(LIBS common encoder)
-if(ENABLE_PRIMITIVES_VEC)
-    set(LIBS ${LIBS} PrimitivesVec)
-endif(ENABLE_PRIMITIVES_VEC)
-
-if(ENABLE_PRIMITIVES_ASM)
-    set(LIBS ${LIBS} PrimitivesASM)
-endif(ENABLE_PRIMITIVES_ASM)
-
-if(NOT XCODE)
-    include(mergestaticlibs)
-    merge_static_libs(x265 ${LIBS})
-endif()
-
-# Test applications
-option(ENABLE_TESTS "Enable Unit Tests" OFF)
-if(ENABLE_TESTS)
-    add_subdirectory(test)
-endif(ENABLE_TESTS)
-
-# Main CLI application
-option(ENABLE_CLI "Build standalone CLI application" ON)
-if(ENABLE_CLI)
-    file(GLOB InputFiles input/*.cpp input/*.h)
-    file(GLOB OutputFiles output/*.cpp output/*.h)
-    source_group(input FILES ${InputFiles})
-    source_group(output FILES ${OutputFiles})
-    if (MSVC)
-        set_source_files_properties(compat/msvc/getopt.c PROPERTIES COMPILE_FLAGS "/wd4100")
-    elseif(GCC)
-        set_source_files_properties(compat/msvc/getopt.c PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter")
-    endif()
-
-    include(version) # determine X265_VERSION
-    set_source_files_properties(x265.cpp PROPERTIES COMPILE_FLAGS -DX265_VERSION=${X265_VERSION})
-    add_executable(cli ${EXTRAS} ../COPYING ${InputFiles} ${OutputFiles}
-        x265.cpp x265opts.h x265.h
-        compat/msvc/getopt.c compat/msvc/getopt.h)
-if(XCODE)
-    target_link_libraries(cli ${LIBS} ${PLATFORM_LIBS})
-else()
-    target_link_libraries(cli x265 ${PLATFORM_LIBS})
-endif()
-    SET_TARGET_PROPERTIES(cli PROPERTIES OUTPUT_NAME x265)
-endif(ENABLE_CLI)
+if(NOT CMAKE_BUILD_TYPE)
+    # default to Release build for GCC builds
+    set(CMAKE_BUILD_TYPE Release CACHE STRING
+        "Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
+        FORCE)
+endif()
+
+project (x265)
+cmake_minimum_required (VERSION 2.6)
+
+SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")
+
+if ("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
+    set(X64 1)
+    add_definitions(-DX86_64=1)
+endif()
+
+if (CMAKE_GENERATOR STREQUAL "Xcode")
+  set(XCODE 1)
+endif()
+if (APPLE)
+  add_definitions(-DMACOS)
+endif()
+
+# Enforce coding standards.  Full warnings and warnings as errors
+if("$ENV{CXX}" STREQUAL "icl")
+    set(MSVC 1)
+endif()
+if(MSVC)
+    option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
+    if (STATIC_LINK_CRT)
+        set(CompilerFlags CMAKE_CXX_FLAGS_RELEASE CMAKE_C_FLAGS_RELEASE)
+        foreach(CompilerFlag ${CompilerFlags})
+            string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
+        endforeach()
+    endif (STATIC_LINK_CRT)
+    add_definitions(/W4 /WX /D_CRT_SECURE_NO_WARNINGS)
+    add_definitions(/Ob2) # always inline
+    add_definitions(/Oi)  # enable intrinsics
+    add_definitions(/MP)  # multithreaded build
+    include_directories(compat/msvc)
+endif(MSVC)
+
+if("$ENV{CXX}" STREQUAL "icpc")
+    set(GCC 1)
+    add_definitions(-Wall -Wextra -Wshadow -no-vec)
+elseif(CMAKE_COMPILER_IS_GNUCXX)
+    execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
+    set(GCC 1)
+    add_definitions(-Wall -Wextra -Wshadow -mstackrealign)
+    if(NOT GCC_VERSION VERSION_LESS 4.7)
+        # this is necessary to avoid name conflicts in vector class
+        # library.  if vector classes are removed/replaced this can
+        # likely be removed as well.
+        add_definitions(-fabi-version=6)
+    endif()
+elseif(APPLE)
+    exec_program(uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION)
+    string(REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
+    message(STATUS "DARWIN_VERSION=${DARWIN_VERSION}")
+    if (DARWIN_VERSION GREATER 12)
+        # Detect Mac OS X 10.8 llvm/gcc frankenstein's monster
+        if (${CMAKE_CXX_COMPILER} STREQUAL "/usr/bin/c++")
+            execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
+            set(GCC 1)
+            add_definitions(-Wall -Wextra -Wshadow)
+        endif()
+    endif()
+endif()
+if (GCC)
+    option(WARNINGS_AS_ERRORS "Stop compiles on first warning" OFF)
+    if(WARNINGS_AS_ERRORS)
+        add_definitions(-Werror)
+    endif(WARNINGS_AS_ERRORS)
+    if(X64 AND NOT WIN32)
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+    endif(X64 AND NOT WIN32)
+endif(GCC)
+
+option(HIGH_BIT_DEPTH "Use 16bit pixels internally" OFF)
+if(HIGH_BIT_DEPTH)
+    add_definitions(-DHIGH_BIT_DEPTH=1)
+else(HIGH_BIT_DEPTH)
+    add_definitions(-DHIGH_BIT_DEPTH=0)
+endif(HIGH_BIT_DEPTH)
+
+option(ENABLE_PRIMITIVES_VEC "Enable use of SIMD vector class primitives" ON)
+find_package(Yasm)
+if(YASM_FOUND)
+    if (YASM_VERSION_STRING VERSION_LESS "1.2.0")
+        message(STATUS "Yasm version ${YASM_VERSION_STRING} is too old. 1.2.0 or later required")
+    else()
+        message(STATUS "Found Yasm ${YASM_VERSION_STRING} to build assembly primitives")
+        option(ENABLE_PRIMITIVES_ASM "Enable use of assembly coded primitives" ON)
+    endif()
+endif(YASM_FOUND)
+
+if(UNIX)
+    SET(PLATFORM_LIBS pthread m)
+    if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+        SET(PLATFORM_LIBS ${PLATFORM_LIBS} rt)
+    endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+endif(UNIX)
+
+option(ENABLE_STLPORT "Enable use of STLPort STL replacement library" OFF)
+if(ENABLE_STLPORT)
+    set(STLPORT_LOCATION "${STLPORT_INCLUDE_DIR}" CACHE PATH "Location of STLPort sources")
+    # STLPort headers must be included before system headers
+    include_directories(BEFORE ${STLPORT_LOCATION}/stlport)
+    link_directories(${STLPORT_LOCATION}/lib)
+    if("$ENV{CXX}" STREQUAL "icl")
+        add_definitions(/Qwd1879)
+    endif()
+endif(ENABLE_STLPORT)
+
+option(ENABLE_PPA "Enable PPA profiling instrumentation" OFF)
+if(ENABLE_PPA)
+    add_definitions(-DENABLE_PPA)
+    add_subdirectory(PPA)
+    SET(PLATFORM_LIBS ${PLATFORM_LIBS} PPA)
+    if(UNIX)
+        SET(PLATFORM_LIBS ${PLATFORM_LIBS} dl)
+    endif(UNIX)
+endif(ENABLE_PPA)
+
+option(ENABLE_OPENCL "Use OpenCL for motion estimation" OFF)
+if(ENABLE_OPENCL)
+    find_package(OpenCL)
+    if(OPENCL_FOUND)
+        add_definitions(-DHAVE_OPENCL)
+    endif()
+endif(ENABLE_OPENCL)
+
+if (WIN32)
+    # Visual leak detector
+    find_package(VLD)
+    if(VLD_FOUND)
+        add_definitions(-DHAVE_VLD)
+        include_directories(${VLD_INCLUDE_DIRS})
+        set(PLATFORM_LIBS ${PLATFORM_LIBS} ${VLD_LIBRARIES})
+        link_directories(${VLD_LIBRARY_DIRS})
+    endif()
+    option(WINXP_SUPPORT "Make binaries compatible with Windows XP" OFF)
+    if(WINXP_SUPPORT)
+        # force workarounds for atomic intrinsics introduced after XP
+        add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP)
+    endif(WINXP_SUPPORT)
+endif()
+
+include_directories(. Lib common encoder)
+add_subdirectory(common)
+add_subdirectory(encoder)
+
+set(LIBS common encoder)
+if(ENABLE_PRIMITIVES_VEC)
+    set(LIBS ${LIBS} PrimitivesVec)
+endif(ENABLE_PRIMITIVES_VEC)
+
+if(ENABLE_PRIMITIVES_ASM)
+    set(LIBS ${LIBS} PrimitivesASM)
+endif(ENABLE_PRIMITIVES_ASM)
+
+if(NOT XCODE)
+    include(mergestaticlibs)
+    merge_static_libs(x265 ${LIBS})
+endif()
+
+# Test applications
+option(ENABLE_TESTS "Enable Unit Tests" OFF)
+if(ENABLE_TESTS)
+    add_subdirectory(test)
+endif(ENABLE_TESTS)
+
+# Main CLI application
+option(ENABLE_CLI "Build standalone CLI application" ON)
+if(ENABLE_CLI)
+    file(GLOB InputFiles input/*.cpp input/*.h)
+    file(GLOB OutputFiles output/*.cpp output/*.h)
+    source_group(input FILES ${InputFiles})
+    source_group(output FILES ${OutputFiles})
+    if (MSVC)
+        set_source_files_properties(compat/msvc/getopt.c PROPERTIES COMPILE_FLAGS "/wd4100")
+    elseif(GCC)
+        set_source_files_properties(compat/msvc/getopt.c PROPERTIES COMPILE_FLAGS "-Wno-unused-parameter")
+    endif()
+
+    include(version) # determine X265_VERSION
+    set_source_files_properties(x265.cpp PROPERTIES COMPILE_FLAGS -DX265_VERSION=${X265_VERSION})
+    add_executable(cli ${EXTRAS} ../COPYING ${InputFiles} ${OutputFiles}
+        x265.cpp x265opts.h x265.h
+        compat/msvc/getopt.c compat/msvc/getopt.h)
+if(XCODE)
+    target_link_libraries(cli ${LIBS} ${PLATFORM_LIBS} ${OPENCL_LIBRARIES})
+else()
+    target_link_libraries(cli x265 ${PLATFORM_LIBS} ${OPENCL_LIBRARIES})
+endif()
+    SET_TARGET_PROPERTIES(cli PROPERTIES OUTPUT_NAME x265)
+endif(ENABLE_CLI)
diff -r 0d33ff236f68 -r 97e1bed019a6 source/cmake/FindOpenCL.cmake
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/cmake/FindOpenCL.cmake	Tue Sep 17 13:41:27 2013 -0500
@@ -0,0 +1,58 @@
+# modified from OpenCV
+
+if(APPLE)
+  set(OPENCL_FOUND YES)
+  set(OPENCL_LIBRARY "-framework OpenCL" CACHE STRING "OpenCL library")
+  set(OPENCL_INCLUDE_DIR "" CACHE STRING "OpenCL include directory")
+  mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
+else(APPLE)
+  #find_package(OpenCL QUIET)
+
+  if (NOT OPENCL_FOUND)
+    find_path(OPENCL_ROOT_DIR
+              NAMES OpenCL/cl.h CL/cl.h include/CL/cl.h include/nvidia-current/CL/cl.h
+              PATHS ENV OCLROOT ENV AMDAPPSDKROOT ENV CUDA_PATH ENV INTELOCLSDKROOT
+              DOC "OpenCL root directory"
+              NO_DEFAULT_PATH)
+
+    find_path(OPENCL_INCLUDE_DIR
+              NAMES OpenCL/cl.h CL/cl.h
+              HINTS ${OPENCL_ROOT_DIR}
+              PATH_SUFFIXES include include/nvidia-current
+              DOC "OpenCL include directory"
+              NO_DEFAULT_PATH)
+
+    if(WIN32)
+      if(X64)
+        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win64 lib/x86_64 lib/x64)
+      else()
+        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib/Win32 lib/x86)
+      endif()
+    elseif(UNIX)
+      if(X64)
+        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib64 lib)
+      else()
+        set(OPENCL_POSSIBLE_LIB_SUFFIXES lib32 lib)
+      endif()
+    else()
+      set(OPENCL_POSSIBLE_LIB_SUFFIXES lib)
+    endif()
+
+    find_library(OPENCL_LIBRARY
+              NAMES OpenCL
+              HINTS ${OPENCL_ROOT_DIR}
+              PATH_SUFFIXES ${OPENCL_POSSIBLE_LIB_SUFFIXES}
+              DOC "OpenCL library"
+              NO_DEFAULT_PATH)
+
+    mark_as_advanced(OPENCL_INCLUDE_DIR OPENCL_LIBRARY)
+    include(FindPackageHandleStandardArgs)
+    FIND_PACKAGE_HANDLE_STANDARD_ARGS(OPENCL DEFAULT_MSG OPENCL_LIBRARY OPENCL_INCLUDE_DIR )
+  endif()
+endif(APPLE)
+
+if(OPENCL_FOUND)
+  set(HAVE_OPENCL 1)
+  set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR})
+  set(OPENCL_LIBRARIES    ${OPENCL_LIBRARY})
+endif()
diff -r 0d33ff236f68 -r 97e1bed019a6 source/cmake/cl2cpp.cmake
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/cmake/cl2cpp.cmake	Tue Sep 17 13:41:27 2013 -0500
@@ -0,0 +1,35 @@
+# modified from OpenCV
+
+file(GLOB cl_list "${CL_DIR}/*.h" "${CL_DIR}/*.cl" )
+
+file(WRITE ${OUTPUT} "// This file is auto-generated. Do not edit!
+
+namespace x265
+{
+
+")
+
+file(APPEND ${OUTPUT} "const char* x265_opencl_source=\n")
+
+foreach(cl ${cl_list})
+  file(READ "${cl}" lines)
+
+  string(REPLACE "\r" "" lines "${lines}\n")
+  string(REPLACE "\t" "  " lines "${lines}")
+
+  string(REGEX REPLACE "/\\*([^*]/|\\*[^/]|[^*/])*\\*/" ""   lines "${lines}") # multiline comments
+  string(REGEX REPLACE "/\\*([^\n])*\\*/"               ""   lines "${lines}") # single-line comments
+  string(REGEX REPLACE "[ ]*//[^\n]*\n"                 "\n" lines "${lines}") # single-line comments
+  string(REGEX REPLACE "\n[ ]*(\n[ ]*)*"                "\n" lines "${lines}") # empty lines & leading whitespace
+  string(REGEX REPLACE "^\n"                            ""   lines "${lines}") # leading new line
+
+  string(REPLACE "\\" "\\\\" lines "${lines}")
+  string(REPLACE "\"" "\\\"" lines "${lines}")
+  string(REPLACE "\n" "\\n\"\n\"" lines "${lines}")
+
+  string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof
+
+  file(APPEND ${OUTPUT} "\"${lines}")
+endforeach()
+
+file(APPEND ${OUTPUT} ";\n\n}\n")
\ No newline at end of file
diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt	Mon Sep 16 21:06:49 2013 +0800
+++ b/source/common/CMakeLists.txt	Tue Sep 17 13:41:27 2013 -0500
@@ -1,59 +1,73 @@
-if(GCC)
-    if (NOT X64)
-        # force gcc to generate code for sync primitives
-        set_source_files_properties(threadpool.cpp reference.cpp wavefront.cpp PROPERTIES COMPILE_FLAGS -march=i686)
-    endif()
-endif(GCC)
-
-if(ENABLE_PRIMITIVES_VEC)
-    add_definitions(-DENABLE_VECTOR_PRIMITIVES=1)
-endif(ENABLE_PRIMITIVES_VEC)
-if(ENABLE_PRIMITIVES_ASM)
-    add_definitions(-DENABLE_ASM_PRIMITIVES=1)
-endif(ENABLE_PRIMITIVES_ASM)
-
-include_directories(../VectorClass)
-
-file(GLOB LIBCOMMON_HDR ../Lib/TLibCommon/*.h)
-file(GLOB LIBCOMMON_SRC ../Lib/TLibCommon/*.cpp)
-source_group(TLibCommon FILES ${LIBCOMMON_SRC})
-source_group(TLibCommonH FILES ${LIBCOMMON_HDR})
-if(GCC)
-    set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS 
-        "-Wno-sign-compare")
-endif(GCC)
-if(MSVC)
-    # ignore these warnings from HM source
-    # /wd4244 type conversion, possible loss of data
-    # /wd4512 assignment operator could not be generated
-    # /wd4127 conditional expression is constant
-    # /wd4389 signed/unsigned mismatch
-    # /wd4018 '<' signed/unsigned mismatch
-    # /wd4800 performance warning: bool coersion
-    set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS 
-        "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
-    if ("$ENV{CXX}" STREQUAL "icl")
-        add_definitions(/Qwd2557) # signed/unsigned mismatch
-    endif()
-endif(MSVC)
-
-add_library(common STATIC ../../COPYING
-    ${LIBCOMMON_SRC} ${LIBCOMMON_HDR}
-    primitives.cpp primitives.h
-    pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp
-    ../VectorClass/instrset_detect.cpp
-    threading.cpp threading.h
-    threadpool.cpp threadpool.h
-    wavefront.h wavefront.cpp
-    md5.cpp md5.h
-    TShortYUV.cpp TShortYUV.h mv.h
-    reference.cpp reference.h
-    common.cpp common.h
-    lowres.cpp lowres.h)
-    
-if(ENABLE_PRIMITIVES_VEC)
-    add_subdirectory(vec)
-endif(ENABLE_PRIMITIVES_VEC)
-if(ENABLE_PRIMITIVES_ASM)
-    add_subdirectory(x86)
-endif(ENABLE_PRIMITIVES_ASM)
+if(GCC)
+    if (NOT X64)
+        # force gcc to generate code for sync primitives
+        set_source_files_properties(threadpool.cpp reference.cpp wavefront.cpp PROPERTIES COMPILE_FLAGS -march=i686)
+    endif()
+endif(GCC)
+
+if(ENABLE_PRIMITIVES_VEC)
+    add_definitions(-DENABLE_VECTOR_PRIMITIVES=1)
+endif(ENABLE_PRIMITIVES_VEC)
+if(ENABLE_PRIMITIVES_ASM)
+    add_definitions(-DENABLE_ASM_PRIMITIVES=1)
+endif(ENABLE_PRIMITIVES_ASM)
+
+include_directories(../VectorClass)
+
+file(GLOB LIBCOMMON_HDR ../Lib/TLibCommon/*.h)
+file(GLOB LIBCOMMON_SRC ../Lib/TLibCommon/*.cpp)
+source_group(TLibCommon FILES ${LIBCOMMON_SRC})
+source_group(TLibCommonH FILES ${LIBCOMMON_HDR})
+if(GCC)
+    set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS 
+        "-Wno-sign-compare")
+endif(GCC)
+if(MSVC)
+    # ignore these warnings from HM source
+    # /wd4244 type conversion, possible loss of data
+    # /wd4512 assignment operator could not be generated
+    # /wd4127 conditional expression is constant
+    # /wd4389 signed/unsigned mismatch
+    # /wd4018 '<' signed/unsigned mismatch
+    # /wd4800 performance warning: bool coersion
+    set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS 
+        "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
+    if ("$ENV{CXX}" STREQUAL "icl")
+        add_definitions(/Qwd2557) # signed/unsigned mismatch
+    endif()
+endif(MSVC)
+
+if(HAVE_OPENCL)
+    include_directories(${OPENCL_INCLUDE_DIRS})
+    set(OPENCL_KERNEL_HDR ${CMAKE_CURRENT_SOURCE_DIR}/oclkernels.h)
+    add_custom_command(
+      OUTPUT ${OPENCL_KERNEL_HDR}
+      COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/opencl" 
+              -DOUTPUT=${OPENCL_KERNEL_HDR} 
+              -P "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/cl2cpp.cmake"
+      DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/cl2cpp.cmake")
+    set(OPENCL_SRC opencl.cpp)
+    set(OPENCL_HDR opencl.h ${OPENCL_KERNEL_HDR}) 
+endif(HAVE_OPENCL)
+
+add_library(common STATIC ../../COPYING
+    ${LIBCOMMON_SRC} ${LIBCOMMON_HDR}
+    primitives.cpp primitives.h
+    pixel.cpp dct.cpp ipfilter.cpp intrapred.cpp
+    ../VectorClass/instrset_detect.cpp
+    threading.cpp threading.h
+    threadpool.cpp threadpool.h
+    wavefront.h wavefront.cpp
+    md5.cpp md5.h
+    TShortYUV.cpp TShortYUV.h mv.h
+    reference.cpp reference.h
+    common.cpp common.h
+    lowres.cpp lowres.h
+    ${OPENCL_SRC} ${OPENCL_HDR})
+
+if(ENABLE_PRIMITIVES_VEC)
+    add_subdirectory(vec)
+endif(ENABLE_PRIMITIVES_VEC)
+if(ENABLE_PRIMITIVES_ASM)
+    add_subdirectory(x86)
+endif(ENABLE_PRIMITIVES_ASM)
diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/opencl.cpp	Tue Sep 17 13:41:27 2013 -0500
@@ -0,0 +1,631 @@
+/*****************************************************************************
+ * opencl.c: OpenCL initialization and kernel compilation
+ *****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve at borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at multicorewareinc.com.
+ *****************************************************************************/
+
+#include "TLibCommon/CommonDef.h"
+#include "common.h"
+
+#if defined WIN32 || defined _WIN32
+#include <Windows.h>
+#else
+#include <dlfcn.h> //dlopen, dlsym, dlclose
+#endif
+#include "opencl.h"
+#include "oclkernels.h"
+#include "stdio.h"
+
+/* define from recent cl_ext.h, copied here in case headers are old */
+#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD        0x4042
+
+using namespace x265;
+
+// global variable
+static ocl g_ocl;
+
+static void CALLBACK oclErrorNotify( const char *errinfo, const void *private_info, size_t cb, void *user_data )
+{
+    /* Any error notification can be assumed to be fatal to the OpenCL context.
+     * We need to stop using it immediately to prevent further damage. */
+    x265_param_t *param = (x265_param_t*)user_data;
+    param->useOpenCL = 0;
+    //h->opencl.b_fatal_error = 1;
+    x265_log(param, X265_LOG_ERROR, "OpenCL: %s\n", errinfo );
+    x265_log(param, X265_LOG_ERROR, "OpenCL: fatal error, aborting encode\n" );
+    (void)cb;
+    (void)private_info;
+}
+
+ocl::ocl()
+    : m_context(NULL)
+    , m_device(NULL)
+    , m_queue(NULL)
+    , m_program(NULL)
+    , m_initialized(false)
+    , m_deviceAMDSI(false)
+    , m_fatalError(false)
+    , m_openclThreadPri(0)
+    , m_kernelMeStar(NULL)
+{}
+
+ocl::~ocl()
+{}
+
+
+bool ocl::init(x265_param_t *param)
+{
+    cl_int status;
+    cl_uint numPlatforms;
+    bool ret = false;
+
+    if (m_initialized)
+    {
+        return true;
+    }
+
+    status = clGetPlatformIDs(0, NULL, &numPlatforms);
+    if( status != CL_SUCCESS || numPlatforms == 0 )
+    {
+        x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to query installed platforms\n");
+        return false;
+    }
+
+    cl_platform_id *platforms = (cl_platform_id*)X265_MALLOC(cl_platform_id, numPlatforms);
+    status = clGetPlatformIDs( numPlatforms, platforms, NULL );
+    if( status != CL_SUCCESS )
+    {
+        x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to query installed platforms\n");
+        X265_FREE( platforms );
+        return false;
+    }
+
+    /* Select the first OpenCL platform with a GPU device that supports our
+     * required image (texture) formats */
+    for( cl_uint i = 0; i < numPlatforms; ++i )
+    {
+        cl_uint gpu_count = 0;
+        status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &gpu_count );
+        if( status != CL_SUCCESS || !gpu_count )
+            continue;
+
+        cl_device_id *devices = (cl_device_id*) X265_MALLOC( cl_device_id, gpu_count );
+        if( !devices )
+            continue;
+
+        status = clGetDeviceIDs( platforms[i], CL_DEVICE_TYPE_GPU, gpu_count, devices, NULL );
+        if( status != CL_SUCCESS )
+        {
+            X265_FREE( devices );
+            continue;
+        }
+
+        /* Find a GPU device that supports our image formats */
+        for( cl_uint gpu = 0; gpu < gpu_count; gpu++ )
+        {
+            cl_device_id device = devices[gpu];
+
+            /* if the user has specified an exact device ID, skip all other
+             * GPUs.  If this device matches, allow it to continue through the
+             * checks for supported images, etc.  */
+            if( param->OpenCLDeviceId && devices[gpu] != (cl_device_id) param->OpenCLDeviceId )
+                continue;
+
+            cl_bool image_support;
+            clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, NULL );
+            if( !image_support )
+                continue;
+
+            cl_context context = clCreateContext( NULL, 1, &device, oclErrorNotify, (void*)param, &status );
+            if( status != CL_SUCCESS )
+                continue;
+
+            cl_uint imagecount = 0;
+            clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &imagecount );
+            if( !imagecount )
+            {
+                clReleaseContext( context );
+                continue;
+            }
+
+            cl_image_format *imageType = (cl_image_format*)X265_MALLOC(cl_image_format, imagecount);
+            if( !imageType )
+            {
+                clReleaseContext( context );
+                continue;
+            }
+
+            clGetSupportedImageFormats( context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, imagecount, imageType, NULL );
+
+            int b_has_r = 0;
+            int b_has_rgba = 0;
+            for( cl_uint j = 0; j < imagecount; j++ )
+            {
+                if( imageType[j].image_channel_order == CL_R &&
+                    imageType[j].image_channel_data_type == CL_UNSIGNED_INT32 )
+                    b_has_r = 1;
+                else if( imageType[j].image_channel_order == CL_RGBA &&
+                         imageType[j].image_channel_data_type == CL_UNSIGNED_INT8 )
+                    b_has_rgba = 1;
+            }
+            X265_FREE( imageType );
+            if( !b_has_r || !b_has_rgba )
+            {
+                char devname[64];
+                status = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(devname), devname, NULL);
+                if( status == CL_SUCCESS )
+                {
+                    /* emit warning if we are discarding the user's explicit choice */
+                    int level = param->OpenCLDeviceId ? X265_LOG_WARNING : X265_LOG_DEBUG;
+                    x265_log(param, level, "OpenCL: %s does not support required image formats\n", devname);
+                }
+                clReleaseContext( context );
+                continue;
+            }
+
+            /* user selection of GPU device, skip N first matches */
+            if( param->iOpenCLDevice )
+            {
+                param->iOpenCLDevice--;
+                clReleaseContext( context );
+                continue;
+            }
+
+            cl_command_queue queue = clCreateCommandQueue( context, device, 0, &status );
+            if( status != CL_SUCCESS )
+            {
+                clReleaseContext( context );
+                continue;
+            }
+
+            m_context = context;
+            m_device = device;
+            m_queue = queue;
+
+            ret = true;
+            break;
+        }
+
+        X265_FREE( devices );
+
+        if (ret)
+        {
+            break;
+        }
+    }
+
+    X265_FREE( platforms );
+
+    if (!param->pszCLbinFile)
+    {
+        param->pszCLbinFile = "x265_kernels.clbin";
+    }
+
+    if (!ret)
+    {
+        x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to find a compatible device\n");
+    }
+    else
+    {
+        ret = createKernels(param);
+    }
+
+    m_initialized = ret;
+
+    return ret;
+}
+
+bool ocl::createKernels(x265_param_t *param)
+{
+    static const char *kernelnames[] = {
+        "motion_estimate"
+    };
+    cl_kernel *kernels[] = {
+        &m_kernelMeStar
+    };
+    cl_int status;
+
+    compile(param);
+    if( !m_program )
+    {
+        return false;
+    }
+
+    for( int i = 0; i < sizeof(kernelnames)/sizeof(kernelnames[0]); i++ )
+    {
+        *kernels[i] = clCreateKernel(m_program, kernelnames[i], &status);
+        if( status != CL_SUCCESS )
+        {
+            x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to compile kernel '%s' (%d)\n", kernelnames[i], status );
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void ocl::destroy()
+{
+    if (m_queue)
+    {
+        clFinish(m_queue);
+    }
+
+    if (m_kernelMeStar)
+    {
+        clReleaseKernel(m_kernelMeStar);
+        m_kernelMeStar = NULL;
+    }
+
+    if (m_program)
+    {
+        clReleaseProgram(m_program);
+        m_program = NULL;
+    }
+
+    if (m_queue)
+    {
+        clReleaseCommandQueue( m_queue );
+        m_queue = NULL;
+    }
+
+    if (m_context)
+    {
+        clReleaseContext( m_context );
+        m_context = NULL;
+    }
+
+    m_initialized = false;
+}
+
+/* Try to load the cached compiled program binary, verify the device context is
+ * still valid before reuse */
+void ocl::cache_load(x265_param_t *param, char *devname, char *devvendor, char *driverversion)
+{
+    cl_int status;
+
+    /* try to load cached program binary */
+    FILE *fp = fopen( param->pszCLbinFile, "rb" );
+    if( !fp )
+        return;
+
+    fseek( fp, 0L, SEEK_END );
+    size_t size = ftell( fp );
+    rewind( fp );
+    uint8_t *binary = (uint8_t*)X265_MALLOC( uint8_t, size );
+    if (!binary)
+        return;
+
+    fread( binary, 1, size, fp );
+    const uint8_t *ptr = (const uint8_t*)binary;
+
+#define CHECK_STRING( STR )\
+    {\
+        size_t len = strlen( STR );\
+        if( size <= len || strncmp( (char*)ptr, STR, len ) )\
+            goto fail;\
+        else {\
+            size -= (len+1); ptr += (len+1);\
+        }\
+    }
+
+    CHECK_STRING( devname );
+    CHECK_STRING( devvendor );
+    CHECK_STRING( driverversion );
+    //CHECK_STRING( x265_opencl_source_hash );
+#undef CHECK_STRING
+
+    m_program = clCreateProgramWithBinary( m_context, 1, &m_device, &size, &ptr, NULL, &status );
+    if (status != CL_SUCCESS)
+    {
+        m_program = NULL;
+    }
+
+fail:
+    fclose( fp );
+    X265_FREE( binary );
+}
+
+/* Save the compiled program binary to a file for later reuse.  Device context
+ * is also saved in the cache file so we do not reuse stale binaries */
+void ocl::cache_save(x265_param_t *param, char *devname, char *devvendor, char *driverversion)
+{
+    FILE *fp = fopen(param->pszCLbinFile, "wb");
+    if( !fp )
+    {
+        x265_log(param, X265_LOG_INFO, "OpenCL: unable to open clbin file for write");
+        return;
+    }
+
+    size_t size;
+    cl_int status = clGetProgramInfo( m_program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &size, NULL );
+    if( status == CL_SUCCESS )
+    {
+        uint8_t *binary = (uint8_t*)X265_MALLOC(uint8_t, size);
+        status = clGetProgramInfo( m_program, CL_PROGRAM_BINARIES, sizeof(uint8_t *), &binary, NULL );
+        if( status == CL_SUCCESS )
+        {
+            fputs( devname, fp );
+            fputc( '\n', fp );
+            fputs( devvendor, fp );
+            fputc( '\n', fp );
+            fputs( driverversion, fp );
+            fputc( '\n', fp );
+            //fputs( x265_opencl_source_hash, fp );
+            //fputc( '\n', fp );
+            fwrite( binary, 1, size, fp );
+        }
+        else
+            x265_log(param, X265_LOG_INFO, "OpenCL: Unable to query program binary, no cache file generated");
+        X265_FREE( binary );
+    }
+    else
+        x265_log(param, X265_LOG_INFO, "OpenCL: Unable to query program binary size, no cache file generated");
+    fclose( fp );
+
+    return;
+}
+
+
+oclME::oclME(x265_param_t *param)
+    : m_pageLockedBuffer(NULL)
+    , m_pageLockedPtr(NULL)
+    , m_plOccupancy(0)
+    , m_numCopies(0)
+    , m_mvCostsBuffer(NULL)
+    , m_mvpBuffer(NULL)
+{
+    for (int i = 0; i < 2; i++)
+    {
+        m_mvBuffer[i];
+        m_costsBuffer[i];
+        m_frameStatsBuffer[i];
+    }
+
+    m_ocl = &g_ocl;
+    init(param);
+}
+
+/* The OpenCL source under common/opencl will be merged into common/oclkernels.h 
+ * by CMake. It defines a x265_opencl_source string which we will pass
+ * to clCreateProgramWithSource().  We also attempt to use a cache file for the
+ * compiled binary, stored in the current working folder. */
+bool ocl::compile(x265_param_t *param)
+{
+    cl_int status;
+    char devname[64];
+    char devvendor[64];
+    char driverversion[64];
+
+    status  = clGetDeviceInfo( m_device, CL_DEVICE_NAME,    sizeof(devname), devname, NULL );
+    status |= clGetDeviceInfo( m_device, CL_DEVICE_VENDOR,  sizeof(devvendor), devvendor, NULL );
+    status |= clGetDeviceInfo( m_device, CL_DRIVER_VERSION, sizeof(driverversion), driverversion, NULL );
+    if (status != CL_SUCCESS)
+    {
+        return false;
+    }
+
+    // Most AMD GPUs have vector registers
+    int vectorize = !strcmp( devvendor, "Advanced Micro Devices, Inc." );
+    m_deviceAMDSI = false;
+
+    if (vectorize)
+    {
+        /* Disable OpenCL on Intel/AMD switchable graphics devices */
+        if (detectSwitchableGraphics())
+        {
+            x265_log(param, X265_LOG_INFO, "OpenCL acceleration disabled, switchable graphics detected\n" );
+            return false;
+        }
+
+        /* Detect AMD SouthernIsland or newer device (single-width registers) */
+        cl_uint simdwidth = 4;
+        status = clGetDeviceInfo( m_device, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(cl_uint), &simdwidth, NULL );
+        if( status == CL_SUCCESS && simdwidth == 1 )
+        {
+            vectorize = 0;
+            m_deviceAMDSI = true;
+        }
+    }
+
+    x265_log(param, X265_LOG_INFO, "OpenCL acceleration enabled with %s %s %s\n", devvendor, devname, m_deviceAMDSI ? "(SI)" : "" );
+
+    cache_load(param, devname, devvendor, driverversion);
+    if( !m_program )
+    {
+        /* clCreateProgramWithSource() requires a pointer variable, you cannot just use &x265_opencl_source */
+        x265_log(param, X265_LOG_INFO, "Compiling OpenCL kernels...\n" );
+        size_t size = strlen(x265_opencl_source);
+        m_program = clCreateProgramWithSource( m_context, 1, &x265_opencl_source, &size, &status );
+        if( status != CL_SUCCESS || !m_program )
+        {
+            x265_log(param, X265_LOG_WARNING, "OpenCL: unable to create program\n" );
+            return false;
+        }
+    }
+
+    /* Build the program binary for the OpenCL device */
+    const char *buildopts = vectorize ? "-DVECTORIZE=1" : "";
+    status = clBuildProgram( m_program, 1, &m_device, buildopts, NULL, NULL );
+    if( status == CL_SUCCESS )
+    {
+        cache_save(param, devname, devvendor, driverversion);
+        return true;
+    }
+
+    /* Compile failure, should not happen with production code. */
+
+    size_t build_log_len = 0;
+
+    status = clGetProgramBuildInfo( m_program, m_device, CL_PROGRAM_BUILD_LOG, build_log_len, NULL, &build_log_len );
+    if( status != CL_SUCCESS )
+    {
+        x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed, unable to query build log\n" );
+        return false;
+    }
+
+    char *build_log = (char*)X265_MALLOC(char, build_log_len);
+    if( !build_log )
+    {
+        x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed, unable to alloc build log\n" );
+        return false;
+    }
+
+    status = clGetProgramBuildInfo( m_program, m_device, CL_PROGRAM_BUILD_LOG, build_log_len, build_log, NULL );
+    if( status != CL_SUCCESS )
+    {
+        x265_log(param, X265_LOG_WARNING, "OpenCL: Compilation failed, unable to get build log\n" );
+        X265_FREE( build_log );
+        return false;
+    }
+
+    FILE *lg = fopen( "x265_kernel_build_log.txt", "w" );
+    if( lg )
+    {
+        fwrite( build_log, 1, build_log_len, lg );
+        fclose( lg );
+        x265_log(param, X265_LOG_WARNING, "OpenCL: kernel build errors written to x265_kernel_build_log.txt\n" );
+    }
+
+    X265_FREE( build_log );
+
+    clReleaseProgram(m_program);
+    m_program = NULL;
+
+    return false;
+}
+
+oclME::~oclME()
+{
+}
+
+void oclME::destroy()
+{
+}
+
+bool oclME::init(x265_param_t *param)
+{
+    cl_int status;
+
+    if (!m_ocl)
+    {
+        return false;
+    }
+    m_ocl->init(param);
+
+    m_pageLockedBuffer = clCreateBuffer(m_ocl->m_context, CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR, PAGE_LOCKED_BUF_SIZE, NULL, &status);
+    if( status != CL_SUCCESS )
+    {
+        x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to allocate page-locked buffer, error '%d'\n", status );
+        return false;
+    }
+    m_pageLockedPtr = (char*) clEnqueueMapBuffer(m_ocl->m_queue, m_pageLockedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+                                                 0, PAGE_LOCKED_BUF_SIZE, 0, NULL, NULL, &status);
+    if( status != CL_SUCCESS )
+    {
+        x265_log(param, X265_LOG_WARNING, "OpenCL: Unable to map page-locked buffer, error '%d'\n", status );
+        return false;
+    }
+
+    return true;
+}
+
+/* OpenCL misbehaves on hybrid laptops with Intel iGPU and AMD dGPU, so
+ * we consult AMD's ADL interface to detect this situation and disable
+ * OpenCL on these machines (Linux and Windows) */
+#ifndef _WIN32
+#define __stdcall
+#define HINSTANCE void *
+#endif
+typedef void* ( __stdcall *ADL_MAIN_MALLOC_CALLBACK )( int );
+typedef int ( *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int );
+typedef int ( *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* );
+typedef int ( *ADL_POWERXPRESS_SCHEME_GET ) ( int, int *, int *, int * );
+typedef int ( *ADL_MAIN_CONTROL_DESTROY )();
+#define ADL_OK 0
+#define ADL_PX_SCHEME_DYNAMIC 2
+
+void* __stdcall adl_malloc_wrapper( int iSize ) { return X265_MALLOC(char, iSize); }
+
+int ocl::detectSwitchableGraphics()
+{
+    ADL_MAIN_CONTROL_CREATE          ADL_Main_Control_Create;
+    ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get;
+    ADL_POWERXPRESS_SCHEME_GET       ADL_PowerXpress_Scheme_Get;
+    ADL_MAIN_CONTROL_DESTROY         ADL_Main_Control_Destroy;
+    HINSTANCE hDLL;
+    int ret = 0;
+
+#if _WIN32
+    hDLL = LoadLibrary( "atiadlxx.dll" );
+    if( !hDLL )
+        hDLL = LoadLibrary( "atiadlxy.dll" );
+#else
+    hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL );
+#define GetProcAddress dlsym
+#endif
+    if( !hDLL )
+        return ret;
+
+    ADL_Main_Control_Create = (ADL_MAIN_CONTROL_CREATE) GetProcAddress(hDLL, "ADL_Main_Control_Create");
+    ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY) GetProcAddress(hDLL, "ADL_Main_Control_Destroy");
+    ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET) GetProcAddress(hDLL, "ADL_Adapter_NumberOfAdapters_Get");
+    ADL_PowerXpress_Scheme_Get = (ADL_POWERXPRESS_SCHEME_GET) GetProcAddress(hDLL, "ADL_PowerXpress_Scheme_Get");
+    if( !ADL_Main_Control_Destroy || !ADL_Main_Control_Destroy || !ADL_Adapter_NumberOfAdapters_Get ||
+        !ADL_PowerXpress_Scheme_Get )
+        goto bail;
+
+    if( ADL_OK != ADL_Main_Control_Create( adl_malloc_wrapper, 1) )
+        goto bail;
+
+    int numAdapters = 0;
+    if( ADL_OK != ADL_Adapter_NumberOfAdapters_Get( &numAdapters ) )
+    {
+        ADL_Main_Control_Destroy();
+        goto bail;
+    }
+
+    for( int i = 0; i < numAdapters; i++ )
+    {
+        int PXSchemeRange, PXSchemeCurrentState, PXSchemeDefaultState;
+        if( ADL_OK != ADL_PowerXpress_Scheme_Get( i, &PXSchemeRange, &PXSchemeCurrentState, &PXSchemeDefaultState) )
+            break;
+
+        if( PXSchemeRange >= ADL_PX_SCHEME_DYNAMIC )
+        {
+            ret = 1;
+            break;
+        }
+    }
+
+    ADL_Main_Control_Destroy();
+
+bail:
+#if _WIN32
+    FreeLibrary( hDLL );
+#else
+    dlclose( hDLL );
+#endif
+
+    return ret;
+}
+
diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/opencl.h	Tue Sep 17 13:41:27 2013 -0500
@@ -0,0 +1,128 @@
+/*****************************************************************************
+ * opencl.h: OpenCL structures and defines
+ *****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Steve Borho <steve at borho.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at multicorewareinc.com.
+ *****************************************************************************/
+
+#ifndef X265_OPENCL_H
+#define X265_OPENCL_H
+
+#if defined __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+
+#include "x265.h"
+
+namespace x265
+{
+
+/* Number of PCIe copies that can be queued before requiring a flush */
+#define MAX_FINISH_COPIES 1024
+
+/* Size (in bytes) of the page-locked buffer used for PCIe xfers */
+#define PAGE_LOCKED_BUF_SIZE 32 * 1024 * 1024
+
+/* We define CL_QUEUE_THREAD_HANDLE_AMD here because it is not defined
+ * in the OpenCL headers shipped with NVIDIA drivers.  We need to be
+ * able to compile on an NVIDIA machine and run optimally on an AMD GPU. */
+#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
+
+#define OCLCHECK( method, ... )\
+    status = method( __VA_ARGS__ );\
+    if( status != CL_SUCCESS ) {\
+        param->useOpenCL = 0;\
+        m_ocl->m_fatalError = 1;\
+        x264_log( param, X264_LOG_ERROR, # method " error '%d'\n", status );\
+        return status;\
+    }
+
+class ocl
+{
+public:
+
+    ocl();
+    ~ocl();
+
+    cl_context       m_context;
+    cl_device_id     m_device;
+    cl_command_queue m_queue;
+    cl_program       m_program;
+
+    bool             m_initialized;
+    bool             m_deviceAMDSI;
+    bool             m_fatalError;
+    int              m_openclThreadPri;
+
+    cl_kernel        m_kernelMeStar;
+
+    bool       init(x265_param_t *param);
+    void       destroy();
+    int        detectSwitchableGraphics();
+
+private:
+
+    bool       createKernels(x265_param_t *param);
+    bool       compile(x265_param_t *param);
+    void       cache_load(x265_param_t *param, char *devname, char *devvendor, char *driverversion);
+    void       cache_save(x265_param_t *param, char *devname, char *devvendor, char *driverversion);
+};
+
+
+/* motion estimation */
+class oclME
+{
+public:
+
+    oclME(x265_param_t *param);
+    ~oclME();
+
+    ocl*             m_ocl;
+
+    cl_mem           m_pageLockedBuffer;
+    char            *m_pageLockedPtr;
+    int              m_plOccupancy;
+
+    struct
+    {
+        void *src;
+        void *dest;
+        int   bytes;
+    } m_copies[MAX_FINISH_COPIES];
+    int              m_numCopies;
+
+    cl_mem           m_mvBuffer[2];
+    cl_mem           m_mvCostsBuffer;
+    cl_mem           m_mvpBuffer;
+
+    cl_mem           m_costsBuffer[2];
+    cl_mem           m_frameStatsBuffer[2]; /* cost_est, cost_est_aq, intra_mbs */
+
+    bool       init(x265_param_t *param);
+    void       destroy();
+
+};
+
+} // end namespace x265
+
+#endif
diff -r 0d33ff236f68 -r 97e1bed019a6 source/common/opencl/me.cl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/opencl/me.cl	Tue Sep 17 13:41:27 2013 -0500
@@ -0,0 +1,15 @@
+/* motion estimate */
+
+__kernel void motion_estimate( read_only image2d_t    fenc,
+                               read_only image2d_t    fref,
+                               const __global short2 *in_mvs,
+                               __global short2       *out_mvs,
+                               __global short        *out_mv_costs,
+                               __global short2       *mvp_buffer,
+                               __local short         *cost_local,
+                               __local short2        *mvc_local,
+                               int                    mb_width,
+                               int                    lambda,
+                               int                    me_range)
+{
+}
diff -r 0d33ff236f68 -r 97e1bed019a6 source/encoder/CMakeLists.txt
--- a/source/encoder/CMakeLists.txt	Mon Sep 16 21:06:49 2013 +0800
+++ b/source/encoder/CMakeLists.txt	Tue Sep 17 13:41:27 2013 -0500
@@ -1,43 +1,47 @@
-if(GCC)
-    # encoder.cpp must include HM headers which are not careful about named parameters
-    set_source_files_properties(encoder.cpp PROPERTIES COMPILE_FLAGS -Wno-unused-parameter)
-endif(GCC)
-
-if(GCC)
-    if (NOT X64)
-        # force gcc to generate code for sync primitives
-        set_source_files_properties(framefilter.cpp PROPERTIES COMPILE_FLAGS -march=i686)
-    endif()
-endif(GCC)
-
-file(GLOB LIBENCODER_HDR  ../Lib/TLibEncoder/*.h)
-file(GLOB LIBENCODER_SRC  ../Lib/TLibEncoder/*.cpp)
-source_group(TLibEncoder FILES ${LIBENCODER_SRC})
-source_group(TLibEncoderH FILES ${LIBENCODER_HDR})
-if(GCC)
-    set_source_files_properties(compress.cpp ${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS 
-        "-Wno-sign-compare")
-endif(GCC)
-if(MSVC)
-    # ignore these warnings from HM source
-    # /wd4244 type conversion, possible loss of data
-    # /wd4512 assignment operator could not be generated
-    # /wd4127 conditional expression is constant
-    # /wd4389 signed/unsigned mismatch
-    # /wd4018 '<' signed/unsigned mismatch
-    # /wd4800 performance warning: bool coersion
-    set_source_files_properties(${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS 
-        "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
-endif(MSVC)
-
-add_library(encoder STATIC ../../COPYING ../x265.h
-    ${LIBENCODER_SRC} ${LIBENCODER_HDR}
-    bitcost.cpp bitcost.h
-    motion.cpp motion.h
-    slicetype.cpp slicetype.h
-    frameencoder.cpp frameencoder.h
-    framefilter.cpp framefilter.h
-    cturow.cpp cturow.h
-    dpb.cpp dpb.h
-    ratecontrol.cpp ratecontrol.h
-    compress.cpp encoder.cpp)
+if(GCC)
+    # encoder.cpp must include HM headers which are not careful about named parameters
+    set_source_files_properties(encoder.cpp PROPERTIES COMPILE_FLAGS -Wno-unused-parameter)
+endif(GCC)
+
+if(GCC)
+    if (NOT X64)
+        # force gcc to generate code for sync primitives
+        set_source_files_properties(framefilter.cpp PROPERTIES COMPILE_FLAGS -march=i686)
+    endif()
+endif(GCC)
+
+file(GLOB LIBENCODER_HDR  ../Lib/TLibEncoder/*.h)
+file(GLOB LIBENCODER_SRC  ../Lib/TLibEncoder/*.cpp)
+source_group(TLibEncoder FILES ${LIBENCODER_SRC})
+source_group(TLibEncoderH FILES ${LIBENCODER_HDR})
+if(GCC)
+    set_source_files_properties(compress.cpp ${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS 
+        "-Wno-sign-compare")
+endif(GCC)
+if(MSVC)
+    # ignore these warnings from HM source
+    # /wd4244 type conversion, possible loss of data
+    # /wd4512 assignment operator could not be generated
+    # /wd4127 conditional expression is constant
+    # /wd4389 signed/unsigned mismatch
+    # /wd4018 '<' signed/unsigned mismatch
+    # /wd4800 performance warning: bool coersion
+    set_source_files_properties(${LIBENCODER_SRC} PROPERTIES COMPILE_FLAGS 
+        "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
+endif(MSVC)
+
+if(HAVE_OPENCL)
+    include_directories(${OPENCL_INCLUDE_DIRS})
+endif(HAVE_OPENCL)
+
+add_library(encoder STATIC ../../COPYING ../x265.h
+    ${LIBENCODER_SRC} ${LIBENCODER_HDR}
+    bitcost.cpp bitcost.h
+    motion.cpp motion.h
+    slicetype.cpp slicetype.h
+    frameencoder.cpp frameencoder.h
+    framefilter.cpp framefilter.h
+    cturow.cpp cturow.h
+    dpb.cpp dpb.h
+    ratecontrol.cpp ratecontrol.h
+    compress.cpp encoder.cpp)
diff -r 0d33ff236f68 -r 97e1bed019a6 source/x265.h
--- a/source/x265.h	Mon Sep 16 21:06:49 2013 +0800
+++ b/source/x265.h	Tue Sep 17 13:41:27 2013 -0500
@@ -277,6 +277,12 @@
         int       qp;                          ///< Constant QP base value
         int       rateFactor;                  ///< Constant rate factor (CRF)
     } rc;
+
+    // opencl
+    int       useOpenCL;                       ///<  use OpenCL when available
+    int       iOpenCLDevice;                   ///<  specify count of GPU devices to skip, for CLI users
+    void     *OpenCLDeviceId;                  ///<  pass explicit cl_device_id as void*, for API users
+    char     *pszCLbinFile;                    ///<  compiled OpenCL kernel cache file
 }
 x265_param_t;
 


More information about the x265-devel mailing list