[x265] [PATCH 307 of 307] x86:AVX512 Set run time flag to enable/disable avx512

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:35:05 CEST 2018


# HG changeset patch
# User Jayashree <jayashree.c at multicorewareinc.com>
# Date 1522928767 -19800
#      Thu Apr 05 17:16:07 2018 +0530
# Node ID f6ad2fa637fd3c8f9e2811982b89aa28228e9f6b
# Parent  876b6e006f2080072c0684dbf75e7cfde974ba79
x86:AVX512 Set run time flag to enable/disable avx512

diff -r 876b6e006f20 -r f6ad2fa637fd source/common/cpu.cpp
--- a/source/common/cpu.cpp	Mon Feb 05 10:39:00 2018 -0800
+++ b/source/common/cpu.cpp	Thu Apr 05 17:16:07 2018 +0530
@@ -122,7 +122,7 @@
 #pragma warning(disable: 4309) // truncation of constant value
 #endif
 
-uint32_t cpu_detect(void)
+uint32_t cpu_detect(bool benableavx512 )
 {
     uint32_t cpu = 0;
 
@@ -184,11 +184,13 @@
         {
             if (ebx & 0x00000020)
                 cpu |= X265_CPU_AVX2;
-
-            if ((xcr0 & 0xE0) == 0xE0) /* OPMASK/ZMM state */
+            if (benableavx512)
             {
-                if ((ebx & 0xD0030000) == 0xD0030000)
-                    cpu |= X265_CPU_AVX512;
+                if ((xcr0 & 0xE0) == 0xE0) /* OPMASK/ZMM state */
+                {
+                    if ((ebx & 0xD0030000) == 0xD0030000)
+                        cpu |= X265_CPU_AVX512;
+                }
             }
         }
     }
@@ -327,7 +329,7 @@
 int PFX(cpu_fast_neon_mrc_test)(void);
 }
 
-uint32_t cpu_detect(void)
+uint32_t cpu_detect(bool benableavx512)
 {
     int flags = 0;
 
@@ -370,7 +372,7 @@
 
 #elif X265_ARCH_POWER8
 
-uint32_t cpu_detect(void)
+uint32_t cpu_detect(bool benableavx512)
 {
 #if HAVE_ALTIVEC
     return X265_CPU_ALTIVEC;
@@ -381,7 +383,7 @@
 
 #else // if X265_ARCH_POWER8
 
-uint32_t cpu_detect(void)
+uint32_t cpu_detect(bool benableavx512)
 {
     return 0;
 }
diff -r 876b6e006f20 -r f6ad2fa637fd source/common/cpu.h
--- a/source/common/cpu.h	Mon Feb 05 10:39:00 2018 -0800
+++ b/source/common/cpu.h	Thu Apr 05 17:16:07 2018 +0530
@@ -50,7 +50,7 @@
 #endif
 
 namespace X265_NS {
-uint32_t cpu_detect(void);
+uint32_t cpu_detect(bool);
 
 struct cpu_name_t
 {
diff -r 876b6e006f20 -r f6ad2fa637fd source/common/param.cpp
--- a/source/common/param.cpp	Mon Feb 05 10:39:00 2018 -0800
+++ b/source/common/param.cpp	Thu Apr 05 17:16:07 2018 +0530
@@ -99,13 +99,13 @@
 {
     x265_free(p);
 }
-
+bool  benableavx512 = false;
 void x265_param_default(x265_param* param)
 {
     memset(param, 0, sizeof(x265_param));
 
     /* Applying default values to all elements in the param structure */
-    param->cpuid = X265_NS::cpu_detect();
+    param->cpuid = X265_NS::cpu_detect(benableavx512);
     param->bEnableWavefront = 1;
     param->frameNumThreads = 0;
 
@@ -609,6 +609,17 @@
     if (0) ;
     OPT("asm")
     {
+        sscanf(value, "%s", p->asmname);
+        if (strcmp(value, "avx512")==0)
+        {
+            p->bEnableavx512 = 1;
+            benableavx512 = true;
+        }
+        else
+        {
+            p->bEnableavx512 = 0;
+            benableavx512 = false;
+        }
         if (bValueWasNull)
             p->cpuid = atobool(value);
         else
@@ -1072,7 +1083,7 @@
     if (isdigit(value[0]))
         cpu = x265_atoi(value, bError);
     else
-        cpu = !strcmp(value, "auto") || x265_atobool(value, bError) ? X265_NS::cpu_detect() : 0;
+        cpu = !strcmp(value, "auto") || x265_atobool(value, bError) ? X265_NS::cpu_detect(benableavx512) : 0;
 
     if (bError)
     {
diff -r 876b6e006f20 -r f6ad2fa637fd source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Mon Feb 05 10:39:00 2018 -0800
+++ b/source/test/pixelharness.cpp	Thu Apr 05 17:16:07 2018 +0530
@@ -332,8 +332,9 @@
     memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
     memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
     int j = 0;
+    bool enableavx512 = true;
     int width = 16 * (rand() % 4 + 1);
-    int cpuid = X265_NS::cpu_detect();
+    int cpuid = X265_NS::cpu_detect(enableavx512);
     if (cpuid & X265_CPU_AVX512)
         width = 32 * (rand() % 2 + 1);
     int height = 8;
diff -r 876b6e006f20 -r f6ad2fa637fd source/test/testbench.cpp
--- a/source/test/testbench.cpp	Mon Feb 05 10:39:00 2018 -0800
+++ b/source/test/testbench.cpp	Thu Apr 05 17:16:07 2018 +0530
@@ -96,7 +96,8 @@
 
 int main(int argc, char *argv[])
 {
-    int cpuid = X265_NS::cpu_detect();
+    bool enableavx512 = true;
+    int cpuid = X265_NS::cpu_detect(enableavx512);
     const char *testname = 0;
 
     if (!(argc & 1))
diff -r 876b6e006f20 -r f6ad2fa637fd source/x265.h
--- a/source/x265.h	Mon Feb 05 10:39:00 2018 -0800
+++ b/source/x265.h	Thu Apr 05 17:16:07 2018 +0530
@@ -585,7 +585,14 @@
      * somehow flawed on your target hardware. The asm function tables are
      * process global, the first encoder configures them for all encoders */
     int       cpuid;
-
+     /*==Assembly features ==*/
+     /*  x265_param_parse() will detect if the avx512 is enabled (in cli )and set 
+     *  bEnableavx512 to 1 to use avx512 SIMD. By default this flag will not be set , 
+     *  hence the encoding will happen without avx512 assembly primitives even if the cpu has 
+     *  avx512 capabilities. 
+     *  Ensure to use --asm avx512 if you need to encode with avx512 assembly primitives*/
+    int     bEnableavx512;
+    char*   asmname;
     /*== Parallelism Features ==*/
 
     /* Number of concurrently encoded frames between 1 and X265_MAX_FRAME_THREADS


More information about the x265-devel mailing list