[x265] [PATCH 307 of 307] x86:AVX512 Set run time flag to enable/disable avx512

Mateusz mateuszb at poczta.onet.pl
Fri Apr 13 16:09:10 CEST 2018


W dniu 13.04.2018 o 15:43, Ashok Kumar Mishra pisze:
> We are not seeing this issue at our side, can you please share your machine configuration?
> Thanks.

CPU i7 8700, 64-bit Win10 Home 1709 16299.371, 16 GB RAM.

Bug is fixed in current stable tip (by "remove unused asmname from x265_param").

f:\t>x265 -V
x265 [info]: HEVC encoder version 2.7+336-07defe235cde
x265 [info]: build info [Windows][MSVC 1914][64 bit] 8bit
x265 [info]: using cpu capabilities: MMX2 SSE2Fast LZCNT SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2

f:\t>x265 --asm sse4 -V
***** HANGS *****

f:\t>x265n -V
x265 [info]: HEVC encoder version 2.7+340-aa9102400f24
x265 [info]: build info [Windows][MSVC 1914][64 bit] 8bit
x265 [info]: using cpu capabilities: MMX2 SSE2Fast LZCNT SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2

f:\t>x265n --asm sse4 -V
x265 [info]: HEVC encoder version 2.7+340-aa9102400f24
x265 [info]: build info [Windows][MSVC 1914][64 bit] 8bit
x265 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.1

Old GCC builds are not affected (probably in GCC sscanf validates if last argument is NULL).

Mateusz


> 
> On Fri, Apr 13, 2018 at 10:05 AM, Mateusz <mateuszb at poczta.onet.pl <mailto:mateuszb at poczta.onet.pl>> wrote:
> 
>     W dniu 07.04.2018 o 04:35, mythreyi at multicorewareinc.com <mailto:mythreyi at multicorewareinc.com> pisze:
>     > # HG changeset patch
>     > # User Jayashree <jayashree.c at multicorewareinc.com <mailto:jayashree.c at multicorewareinc.com>>
>     > # Date 1522928767 -19800
>     > #      Thu Apr 05 17:16:07 2018 +0530
>     > # Node ID f6ad2fa637fd3c8f9e2811982b89aa28228e9f6b
>     > # Parent  876b6e006f2080072c0684dbf75e7cfde974ba79
>     > x86:AVX512 Set run time flag to enable/disable avx512
>     >
>     > diff -r 876b6e006f20 -r f6ad2fa637fd source/common/cpu.cpp
>     > --- a/source/common/cpu.cpp   Mon Feb 05 10:39:00 2018 -0800
>     > +++ b/source/common/cpu.cpp   Thu Apr 05 17:16:07 2018 +0530
>     > @@ -122,7 +122,7 @@
>     >  #pragma warning(disable: 4309) // truncation of constant value
>     >  #endif
>>     > -uint32_t cpu_detect(void)
>     > +uint32_t cpu_detect(bool benableavx512 )
>     >  {
>     >      uint32_t cpu = 0;
>>     > @@ -184,11 +184,13 @@
>     >          {
>     >              if (ebx & 0x00000020)
>     >                  cpu |= X265_CPU_AVX2;
>     > -
>     > -            if ((xcr0 & 0xE0) == 0xE0) /* OPMASK/ZMM state */
>     > +            if (benableavx512)
>     >              {
>     > -                if ((ebx & 0xD0030000) == 0xD0030000)
>     > -                    cpu |= X265_CPU_AVX512;
>     > +                if ((xcr0 & 0xE0) == 0xE0) /* OPMASK/ZMM state */
>     > +                {
>     > +                    if ((ebx & 0xD0030000) == 0xD0030000)
>     > +                        cpu |= X265_CPU_AVX512;
>     > +                }
>     >              }
>     >          }
>     >      }
>     > @@ -327,7 +329,7 @@
>     >  int PFX(cpu_fast_neon_mrc_test)(void);
>     >  }
>>     > -uint32_t cpu_detect(void)
>     > +uint32_t cpu_detect(bool benableavx512)
>     >  {
>     >      int flags = 0;
>>     > @@ -370,7 +372,7 @@
>>     >  #elif X265_ARCH_POWER8
>>     > -uint32_t cpu_detect(void)
>     > +uint32_t cpu_detect(bool benableavx512)
>     >  {
>     >  #if HAVE_ALTIVEC
>     >      return X265_CPU_ALTIVEC;
>     > @@ -381,7 +383,7 @@
>>     >  #else // if X265_ARCH_POWER8
>>     > -uint32_t cpu_detect(void)
>     > +uint32_t cpu_detect(bool benableavx512)
>     >  {
>     >      return 0;
>     >  }
>     > diff -r 876b6e006f20 -r f6ad2fa637fd source/common/cpu.h
>     > --- a/source/common/cpu.h     Mon Feb 05 10:39:00 2018 -0800
>     > +++ b/source/common/cpu.h     Thu Apr 05 17:16:07 2018 +0530
>     > @@ -50,7 +50,7 @@
>     >  #endif
>>     >  namespace X265_NS {
>     > -uint32_t cpu_detect(void);
>     > +uint32_t cpu_detect(bool);
>>     >  struct cpu_name_t
>     >  {
>     > diff -r 876b6e006f20 -r f6ad2fa637fd source/common/param.cpp
>     > --- a/source/common/param.cpp Mon Feb 05 10:39:00 2018 -0800
>     > +++ b/source/common/param.cpp Thu Apr 05 17:16:07 2018 +0530
>     > @@ -99,13 +99,13 @@
>     >  {
>     >      x265_free(p);
>     >  }
>     > -
>     > +bool  benableavx512 = false;
>     >  void x265_param_default(x265_param* param)
>     >  {
>     >      memset(param, 0, sizeof(x265_param));
>>     >      /* Applying default values to all elements in the param structure */
>     > -    param->cpuid = X265_NS::cpu_detect();
>     > +    param->cpuid = X265_NS::cpu_detect(benableavx512);
>     >      param->bEnableWavefront = 1;
>     >      param->frameNumThreads = 0;
>>     > @@ -609,6 +609,17 @@
>     >      if (0) ;
>     >      OPT("asm")
>     >      {
>     > +        sscanf(value, "%s", p->asmname);
> 
>     p->asmname is a pointer to unallocated memory. It is not used in x265.
>     VS 2015 and VS 2017 builds hangs at command-line:
>     x265 --asm sse4 -V
> 
>     I think we should remove 'asmname' from x265_param
>     or allocate memory before we copy anything to this location.
> 
> 
>     > +        if (strcmp(value, "avx512")==0)
>     > +        {
>     > +            p->bEnableavx512 = 1;
>     > +            benableavx512 = true;
>     > +        }
>     > +        else
>     > +        {
>     > +            p->bEnableavx512 = 0;
>     > +            benableavx512 = false;
>     > +        }
>     >          if (bValueWasNull)
>     >              p->cpuid = atobool(value);
>     >          else
>     > @@ -1072,7 +1083,7 @@
>     >      if (isdigit(value[0]))
>     >          cpu = x265_atoi(value, bError);
>     >      else
>     > -        cpu = !strcmp(value, "auto") || x265_atobool(value, bError) ? X265_NS::cpu_detect() : 0;
>     > +        cpu = !strcmp(value, "auto") || x265_atobool(value, bError) ? X265_NS::cpu_detect(benableavx512) : 0;
>>     >      if (bError)
>     >      {
>     > diff -r 876b6e006f20 -r f6ad2fa637fd source/test/pixelharness.cpp
>     > --- a/source/test/pixelharness.cpp    Mon Feb 05 10:39:00 2018 -0800
>     > +++ b/source/test/pixelharness.cpp    Thu Apr 05 17:16:07 2018 +0530
>     > @@ -332,8 +332,9 @@
>     >      memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
>     >      memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
>     >      int j = 0;
>     > +    bool enableavx512 = true;
>     >      int width = 16 * (rand() % 4 + 1);
>     > -    int cpuid = X265_NS::cpu_detect();
>     > +    int cpuid = X265_NS::cpu_detect(enableavx512);
>     >      if (cpuid & X265_CPU_AVX512)
>     >          width = 32 * (rand() % 2 + 1);
>     >      int height = 8;
>     > diff -r 876b6e006f20 -r f6ad2fa637fd source/test/testbench.cpp
>     > --- a/source/test/testbench.cpp       Mon Feb 05 10:39:00 2018 -0800
>     > +++ b/source/test/testbench.cpp       Thu Apr 05 17:16:07 2018 +0530
>     > @@ -96,7 +96,8 @@
>>     >  int main(int argc, char *argv[])
>     >  {
>     > -    int cpuid = X265_NS::cpu_detect();
>     > +    bool enableavx512 = true;
>     > +    int cpuid = X265_NS::cpu_detect(enableavx512);
>     >      const char *testname = 0;
>>     >      if (!(argc & 1))
>     > diff -r 876b6e006f20 -r f6ad2fa637fd source/x265.h
>     > --- a/source/x265.h   Mon Feb 05 10:39:00 2018 -0800
>     > +++ b/source/x265.h   Thu Apr 05 17:16:07 2018 +0530
>     > @@ -585,7 +585,14 @@
>     >       * somehow flawed on your target hardware. The asm function tables are
>     >       * process global, the first encoder configures them for all encoders */
>     >      int       cpuid;
>     > -
>     > +     /*==Assembly features ==*/
>     > +     /*  x265_param_parse() will detect if the avx512 is enabled (in cli )and set
>     > +     *  bEnableavx512 to 1 to use avx512 SIMD. By default this flag will not be set ,
>     > +     *  hence the encoding will happen without avx512 assembly primitives even if the cpu has
>     > +     *  avx512 capabilities.
>     > +     *  Ensure to use --asm avx512 if you need to encode with avx512 assembly primitives*/
>     > +    int     bEnableavx512;
>     > +    char*   asmname;
>     >      /*== Parallelism Features ==*/
>>     >      /* Number of concurrently encoded frames between 1 and X265_MAX_FRAME_THREADS
>     > _______________________________________________
>     > x265-devel mailing list
>     > x265-devel at videolan.org <mailto:x265-devel at videolan.org>
>     > https://mailman.videolan.org/listinfo/x265-devel <https://mailman.videolan.org/listinfo/x265-devel>
> 
>     _______________________________________________
>     x265-devel mailing list
>     x265-devel at videolan.org <mailto:x265-devel at videolan.org>
>     https://mailman.videolan.org/listinfo/x265-devel <https://mailman.videolan.org/listinfo/x265-devel>
> 
> 
> 
> 
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
> 




More information about the x265-devel mailing list