[x265] [PATCH v2] testbench.cpp: Add --cpuid list to list valid SIMD architectures
George Steed
george.steed at arm.com
Tue Mar 18 09:44:23 UTC 2025
Gentle ping :)
I think checking the exact string length is good in general since it
makes it easier to avoid accidentally matching in other cases where a
substring may also be a valid option, e.g. strncmp(value, "SVE", 3)
would match both "SVE" and "SVE2", this behaviour is not desirable.
In future we could also migrate this to using C++ std::string or
std::string_view which would allow us to just use `==` for comparison
instead which may be simplier. Let me know if you have a preference for
this.
Thanks,
George
On Tue, Feb 25, 2025 at 01:22:00PM +0800, chen wrote:
> Hi George,
>
>
>
>
> Thank for the revised patch, it looks good now.
>
>
>
>
> btw: I don't think treated 'lists' as 'list' is bad idea, it is option to shows help information, typo is acceptanceable.
>
>
>
>
> Regards,
> Chen
>
>
>
>
> At 2025-02-24 18:50:33, "George Steed" <george.steed at arm.com> wrote:
> >Hi Chen,
> >
> >Thanks for your comments. I've changed `strlen(name)` to 5: the length
> >of "list" including the null-terminator. Note that we need to include
> >the null-terminator in the length here to avoid also matching e.g.
> >`--cpuid lista` (with trailing characters).
> >
> >Please find revised patch below and attached.
> >
> >Thanks,
> >George
> >
> >-- >8 --
> >Rather than needing to guess what architecture features are available to
> >be tested, simply allow the user to query the testbench to discover the
> >available features. Since the list of architectures is now a global
> >variable, adjust the naming to camelCase instead of snake_case to match
> >the rest of the file.
> >
> >Also add the relevant explanation line to --help text.
> >
> >Also take this opportunity to reorder SVE/SVE2 in the Arm architecture
> >list such that they occur below the group of Neon extensions.
> >---
> > source/test/testbench.cpp | 92 ++++++++++++++++++++++-----------------
> > 1 file changed, 53 insertions(+), 39 deletions(-)
> >
> >diff --git a/source/test/testbench.cpp b/source/test/testbench.cpp
> >index b8ef760f2..f651dc51b 100644
> >--- a/source/test/testbench.cpp
> >+++ b/source/test/testbench.cpp
> >@@ -80,13 +80,52 @@ const char* const* chromaPartStr[X265_CSP_COUNT] =
> > lumaPartStr
> > };
> >
> >+struct test_arch_t
> >+{
> >+ char name[13];
> >+ int flag;
> >+} testArch[] =
> >+{
> >+#if X265_ARCH_X86
> >+ { "SSE2", X265_CPU_SSE2 },
> >+ { "SSE3", X265_CPU_SSE3 },
> >+ { "SSSE3", X265_CPU_SSSE3 },
> >+ { "SSE4", X265_CPU_SSE4 },
> >+ { "AVX", X265_CPU_AVX },
> >+ { "XOP", X265_CPU_XOP },
> >+ { "AVX2", X265_CPU_AVX2 },
> >+ { "BMI2", X265_CPU_AVX2 | X265_CPU_BMI1 | X265_CPU_BMI2 },
> >+ { "AVX512", X265_CPU_AVX512 },
> >+#else
> >+ { "ARMv6", X265_CPU_ARMV6 },
> >+ { "NEON", X265_CPU_NEON },
> >+ { "Neon_DotProd", X265_CPU_NEON_DOTPROD },
> >+ { "Neon_I8MM", X265_CPU_NEON_I8MM },
> >+ { "SVE", X265_CPU_SVE },
> >+ { "SVE2", X265_CPU_SVE2 },
> >+ { "FastNeonMRC", X265_CPU_FAST_NEON_MRC },
> >+#endif
> >+ { "", 0 },
> >+};
> >+
> >+void do_cpuid_list(int cpuid)
> >+{
> >+ printf("x265 detected --cpuid architectures:\n");
> >+ for (int i = 0; testArch[i].flag; i++)
> >+ {
> >+ if ((testArch[i].flag & cpuid) == testArch[i].flag)
> >+ printf(" %s\n", testArch[i].name);
> >+ }
> >+}
> >+
> > void do_help()
> > {
> > printf("x265 optimized primitive testbench\n\n");
> > printf("usage: TestBench [--cpuid CPU] [--testbench BENCH] [--nobench] [--help]\n\n");
> >- printf(" CPU is comma separated SIMD arch list, example: SSE4,AVX\n");
> >+ printf(" CPU is comma separated SIMD architecture list, for example: SSE4,AVX\n");
> >+ printf(" Use `--cpuid list` to print a list of detected SIMD architectures\n\n");
> > printf(" BENCH is one of (pixel,transforms,interp,intrapred)\n\n");
> >- printf(" --nobench disables running benchmarks, only run correctness tests\n\n");
> >+ printf(" `--nobench` disables running benchmarks, only run correctness tests\n\n");
> > printf("By default, the test bench will test all benches on detected CPU architectures\n");
> > printf("Options and testbench name may be truncated.\n");
> > }
> >@@ -120,6 +159,11 @@ int main(int argc, char *argv[])
> > }
> > else if (!strncmp(name, "cpuid", strlen(name)))
> > {
> >+ if (!strncmp(value, "list", 5))
> >+ {
> >+ do_cpuid_list(cpuid);
> >+ return 0;
> >+ }
> > int cpu_detect_cpuid = cpuid;
> > bool bError = false;
> > cpuid = parseCpuName(value, bError, enableavx512);
> >@@ -173,48 +217,18 @@ int main(int argc, char *argv[])
> > setupCPrimitives(cprim);
> > setupAliasPrimitives(cprim);
> >
> >- struct test_arch_t
> >- {
> >- char name[13];
> >- int flag;
> >- } test_arch[] =
> >+ for (int i = 0; testArch[i].flag; i++)
> > {
> >-#if X265_ARCH_X86
> >- { "SSE2", X265_CPU_SSE2 },
> >- { "SSE3", X265_CPU_SSE3 },
> >- { "SSSE3", X265_CPU_SSSE3 },
> >- { "SSE4", X265_CPU_SSE4 },
> >- { "AVX", X265_CPU_AVX },
> >- { "XOP", X265_CPU_XOP },
> >- { "AVX2", X265_CPU_AVX2 },
> >- { "BMI2", X265_CPU_AVX2 | X265_CPU_BMI1 | X265_CPU_BMI2 },
> >- { "AVX512", X265_CPU_AVX512 },
> >-#else
> >- { "ARMv6", X265_CPU_ARMV6 },
> >- { "NEON", X265_CPU_NEON },
> >- { "SVE2", X265_CPU_SVE2 },
> >- { "SVE", X265_CPU_SVE },
> >- { "Neon_DotProd", X265_CPU_NEON_DOTPROD },
> >- { "Neon_I8MM", X265_CPU_NEON_I8MM },
> >- { "FastNeonMRC", X265_CPU_FAST_NEON_MRC },
> >-#endif
> >- { "", 0 },
> >- };
> >-
> >- for (int i = 0; test_arch[i].flag; i++)
> >- {
> >- if ((test_arch[i].flag & cpuid) == test_arch[i].flag)
> >- {
> >- printf("Testing primitives: %s\n", test_arch[i].name);
> >- fflush(stdout);
> >- }
> >- else
> >+ if ((testArch[i].flag & cpuid) != testArch[i].flag)
> > continue;
> >
> >+ printf("Testing primitives: %s\n", testArch[i].name);
> >+ fflush(stdout);
> >+
> > #if defined(X265_ARCH_X86) || defined(X265_ARCH_ARM64)
> > EncoderPrimitives vecprim;
> > memset(&vecprim, 0, sizeof(vecprim));
> >- setupIntrinsicPrimitives(vecprim, test_arch[i].flag);
> >+ setupIntrinsicPrimitives(vecprim, testArch[i].flag);
> > setupAliasPrimitives(vecprim);
> > for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++)
> > {
> >@@ -232,7 +246,7 @@ int main(int argc, char *argv[])
> > EncoderPrimitives asmprim;
> > memset(&asmprim, 0, sizeof(asmprim));
> >
> >- setupAssemblyPrimitives(asmprim, test_arch[i].flag);
> >+ setupAssemblyPrimitives(asmprim, testArch[i].flag);
> > setupAliasPrimitives(asmprim);
> > memcpy(&primitives, &asmprim, sizeof(EncoderPrimitives));
> > for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++)
> >--
> >2.34.1
> >
More information about the x265-devel
mailing list