[x265] [PATCH 1/2] pixelharness.cpp: Enable tests for 64x64 TU primitives

Gerda Zsejke More gerdazsejke.more at arm.com
Fri Feb 7 13:49:46 UTC 2025


Enable unit tests for 64x64 variants of calcresidual, transpose,
cpy1Dto2D, cpy2Dto1D and SSD primitives. Enabling these unit tests
exposed bugs in cpy1Dto2D_shl_64x64_sve and cpy2Dto1D_shl_64x64_sve.
This patch also fixes these issues.
---
 source/common/aarch64/blockcopy8-sve.S |   4 +-
 source/test/pixelharness.cpp           | 135 ++++++++++++-------------
 2 files changed, 68 insertions(+), 71 deletions(-)

diff --git a/source/common/aarch64/blockcopy8-sve.S b/source/common/aarch64/blockcopy8-sve.S
index 1d742a64c..ba7168c26 100644
--- a/source/common/aarch64/blockcopy8-sve.S
+++ b/source/common/aarch64/blockcopy8-sve.S
@@ -965,7 +965,7 @@ function PFX(cpy2Dto1D_shl_64x64_sve)
     whilelt         p0.h, x9, x8
     b.first         .L_cpy2Dto1D_shl_64x64
     add             x1, x1, x2, lsl #1
-    addvl           x0, x0, #1
+    add             x0, x0, x8, lsl #1
     cbnz            w12, .L_init_cpy2Dto1D_shl_64x64
     ret
 endfunc
@@ -1204,7 +1204,7 @@ function PFX(cpy1Dto2D_shl_64x64_sve)
     inch            x9
     whilelt         p0.h, x9, x8
     b.first         .L_cpy1Dto2D_shl_64x64
-    addvl           x1, x1, #1
+    add             x1, x1, x8, lsl #1
     add             x0, x0, x2, lsl #1
     cbnz            w12, .L_init_cpy1Dto2D_shl_64x64
     ret
diff --git a/source/test/pixelharness.cpp b/source/test/pixelharness.cpp
index 311985d83..b730c15c0 100644
--- a/source/test/pixelharness.cpp
+++ b/source/test/pixelharness.cpp
@@ -2686,102 +2686,99 @@ bool PixelHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPr
             }
         }
 
-        if (i < BLOCK_64x64)
-        {
-            /* TU only primitives */
+        /* TU only primitives */
 
-            if (opt.cu[i].calcresidual[NONALIGNED])
+        if (opt.cu[i].calcresidual[NONALIGNED])
+        {
+            if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED]))
             {
-                if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED]))
-                {
-                    printf("calcresidual width: %d failed!\n", 4 << i);
-                    return false;
-                }
+                printf("calcresidual width: %d failed!\n", 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].calcresidual[ALIGNED])
+        if (opt.cu[i].calcresidual[ALIGNED])
+        {
+            if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED]))
             {
-                if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED]))
-                {
-                    printf("calcresidual_aligned width: %d failed!\n", 4 << i);
-                    return false;
-                }
+                printf("calcresidual_aligned width: %d failed!\n", 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].transpose)
+        if (opt.cu[i].transpose)
+        {
+            if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
             {
-                if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
-                {
-                    printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].ssd_s[NONALIGNED])
+        }
+        if (opt.cu[i].ssd_s[NONALIGNED])
+        {
+            if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED]))
             {
-                if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED]))
-                {
-                    printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].ssd_s[ALIGNED])
+        }
+        if (opt.cu[i].ssd_s[ALIGNED])
+        {
+            if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED]))
             {
-                if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED]))
-                {
-                    printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].copy_cnt)
+        }
+        if (opt.cu[i].copy_cnt)
+        {
+            if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
             {
-                if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
-                {
-                    printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy2Dto1D_shl)
+        if (opt.cu[i].cpy2Dto1D_shl)
+        {
+            if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl))
             {
-                if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl))
-                {
-                    printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy2Dto1D_shr)
+        if (opt.cu[i].cpy2Dto1D_shr)
+        {
+            if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr))
             {
-                if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr))
-                {
-                    printf("cpy2Dto1D_shr failed!\n");
-                    return false;
-                }
+                printf("cpy2Dto1D_shr failed!\n");
+                return false;
             }
-            if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+        }
+        if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+        {
+            if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
             {
-                if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
-                {
-                    printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+        }
+        if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+        {
+            if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
             {
-                if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
-                {
-                    printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy1Dto2D_shr)
+        if (opt.cu[i].cpy1Dto2D_shr)
+        {
+            if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr))
             {
-                if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr))
-                {
-                    printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
         }
     }
-- 
2.39.5 (Apple Git-154)

-------------- next part --------------
>From c0811fad3fbde513aefa823edb6516b07df46c9c Mon Sep 17 00:00:00 2001
Message-Id: <c0811fad3fbde513aefa823edb6516b07df46c9c.1738934102.git.gerdazsejke.more at arm.com>
In-Reply-To: <cover.1738934102.git.gerdazsejke.more at arm.com>
References: <cover.1738934102.git.gerdazsejke.more at arm.com>
From: Arpad Panyik <Arpad.Panyik at arm.com>
Date: Thu, 30 Jan 2025 15:54:43 +0100
Subject: [PATCH 1/2] pixelharness.cpp: Enable tests for 64x64 TU primitives

Enable unit tests for 64x64 variants of calcresidual, transpose,
cpy1Dto2D, cpy2Dto1D and SSD primitives. Enabling these unit tests
exposed bugs in cpy1Dto2D_shl_64x64_sve and cpy2Dto1D_shl_64x64_sve.
This patch also fixes these issues.
---
 source/common/aarch64/blockcopy8-sve.S |   4 +-
 source/test/pixelharness.cpp           | 135 ++++++++++++-------------
 2 files changed, 68 insertions(+), 71 deletions(-)

diff --git a/source/common/aarch64/blockcopy8-sve.S b/source/common/aarch64/blockcopy8-sve.S
index 1d742a64c..ba7168c26 100644
--- a/source/common/aarch64/blockcopy8-sve.S
+++ b/source/common/aarch64/blockcopy8-sve.S
@@ -965,7 +965,7 @@ function PFX(cpy2Dto1D_shl_64x64_sve)
     whilelt         p0.h, x9, x8
     b.first         .L_cpy2Dto1D_shl_64x64
     add             x1, x1, x2, lsl #1
-    addvl           x0, x0, #1
+    add             x0, x0, x8, lsl #1
     cbnz            w12, .L_init_cpy2Dto1D_shl_64x64
     ret
 endfunc
@@ -1204,7 +1204,7 @@ function PFX(cpy1Dto2D_shl_64x64_sve)
     inch            x9
     whilelt         p0.h, x9, x8
     b.first         .L_cpy1Dto2D_shl_64x64
-    addvl           x1, x1, #1
+    add             x1, x1, x8, lsl #1
     add             x0, x0, x2, lsl #1
     cbnz            w12, .L_init_cpy1Dto2D_shl_64x64
     ret
diff --git a/source/test/pixelharness.cpp b/source/test/pixelharness.cpp
index 311985d83..b730c15c0 100644
--- a/source/test/pixelharness.cpp
+++ b/source/test/pixelharness.cpp
@@ -2686,102 +2686,99 @@ bool PixelHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPr
             }
         }
 
-        if (i < BLOCK_64x64)
-        {
-            /* TU only primitives */
+        /* TU only primitives */
 
-            if (opt.cu[i].calcresidual[NONALIGNED])
+        if (opt.cu[i].calcresidual[NONALIGNED])
+        {
+            if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED]))
             {
-                if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED]))
-                {
-                    printf("calcresidual width: %d failed!\n", 4 << i);
-                    return false;
-                }
+                printf("calcresidual width: %d failed!\n", 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].calcresidual[ALIGNED])
+        if (opt.cu[i].calcresidual[ALIGNED])
+        {
+            if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED]))
             {
-                if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED]))
-                {
-                    printf("calcresidual_aligned width: %d failed!\n", 4 << i);
-                    return false;
-                }
+                printf("calcresidual_aligned width: %d failed!\n", 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].transpose)
+        if (opt.cu[i].transpose)
+        {
+            if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
             {
-                if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
-                {
-                    printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].ssd_s[NONALIGNED])
+        }
+        if (opt.cu[i].ssd_s[NONALIGNED])
+        {
+            if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED]))
             {
-                if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED]))
-                {
-                    printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].ssd_s[ALIGNED])
+        }
+        if (opt.cu[i].ssd_s[ALIGNED])
+        {
+            if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED]))
             {
-                if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED]))
-                {
-                    printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].copy_cnt)
+        }
+        if (opt.cu[i].copy_cnt)
+        {
+            if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
             {
-                if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
-                {
-                    printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy2Dto1D_shl)
+        if (opt.cu[i].cpy2Dto1D_shl)
+        {
+            if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl))
             {
-                if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl))
-                {
-                    printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy2Dto1D_shr)
+        if (opt.cu[i].cpy2Dto1D_shr)
+        {
+            if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr))
             {
-                if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr))
-                {
-                    printf("cpy2Dto1D_shr failed!\n");
-                    return false;
-                }
+                printf("cpy2Dto1D_shr failed!\n");
+                return false;
             }
-            if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+        }
+        if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+        {
+            if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
             {
-                if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
-                {
-                    printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
-            if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+        }
+        if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+        {
+            if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
             {
-                if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
-                {
-                    printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
+        }
 
-            if (opt.cu[i].cpy1Dto2D_shr)
+        if (opt.cu[i].cpy1Dto2D_shr)
+        {
+            if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr))
             {
-                if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr))
-                {
-                    printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
-                    return false;
-                }
+                printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
+                return false;
             }
         }
     }
-- 
2.39.5 (Apple Git-154)



More information about the x265-devel mailing list