[x265] [PATCH 1/2] pixelharness.cpp: Enable tests for 64x64 TU primitives
Gerda Zsejke More
gerdazsejke.more at arm.com
Fri Feb 7 13:49:46 UTC 2025
Enable unit tests for 64x64 variants of calcresidual, transpose,
cpy1Dto2D, cpy2Dto1D and SSD primitives. Enabling these unit tests
exposed bugs in cpy1Dto2D_shl_64x64_sve and cpy2Dto1D_shl_64x64_sve.
This patch also fixes these issues.
---
source/common/aarch64/blockcopy8-sve.S | 4 +-
source/test/pixelharness.cpp | 135 ++++++++++++-------------
2 files changed, 68 insertions(+), 71 deletions(-)
diff --git a/source/common/aarch64/blockcopy8-sve.S b/source/common/aarch64/blockcopy8-sve.S
index 1d742a64c..ba7168c26 100644
--- a/source/common/aarch64/blockcopy8-sve.S
+++ b/source/common/aarch64/blockcopy8-sve.S
@@ -965,7 +965,7 @@ function PFX(cpy2Dto1D_shl_64x64_sve)
whilelt p0.h, x9, x8
b.first .L_cpy2Dto1D_shl_64x64
add x1, x1, x2, lsl #1
- addvl x0, x0, #1
+ add x0, x0, x8, lsl #1
cbnz w12, .L_init_cpy2Dto1D_shl_64x64
ret
endfunc
@@ -1204,7 +1204,7 @@ function PFX(cpy1Dto2D_shl_64x64_sve)
inch x9
whilelt p0.h, x9, x8
b.first .L_cpy1Dto2D_shl_64x64
- addvl x1, x1, #1
+ add x1, x1, x8, lsl #1
add x0, x0, x2, lsl #1
cbnz w12, .L_init_cpy1Dto2D_shl_64x64
ret
diff --git a/source/test/pixelharness.cpp b/source/test/pixelharness.cpp
index 311985d83..b730c15c0 100644
--- a/source/test/pixelharness.cpp
+++ b/source/test/pixelharness.cpp
@@ -2686,102 +2686,99 @@ bool PixelHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPr
}
}
- if (i < BLOCK_64x64)
- {
- /* TU only primitives */
+ /* TU only primitives */
- if (opt.cu[i].calcresidual[NONALIGNED])
+ if (opt.cu[i].calcresidual[NONALIGNED])
+ {
+ if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED]))
{
- if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED]))
- {
- printf("calcresidual width: %d failed!\n", 4 << i);
- return false;
- }
+ printf("calcresidual width: %d failed!\n", 4 << i);
+ return false;
}
+ }
- if (opt.cu[i].calcresidual[ALIGNED])
+ if (opt.cu[i].calcresidual[ALIGNED])
+ {
+ if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED]))
{
- if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED]))
- {
- printf("calcresidual_aligned width: %d failed!\n", 4 << i);
- return false;
- }
+ printf("calcresidual_aligned width: %d failed!\n", 4 << i);
+ return false;
}
+ }
- if (opt.cu[i].transpose)
+ if (opt.cu[i].transpose)
+ {
+ if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
{
- if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
- {
- printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
- return false;
- }
+ printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
+ return false;
}
- if (opt.cu[i].ssd_s[NONALIGNED])
+ }
+ if (opt.cu[i].ssd_s[NONALIGNED])
+ {
+ if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED]))
{
- if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED]))
- {
- printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
+ return false;
}
- if (opt.cu[i].ssd_s[ALIGNED])
+ }
+ if (opt.cu[i].ssd_s[ALIGNED])
+ {
+ if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED]))
{
- if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED]))
- {
- printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
+ return false;
}
- if (opt.cu[i].copy_cnt)
+ }
+ if (opt.cu[i].copy_cnt)
+ {
+ if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
{
- if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
- {
- printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
}
+ }
- if (opt.cu[i].cpy2Dto1D_shl)
+ if (opt.cu[i].cpy2Dto1D_shl)
+ {
+ if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl))
{
- if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl))
- {
- printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
}
+ }
- if (opt.cu[i].cpy2Dto1D_shr)
+ if (opt.cu[i].cpy2Dto1D_shr)
+ {
+ if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr))
{
- if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr))
- {
- printf("cpy2Dto1D_shr failed!\n");
- return false;
- }
+ printf("cpy2Dto1D_shr failed!\n");
+ return false;
}
- if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+ }
+ if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+ {
+ if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
{
- if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
- {
- printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
}
- if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+ }
+ if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+ {
+ if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
{
- if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
- {
- printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
}
+ }
- if (opt.cu[i].cpy1Dto2D_shr)
+ if (opt.cu[i].cpy1Dto2D_shr)
+ {
+ if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr))
{
- if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr))
- {
- printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
}
}
}
--
2.39.5 (Apple Git-154)
-------------- next part --------------
>From c0811fad3fbde513aefa823edb6516b07df46c9c Mon Sep 17 00:00:00 2001
Message-Id: <c0811fad3fbde513aefa823edb6516b07df46c9c.1738934102.git.gerdazsejke.more at arm.com>
In-Reply-To: <cover.1738934102.git.gerdazsejke.more at arm.com>
References: <cover.1738934102.git.gerdazsejke.more at arm.com>
From: Arpad Panyik <Arpad.Panyik at arm.com>
Date: Thu, 30 Jan 2025 15:54:43 +0100
Subject: [PATCH 1/2] pixelharness.cpp: Enable tests for 64x64 TU primitives
Enable unit tests for 64x64 variants of calcresidual, transpose,
cpy1Dto2D, cpy2Dto1D and SSD primitives. Enabling these unit tests
exposed bugs in cpy1Dto2D_shl_64x64_sve and cpy2Dto1D_shl_64x64_sve.
This patch also fixes these issues.
---
source/common/aarch64/blockcopy8-sve.S | 4 +-
source/test/pixelharness.cpp | 135 ++++++++++++-------------
2 files changed, 68 insertions(+), 71 deletions(-)
diff --git a/source/common/aarch64/blockcopy8-sve.S b/source/common/aarch64/blockcopy8-sve.S
index 1d742a64c..ba7168c26 100644
--- a/source/common/aarch64/blockcopy8-sve.S
+++ b/source/common/aarch64/blockcopy8-sve.S
@@ -965,7 +965,7 @@ function PFX(cpy2Dto1D_shl_64x64_sve)
whilelt p0.h, x9, x8
b.first .L_cpy2Dto1D_shl_64x64
add x1, x1, x2, lsl #1
- addvl x0, x0, #1
+ add x0, x0, x8, lsl #1
cbnz w12, .L_init_cpy2Dto1D_shl_64x64
ret
endfunc
@@ -1204,7 +1204,7 @@ function PFX(cpy1Dto2D_shl_64x64_sve)
inch x9
whilelt p0.h, x9, x8
b.first .L_cpy1Dto2D_shl_64x64
- addvl x1, x1, #1
+ add x1, x1, x8, lsl #1
add x0, x0, x2, lsl #1
cbnz w12, .L_init_cpy1Dto2D_shl_64x64
ret
diff --git a/source/test/pixelharness.cpp b/source/test/pixelharness.cpp
index 311985d83..b730c15c0 100644
--- a/source/test/pixelharness.cpp
+++ b/source/test/pixelharness.cpp
@@ -2686,102 +2686,99 @@ bool PixelHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPr
}
}
- if (i < BLOCK_64x64)
- {
- /* TU only primitives */
+ /* TU only primitives */
- if (opt.cu[i].calcresidual[NONALIGNED])
+ if (opt.cu[i].calcresidual[NONALIGNED])
+ {
+ if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED]))
{
- if (!check_calresidual(ref.cu[i].calcresidual[NONALIGNED], opt.cu[i].calcresidual[NONALIGNED]))
- {
- printf("calcresidual width: %d failed!\n", 4 << i);
- return false;
- }
+ printf("calcresidual width: %d failed!\n", 4 << i);
+ return false;
}
+ }
- if (opt.cu[i].calcresidual[ALIGNED])
+ if (opt.cu[i].calcresidual[ALIGNED])
+ {
+ if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED]))
{
- if (!check_calresidual_aligned(ref.cu[i].calcresidual[ALIGNED], opt.cu[i].calcresidual[ALIGNED]))
- {
- printf("calcresidual_aligned width: %d failed!\n", 4 << i);
- return false;
- }
+ printf("calcresidual_aligned width: %d failed!\n", 4 << i);
+ return false;
}
+ }
- if (opt.cu[i].transpose)
+ if (opt.cu[i].transpose)
+ {
+ if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
{
- if (!check_transpose(ref.cu[i].transpose, opt.cu[i].transpose))
- {
- printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
- return false;
- }
+ printf("transpose[%dx%d] failed\n", 4 << i, 4 << i);
+ return false;
}
- if (opt.cu[i].ssd_s[NONALIGNED])
+ }
+ if (opt.cu[i].ssd_s[NONALIGNED])
+ {
+ if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED]))
{
- if (!check_ssd_s(ref.cu[i].ssd_s[NONALIGNED], opt.cu[i].ssd_s[NONALIGNED]))
- {
- printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("ssd_s[%dx%d]: failed!\n", 4 << i, 4 << i);
+ return false;
}
- if (opt.cu[i].ssd_s[ALIGNED])
+ }
+ if (opt.cu[i].ssd_s[ALIGNED])
+ {
+ if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED]))
{
- if (!check_ssd_s_aligned(ref.cu[i].ssd_s[ALIGNED], opt.cu[i].ssd_s[ALIGNED]))
- {
- printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("ssd_s_aligned[%dx%d]: failed!\n", 4 << i, 4 << i);
+ return false;
}
- if (opt.cu[i].copy_cnt)
+ }
+ if (opt.cu[i].copy_cnt)
+ {
+ if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
{
- if (!check_copy_cnt_t(ref.cu[i].copy_cnt, opt.cu[i].copy_cnt))
- {
- printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
}
+ }
- if (opt.cu[i].cpy2Dto1D_shl)
+ if (opt.cu[i].cpy2Dto1D_shl)
+ {
+ if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl))
{
- if (!check_cpy2Dto1D_shl_t(ref.cu[i].cpy2Dto1D_shl, opt.cu[i].cpy2Dto1D_shl))
- {
- printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("cpy2Dto1D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
}
+ }
- if (opt.cu[i].cpy2Dto1D_shr)
+ if (opt.cu[i].cpy2Dto1D_shr)
+ {
+ if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr))
{
- if (!check_cpy2Dto1D_shr_t(ref.cu[i].cpy2Dto1D_shr, opt.cu[i].cpy2Dto1D_shr))
- {
- printf("cpy2Dto1D_shr failed!\n");
- return false;
- }
+ printf("cpy2Dto1D_shr failed!\n");
+ return false;
}
- if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+ }
+ if (opt.cu[i].cpy1Dto2D_shl[NONALIGNED])
+ {
+ if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
{
- if (!check_cpy1Dto2D_shl_t(ref.cu[i].cpy1Dto2D_shl[NONALIGNED], opt.cu[i].cpy1Dto2D_shl[NONALIGNED]))
- {
- printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("cpy1Dto2D_shl[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
}
- if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+ }
+ if (opt.cu[i].cpy1Dto2D_shl[ALIGNED])
+ {
+ if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
{
- if (!check_cpy1Dto2D_shl_aligned_t(ref.cu[i].cpy1Dto2D_shl[ALIGNED], opt.cu[i].cpy1Dto2D_shl[ALIGNED]))
- {
- printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("cpy1Dto2D_shl_aligned[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
}
+ }
- if (opt.cu[i].cpy1Dto2D_shr)
+ if (opt.cu[i].cpy1Dto2D_shr)
+ {
+ if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr))
{
- if (!check_cpy1Dto2D_shr_t(ref.cu[i].cpy1Dto2D_shr, opt.cu[i].cpy1Dto2D_shr))
- {
- printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
- return false;
- }
+ printf("cpy1Dto2D_shr[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
}
}
}
--
2.39.5 (Apple Git-154)
More information about the x265-devel
mailing list