[x265] [PATCH 2/2] Rename CLZ/CTZ to BSR/BSF
Micro Daryl Robles
microdaryl.robles at arm.com
Tue Apr 8 15:13:44 UTC 2025
The CLZ/CTZ macros are actually synonymous to BitScanReverse and
BitScanForward of MSVC so rename them appropriately.
---
source/common/aarch64/dct-prim.cpp | 4 ++--
source/common/aarch64/dct-prim.h | 2 +-
source/common/bitstream.cpp | 2 +-
source/common/dct.cpp | 4 ++--
source/common/ppc/dct_altivec.cpp | 2 +-
source/common/quant.cpp | 8 ++++----
source/common/threading.h | 24 ++++++++++++------------
source/common/threadpool.cpp | 10 +++++-----
source/common/wavefront.cpp | 2 +-
source/common/x86/pixel-util8.asm | 4 ++--
source/encoder/entropy.cpp | 10 +++++-----
source/encoder/frameencoder.cpp | 4 ++--
source/encoder/slicetype.cpp | 2 +-
source/encoder/weightPrediction.cpp | 2 +-
14 files changed, 40 insertions(+), 40 deletions(-)
diff --git a/source/common/aarch64/dct-prim.cpp b/source/common/aarch64/dct-prim.cpp
index 6a3d95e91..5ac15bdd3 100644
--- a/source/common/aarch64/dct-prim.cpp
+++ b/source/common/aarch64/dct-prim.cpp
@@ -1904,9 +1904,9 @@ uint32_t findPosFirstLast_neon(const int16_t *coeff, const intptr_t trSize,
}
unsigned long id_first, id_last;
- CTZ64(id_first, cmp_4bit);
+ BSF64(id_first, cmp_4bit);
uint32_t firstNZPosInCG = (uint32_t)id_first >> 2;
- CLZ64(id_last, cmp_4bit);
+ BSR64(id_last, cmp_4bit);
uint32_t lastNZPosInCG = (uint32_t)id_last >> 2;
// Add long not needed, we only need LSB.
diff --git a/source/common/aarch64/dct-prim.h b/source/common/aarch64/dct-prim.h
index dc296962b..ec09482a3 100644
--- a/source/common/aarch64/dct-prim.h
+++ b/source/common/aarch64/dct-prim.h
@@ -5,7 +5,7 @@
#include "common.h"
#include "primitives.h"
#include "contexts.h" // costCoeffNxN_c
-#include "threading.h" // CLZ
+#include "threading.h" // BSR
#include <arm_neon.h>
namespace X265_NS
diff --git a/source/common/bitstream.cpp b/source/common/bitstream.cpp
index b844749f5..d4a5c2da9 100644
--- a/source/common/bitstream.cpp
+++ b/source/common/bitstream.cpp
@@ -118,7 +118,7 @@ void SyntaxElementWriter::writeUvlc(uint32_t code)
X265_CHECK(code, "writing -1 code, will cause infinite loop\n");
unsigned long idx;
- CLZ(idx, code);
+ BSR(idx, code);
uint32_t length = (uint32_t)idx * 2 + 1;
// Take care of cases where length > 32
diff --git a/source/common/dct.cpp b/source/common/dct.cpp
index a0b977f4a..932dc7dd5 100644
--- a/source/common/dct.cpp
+++ b/source/common/dct.cpp
@@ -30,7 +30,7 @@
#include "common.h"
#include "primitives.h"
#include "contexts.h" // costCoeffNxN_c
-#include "threading.h" // CLZ
+#include "threading.h" // BSR
using namespace X265_NS;
@@ -911,7 +911,7 @@ static uint32_t costCoeffRemain_c(uint16_t *absCoeff, int numNonZero, int idx)
{
{
unsigned long cidx;
- CLZ(cidx, codeNumber + 1);
+ BSR(cidx, codeNumber + 1);
length = cidx;
}
X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
diff --git a/source/common/ppc/dct_altivec.cpp b/source/common/ppc/dct_altivec.cpp
index 92a4c59f4..b8c0c03b8 100644
--- a/source/common/ppc/dct_altivec.cpp
+++ b/source/common/ppc/dct_altivec.cpp
@@ -25,7 +25,7 @@
#include "common.h"
#include "primitives.h"
#include "contexts.h" // costCoeffNxN_c
-#include "threading.h" // CLZ
+#include "threading.h" // BSR
#include "ppccommon.h"
using namespace X265_NS;
diff --git a/source/common/quant.cpp b/source/common/quant.cpp
index 303b2a5e9..b6521912b 100644
--- a/source/common/quant.cpp
+++ b/source/common/quant.cpp
@@ -80,7 +80,7 @@ inline int getICRate(uint32_t absLevel, int32_t diffLevel, const int* greaterOne
// NOTE: mapping to x86 hardware instruction BSR
unsigned long size;
- CLZ(size, absLevel);
+ BSR(size, absLevel);
int egs = size * 2 + 1;
rate += egs << 15;
@@ -164,7 +164,7 @@ inline uint32_t getICRateCost(uint32_t absLevel, int32_t diffLevel, const int* g
if (symbol)
{
unsigned long idx;
- CLZ(idx, symbol + 1);
+ BSR(idx, symbol + 1);
length = idx;
}
@@ -293,10 +293,10 @@ uint32_t Quant::signBitHidingHDQ(int16_t* coeff, int32_t* deltaU, uint32_t numSi
int firstNZPosInCG0 = n;
#endif
- CLZ(tmp, coeffFlag[cg]);
+ BSR(tmp, coeffFlag[cg]);
const int firstNZPosInCG = (15 ^ tmp);
- CTZ(tmp, coeffFlag[cg]);
+ BSF(tmp, coeffFlag[cg]);
const int lastNZPosInCG = (15 ^ tmp);
X265_CHECK(firstNZPosInCG0 == firstNZPosInCG, "firstNZPosInCG0 check failure\n");
diff --git a/source/common/threading.h b/source/common/threading.h
index 2a1743738..2fa62bcc2 100644
--- a/source/common/threading.h
+++ b/source/common/threading.h
@@ -58,10 +58,10 @@ int no_atomic_dec(int* ptr);
int no_atomic_add(int* ptr, int val);
}
-#define CLZ(id, x) id = (unsigned long)__builtin_clz(x) ^ 31
-#define CTZ(id, x) id = (unsigned long)__builtin_ctz(x)
-#define CLZ64(id, x) id = (unsigned long)__builtin_clzll(x) ^ 63
-#define CTZ64(id, x) id = (unsigned long)__builtin_ctzll(x)
+#define BSR(id, x) (id) = ((unsigned long)__builtin_clz(x) ^ 31)
+#define BSF(id, x) (id) = ((unsigned long)__builtin_ctz(x))
+#define BSR64(id, x) (id) = ((unsigned long)__builtin_clzll(x) ^ 63)
+#define BSF64(id, x) (id) = ((unsigned long)__builtin_ctzll(x))
#define ATOMIC_OR(ptr, mask) no_atomic_or((int*)ptr, mask)
#define ATOMIC_AND(ptr, mask) no_atomic_and((int*)ptr, mask)
#define ATOMIC_INC(ptr) no_atomic_inc((int*)ptr)
@@ -74,10 +74,10 @@ int no_atomic_add(int* ptr, int val);
#include <sys/time.h>
#include <unistd.h>
-#define CLZ(id, x) id = (unsigned long)__builtin_clz(x) ^ 31
-#define CTZ(id, x) id = (unsigned long)__builtin_ctz(x)
-#define CLZ64(id, x) id = (unsigned long)__builtin_clzll(x) ^ 63
-#define CTZ64(id, x) id = (unsigned long)__builtin_ctzll(x)
+#define BSR(id, x) (id) = ((unsigned long)__builtin_clz(x) ^ 31)
+#define BSF(id, x) (id) = ((unsigned long)__builtin_ctz(x))
+#define BSR64(id, x) (id) = ((unsigned long)__builtin_clzll(x) ^ 63)
+#define BSF64(id, x) (id) = ((unsigned long)__builtin_ctzll(x))
#define ATOMIC_OR(ptr, mask) __sync_fetch_and_or(ptr, mask)
#define ATOMIC_AND(ptr, mask) __sync_fetch_and_and(ptr, mask)
#define ATOMIC_INC(ptr) __sync_add_and_fetch((volatile int32_t*)ptr, 1)
@@ -89,10 +89,10 @@ int no_atomic_add(int* ptr, int val);
#include <intrin.h>
-#define CLZ(id, x) _BitScanReverse(&id, x)
-#define CTZ(id, x) _BitScanForward(&id, x)
-#define CLZ64(id, x) _BitScanReverse64(&id, x)
-#define CTZ64(id, x) _BitScanForward64(&id, x)
+#define BSR(id, x) _BitScanReverse(&id, x)
+#define BSF(id, x) _BitScanForward(&id, x)
+#define BSR64(id, x) _BitScanReverse64(&id, x)
+#define BSF64(id, x) _BitScanForward64(&id, x)
#define ATOMIC_INC(ptr) InterlockedIncrement((volatile LONG*)ptr)
#define ATOMIC_DEC(ptr) InterlockedDecrement((volatile LONG*)ptr)
#define ATOMIC_ADD(ptr, val) InterlockedExchangeAdd((volatile LONG*)ptr, val)
diff --git a/source/common/threadpool.cpp b/source/common/threadpool.cpp
index b3505e5c0..ec96f18b1 100644
--- a/source/common/threadpool.cpp
+++ b/source/common/threadpool.cpp
@@ -36,13 +36,13 @@
#ifdef __GNUC__
-#define SLEEPBITMAP_CTZ(id, x) id = (unsigned long)__builtin_ctzll(x)
+#define SLEEPBITMAP_BSF(id, x) (id) = ((unsigned long)__builtin_ctzll(x))
#define SLEEPBITMAP_OR(ptr, mask) __sync_fetch_and_or(ptr, mask)
#define SLEEPBITMAP_AND(ptr, mask) __sync_fetch_and_and(ptr, mask)
#elif defined(_MSC_VER)
-#define SLEEPBITMAP_CTZ(id, x) _BitScanForward64(&id, x)
+#define SLEEPBITMAP_BSF(id, x) _BitScanForward64(&id, x)
#define SLEEPBITMAP_OR(ptr, mask) InterlockedOr64((volatile LONG64*)ptr, (LONG)mask)
#define SLEEPBITMAP_AND(ptr, mask) InterlockedAnd64((volatile LONG64*)ptr, (LONG)mask)
@@ -51,7 +51,7 @@
#else
/* use 32-bit primitives defined in threading.h */
-#define SLEEPBITMAP_CTZ CTZ
+#define SLEEPBITMAP_BSF BSF
#define SLEEPBITMAP_OR ATOMIC_OR
#define SLEEPBITMAP_AND ATOMIC_AND
@@ -206,7 +206,7 @@ int ThreadPool::tryAcquireSleepingThread(sleepbitmap_t firstTryBitmap, sleepbitm
sleepbitmap_t masked = m_sleepBitmap & firstTryBitmap;
while (masked)
{
- SLEEPBITMAP_CTZ(id, masked);
+ SLEEPBITMAP_BSF(id, masked);
sleepbitmap_t bit = (sleepbitmap_t)1 << id;
if (SLEEPBITMAP_AND(&m_sleepBitmap, ~bit) & bit)
@@ -218,7 +218,7 @@ int ThreadPool::tryAcquireSleepingThread(sleepbitmap_t firstTryBitmap, sleepbitm
masked = m_sleepBitmap & secondTryBitmap;
while (masked)
{
- SLEEPBITMAP_CTZ(id, masked);
+ SLEEPBITMAP_BSF(id, masked);
sleepbitmap_t bit = (sleepbitmap_t)1 << id;
if (SLEEPBITMAP_AND(&m_sleepBitmap, ~bit) & bit)
diff --git a/source/common/wavefront.cpp b/source/common/wavefront.cpp
index c3f2d81a8..d2d03efe9 100644
--- a/source/common/wavefront.cpp
+++ b/source/common/wavefront.cpp
@@ -102,7 +102,7 @@ void WaveFront::findJob(int threadId)
uint32_t oldval = m_internalDependencyBitmap[w] & m_externalDependencyBitmap[w];
while (oldval)
{
- CTZ(id, oldval);
+ BSF(id, oldval);
uint32_t bit = 1 << id;
if (ATOMIC_AND(&m_internalDependencyBitmap[w], ~bit) & bit)
diff --git a/source/common/x86/pixel-util8.asm b/source/common/x86/pixel-util8.asm
index 6ad2852d3..2d3c6e5b8 100644
--- a/source/common/x86/pixel-util8.asm
+++ b/source/common/x86/pixel-util8.asm
@@ -8764,7 +8764,7 @@ cglobal costCoeffNxN, 6,10,5
; {
; {
; unsigned long cidx;
-; CLZ(cidx, codeNumber + 1);
+; BSR(cidx, codeNumber + 1);
; length = cidx;
; }
; codeNumber = (length + length);
@@ -8823,7 +8823,7 @@ cglobal costCoeffRemain, 0,7,1
shr eax, t3b ; codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION
- lea r2d, [rax - 3 + 1] ; CLZ(cidx, codeNumber + 1);
+ lea r2d, [rax - 3 + 1] ; BSR(cidx, codeNumber + 1);
bsr r2d, r2d
add r2d, r2d ; codeNumber = (length + length)
diff --git a/source/encoder/entropy.cpp b/source/encoder/entropy.cpp
index ece8d6bc9..2fb14f41a 100644
--- a/source/encoder/entropy.cpp
+++ b/source/encoder/entropy.cpp
@@ -1893,7 +1893,7 @@ void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
{
unsigned long idx;
- CLZ(idx, codeNumber + 1);
+ BSR(idx, codeNumber + 1);
length = idx;
X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
codeNumber -= (1 << idx) - 1;
@@ -2206,7 +2206,7 @@ uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
{
{
unsigned long cidx;
- CLZ(cidx, codeNumber + 1);
+ BSR(cidx, codeNumber + 1);
length = cidx;
}
X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
@@ -2490,8 +2490,8 @@ void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absP
{
uint32_t idx;
X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
- CLZ(lastNZPosInCG, subCoeffFlag);
- CTZ(firstNZPosInCG, subCoeffFlag);
+ BSR(lastNZPosInCG, subCoeffFlag);
+ BSF(firstNZPosInCG, subCoeffFlag);
bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
@@ -2887,7 +2887,7 @@ void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
// NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
//numBits = g_renormTable[lps >> 3];
unsigned long idx;
- CLZ(idx, lps);
+ BSR(idx, lps);
X265_CHECK(state != 63 || idx == 1, "state failure\n");
numBits = 8 - idx;
diff --git a/source/encoder/frameencoder.cpp b/source/encoder/frameencoder.cpp
index 9188f4c8c..74ef45eae 100644
--- a/source/encoder/frameencoder.cpp
+++ b/source/encoder/frameencoder.cpp
@@ -196,7 +196,7 @@ bool FrameEncoder::init(Encoder *top, int numRows, int numCols)
// 7.4.7.1 - Ceil( Log2( PicSizeInCtbsY ) ) bits
{
unsigned long tmp;
- CLZ(tmp, (numRows * numCols - 1));
+ BSR(tmp, (numRows * numCols - 1));
m_sliceAddrBits = (uint16_t)(tmp + 1);
}
@@ -374,7 +374,7 @@ uint32_t getBsLength( int32_t code )
++ucode;
unsigned long idx;
- CLZ( idx, ucode );
+ BSR( idx, ucode );
uint32_t length = (uint32_t)idx * 2 + 1;
return length;
diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
index e94a7eb9c..56c4ad79c 100644
--- a/source/encoder/slicetype.cpp
+++ b/source/encoder/slicetype.cpp
@@ -937,7 +937,7 @@ void LookaheadTLD::weightsAnalyse(Lowres& fenc, Lowres& ref)
if (mindenom > 0 && !(minscale & 1))
{
unsigned long idx;
- CTZ(idx, minscale);
+ BSF(idx, minscale);
int shift = X265_MIN((int)idx, mindenom);
mindenom -= shift;
minscale >>= shift;
diff --git a/source/encoder/weightPrediction.cpp b/source/encoder/weightPrediction.cpp
index a3136d056..bc842f173 100644
--- a/source/encoder/weightPrediction.cpp
+++ b/source/encoder/weightPrediction.cpp
@@ -459,7 +459,7 @@ void weightAnalyse(Slice& slice, Frame& frame, x265_param& param)
if (mindenom > 0 && !(minscale & 1))
{
unsigned long idx;
- CTZ(idx, minscale);
+ BSF(idx, minscale);
int shift = X265_MIN((int)idx, mindenom);
mindenom -= shift;
minscale >>= shift;
--
2.34.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0002-Rename-CLZ-CTZ-to-BSR-BSF.patch
Type: text/x-diff
Size: 15011 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20250408/d32c81a0/attachment-0001.patch>
More information about the x265-devel
mailing list