[x265] [PATCH 2/2] Rename CLZ/CTZ to BSR/BSF

Tue Apr 8 15:13:44 UTC 2025

The CLZ/CTZ macros are actually synonymous to BitScanReverse and
BitScanForward of MSVC so rename them appropriately.
---
 source/common/aarch64/dct-prim.cpp  |  4 ++--
 source/common/aarch64/dct-prim.h    |  2 +-
 source/common/bitstream.cpp         |  2 +-
 source/common/dct.cpp               |  4 ++--
 source/common/ppc/dct_altivec.cpp   |  2 +-
 source/common/quant.cpp             |  8 ++++----
 source/common/threading.h           | 24 ++++++++++++------------
 source/common/threadpool.cpp        | 10 +++++-----
 source/common/wavefront.cpp         |  2 +-
 source/common/x86/pixel-util8.asm   |  4 ++--
 source/encoder/entropy.cpp          | 10 +++++-----
 source/encoder/frameencoder.cpp     |  4 ++--
 source/encoder/slicetype.cpp        |  2 +-
 source/encoder/weightPrediction.cpp |  2 +-
 14 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/source/common/aarch64/dct-prim.cpp b/source/common/aarch64/dct-prim.cpp
index 6a3d95e91..5ac15bdd3 100644
--- a/source/common/aarch64/dct-prim.cpp
+++ b/source/common/aarch64/dct-prim.cpp
@@ -1904,9 +1904,9 @@ uint32_t findPosFirstLast_neon(const int16_t *coeff, const intptr_t trSize,
     }
 
     unsigned long id_first, id_last;
-    CTZ64(id_first, cmp_4bit);
+    BSF64(id_first, cmp_4bit);
     uint32_t firstNZPosInCG = (uint32_t)id_first >> 2;
-    CLZ64(id_last, cmp_4bit);
+    BSR64(id_last, cmp_4bit);
     uint32_t lastNZPosInCG = (uint32_t)id_last >> 2;
 
     // Add long not needed, we only need LSB.
diff --git a/source/common/aarch64/dct-prim.h b/source/common/aarch64/dct-prim.h
index dc296962b..ec09482a3 100644
--- a/source/common/aarch64/dct-prim.h
+++ b/source/common/aarch64/dct-prim.h
@@ -5,7 +5,7 @@
 #include "common.h"
 #include "primitives.h"
 #include "contexts.h"   // costCoeffNxN_c
-#include "threading.h"  // CLZ
+#include "threading.h"  // BSR
 #include <arm_neon.h>
 
 namespace X265_NS
diff --git a/source/common/bitstream.cpp b/source/common/bitstream.cpp
index b844749f5..d4a5c2da9 100644
--- a/source/common/bitstream.cpp
+++ b/source/common/bitstream.cpp
@@ -118,7 +118,7 @@ void SyntaxElementWriter::writeUvlc(uint32_t code)
     X265_CHECK(code, "writing -1 code, will cause infinite loop\n");
 
     unsigned long idx;
-    CLZ(idx, code);
+    BSR(idx, code);
     uint32_t length = (uint32_t)idx * 2 + 1;
 
     // Take care of cases where length > 32
diff --git a/source/common/dct.cpp b/source/common/dct.cpp
index a0b977f4a..932dc7dd5 100644
--- a/source/common/dct.cpp
+++ b/source/common/dct.cpp
@@ -30,7 +30,7 @@
 #include "common.h"
 #include "primitives.h"
 #include "contexts.h"   // costCoeffNxN_c
-#include "threading.h"  // CLZ
+#include "threading.h"  // BSR
 
 using namespace X265_NS;
 
@@ -911,7 +911,7 @@ static uint32_t costCoeffRemain_c(uint16_t *absCoeff, int numNonZero, int idx)
             {
                 {
                     unsigned long cidx;
-                    CLZ(cidx, codeNumber + 1);
+                    BSR(cidx, codeNumber + 1);
                     length = cidx;
                 }
                 X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
diff --git a/source/common/ppc/dct_altivec.cpp b/source/common/ppc/dct_altivec.cpp
index 92a4c59f4..b8c0c03b8 100644
--- a/source/common/ppc/dct_altivec.cpp
+++ b/source/common/ppc/dct_altivec.cpp
@@ -25,7 +25,7 @@
 #include "common.h"
 #include "primitives.h"
 #include "contexts.h"   // costCoeffNxN_c
-#include "threading.h"  // CLZ
+#include "threading.h"  // BSR
 #include "ppccommon.h"
 
 using namespace X265_NS;
diff --git a/source/common/quant.cpp b/source/common/quant.cpp
index 303b2a5e9..b6521912b 100644
--- a/source/common/quant.cpp
+++ b/source/common/quant.cpp
@@ -80,7 +80,7 @@ inline int getICRate(uint32_t absLevel, int32_t diffLevel, const int* greaterOne
 
             // NOTE: mapping to x86 hardware instruction BSR
             unsigned long size;
-            CLZ(size, absLevel);
+            BSR(size, absLevel);
             int egs = size * 2 + 1;
 
             rate += egs << 15;
@@ -164,7 +164,7 @@ inline uint32_t getICRateCost(uint32_t absLevel, int32_t diffLevel, const int* g
             if (symbol)
             {
                 unsigned long idx;
-                CLZ(idx, symbol + 1);
+                BSR(idx, symbol + 1);
                 length = idx;
             }
 
@@ -293,10 +293,10 @@ uint32_t Quant::signBitHidingHDQ(int16_t* coeff, int32_t* deltaU, uint32_t numSi
         int firstNZPosInCG0 = n;
 #endif
 
-        CLZ(tmp, coeffFlag[cg]);
+        BSR(tmp, coeffFlag[cg]);
         const int firstNZPosInCG = (15 ^ tmp);
 
-        CTZ(tmp, coeffFlag[cg]);
+        BSF(tmp, coeffFlag[cg]);
         const int lastNZPosInCG = (15 ^ tmp);
 
         X265_CHECK(firstNZPosInCG0 == firstNZPosInCG, "firstNZPosInCG0 check failure\n");
diff --git a/source/common/threading.h b/source/common/threading.h
index 2a1743738..2fa62bcc2 100644
--- a/source/common/threading.h
+++ b/source/common/threading.h
@@ -58,10 +58,10 @@ int no_atomic_dec(int* ptr);
 int no_atomic_add(int* ptr, int val);
 }
 
-#define CLZ(id, x)            id = (unsigned long)__builtin_clz(x) ^ 31
-#define CTZ(id, x)            id = (unsigned long)__builtin_ctz(x)
-#define CLZ64(id, x)          id = (unsigned long)__builtin_clzll(x) ^ 63
-#define CTZ64(id, x)          id = (unsigned long)__builtin_ctzll(x)
+#define BSR(id, x)            (id) = ((unsigned long)__builtin_clz(x) ^ 31)
+#define BSF(id, x)            (id) = ((unsigned long)__builtin_ctz(x))
+#define BSR64(id, x)          (id) = ((unsigned long)__builtin_clzll(x) ^ 63)
+#define BSF64(id, x)          (id) = ((unsigned long)__builtin_ctzll(x))
 #define ATOMIC_OR(ptr, mask)  no_atomic_or((int*)ptr, mask)
 #define ATOMIC_AND(ptr, mask) no_atomic_and((int*)ptr, mask)
 #define ATOMIC_INC(ptr)       no_atomic_inc((int*)ptr)
@@ -74,10 +74,10 @@ int no_atomic_add(int* ptr, int val);
 #include <sys/time.h>
 #include <unistd.h>
 
-#define CLZ(id, x)            id = (unsigned long)__builtin_clz(x) ^ 31
-#define CTZ(id, x)            id = (unsigned long)__builtin_ctz(x)
-#define CLZ64(id, x)          id = (unsigned long)__builtin_clzll(x) ^ 63
-#define CTZ64(id, x)          id = (unsigned long)__builtin_ctzll(x)
+#define BSR(id, x)            (id) = ((unsigned long)__builtin_clz(x) ^ 31)
+#define BSF(id, x)            (id) = ((unsigned long)__builtin_ctz(x))
+#define BSR64(id, x)          (id) = ((unsigned long)__builtin_clzll(x) ^ 63)
+#define BSF64(id, x)          (id) = ((unsigned long)__builtin_ctzll(x))
 #define ATOMIC_OR(ptr, mask)  __sync_fetch_and_or(ptr, mask)
 #define ATOMIC_AND(ptr, mask) __sync_fetch_and_and(ptr, mask)
 #define ATOMIC_INC(ptr)       __sync_add_and_fetch((volatile int32_t*)ptr, 1)
@@ -89,10 +89,10 @@ int no_atomic_add(int* ptr, int val);
 
 #include <intrin.h>
 
-#define CLZ(id, x)            _BitScanReverse(&id, x)
-#define CTZ(id, x)            _BitScanForward(&id, x)
-#define CLZ64(id, x)          _BitScanReverse64(&id, x)
-#define CTZ64(id, x)          _BitScanForward64(&id, x)
+#define BSR(id, x)            _BitScanReverse(&id, x)
+#define BSF(id, x)            _BitScanForward(&id, x)
+#define BSR64(id, x)          _BitScanReverse64(&id, x)
+#define BSF64(id, x)          _BitScanForward64(&id, x)
 #define ATOMIC_INC(ptr)       InterlockedIncrement((volatile LONG*)ptr)
 #define ATOMIC_DEC(ptr)       InterlockedDecrement((volatile LONG*)ptr)
 #define ATOMIC_ADD(ptr, val)  InterlockedExchangeAdd((volatile LONG*)ptr, val)
diff --git a/source/common/threadpool.cpp b/source/common/threadpool.cpp
index b3505e5c0..ec96f18b1 100644
--- a/source/common/threadpool.cpp
+++ b/source/common/threadpool.cpp
@@ -36,13 +36,13 @@
 
 #ifdef __GNUC__
 
-#define SLEEPBITMAP_CTZ(id, x)     id = (unsigned long)__builtin_ctzll(x)
+#define SLEEPBITMAP_BSF(id, x)     (id) = ((unsigned long)__builtin_ctzll(x))
 #define SLEEPBITMAP_OR(ptr, mask)  __sync_fetch_and_or(ptr, mask)
 #define SLEEPBITMAP_AND(ptr, mask) __sync_fetch_and_and(ptr, mask)
 
 #elif defined(_MSC_VER)
 
-#define SLEEPBITMAP_CTZ(id, x)     _BitScanForward64(&id, x)
+#define SLEEPBITMAP_BSF(id, x)     _BitScanForward64(&id, x)
 #define SLEEPBITMAP_OR(ptr, mask)  InterlockedOr64((volatile LONG64*)ptr, (LONG)mask)
 #define SLEEPBITMAP_AND(ptr, mask) InterlockedAnd64((volatile LONG64*)ptr, (LONG)mask)
 
@@ -51,7 +51,7 @@
 #else
 
 /* use 32-bit primitives defined in threading.h */
-#define SLEEPBITMAP_CTZ CTZ
+#define SLEEPBITMAP_BSF BSF
 #define SLEEPBITMAP_OR  ATOMIC_OR
 #define SLEEPBITMAP_AND ATOMIC_AND
 
@@ -206,7 +206,7 @@ int ThreadPool::tryAcquireSleepingThread(sleepbitmap_t firstTryBitmap, sleepbitm
     sleepbitmap_t masked = m_sleepBitmap & firstTryBitmap;
     while (masked)
     {
-        SLEEPBITMAP_CTZ(id, masked);
+        SLEEPBITMAP_BSF(id, masked);
 
         sleepbitmap_t bit = (sleepbitmap_t)1 << id;
         if (SLEEPBITMAP_AND(&m_sleepBitmap, ~bit) & bit)
@@ -218,7 +218,7 @@ int ThreadPool::tryAcquireSleepingThread(sleepbitmap_t firstTryBitmap, sleepbitm
     masked = m_sleepBitmap & secondTryBitmap;
     while (masked)
     {
-        SLEEPBITMAP_CTZ(id, masked);
+        SLEEPBITMAP_BSF(id, masked);
 
         sleepbitmap_t bit = (sleepbitmap_t)1 << id;
         if (SLEEPBITMAP_AND(&m_sleepBitmap, ~bit) & bit)
diff --git a/source/common/wavefront.cpp b/source/common/wavefront.cpp
index c3f2d81a8..d2d03efe9 100644
--- a/source/common/wavefront.cpp
+++ b/source/common/wavefront.cpp
@@ -102,7 +102,7 @@ void WaveFront::findJob(int threadId)
         uint32_t oldval = m_internalDependencyBitmap[w] & m_externalDependencyBitmap[w];
         while (oldval)
         {
-            CTZ(id, oldval);
+            BSF(id, oldval);
 
             uint32_t bit = 1 << id;
             if (ATOMIC_AND(&m_internalDependencyBitmap[w], ~bit) & bit)
diff --git a/source/common/x86/pixel-util8.asm b/source/common/x86/pixel-util8.asm
index 6ad2852d3..2d3c6e5b8 100644
--- a/source/common/x86/pixel-util8.asm
+++ b/source/common/x86/pixel-util8.asm
@@ -8764,7 +8764,7 @@ cglobal costCoeffNxN, 6,10,5
 ;        {
 ;            {
 ;                unsigned long cidx;
-;                CLZ(cidx, codeNumber + 1);
+;                BSR(cidx, codeNumber + 1);
 ;                length = cidx;
 ;            }
 ;            codeNumber = (length + length);
@@ -8823,7 +8823,7 @@ cglobal costCoeffRemain, 0,7,1
 
     shr         eax, t3b                ; codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION
 
-    lea         r2d, [rax - 3 + 1]      ; CLZ(cidx, codeNumber + 1);
+    lea         r2d, [rax - 3 + 1]      ; BSR(cidx, codeNumber + 1);
     bsr         r2d, r2d
     add         r2d, r2d                ; codeNumber = (length + length)
 
diff --git a/source/encoder/entropy.cpp b/source/encoder/entropy.cpp
index ece8d6bc9..2fb14f41a 100644
--- a/source/encoder/entropy.cpp
+++ b/source/encoder/entropy.cpp
@@ -1893,7 +1893,7 @@ void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
         codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
         {
             unsigned long idx;
-            CLZ(idx, codeNumber + 1);
+            BSR(idx, codeNumber + 1);
             length = idx;
             X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
             codeNumber -= (1 << idx) - 1;
@@ -2206,7 +2206,7 @@ uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
             {
                 {
                     unsigned long cidx;
-                    CLZ(cidx, codeNumber + 1);
+                    BSR(cidx, codeNumber + 1);
                     length = cidx;
                 }
                 X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
@@ -2490,8 +2490,8 @@ void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absP
         {
             uint32_t idx;
             X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
-            CLZ(lastNZPosInCG, subCoeffFlag);
-            CTZ(firstNZPosInCG, subCoeffFlag);
+            BSR(lastNZPosInCG, subCoeffFlag);
+            BSF(firstNZPosInCG, subCoeffFlag);
 
             bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
             const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
@@ -2887,7 +2887,7 @@ void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
         // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
         //numBits = g_renormTable[lps >> 3];
         unsigned long idx;
-        CLZ(idx, lps);
+        BSR(idx, lps);
         X265_CHECK(state != 63 || idx == 1, "state failure\n");
 
         numBits = 8 - idx;
diff --git a/source/encoder/frameencoder.cpp b/source/encoder/frameencoder.cpp
index 9188f4c8c..74ef45eae 100644
--- a/source/encoder/frameencoder.cpp
+++ b/source/encoder/frameencoder.cpp
@@ -196,7 +196,7 @@ bool FrameEncoder::init(Encoder *top, int numRows, int numCols)
     // 7.4.7.1 - Ceil( Log2( PicSizeInCtbsY ) ) bits
     {
         unsigned long tmp;
-        CLZ(tmp, (numRows * numCols - 1));
+        BSR(tmp, (numRows * numCols - 1));
         m_sliceAddrBits = (uint16_t)(tmp + 1);
     }
 
@@ -374,7 +374,7 @@ uint32_t getBsLength( int32_t code )
 
     ++ucode;
     unsigned long idx;
-    CLZ( idx, ucode );
+    BSR( idx, ucode );
     uint32_t length = (uint32_t)idx * 2 + 1;
 
     return length;
diff --git a/source/encoder/slicetype.cpp b/source/encoder/slicetype.cpp
index e94a7eb9c..56c4ad79c 100644
--- a/source/encoder/slicetype.cpp
+++ b/source/encoder/slicetype.cpp
@@ -937,7 +937,7 @@ void LookaheadTLD::weightsAnalyse(Lowres& fenc, Lowres& ref)
     if (mindenom > 0 && !(minscale & 1))
     {
         unsigned long idx;
-        CTZ(idx, minscale);
+        BSF(idx, minscale);
         int shift = X265_MIN((int)idx, mindenom);
         mindenom -= shift;
         minscale >>= shift;
diff --git a/source/encoder/weightPrediction.cpp b/source/encoder/weightPrediction.cpp
index a3136d056..bc842f173 100644
--- a/source/encoder/weightPrediction.cpp
+++ b/source/encoder/weightPrediction.cpp
@@ -459,7 +459,7 @@ void weightAnalyse(Slice& slice, Frame& frame, x265_param& param)
                 if (mindenom > 0 && !(minscale & 1))
                 {
                     unsigned long idx;
-                    CTZ(idx, minscale);
+                    BSF(idx, minscale);
                     int shift = X265_MIN((int)idx, mindenom);
                     mindenom -= shift;
                     minscale >>= shift;
-- 
2.34.1

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0002-Rename-CLZ-CTZ-to-BSR-BSF.patch
Type: text/x-diff
Size: 15011 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20250408/d32c81a0/attachment-0001.patch>