相关文章推荐
慷慨大方的皮带  ·  VBA textbox ...·  1 年前    · 
坏坏的小熊猫  ·  无法对 ...·  2 年前    · 
宽容的苦咖啡  ·  【Day 28】Cmd ...·  2 年前    · 
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Show hidden characters

There are no __lzcnt64() and _BitScanForward64() functions for x86 AFAICS. How about:

--- a/ctz_clz.cpp 2021-10-16 11:52:42
+++ b/ctz_clz.cpp 2021-10-19 09:15:54
@@ -14,7 +14,15 @@
 int __builtin_ctzll(unsigned long long x) {
     unsigned long ret;
+#ifdef _WIN64
     _BitScanForward64(&ret, x);
+#else
+    if (_BitScanForward(&ret, (uint32_t)x) == 0) { // low 32 bits are all zero.
+      _BitScanForward(&ret, (uint32_t)(x >> 32));
+      ret += 32;
+#endif
     return (int)ret;
@@ -30,10 +38,17 @@
 int __builtin_clzll(unsigned long long x) {
-    //unsigned long ret;
-    //_BitScanReverse64(&ret, x);
-    //return (int)(63 ^ ret);
+#ifdef _WIN64
     return (int)__lzcnt64(x);
+#else
+    unsigned long ret;
+    // Scan the high 32 bits.
+    if (_BitScanReverse(&ret, (uint32_t)(x >> 32)))
+       return (63 ^ (ret + 32));
+    // Scan the low 32 bits.
+    _BitScanReverse(&ret, (uint32_t)x);
+    return (63 ^ (int)ret);
+#endif
 int __builtin_clzl(unsigned long x) {

Is this correct?

@gvanem yes something like that (sorry for my code with highly decorated indirections)

edit (better to see in context)

https://github.com/mu578/mu0/blob/master/mu0/mu0_definition/mu0_bitwiseop.h

I updated the code to support 32/64 bit builds. It's BMI+ though (doesn't try to use bsr )

https://godbolt.org/z/G766KEM6j

@pps83 thanks see my updates too, still digging up between targets ; that's funny or not, this simple thing is @$##^&&% to get right ; just for getting a bit_width , all I want that's a portable bit_width operator jeez ; soft emulation takes no ROM ; maybe with a small lookup table I could shrink down the number of ops... whatever just want bit_width, can I get that easy... it seems not. ICC have _lzcnt_u64, _lzcnt_u32 and IBM XL __cntlz4, __cntlz8, but they have now a clang front-end so ; clang forever. ```update ARMCC has __rbit() and __clz() however that's a clang fork so... just logging for posterity.

For a soft emulation fallback (according to my CPU benchmarks) the following is the best
player:

__mu0_static_inline__
int __mu0_cntlz_i__(const unsigned int __x)
	__mu0_static__
	const unsigned char s_table[32] =
		   0,  9,  1, 10, 13, 21,  2, 29
		, 11, 14, 16, 18, 22, 25,  3, 30
		,  8, 12, 20, 28, 15, 17, 24,  7
		, 19, 27, 23,  6, 26,  5,  4, 31
	const unsigned int d = __mu0_bit_digits_i__();
	      unsigned int x = __x;
	if (x) {
		x = x | (x >>  1U);
		x = x | (x >>  2U);
		x = x | (x >>  4U);
		x = x | (x >>  8U);
		x = x | (x >> 16U);
		return (d - 1) - s_table[((x * 0x07C4ACDD) >> 27U) % d];
	return d;
#	if 0
__mu0_static_inline__
int __mu0_cntlz_i__(const unsigned int __x)
	unsigned int x, d, y;
	if (__x) {
		x = __x;
		d = __mu0_bit_digits_i__();
		y = x >> 16U; if (y != 0U) { d = d - 16U; x = y; }
		y = x >>  8U; if (y != 0U) { d = d -  8U; x = y; }
		y = x >>  4U; if (y != 0U) { d = d -  4U; x = y; }
		y = x >>  2U; if (y != 0U) { d = d -  2U; x = y; }
		y = x >>  1U;
		return __mu0_cast__(int, ((y != 0U) ? d - 2U : d - x));
	return __mu0_bit_digits_i__();
#	endif
__mu0_static_inline__
int __mu0_cntlz_ll__(const unsigned long long __x)
	const unsigned int d = __mu0_bit_digits_i__();
#	if MU0_HAVE_C99 || MU0_HAVE_CPP11
	return ((__x & 0xFFFFFFFF00000000ULL)
		?     __mu0_cntlz_i__(__x >> d)
		: d + __mu0_cntlz_i__(__x & 0xFFFFFFFFULL)
#	else
	return ((__x & 0xFFFFFFFF00000000U)
		?     __mu0_cntlz_i__(__x >> d)
		: d + __mu0_cntlz_i__(__x & 0xFFFFFFFFU)
#	endif
__mu0_static_inline__
int __mu0_cntlz_l__(const unsigned long __x)
#	if defined(__LP64__) || defined(__LP64)
	return __mu0_cntlz_ll__(__x);
#	else
	return __mu0_cntlz_i__(__x);
#	endif
      

Hi Pavel,

This is really useful! I'd like to include this as a support file in my C container library.
https://github.com/nickpelling/C_STD

Is there a copyright or licence notice you'd like this published under? I use the MIT license because it's most permissive (I think):
https://github.com/nickpelling/C_STD?tab=MIT-1-ov-file

Hi Pavel,

This is really useful! I'd like to include this as a support file in my C container library. https://github.com/nickpelling/C_STD

Is there a copyright or licence notice you'd like this published under? I use the MIT license because it's most permissive (I think): https://github.com/nickpelling/C_STD?tab=MIT-1-ov-file

OK to take it as is. Hopefully it's useful for users of C_STD.
Note, that the code uses _tzcnt_u32, _tzcnt_u64, _lzcnt_u32 _lzcnt_u64. AFAIK, these are BMI (or BMI2), so, it might be a good idea to use bsf variants instead for generic targets.

  • check for __AVX2__ builds to use lzcnt/tzcnt vs bsf/bsr
  • add code to handle arm/arm64
  • add guard for clang-cl
  • The code was tested to verify that it's bit exact with the code that clang/gcc emits for these builtins.