@@ -1130,16 +1130,6 @@ enum bisect_error bisect_next_all(struct repository *r, const char *prefix)
return res;
}
-static inline int log2i(int n)
-{
- int log2 = 0;
-
- for (; n > 1; n >>= 1)
- log2++;
-
- return log2;
-}
-
static inline int exp2i(int n)
{
return 1 << n;
@@ -1162,7 +1152,7 @@ int estimate_bisect_steps(int all)
if (all < 3)
return 0;
- n = log2i(all);
+ n = log2u(all);
e = exp2i(n);
x = all - e;
@@ -140,4 +140,22 @@ int csprng_bytes(void *buf, size_t len);
*/
uint32_t git_rand(void);
+/* Provide log2 of the given `size_t`. */
+static inline unsigned log2u(uintmax_t sz)
+{
+ unsigned l = 0;
+
+ /*
+ * Technically this isn't required, but it helps the compiler optimize
+ * this to a `bsr` instruction.
+ */
+ if (!sz)
+ return 0;
+
+ for (; sz; sz >>= 1)
+ l++;
+
+ return l - 1;
+}
+
#endif /* WRAPPER_H */
We have an implementation of a function that computes the log2 for an integer. While we could instead use log2(3P), that involves floating point numbers and is thus needlessly complex and inefficient. We're about to add a second caller that wants to compute log2 for a `size_t`. Let's thus move the function into "wrapper.h" such that it becomes generally available. While at it, tweak the implementation a bit: - The parameter is converted from `int` to `uintmax_t`. This conversion is safe to do in "bisect.c" because we already check that the argument is positive. - The return value is an `unsigned`. It cannot ever be negative, so it is pointless for it to be a signed integer. - Loop until `!n` instead of requiring that `n > 1` and then subtract 1 from the result and add a special case for `!sz`. This helps compilers to generate more efficient code. Compilers recognize the pattern of this function and optimize accordingly. On GCC 14.2 x86_64: log2u(unsigned long): test rdi, rdi je .L3 bsr rax, rdi ret .L3: mov eax, -1 ret Clang 18.1 does not yet recognize the pattern, but starts to do so on Clang trunk x86_64. The code isn't quite as efficient as the one generated by GCC, but still manages to optimize away the loop: log2u(unsigned long): test rdi, rdi je .LBB0_1 shr rdi bsr rcx, rdi mov eax, 127 cmovne rax, rcx xor eax, -64 add eax, 65 ret .LBB0_1: mov eax, -1 ret The pattern is also recognized on other platforms like ARM64 GCC 14.2.0, where we end up using `clz`: log2u(unsigned long): clz x2, x0 cmp x0, 0 mov w1, 63 sub w0, w1, w2 csinv w0, w0, wzr, ne ret Note that we have a similar function `fastlog2()` in the reftable code. As that codebase is separate from the Git codebase we do not adapt it to use the new function. Signed-off-by: Patrick Steinhardt <ps@pks.im> --- bisect.c | 12 +----------- wrapper.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 11 deletions(-)