diff mbox series

[v2,1/3] wrapper: introduce `log2u()`

Message ID df8c5dffffed0edd9068406fcf39ac6608bc930f.1725439407.git.ps@pks.im (mailing list archive)
State Accepted
Commit d343068e4abc5e43d1ef1d5fed42bf4d7aa8cff4
Headers show
Series refs/files: use heuristics to decide whether to repack with `--auto` | expand

Commit Message

Patrick Steinhardt Sept. 4, 2024, 8:53 a.m. UTC
We have an implementation of a function that computes the log2 for an
integer. While we could instead use log2(3P), that involves floating
point numbers and is thus needlessly complex and inefficient.

We're about to add a second caller that wants to compute log2 for a
`size_t`. Let's thus move the function into "wrapper.h" such that it
becomes generally available.

While at it, tweak the implementation a bit:

  - The parameter is converted from `int` to `uintmax_t`. This
    conversion is safe to do in "bisect.c" because we already check that
    the argument is positive.

  - The return value is an `unsigned`. It cannot ever be negative, so it
    is pointless for it to be a signed integer.

  - Loop until `!n` instead of requiring that `n > 1` and then subtract
    1 from the result and add a special case for `!sz`. This helps
    compilers to generate more efficient code.

Compilers recognize the pattern of this function and optimize
accordingly. On GCC 14.2 x86_64:

    log2u(unsigned long):
            test    rdi, rdi
            je      .L3
            bsr     rax, rdi
            ret
    .L3:
            mov     eax, -1
            ret

Clang 18.1 does not yet recognize the pattern, but starts to do so on
Clang trunk x86_64. The code isn't quite as efficient as the one
generated by GCC, but still manages to optimize away the loop:

    log2u(unsigned long):
            test    rdi, rdi
            je      .LBB0_1
            shr     rdi
            bsr     rcx, rdi
            mov     eax, 127
            cmovne  rax, rcx
            xor     eax, -64
            add     eax, 65
            ret
    .LBB0_1:
            mov     eax, -1
            ret

The pattern is also recognized on other platforms like ARM64 GCC 14.2.0,
where we end up using `clz`:

    log2u(unsigned long):
            clz     x2, x0
            cmp     x0, 0
            mov     w1, 63
            sub     w0, w1, w2
            csinv   w0, w0, wzr, ne
            ret

Note that we have a similar function `fastlog2()` in the reftable code.
As that codebase is separate from the Git codebase we do not adapt it to
use the new function.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 bisect.c  | 12 +-----------
 wrapper.h | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/bisect.c b/bisect.c
index 4406fc44cf9..4713226fad4 100644
--- a/bisect.c
+++ b/bisect.c
@@ -1130,16 +1130,6 @@  enum bisect_error bisect_next_all(struct repository *r, const char *prefix)
 	return res;
 }
 
-static inline int log2i(int n)
-{
-	int log2 = 0;
-
-	for (; n > 1; n >>= 1)
-		log2++;
-
-	return log2;
-}
-
 static inline int exp2i(int n)
 {
 	return 1 << n;
@@ -1162,7 +1152,7 @@  int estimate_bisect_steps(int all)
 	if (all < 3)
 		return 0;
 
-	n = log2i(all);
+	n = log2u(all);
 	e = exp2i(n);
 	x = all - e;
 
diff --git a/wrapper.h b/wrapper.h
index 1b2b047ea06..a6b3e1f09ec 100644
--- a/wrapper.h
+++ b/wrapper.h
@@ -140,4 +140,22 @@  int csprng_bytes(void *buf, size_t len);
  */
 uint32_t git_rand(void);
 
+/* Provide log2 of the given `size_t`. */
+static inline unsigned log2u(uintmax_t sz)
+{
+	unsigned l = 0;
+
+	/*
+	 * Technically this isn't required, but it helps the compiler optimize
+	 * this to a `bsr` instruction.
+	 */
+	if (!sz)
+		return 0;
+
+	for (; sz; sz >>= 1)
+		l++;
+
+	return l - 1;
+}
+
 #endif /* WRAPPER_H */