mbox series

[v2,0/8] refs/reftable: reuse iterators when reading refs

Message ID cover.1730792627.git.ps@pks.im (mailing list archive)
Headers show
Series refs/reftable: reuse iterators when reading refs | expand

Message

Patrick Steinhardt Nov. 5, 2024, 9:11 a.m. UTC
Hi,

this is the second version of my patch series that refactors the
reftable backend to reuse iterators when reading random references. This
removes the overhead of having to recreate the iterator on every read
and thus leads to better performance and less allocation churn.

The only change compared to v2 is that I've rebased the series on top of
8f8d6eee53 (The seventh batch, 2024-11-01) with ps/reftable-detach at
3740325472 (reftable/system: provide thin wrapper for lockfile
subsystem, 2024-10-23) merged into it. This was done to fix textual and
semantic conflicts with that series.

Thanks!

Patrick

Patrick Steinhardt (8):
  refs/reftable: encapsulate reftable stack
  refs/reftable: handle reloading stacks in the reftable backend
  refs/reftable: read references via `struct reftable_backend`
  refs/reftable: refactor reading symbolic refs to use reftable backend
  refs/reftable: refactor reflog expiry to use reftable backend
  reftable/stack: add mechanism to notify callers on reload
  reftable/merged: drain priority queue on reseek
  refs/reftable: reuse iterators when reading refs

 refs/reftable-backend.c          | 369 +++++++++++++++++++------------
 reftable/merged.c                |   2 +
 reftable/reftable-stack.h        |   3 +
 reftable/reftable-writer.h       |   9 +
 reftable/stack.c                 |   9 +
 t/unit-tests/t-reftable-merged.c |  73 ++++++
 6 files changed, 321 insertions(+), 144 deletions(-)

Range-diff against v1:
1:  b599bcdac1 = 1:  ac01c06c41 refs/reftable: encapsulate reftable stack
2:  b81ce63589 = 2:  bab837e373 refs/reftable: handle reloading stacks in the reftable backend
3:  00fdf392a6 ! 3:  1b50655202 refs/reftable: read references via `struct reftable_backend`
    @@ refs/reftable-backend.c: static void reftable_backend_release(struct reftable_ba
     +		strbuf_addstr(referent, ref.value.symref);
     +		*type |= REF_ISSYMREF;
     +	} else if (reftable_ref_record_val1(&ref)) {
    ++		unsigned int hash_id;
    ++
    ++		switch (reftable_stack_hash_id(be->stack)) {
    ++		case REFTABLE_HASH_SHA1:
    ++			hash_id = GIT_HASH_SHA1;
    ++			break;
    ++		case REFTABLE_HASH_SHA256:
    ++			hash_id = GIT_HASH_SHA256;
    ++			break;
    ++		default:
    ++			BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack));
    ++		}
    ++
     +		oidread(oid, reftable_ref_record_val1(&ref),
    -+			&hash_algos[hash_algo_by_id(reftable_stack_hash_id(be->stack))]);
    ++			&hash_algos[hash_id]);
     +	} else {
     +		/* We got a tombstone, which should not happen. */
     +		BUG("unhandled reference value type %d", ref.value_type);
    @@ reftable/reftable-stack.h: struct reftable_compaction_stats {
      struct reftable_compaction_stats *
      reftable_stack_compaction_stats(struct reftable_stack *st);
      
    -+/* return the hash ID of the merged table. */
    -+uint32_t reftable_stack_hash_id(struct reftable_stack *st);
    ++/* Return the hash of the stack. */
    ++enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st);
     +
      #endif
     
    @@ reftable/stack.c: int reftable_stack_clean(struct reftable_stack *st)
      	return err;
      }
     +
    -+uint32_t reftable_stack_hash_id(struct reftable_stack *st)
    ++enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
     +{
     +	return reftable_merged_table_hash_id(st->merged);
     +}
4:  142081cb0c = 4:  0906b04fc6 refs/reftable: refactor reading symbolic refs to use reftable backend
5:  44f4adce9a = 5:  355557ec95 refs/reftable: refactor reflog expiry to use reftable backend
6:  0a294b577f ! 6:  71ad6c80b0 reftable/stack: add mechanism to notify callers on reload
    @@ Commit message
     
      ## reftable/reftable-writer.h ##
     @@ reftable/reftable-writer.h: struct reftable_write_options {
    - 	 * negative value will cause us to block indefinitely.
    + 	 * fsync(3P) when unset.
      	 */
    - 	long lock_timeout_ms;
    + 	int (*fsync)(int fd);
     +
     +	/*
     +	 * Callback function to execute whenever the stack is being reloaded.
7:  45f397b563 ! 7:  93efd11886 reftable/merged: drain priority queue on reseek
    @@ t/unit-tests/t-reftable-merged.c: static void t_merged_seek_multiple_times(void)
     +	check(!err);
     +	err = reftable_iterator_next_ref(&it, &rec);
     +	check(!err);
    -+	err = reftable_ref_record_equal(&rec, &r2[0], GIT_SHA1_RAWSZ);
    ++	err = reftable_ref_record_equal(&rec, &r2[0], REFTABLE_HASH_SIZE_SHA1);
     +	check(err == 1);
     +
     +	err = reftable_iterator_seek_ref(&it, "a");
     +	check(!err);
     +	err = reftable_iterator_next_ref(&it, &rec);
     +	check(!err);
    -+	err = reftable_ref_record_equal(&rec, &r1[0], GIT_SHA1_RAWSZ);
    ++	err = reftable_ref_record_equal(&rec, &r1[0], REFTABLE_HASH_SIZE_SHA1);
     +	check(err == 1);
     +
     +	for (size_t i = 0; i < ARRAY_SIZE(bufs); i++)
8:  feb4e6a36f = 8:  276c27e770 refs/reftable: reuse iterators when reading refs