diff mbox series

[v2,08/10] reftable/writer: optimize allocations by using a scratch buffer

Message ID 20241120-pks-refs-optimize-migrations-v2-8-a233374b7452@pks.im (mailing list archive)
State New
Headers show
Series refs: optimize ref format migrations | expand

Commit Message

Patrick Steinhardt Nov. 20, 2024, 7:51 a.m. UTC
Both `writer_add_record()` and `reftable_writer_add_ref()` get executed
for every single ref record we're adding to the reftable writer. And as
both functions use a local buffer to write data, the allocations we have
to do here add up during larger transactions.

Refactor the code to use a scratch buffer part of the `reftable_writer`
itself such that we can reuse it. This signifcantly reduces the number
of allocations during large transactions, e.g. when migrating refs from
the "files" backend to the "reftable" backend. Before this change:

    HEAP SUMMARY:
        in use at exit: 80,048 bytes in 49 blocks
      total heap usage: 5,032,171 allocs, 5,032,122 frees, 418,792,092 bytes allocated

After this change:

    HEAP SUMMARY:
        in use at exit: 80,048 bytes in 49 blocks
      total heap usage: 3,025,864 allocs, 3,025,815 frees, 372,746,291 bytes allocated

This also translate into a small speedup:

    Benchmark 1: migrate files:reftable (refcount = 1000000, revision = HEAD~)
      Time (mean ± σ):     827.2 ms ±  16.5 ms    [User: 689.4 ms, System: 124.9 ms]
      Range (min … max):   809.0 ms … 924.7 ms    50 runs

    Benchmark 2: migrate files:reftable (refcount = 1000000, revision = HEAD)
      Time (mean ± σ):     813.6 ms ±  11.6 ms    [User: 679.0 ms, System: 123.4 ms]
      Range (min … max):   786.7 ms … 833.5 ms    50 runs

    Summary
      migrate files:reftable (refcount = 1000000, revision = HEAD) ran
        1.02 ± 0.02 times faster than migrate files:reftable (refcount = 1000000, revision = HEAD~)

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/writer.c | 23 +++++++++++------------
 reftable/writer.h |  1 +
 2 files changed, 12 insertions(+), 12 deletions(-)

Comments

Christian Couder Nov. 20, 2024, 10:21 a.m. UTC | #1
On Wed, Nov 20, 2024 at 9:07 AM Patrick Steinhardt <ps@pks.im> wrote:

> diff --git a/reftable/writer.h b/reftable/writer.h
> index e8a6fbb78543e6e56920a2999601db0db9fe4d97..421a897dccd85ad0532860ff1b4f38b2813d438d 100644
> --- a/reftable/writer.h
> +++ b/reftable/writer.h
> @@ -20,6 +20,7 @@ struct reftable_writer {
>         void *write_arg;
>         int pending_padding;
>         struct reftable_buf last_key;
> +       struct reftable_buf buf;

Nit: It would be nice to add a comment, so that readers don't have to
look at .c files, or the commit message, to find what this field is
used for.


>         /* offset of next block to write. */
>         uint64_t next;
diff mbox series

Patch

diff --git a/reftable/writer.c b/reftable/writer.c
index fd136794d5a27b33b5017f36fbd6b095ab8dac5b..6501376ce826469556a7dfa3c39258847300ae66 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -148,6 +148,7 @@  int reftable_writer_new(struct reftable_writer **out,
 
 	reftable_buf_init(&wp->block_writer_data.last_key);
 	reftable_buf_init(&wp->last_key);
+	reftable_buf_init(&wp->buf);
 	REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size);
 	if (!wp->block) {
 		reftable_free(wp);
@@ -180,6 +181,7 @@  static void writer_release(struct reftable_writer *w)
 		w->block_writer = NULL;
 		writer_clear_index(w);
 		reftable_buf_release(&w->last_key);
+		reftable_buf_release(&w->buf);
 	}
 }
 
@@ -249,20 +251,19 @@  static int writer_index_hash(struct reftable_writer *w, struct reftable_buf *has
 static int writer_add_record(struct reftable_writer *w,
 			     struct reftable_record *rec)
 {
-	struct reftable_buf key = REFTABLE_BUF_INIT;
 	int err;
 
-	err = reftable_record_key(rec, &key);
+	err = reftable_record_key(rec, &w->buf);
 	if (err < 0)
 		goto done;
 
-	if (reftable_buf_cmp(&w->last_key, &key) >= 0) {
+	if (reftable_buf_cmp(&w->last_key, &w->buf) >= 0) {
 		err = REFTABLE_API_ERROR;
 		goto done;
 	}
 
 	reftable_buf_reset(&w->last_key);
-	err = reftable_buf_add(&w->last_key, key.buf, key.len);
+	err = reftable_buf_add(&w->last_key, w->buf.buf, w->buf.len);
 	if (err < 0)
 		goto done;
 
@@ -312,7 +313,6 @@  static int writer_add_record(struct reftable_writer *w,
 	}
 
 done:
-	reftable_buf_release(&key);
 	return err;
 }
 
@@ -325,7 +325,6 @@  int reftable_writer_add_ref(struct reftable_writer *w,
 			.ref = *ref
 		},
 	};
-	struct reftable_buf buf = REFTABLE_BUF_INIT;
 	int err;
 
 	if (!ref->refname ||
@@ -340,24 +339,25 @@  int reftable_writer_add_ref(struct reftable_writer *w,
 		goto out;
 
 	if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) {
-		err = reftable_buf_add(&buf, (char *)reftable_ref_record_val1(ref),
+		reftable_buf_reset(&w->buf);
+		err = reftable_buf_add(&w->buf, (char *)reftable_ref_record_val1(ref),
 				       hash_size(w->opts.hash_id));
 		if (err < 0)
 			goto out;
 
-		err = writer_index_hash(w, &buf);
+		err = writer_index_hash(w, &w->buf);
 		if (err < 0)
 			goto out;
 	}
 
 	if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) {
-		reftable_buf_reset(&buf);
-		err = reftable_buf_add(&buf, reftable_ref_record_val2(ref),
+		reftable_buf_reset(&w->buf);
+		err = reftable_buf_add(&w->buf, reftable_ref_record_val2(ref),
 				       hash_size(w->opts.hash_id));
 		if (err < 0)
 			goto out;
 
-		err = writer_index_hash(w, &buf);
+		err = writer_index_hash(w, &w->buf);
 		if (err < 0)
 			goto out;
 	}
@@ -365,7 +365,6 @@  int reftable_writer_add_ref(struct reftable_writer *w,
 	err = 0;
 
 out:
-	reftable_buf_release(&buf);
 	return err;
 }
 
diff --git a/reftable/writer.h b/reftable/writer.h
index e8a6fbb78543e6e56920a2999601db0db9fe4d97..421a897dccd85ad0532860ff1b4f38b2813d438d 100644
--- a/reftable/writer.h
+++ b/reftable/writer.h
@@ -20,6 +20,7 @@  struct reftable_writer {
 	void *write_arg;
 	int pending_padding;
 	struct reftable_buf last_key;
+	struct reftable_buf buf;
 
 	/* offset of next block to write. */
 	uint64_t next;