diff mbox series

[v2,10/10] reftable/block: optimize allocations by using scratch buffer

Message ID 20241120-pks-refs-optimize-migrations-v2-10-a233374b7452@pks.im (mailing list archive)
State New
Headers show
Series refs: optimize ref format migrations | expand

Commit Message

Patrick Steinhardt Nov. 20, 2024, 7:51 a.m. UTC
The block writer needs to compute the key for every record that one adds
to the writer. The buffer for this key is stored on the stack and thus
reallocated on every call to `block_writer_add()`, which is inefficient.

Refactor the code so that we store the buffer in the `block_writer`
struct itself so that we can reuse it. This reduces the number of
allocations when writing many refs, e.g. when migrating one million refs
from the "files" backend to the "reftable backend. Before this change:

    HEAP SUMMARY:
        in use at exit: 80,048 bytes in 49 blocks
      total heap usage: 3,025,864 allocs, 3,025,815 frees, 372,746,291 bytes allocated

After this change:

    HEAP SUMMARY:
        in use at exit: 80,048 bytes in 49 blocks
      total heap usage: 2,013,250 allocs, 2,013,201 frees, 347,543,583 bytes allocated

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/block.c | 13 +++++--------
 reftable/block.h |  1 +
 2 files changed, 6 insertions(+), 8 deletions(-)

Comments

Christian Couder Nov. 20, 2024, 10:22 a.m. UTC | #1
On Wed, Nov 20, 2024 at 8:56 AM Patrick Steinhardt <ps@pks.im> wrote:

> diff --git a/reftable/block.h b/reftable/block.h
> index b3f837d612a8f0fbe98430b04e2dddaa975a15ab..d76f00553073c10185e716e71e2f415ce5dcf7e2 100644
> --- a/reftable/block.h
> +++ b/reftable/block.h
> @@ -39,6 +39,7 @@ struct block_writer {
>         uint32_t restart_cap;
>
>         struct reftable_buf last_key;
> +       struct reftable_buf buf;

Nit (similar as for patch 8/10): It would be nice to add a comment, so
that readers don't have to look at .c files, or the commit message, to
find what this field is used for.

>         int entries;
>  };
diff mbox series

Patch

diff --git a/reftable/block.c b/reftable/block.c
index 3fa36c002a0c1852790780e74a6e055161f857d9..1aa7e8cd3cbf0980f6bc20262be89e755d0a4b4b 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -110,24 +110,21 @@  int block_writer_add(struct block_writer *w, struct reftable_record *rec)
 		.buf = w->block + w->next,
 		.len = w->block_size - w->next,
 	};
-
 	struct string_view start = out;
-
 	int is_restart = 0;
-	struct reftable_buf key = REFTABLE_BUF_INIT;
 	int n = 0;
 	int err;
 
-	err = reftable_record_key(rec, &key);
+	err = reftable_record_key(rec, &w->buf);
 	if (err < 0)
 		goto done;
 
-	if (!key.len) {
+	if (!w->buf.len) {
 		err = REFTABLE_API_ERROR;
 		goto done;
 	}
 
-	n = reftable_encode_key(&is_restart, out, last, key,
+	n = reftable_encode_key(&is_restart, out, last, w->buf,
 				reftable_record_val_type(rec));
 	if (n < 0) {
 		err = -1;
@@ -143,9 +140,8 @@  int block_writer_add(struct block_writer *w, struct reftable_record *rec)
 	string_view_consume(&out, n);
 
 	err = block_writer_register_restart(w, start.len - out.len, is_restart,
-					    &key);
+					    &w->buf);
 done:
-	reftable_buf_release(&key);
 	return err;
 }
 
@@ -569,6 +565,7 @@  void block_writer_release(struct block_writer *bw)
 	REFTABLE_FREE_AND_NULL(bw->zstream);
 	REFTABLE_FREE_AND_NULL(bw->restarts);
 	REFTABLE_FREE_AND_NULL(bw->compressed);
+	reftable_buf_release(&bw->buf);
 	reftable_buf_release(&bw->last_key);
 	/* the block is not owned. */
 }
diff --git a/reftable/block.h b/reftable/block.h
index b3f837d612a8f0fbe98430b04e2dddaa975a15ab..d76f00553073c10185e716e71e2f415ce5dcf7e2 100644
--- a/reftable/block.h
+++ b/reftable/block.h
@@ -39,6 +39,7 @@  struct block_writer {
 	uint32_t restart_cap;
 
 	struct reftable_buf last_key;
+	struct reftable_buf buf;
 	int entries;
 };