diff mbox series

[4/9] refs/reftable: don't recompute committer ident

Message ID a9a6795c025b23035bfdd3e23b0113df9f6c5e4b.1712078736.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series reftable: optimize write performance | expand

Commit Message

Patrick Steinhardt April 2, 2024, 5:30 p.m. UTC
In order to write reflog entries we need to compute the committer's
identity as it becomes encoded in the log record itself. In the reftable
backend, computing the identity is repeated for every single reflog
entry which we are about to write in a transaction. Needless to say,
this can be quite a waste of effort when writing many refs with reflog
entries in a single transaction.

Refactor the code to pre-compute the committer information. This results
in a small speedup when writing 100000 refs in a single transaction:

  Benchmark 1: update-ref: create many refs (HEAD~)
    Time (mean ± σ):      2.895 s ±  0.020 s    [User: 1.516 s, System: 1.374 s]
    Range (min … max):    2.868 s …  2.983 s    100 runs

  Benchmark 2: update-ref: create many refs (HEAD)
    Time (mean ± σ):      2.845 s ±  0.017 s    [User: 1.461 s, System: 1.379 s]
    Range (min … max):    2.803 s …  2.913 s    100 runs

  Summary
    update-ref: create many refs (HEAD) ran
      1.02 ± 0.01 times faster than update-ref: create many refs (HEAD~)

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 52 +++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 18 deletions(-)

Comments

Junio C Hamano April 3, 2024, 6:58 p.m. UTC | #1
Patrick Steinhardt <ps@pks.im> writes:

> In order to write reflog entries we need to compute the committer's
> identity as it becomes encoded in the log record itself. In the reftable
> backend, computing the identity is repeated for every single reflog
> entry which we are about to write in a transaction. Needless to say,
> this can be quite a waste of effort when writing many refs with reflog
> entries in a single transaction.

It would have been nice to mention which caller benefits from this
rewrite in the above.

There are four callers of the fill_reftable_log_record() function.
The patch moves the split_ident() call from the callee to these four
callers.  The write_transaction_table() function calls it in a loop,
which should give us a big boost.  For other three callers, they
call it at most twice (i.e. write_copy_table() when deleting the old
one), so their contribution to the boost should be minimal.

Makes sense.
Patrick Steinhardt April 4, 2024, 5:36 a.m. UTC | #2
On Wed, Apr 03, 2024 at 11:58:36AM -0700, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > In order to write reflog entries we need to compute the committer's
> > identity as it becomes encoded in the log record itself. In the reftable
> > backend, computing the identity is repeated for every single reflog
> > entry which we are about to write in a transaction. Needless to say,
> > this can be quite a waste of effort when writing many refs with reflog
> > entries in a single transaction.
> 
> It would have been nice to mention which caller benefits from this
> rewrite in the above.
> 
> There are four callers of the fill_reftable_log_record() function.
> The patch moves the split_ident() call from the callee to these four
> callers.  The write_transaction_table() function calls it in a loop,
> which should give us a big boost.  For other three callers, they
> call it at most twice (i.e. write_copy_table() when deleting the old
> one), so their contribution to the boost should be minimal.
> 
> Makes sense.

I do say it by saying "write in a transaction", but I agree that this is
not exactly obvious. Will rephrase.

Patrick
diff mbox series

Patch

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 7515dd3019..9e8967e82f 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -171,32 +171,30 @@  static int should_write_log(struct ref_store *refs, const char *refname)
 	}
 }
 
-static void fill_reftable_log_record(struct reftable_log_record *log)
+static void fill_reftable_log_record(struct reftable_log_record *log, const struct ident_split *split)
 {
-	const char *info = git_committer_info(0);
-	struct ident_split split = {0};
+	const char *tz_begin;
 	int sign = 1;
 
-	if (split_ident_line(&split, info, strlen(info)))
-		BUG("failed splitting committer info");
-
 	reftable_log_record_release(log);
 	log->value_type = REFTABLE_LOG_UPDATE;
 	log->value.update.name =
-		xstrndup(split.name_begin, split.name_end - split.name_begin);
+		xstrndup(split->name_begin, split->name_end - split->name_begin);
 	log->value.update.email =
-		xstrndup(split.mail_begin, split.mail_end - split.mail_begin);
-	log->value.update.time = atol(split.date_begin);
-	if (*split.tz_begin == '-') {
+		xstrndup(split->mail_begin, split->mail_end - split->mail_begin);
+	log->value.update.time = atol(split->date_begin);
+
+	tz_begin = split->tz_begin;
+	if (*tz_begin == '-') {
 		sign = -1;
-		split.tz_begin++;
+		tz_begin++;
 	}
-	if (*split.tz_begin == '+') {
+	if (*tz_begin == '+') {
 		sign = 1;
-		split.tz_begin++;
+		tz_begin++;
 	}
 
-	log->value.update.tz_offset = sign * atoi(split.tz_begin);
+	log->value.update.tz_offset = sign * atoi(tz_begin);
 }
 
 static int read_ref_without_reload(struct reftable_stack *stack,
@@ -1023,9 +1021,15 @@  static int write_transaction_table(struct reftable_writer *writer, void *cb_data
 		reftable_stack_merged_table(arg->stack);
 	uint64_t ts = reftable_stack_next_update_index(arg->stack);
 	struct reftable_log_record *logs = NULL;
+	struct ident_split committer_ident = {0};
 	size_t logs_nr = 0, logs_alloc = 0, i;
+	const char *committer_info;
 	int ret = 0;
 
+	committer_info = git_committer_info(0);
+	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
+		BUG("failed splitting committer info");
+
 	QSORT(arg->updates, arg->updates_nr, transaction_update_cmp);
 
 	reftable_writer_set_limits(writer, ts, ts);
@@ -1091,7 +1095,7 @@  static int write_transaction_table(struct reftable_writer *writer, void *cb_data
 			log = &logs[logs_nr++];
 			memset(log, 0, sizeof(*log));
 
-			fill_reftable_log_record(log);
+			fill_reftable_log_record(log, &committer_ident);
 			log->update_index = ts;
 			log->refname = xstrdup(u->refname);
 			memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ);
@@ -1238,9 +1242,11 @@  static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
 		.value.symref = (char *)create->target,
 		.update_index = ts,
 	};
+	struct ident_split committer_ident = {0};
 	struct reftable_log_record log = {0};
 	struct object_id new_oid;
 	struct object_id old_oid;
+	const char *committer_info;
 	int ret;
 
 	reftable_writer_set_limits(writer, ts, ts);
@@ -1268,7 +1274,11 @@  static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
 	    !should_write_log(&create->refs->base, create->refname))
 		return 0;
 
-	fill_reftable_log_record(&log);
+	committer_info = git_committer_info(0);
+	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
+		BUG("failed splitting committer info");
+
+	fill_reftable_log_record(&log, &committer_ident);
 	log.refname = xstrdup(create->refname);
 	log.update_index = ts;
 	log.value.update.message = xstrndup(create->logmsg,
@@ -1344,10 +1354,16 @@  static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	struct reftable_log_record old_log = {0}, *logs = NULL;
 	struct reftable_iterator it = {0};
 	struct string_list skip = STRING_LIST_INIT_NODUP;
+	struct ident_split committer_ident = {0};
 	struct strbuf errbuf = STRBUF_INIT;
 	size_t logs_nr = 0, logs_alloc = 0, i;
+	const char *committer_info;
 	int ret;
 
+	committer_info = git_committer_info(0);
+	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
+		BUG("failed splitting committer info");
+
 	if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) {
 		ret = error(_("refname %s not found"), arg->oldname);
 		goto done;
@@ -1422,7 +1438,7 @@  static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 
 		ALLOC_GROW(logs, logs_nr + 1, logs_alloc);
 		memset(&logs[logs_nr], 0, sizeof(logs[logs_nr]));
-		fill_reftable_log_record(&logs[logs_nr]);
+		fill_reftable_log_record(&logs[logs_nr], &committer_ident);
 		logs[logs_nr].refname = (char *)arg->newname;
 		logs[logs_nr].update_index = deletion_ts;
 		logs[logs_nr].value.update.message =
@@ -1454,7 +1470,7 @@  static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 */
 	ALLOC_GROW(logs, logs_nr + 1, logs_alloc);
 	memset(&logs[logs_nr], 0, sizeof(logs[logs_nr]));
-	fill_reftable_log_record(&logs[logs_nr]);
+	fill_reftable_log_record(&logs[logs_nr], &committer_ident);
 	logs[logs_nr].refname = (char *)arg->newname;
 	logs[logs_nr].update_index = creation_ts;
 	logs[logs_nr].value.update.message =