diff mbox series

[2/7] reftable/record: convert old and new object IDs to arrays

Message ID b63722b35dbf44c408517b6b657b8ff7c81df325.1709640322.git.ps@pks.im (mailing list archive)
State Accepted
Commit 87ff723018bfca588b5d68e110ab04494c451ebd
Headers show
Series reftable: memory optimizations for reflog iteration | expand

Commit Message

Patrick Steinhardt March 5, 2024, 12:10 p.m. UTC
In 7af607c58d (reftable/record: store "val1" hashes as static arrays,
2024-01-03) and b31e3cc620 (reftable/record: store "val2" hashes as
static arrays, 2024-01-03) we have converted ref records to store their
object IDs in a static array. Convert log records to do the same so that
their old and new object IDs are arrays, too.

This change results in two allocations less per log record that we're
iterating over. Before:

    HEAP SUMMARY:
        in use at exit: 13,473 bytes in 122 blocks
      total heap usage: 8,068,495 allocs, 8,068,373 frees, 401,011,862 bytes allocated

After:

    HEAP SUMMARY:
        in use at exit: 13,473 bytes in 122 blocks
      total heap usage: 6,068,489 allocs, 6,068,367 frees, 361,011,822 bytes allocated

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c    | 39 ++++++------------------
 reftable/merged_test.c     | 11 +++----
 reftable/readwrite_test.c  | 62 +++++++++++++++-----------------------
 reftable/record.c          | 59 ++++++------------------------------
 reftable/record_test.c     | 11 -------
 reftable/reftable-record.h |  4 +--
 reftable/stack_test.c      | 26 +++++++---------
 7 files changed, 61 insertions(+), 151 deletions(-)
diff mbox series

Patch

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index f04be942ac..2de57c047a 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -171,23 +171,6 @@  static int should_write_log(struct ref_store *refs, const char *refname)
 	}
 }
 
-static void clear_reftable_log_record(struct reftable_log_record *log)
-{
-	switch (log->value_type) {
-	case REFTABLE_LOG_UPDATE:
-		/*
-		 * When we write log records, the hashes are owned by the
-		 * caller and thus shouldn't be free'd.
-		 */
-		log->value.update.old_hash = NULL;
-		log->value.update.new_hash = NULL;
-		break;
-	case REFTABLE_LOG_DELETION:
-		break;
-	}
-	reftable_log_record_release(log);
-}
-
 static void fill_reftable_log_record(struct reftable_log_record *log)
 {
 	const char *info = git_committer_info(0);
@@ -1102,8 +1085,8 @@  static int write_transaction_table(struct reftable_writer *writer, void *cb_data
 			fill_reftable_log_record(log);
 			log->update_index = ts;
 			log->refname = xstrdup(u->refname);
-			log->value.update.new_hash = u->new_oid.hash;
-			log->value.update.old_hash = tx_update->current_oid.hash;
+			memcpy(log->value.update.new_hash, u->new_oid.hash, GIT_MAX_RAWSZ);
+			memcpy(log->value.update.old_hash, tx_update->current_oid.hash, GIT_MAX_RAWSZ);
 			log->value.update.message =
 				xstrndup(u->msg, arg->refs->write_options.block_size / 2);
 		}
@@ -1158,7 +1141,7 @@  static int write_transaction_table(struct reftable_writer *writer, void *cb_data
 done:
 	assert(ret != REFTABLE_API_ERROR);
 	for (i = 0; i < logs_nr; i++)
-		clear_reftable_log_record(&logs[i]);
+		reftable_log_record_release(&logs[i]);
 	free(logs);
 	return ret;
 }
@@ -1275,13 +1258,13 @@  static int write_create_symref_table(struct reftable_writer *writer, void *cb_da
 	log.update_index = ts;
 	log.value.update.message = xstrndup(create->logmsg,
 					    create->refs->write_options.block_size / 2);
-	log.value.update.new_hash = new_oid.hash;
+	memcpy(log.value.update.new_hash, new_oid.hash, GIT_MAX_RAWSZ);
 	if (refs_resolve_ref_unsafe(&create->refs->base, create->refname,
 				    RESOLVE_REF_READING, &old_oid, NULL))
-		log.value.update.old_hash = old_oid.hash;
+		memcpy(log.value.update.old_hash, old_oid.hash, GIT_MAX_RAWSZ);
 
 	ret = reftable_writer_add_log(writer, &log);
-	clear_reftable_log_record(&log);
+	reftable_log_record_release(&log);
 	return ret;
 }
 
@@ -1420,7 +1403,7 @@  static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 		logs[logs_nr].update_index = deletion_ts;
 		logs[logs_nr].value.update.message =
 			xstrndup(arg->logmsg, arg->refs->write_options.block_size / 2);
-		logs[logs_nr].value.update.old_hash = old_ref.value.val1;
+		memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
 		logs_nr++;
 
 		ret = read_ref_without_reload(arg->stack, "HEAD", &head_oid, &head_referent, &head_type);
@@ -1452,7 +1435,7 @@  static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	logs[logs_nr].update_index = creation_ts;
 	logs[logs_nr].value.update.message =
 		xstrndup(arg->logmsg, arg->refs->write_options.block_size / 2);
-	logs[logs_nr].value.update.new_hash = old_ref.value.val1;
+	memcpy(logs[logs_nr].value.update.new_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
 	logs_nr++;
 
 	/*
@@ -1515,10 +1498,6 @@  static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	for (i = 0; i < logs_nr; i++) {
 		if (!strcmp(logs[i].refname, "HEAD"))
 			continue;
-		if (logs[i].value.update.old_hash == old_ref.value.val1)
-			logs[i].value.update.old_hash = NULL;
-		if (logs[i].value.update.new_hash == old_ref.value.val1)
-			logs[i].value.update.new_hash = NULL;
 		logs[i].refname = NULL;
 		reftable_log_record_release(&logs[i]);
 	}
@@ -2180,7 +2159,7 @@  static int reftable_be_reflog_expire(struct ref_store *ref_store,
 			dest->value_type = REFTABLE_LOG_DELETION;
 		} else {
 			if ((flags & EXPIRE_REFLOGS_REWRITE) && last_hash)
-				dest->value.update.old_hash = last_hash;
+				memcpy(dest->value.update.old_hash, last_hash, GIT_MAX_RAWSZ);
 			last_hash = logs[i].value.update.new_hash;
 		}
 	}
diff --git a/reftable/merged_test.c b/reftable/merged_test.c
index d0f77a3b8f..530fc82d1c 100644
--- a/reftable/merged_test.c
+++ b/reftable/merged_test.c
@@ -289,16 +289,13 @@  merged_table_from_log_records(struct reftable_log_record **logs,
 
 static void test_merged_logs(void)
 {
-	uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 };
-	uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 };
-	uint8_t hash3[GIT_SHA1_RAWSZ] = { 3 };
 	struct reftable_log_record r1[] = {
 		{
 			.refname = "a",
 			.update_index = 2,
 			.value_type = REFTABLE_LOG_UPDATE,
 			.value.update = {
-				.old_hash = hash2,
+				.old_hash = { 2 },
 				/* deletion */
 				.name = "jane doe",
 				.email = "jane@invalid",
@@ -310,8 +307,8 @@  static void test_merged_logs(void)
 			.update_index = 1,
 			.value_type = REFTABLE_LOG_UPDATE,
 			.value.update = {
-				.old_hash = hash1,
-				.new_hash = hash2,
+				.old_hash = { 1 },
+				.new_hash = { 2 },
 				.name = "jane doe",
 				.email = "jane@invalid",
 				.message = "message1",
@@ -324,7 +321,7 @@  static void test_merged_logs(void)
 			.update_index = 3,
 			.value_type = REFTABLE_LOG_UPDATE,
 			.value.update = {
-				.new_hash = hash3,
+				.new_hash = { 3 },
 				.name = "jane doe",
 				.email = "jane@invalid",
 				.message = "message3",
diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
index 363fe0f998..a6dbd214c5 100644
--- a/reftable/readwrite_test.c
+++ b/reftable/readwrite_test.c
@@ -77,18 +77,15 @@  static void write_table(char ***names, struct strbuf *buf, int N,
 	}
 
 	for (i = 0; i < N; i++) {
-		uint8_t hash[GIT_SHA256_RAWSZ] = { 0 };
 		char name[100];
 		int n;
 
-		set_test_hash(hash, i);
-
 		snprintf(name, sizeof(name), "refs/heads/branch%02d", i);
 
 		log.refname = name;
 		log.update_index = update_index;
 		log.value_type = REFTABLE_LOG_UPDATE;
-		log.value.update.new_hash = hash;
+		set_test_hash(log.value.update.new_hash, i);
 		log.value.update.message = "message";
 
 		n = reftable_writer_add_log(w, &log);
@@ -137,13 +134,10 @@  static void test_log_buffer_size(void)
 	/* This tests buffer extension for log compression. Must use a random
 	   hash, to ensure that the compressed part is larger than the original.
 	*/
-	uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ];
 	for (i = 0; i < GIT_SHA1_RAWSZ; i++) {
-		hash1[i] = (uint8_t)(git_rand() % 256);
-		hash2[i] = (uint8_t)(git_rand() % 256);
+		log.value.update.old_hash[i] = (uint8_t)(git_rand() % 256);
+		log.value.update.new_hash[i] = (uint8_t)(git_rand() % 256);
 	}
-	log.value.update.old_hash = hash1;
-	log.value.update.new_hash = hash2;
 	reftable_writer_set_limits(w, update_index, update_index);
 	err = reftable_writer_add_log(w, &log);
 	EXPECT_ERR(err);
@@ -161,25 +155,26 @@  static void test_log_overflow(void)
 		.block_size = ARRAY_SIZE(msg),
 	};
 	int err;
-	struct reftable_log_record
-		log = { .refname = "refs/heads/master",
-			.update_index = 0xa,
-			.value_type = REFTABLE_LOG_UPDATE,
-			.value = { .update = {
-					   .name = "Han-Wen Nienhuys",
-					   .email = "hanwen@google.com",
-					   .tz_offset = 100,
-					   .time = 0x5e430672,
-					   .message = msg,
-				   } } };
+	struct reftable_log_record log = {
+		.refname = "refs/heads/master",
+		.update_index = 0xa,
+		.value_type = REFTABLE_LOG_UPDATE,
+		.value = {
+			.update = {
+				.old_hash = { 1 },
+				.new_hash = { 2 },
+				.name = "Han-Wen Nienhuys",
+				.email = "hanwen@google.com",
+				.tz_offset = 100,
+				.time = 0x5e430672,
+				.message = msg,
+			},
+		},
+	};
 	struct reftable_writer *w =
 		reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts);
 
-	uint8_t hash1[GIT_SHA1_RAWSZ]  = {1}, hash2[GIT_SHA1_RAWSZ] = { 2 };
-
 	memset(msg, 'x', sizeof(msg) - 1);
-	log.value.update.old_hash = hash1;
-	log.value.update.new_hash = hash2;
 	reftable_writer_set_limits(w, update_index, update_index);
 	err = reftable_writer_add_log(w, &log);
 	EXPECT(err == REFTABLE_ENTRY_TOO_BIG_ERROR);
@@ -219,16 +214,13 @@  static void test_log_write_read(void)
 		EXPECT_ERR(err);
 	}
 	for (i = 0; i < N; i++) {
-		uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ];
 		struct reftable_log_record log = { NULL };
-		set_test_hash(hash1, i);
-		set_test_hash(hash2, i + 1);
 
 		log.refname = names[i];
 		log.update_index = i;
 		log.value_type = REFTABLE_LOG_UPDATE;
-		log.value.update.old_hash = hash1;
-		log.value.update.new_hash = hash2;
+		set_test_hash(log.value.update.old_hash, i);
+		set_test_hash(log.value.update.new_hash, i + 1);
 
 		err = reftable_writer_add_log(w, &log);
 		EXPECT_ERR(err);
@@ -298,18 +290,15 @@  static void test_log_zlib_corruption(void)
 	struct reftable_writer *w =
 		reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts);
 	const struct reftable_stats *stats = NULL;
-	uint8_t hash1[GIT_SHA1_RAWSZ] = { 1 };
-	uint8_t hash2[GIT_SHA1_RAWSZ] = { 2 };
 	char message[100] = { 0 };
 	int err, i, n;
-
 	struct reftable_log_record log = {
 		.refname = "refname",
 		.value_type = REFTABLE_LOG_UPDATE,
 		.value = {
 			.update = {
-				.new_hash = hash1,
-				.old_hash = hash2,
+				.new_hash = { 1 },
+				.old_hash = { 2 },
 				.name = "My Name",
 				.email = "myname@invalid",
 				.message = message,
@@ -821,13 +810,12 @@  static void test_write_multiple_indices(void)
 	}
 
 	for (i = 0; i < 100; i++) {
-		unsigned char hash[GIT_SHA1_RAWSZ] = {i};
 		struct reftable_log_record log = {
 			.update_index = 1,
 			.value_type = REFTABLE_LOG_UPDATE,
 			.value.update = {
-				.old_hash = hash,
-				.new_hash = hash,
+				.old_hash = { i },
+				.new_hash = { i },
 			},
 		};
 
diff --git a/reftable/record.c b/reftable/record.c
index 367de04600..8d2cd6b081 100644
--- a/reftable/record.c
+++ b/reftable/record.c
@@ -763,16 +763,10 @@  static void reftable_log_record_copy_from(void *rec, const void *src_rec,
 				xstrdup(dst->value.update.message);
 		}
 
-		if (dst->value.update.new_hash) {
-			REFTABLE_ALLOC_ARRAY(dst->value.update.new_hash, hash_size);
-			memcpy(dst->value.update.new_hash,
-			       src->value.update.new_hash, hash_size);
-		}
-		if (dst->value.update.old_hash) {
-			REFTABLE_ALLOC_ARRAY(dst->value.update.old_hash, hash_size);
-			memcpy(dst->value.update.old_hash,
-			       src->value.update.old_hash, hash_size);
-		}
+		memcpy(dst->value.update.new_hash,
+		       src->value.update.new_hash, hash_size);
+		memcpy(dst->value.update.old_hash,
+		       src->value.update.old_hash, hash_size);
 		break;
 	}
 }
@@ -790,8 +784,6 @@  void reftable_log_record_release(struct reftable_log_record *r)
 	case REFTABLE_LOG_DELETION:
 		break;
 	case REFTABLE_LOG_UPDATE:
-		reftable_free(r->value.update.new_hash);
-		reftable_free(r->value.update.old_hash);
 		reftable_free(r->value.update.name);
 		reftable_free(r->value.update.email);
 		reftable_free(r->value.update.message);
@@ -808,33 +800,20 @@  static uint8_t reftable_log_record_val_type(const void *rec)
 	return reftable_log_record_is_deletion(log) ? 0 : 1;
 }
 
-static uint8_t zero[GIT_SHA256_RAWSZ] = { 0 };
-
 static int reftable_log_record_encode(const void *rec, struct string_view s,
 				      int hash_size)
 {
 	const struct reftable_log_record *r = rec;
 	struct string_view start = s;
 	int n = 0;
-	uint8_t *oldh = NULL;
-	uint8_t *newh = NULL;
 	if (reftable_log_record_is_deletion(r))
 		return 0;
 
-	oldh = r->value.update.old_hash;
-	newh = r->value.update.new_hash;
-	if (!oldh) {
-		oldh = zero;
-	}
-	if (!newh) {
-		newh = zero;
-	}
-
 	if (s.len < 2 * hash_size)
 		return -1;
 
-	memcpy(s.buf, oldh, hash_size);
-	memcpy(s.buf + hash_size, newh, hash_size);
+	memcpy(s.buf, r->value.update.old_hash, hash_size);
+	memcpy(s.buf + hash_size, r->value.update.new_hash, hash_size);
 	string_view_consume(&s, 2 * hash_size);
 
 	n = encode_string(r->value.update.name ? r->value.update.name : "", s);
@@ -891,8 +870,6 @@  static int reftable_log_record_decode(void *rec, struct strbuf key,
 	if (val_type != r->value_type) {
 		switch (r->value_type) {
 		case REFTABLE_LOG_UPDATE:
-			FREE_AND_NULL(r->value.update.old_hash);
-			FREE_AND_NULL(r->value.update.new_hash);
 			FREE_AND_NULL(r->value.update.message);
 			FREE_AND_NULL(r->value.update.email);
 			FREE_AND_NULL(r->value.update.name);
@@ -909,11 +886,6 @@  static int reftable_log_record_decode(void *rec, struct strbuf key,
 	if (in.len < 2 * hash_size)
 		return REFTABLE_FORMAT_ERROR;
 
-	r->value.update.old_hash =
-		reftable_realloc(r->value.update.old_hash, hash_size);
-	r->value.update.new_hash =
-		reftable_realloc(r->value.update.new_hash, hash_size);
-
 	memcpy(r->value.update.old_hash, in.buf, hash_size);
 	memcpy(r->value.update.new_hash, in.buf + hash_size, hash_size);
 
@@ -983,17 +955,6 @@  static int null_streq(char *a, char *b)
 	return 0 == strcmp(a, b);
 }
 
-static int zero_hash_eq(uint8_t *a, uint8_t *b, int sz)
-{
-	if (!a)
-		a = zero;
-
-	if (!b)
-		b = zero;
-
-	return !memcmp(a, b, sz);
-}
-
 static int reftable_log_record_equal_void(const void *a,
 					  const void *b, int hash_size)
 {
@@ -1037,10 +998,10 @@  int reftable_log_record_equal(const struct reftable_log_record *a,
 				  b->value.update.email) &&
 		       null_streq(a->value.update.message,
 				  b->value.update.message) &&
-		       zero_hash_eq(a->value.update.old_hash,
-				    b->value.update.old_hash, hash_size) &&
-		       zero_hash_eq(a->value.update.new_hash,
-				    b->value.update.new_hash, hash_size);
+		       !memcmp(a->value.update.old_hash,
+			       b->value.update.old_hash, hash_size) &&
+		       !memcmp(a->value.update.new_hash,
+			       b->value.update.new_hash, hash_size);
 	}
 
 	abort();
diff --git a/reftable/record_test.c b/reftable/record_test.c
index 89209894d8..57e56138c0 100644
--- a/reftable/record_test.c
+++ b/reftable/record_test.c
@@ -183,8 +183,6 @@  static void test_reftable_log_record_roundtrip(void)
 			.value_type = REFTABLE_LOG_UPDATE,
 			.value = {
 				.update = {
-					.old_hash = reftable_malloc(GIT_SHA1_RAWSZ),
-					.new_hash = reftable_malloc(GIT_SHA1_RAWSZ),
 					.name = xstrdup("han-wen"),
 					.email = xstrdup("hanwen@google.com"),
 					.message = xstrdup("test"),
@@ -202,13 +200,6 @@  static void test_reftable_log_record_roundtrip(void)
 			.refname = xstrdup("branch"),
 			.update_index = 33,
 			.value_type = REFTABLE_LOG_UPDATE,
-			.value = {
-				.update = {
-					.old_hash = reftable_malloc(GIT_SHA1_RAWSZ),
-					.new_hash = reftable_malloc(GIT_SHA1_RAWSZ),
-					/* rest of fields left empty. */
-				},
-			},
 		}
 	};
 	set_test_hash(in[0].value.update.new_hash, 1);
@@ -231,8 +222,6 @@  static void test_reftable_log_record_roundtrip(void)
 				.value_type = REFTABLE_LOG_UPDATE,
 				.value = {
 					.update = {
-						.new_hash = reftable_calloc(GIT_SHA1_RAWSZ, 1),
-						.old_hash = reftable_calloc(GIT_SHA1_RAWSZ, 1),
 						.name = xstrdup("old name"),
 						.email = xstrdup("old@email"),
 						.message = xstrdup("old message"),
diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h
index e657001d42..2659ea008c 100644
--- a/reftable/reftable-record.h
+++ b/reftable/reftable-record.h
@@ -88,8 +88,8 @@  struct reftable_log_record {
 
 	union {
 		struct {
-			uint8_t *new_hash;
-			uint8_t *old_hash;
+			unsigned char new_hash[GIT_MAX_RAWSZ];
+			unsigned char old_hash[GIT_MAX_RAWSZ];
 			char *name;
 			char *email;
 			uint64_t time;
diff --git a/reftable/stack_test.c b/reftable/stack_test.c
index 509f486623..7336757cf5 100644
--- a/reftable/stack_test.c
+++ b/reftable/stack_test.c
@@ -468,8 +468,6 @@  static void test_reftable_stack_add(void)
 		logs[i].refname = xstrdup(buf);
 		logs[i].update_index = N + i + 1;
 		logs[i].value_type = REFTABLE_LOG_UPDATE;
-
-		logs[i].value.update.new_hash = reftable_malloc(GIT_SHA1_RAWSZ);
 		logs[i].value.update.email = xstrdup("identity@invalid");
 		set_test_hash(logs[i].value.update.new_hash, i);
 	}
@@ -547,16 +545,17 @@  static void test_reftable_stack_log_normalize(void)
 	};
 	struct reftable_stack *st = NULL;
 	char *dir = get_tmp_dir(__LINE__);
-
-	uint8_t h1[GIT_SHA1_RAWSZ] = { 0x01 }, h2[GIT_SHA1_RAWSZ] = { 0x02 };
-
-	struct reftable_log_record input = { .refname = "branch",
-					     .update_index = 1,
-					     .value_type = REFTABLE_LOG_UPDATE,
-					     .value = { .update = {
-								.new_hash = h1,
-								.old_hash = h2,
-							} } };
+	struct reftable_log_record input = {
+		.refname = "branch",
+		.update_index = 1,
+		.value_type = REFTABLE_LOG_UPDATE,
+		.value = {
+			.update = {
+				.new_hash = { 1 },
+				.old_hash = { 2 },
+			},
+		},
+	};
 	struct reftable_log_record dest = {
 		.update_index = 0,
 	};
@@ -627,8 +626,6 @@  static void test_reftable_stack_tombstone(void)
 		logs[i].update_index = 42;
 		if (i % 2 == 0) {
 			logs[i].value_type = REFTABLE_LOG_UPDATE;
-			logs[i].value.update.new_hash =
-				reftable_malloc(GIT_SHA1_RAWSZ);
 			set_test_hash(logs[i].value.update.new_hash, i);
 			logs[i].value.update.email =
 				xstrdup("identity@invalid");
@@ -810,7 +807,6 @@  static void test_reflog_expire(void)
 		logs[i].update_index = i;
 		logs[i].value_type = REFTABLE_LOG_UPDATE;
 		logs[i].value.update.time = i;
-		logs[i].value.update.new_hash = reftable_malloc(GIT_SHA1_RAWSZ);
 		logs[i].value.update.email = xstrdup("identity@invalid");
 		set_test_hash(logs[i].value.update.new_hash, i);
 	}