diff mbox series

[v2,3/7] midx.c: extract `struct midx_fanout`

Message ID ae2077acb795311c87ce3bbcef60bffb66a6aa79.1661197803.git.me@ttaylorr.com (mailing list archive)
State Accepted
Commit 989d9cbd5c32fad54391a4ed06b1271f0a891077
Headers show
Series midx: permit changing the preferred pack when reusing the MIDX | expand

Commit Message

Taylor Blau Aug. 22, 2022, 7:50 p.m. UTC
To build up a list of objects (along with their packs, and the offsets
within those packs that each object appears at), the MIDX code
implements `get_sorted_entries()` which builds up a list of candidates,
sorts them, and then removes duplicate entries.

To do this, it keeps an array of `pack_midx_entry` structures that it
builds up once for each fanout level (ie., for all possible values of
the first byte of each object's ID).

This array is a function-local variable of `get_sorted_entries()`. Since
it uses the ALLOC_GROW() macro, having the `alloc_fanout` variable also
be local to that function, and only modified within that function is
convenient.

However, subsequent changes will extract the two ways this array is
filled (from a pack at some fanout value, and from an existing MIDX at
some fanout value) into separate functions. Instead of passing around
pointers to the entries array, along with `nr_fanout` and
`alloc_fanout`, encapsulate these three into a structure instead. Then
pass around a pointer to this structure instead.

This patch does not yet extract the above two functions, but sets us up
to begin doing so in the following commit. For now, the implementation
of get_sorted_entries() is only modified to replace `entries_by_fanout`
with `fanout.entries`, `nr_fanout` with `fanout.nr`, and so on.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 midx.c | 54 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/midx.c b/midx.c
index 4e956cacb7..cdb6c481c7 100644
--- a/midx.c
+++ b/midx.c
@@ -577,6 +577,22 @@  static void fill_pack_entry(uint32_t pack_int_id,
 	entry->preferred = !!preferred;
 }
 
+struct midx_fanout {
+	struct pack_midx_entry *entries;
+	uint32_t nr;
+	uint32_t alloc;
+};
+
+static void midx_fanout_grow(struct midx_fanout *fanout, uint32_t nr)
+{
+	ALLOC_GROW(fanout->entries, nr, fanout->alloc);
+}
+
+static void midx_fanout_sort(struct midx_fanout *fanout)
+{
+	QSORT(fanout->entries, fanout->nr, midx_oid_compare);
+}
+
 /*
  * It is possible to artificially get into a state where there are many
  * duplicate copies of objects. That can create high memory pressure if
@@ -595,8 +611,8 @@  static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 						  int preferred_pack)
 {
 	uint32_t cur_fanout, cur_pack, cur_object;
-	uint32_t alloc_fanout, alloc_objects, total_objects = 0;
-	struct pack_midx_entry *entries_by_fanout = NULL;
+	uint32_t alloc_objects, total_objects = 0;
+	struct midx_fanout fanout = { 0 };
 	struct pack_midx_entry *deduplicated_entries = NULL;
 	uint32_t start_pack = m ? m->num_packs : 0;
 
@@ -608,14 +624,14 @@  static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 	 * slices to be evenly distributed, with some noise. Hence,
 	 * allocate slightly more than one 256th.
 	 */
-	alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16;
+	alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16;
 
-	ALLOC_ARRAY(entries_by_fanout, alloc_fanout);
+	ALLOC_ARRAY(fanout.entries, fanout.alloc);
 	ALLOC_ARRAY(deduplicated_entries, alloc_objects);
 	*nr_objects = 0;
 
 	for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
-		uint32_t nr_fanout = 0;
+		fanout.nr = 0;
 
 		if (m) {
 			uint32_t start = 0, end;
@@ -625,15 +641,15 @@  static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 			end = ntohl(m->chunk_oid_fanout[cur_fanout]);
 
 			for (cur_object = start; cur_object < end; cur_object++) {
-				ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
+				midx_fanout_grow(&fanout, fanout.nr + 1);
 				nth_midxed_pack_midx_entry(m,
-							   &entries_by_fanout[nr_fanout],
+							   &fanout.entries[fanout.nr],
 							   cur_object);
 				if (nth_midxed_pack_int_id(m, cur_object) == preferred_pack)
-					entries_by_fanout[nr_fanout].preferred = 1;
+					fanout.entries[fanout.nr].preferred = 1;
 				else
-					entries_by_fanout[nr_fanout].preferred = 0;
-				nr_fanout++;
+					fanout.entries[fanout.nr].preferred = 0;
+				fanout.nr++;
 			}
 		}
 
@@ -646,36 +662,36 @@  static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 			end = get_pack_fanout(info[cur_pack].p, cur_fanout);
 
 			for (cur_object = start; cur_object < end; cur_object++) {
-				ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
+				midx_fanout_grow(&fanout, fanout.nr + 1);
 				fill_pack_entry(cur_pack,
 						info[cur_pack].p,
 						cur_object,
-						&entries_by_fanout[nr_fanout],
+						&fanout.entries[fanout.nr],
 						preferred);
-				nr_fanout++;
+				fanout.nr++;
 			}
 		}
 
-		QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
+		midx_fanout_sort(&fanout);
 
 		/*
 		 * The batch is now sorted by OID and then mtime (descending).
 		 * Take only the first duplicate.
 		 */
-		for (cur_object = 0; cur_object < nr_fanout; cur_object++) {
-			if (cur_object && oideq(&entries_by_fanout[cur_object - 1].oid,
-						&entries_by_fanout[cur_object].oid))
+		for (cur_object = 0; cur_object < fanout.nr; cur_object++) {
+			if (cur_object && oideq(&fanout.entries[cur_object - 1].oid,
+						&fanout.entries[cur_object].oid))
 				continue;
 
 			ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects);
 			memcpy(&deduplicated_entries[*nr_objects],
-			       &entries_by_fanout[cur_object],
+			       &fanout.entries[cur_object],
 			       sizeof(struct pack_midx_entry));
 			(*nr_objects)++;
 		}
 	}
 
-	free(entries_by_fanout);
+	free(fanout.entries);
 	return deduplicated_entries;
 }