diff mbox series

[v2,09/13] reftable/generic: move seeking of records into the iterator

Message ID 3e91c3830e42063a74dac983152b7b8f0053bafa.1715589670.git.ps@pks.im (mailing list archive)
State Accepted
Commit 5bf96e0c393827481afab55d3b73ae09ec1ea0de
Headers show
Series reftable: prepare for re-seekable iterators | expand

Commit Message

Patrick Steinhardt May 13, 2024, 8:47 a.m. UTC
Reftable iterators are created by seeking on the parent structure of a
corresponding record. For example, to create an iterator for the merged
table you would call `reftable_merged_table_seek_ref()`. Most notably,
it is not posible to create an iterator and then seek it afterwards.

While this may be a bit easier to reason about, it comes with two
significant downsides. The first downside is that the logic to find
records is split up between the parent data structure and the iterator
itself. Conceptually, it is more straight forward if all that logic was
contained in a single place, which should be the iterator.

The second and more significant downside is that it is impossible to
reuse iterators for multiple seeks. Whenever you want to look up a
record, you need to re-create the whole infrastructure again, which is
quite a waste of time. Furthermore, it is impossible to optimize seeks,
such as when seeking the same record multiple times.

To address this, we essentially split up the concerns properly such that
the parent data structure is responsible for setting up the iterator via
a new `init_iter()` callback, whereas the iterator handles seeks via a
new `seek()` callback. This will eventually allow us to call `seek()` on
the iterator multiple times, where every iterator can potentially
optimize for certain cases.

Note that at this point in time we are not yet ready to reuse the
iterators. This will be left for a future patch series.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/generic.c |  47 +++++++++++++----
 reftable/generic.h |   9 +++-
 reftable/iter.c    |  15 ++++++
 reftable/merged.c  |  97 +++++++++++++++++-----------------
 reftable/reader.c  | 126 +++++++++++++++++++++++++--------------------
 5 files changed, 177 insertions(+), 117 deletions(-)
diff mbox series

Patch

diff --git a/reftable/generic.c b/reftable/generic.c
index b9f1c7c18a..1cf68fe124 100644
--- a/reftable/generic.c
+++ b/reftable/generic.c
@@ -12,25 +12,39 @@  license that can be found in the LICENSE file or at
 #include "reftable-iterator.h"
 #include "reftable-generic.h"
 
+void table_init_iter(struct reftable_table *tab,
+		     struct reftable_iterator *it,
+		     uint8_t typ)
+{
+
+	tab->ops->init_iter(tab->table_arg, it, typ);
+}
+
 int reftable_table_seek_ref(struct reftable_table *tab,
 			    struct reftable_iterator *it, const char *name)
 {
-	struct reftable_record rec = { .type = BLOCK_TYPE_REF,
-				       .u.ref = {
-					       .refname = (char *)name,
-				       } };
-	return tab->ops->seek_record(tab->table_arg, it, &rec);
+	struct reftable_record want = {
+		.type = BLOCK_TYPE_REF,
+		.u.ref = {
+			.refname = (char *)name,
+		},
+	};
+	table_init_iter(tab, it, BLOCK_TYPE_REF);
+	return it->ops->seek(it->iter_arg, &want);
 }
 
 int reftable_table_seek_log(struct reftable_table *tab,
 			    struct reftable_iterator *it, const char *name)
 {
-	struct reftable_record rec = { .type = BLOCK_TYPE_LOG,
-				       .u.log = {
-					       .refname = (char *)name,
-					       .update_index = ~((uint64_t)0),
-				       } };
-	return tab->ops->seek_record(tab->table_arg, it, &rec);
+	struct reftable_record want = {
+		.type = BLOCK_TYPE_LOG,
+		.u.log = {
+			.refname = (char *)name,
+			.update_index = ~((uint64_t)0),
+		},
+	};
+	table_init_iter(tab, it, BLOCK_TYPE_LOG);
+	return it->ops->seek(it->iter_arg, &want);
 }
 
 int reftable_table_read_ref(struct reftable_table *tab, const char *name,
@@ -152,11 +166,21 @@  int reftable_iterator_next_log(struct reftable_iterator *it,
 	return err;
 }
 
+int iterator_seek(struct reftable_iterator *it, struct reftable_record *want)
+{
+	return it->ops->seek(it->iter_arg, want);
+}
+
 int iterator_next(struct reftable_iterator *it, struct reftable_record *rec)
 {
 	return it->ops->next(it->iter_arg, rec);
 }
 
+static int empty_iterator_seek(void *arg, struct reftable_record *want)
+{
+	return 0;
+}
+
 static int empty_iterator_next(void *arg, struct reftable_record *rec)
 {
 	return 1;
@@ -167,6 +191,7 @@  static void empty_iterator_close(void *arg)
 }
 
 static struct reftable_iterator_vtable empty_vtable = {
+	.seek = &empty_iterator_seek,
 	.next = &empty_iterator_next,
 	.close = &empty_iterator_close,
 };
diff --git a/reftable/generic.h b/reftable/generic.h
index 98886a0640..8341fa570e 100644
--- a/reftable/generic.h
+++ b/reftable/generic.h
@@ -14,19 +14,24 @@  license that can be found in the LICENSE file or at
 
 /* generic interface to reftables */
 struct reftable_table_vtable {
-	int (*seek_record)(void *tab, struct reftable_iterator *it,
-			   struct reftable_record *);
+	void (*init_iter)(void *tab, struct reftable_iterator *it, uint8_t typ);
 	uint32_t (*hash_id)(void *tab);
 	uint64_t (*min_update_index)(void *tab);
 	uint64_t (*max_update_index)(void *tab);
 };
 
+void table_init_iter(struct reftable_table *tab,
+		     struct reftable_iterator *it,
+		     uint8_t typ);
+
 struct reftable_iterator_vtable {
+	int (*seek)(void *iter_arg, struct reftable_record *want);
 	int (*next)(void *iter_arg, struct reftable_record *rec);
 	void (*close)(void *iter_arg);
 };
 
 void iterator_set_empty(struct reftable_iterator *it);
+int iterator_seek(struct reftable_iterator *it, struct reftable_record *want);
 int iterator_next(struct reftable_iterator *it, struct reftable_record *rec);
 
 #endif
diff --git a/reftable/iter.c b/reftable/iter.c
index aa9ac199b1..b4528fab47 100644
--- a/reftable/iter.c
+++ b/reftable/iter.c
@@ -23,6 +23,13 @@  static void filtering_ref_iterator_close(void *iter_arg)
 	reftable_iterator_destroy(&fri->it);
 }
 
+static int filtering_ref_iterator_seek(void *iter_arg,
+				       struct reftable_record *want)
+{
+	struct filtering_ref_iterator *fri = iter_arg;
+	return iterator_seek(&fri->it, want);
+}
+
 static int filtering_ref_iterator_next(void *iter_arg,
 				       struct reftable_record *rec)
 {
@@ -73,6 +80,7 @@  static int filtering_ref_iterator_next(void *iter_arg,
 }
 
 static struct reftable_iterator_vtable filtering_ref_iterator_vtable = {
+	.seek = &filtering_ref_iterator_seek,
 	.next = &filtering_ref_iterator_next,
 	.close = &filtering_ref_iterator_close,
 };
@@ -119,6 +127,12 @@  static int indexed_table_ref_iter_next_block(struct indexed_table_ref_iter *it)
 	return 0;
 }
 
+static int indexed_table_ref_iter_seek(void *p, struct reftable_record *want)
+{
+	BUG("seeking indexed table is not supported");
+	return -1;
+}
+
 static int indexed_table_ref_iter_next(void *p, struct reftable_record *rec)
 {
 	struct indexed_table_ref_iter *it = p;
@@ -175,6 +189,7 @@  int new_indexed_table_ref_iter(struct indexed_table_ref_iter **dest,
 }
 
 static struct reftable_iterator_vtable indexed_table_ref_iter_vtable = {
+	.seek = &indexed_table_ref_iter_seek,
 	.next = &indexed_table_ref_iter_next,
 	.close = &indexed_table_ref_iter_close,
 };
diff --git a/reftable/merged.c b/reftable/merged.c
index 18a2a6f09b..fc7946d90d 100644
--- a/reftable/merged.c
+++ b/reftable/merged.c
@@ -31,12 +31,18 @@  struct merged_iter {
 };
 
 static void merged_iter_init(struct merged_iter *mi,
-			     struct reftable_merged_table *mt)
+			     struct reftable_merged_table *mt,
+			     uint8_t typ)
 {
 	memset(mi, 0, sizeof(*mi));
 	mi->advance_index = -1;
 	mi->suppress_deletions = mt->suppress_deletions;
+
 	REFTABLE_CALLOC_ARRAY(mi->subiters, mt->stack_len);
+	for (size_t i = 0; i < mt->stack_len; i++) {
+		reftable_record_init(&mi->subiters[i].rec, typ);
+		table_init_iter(&mt->stack[i], &mi->subiters[i].iter, typ);
+	}
 	mi->stack_len = mt->stack_len;
 }
 
@@ -68,6 +74,27 @@  static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx)
 	return 0;
 }
 
+static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want)
+{
+	int err;
+
+	mi->advance_index = -1;
+
+	for (size_t i = 0; i < mi->stack_len; i++) {
+		err = iterator_seek(&mi->subiters[i].iter, want);
+		if (err < 0)
+			return err;
+		if (err > 0)
+			continue;
+
+		err = merged_iter_advance_subiter(mi, i);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 static int merged_iter_next_entry(struct merged_iter *mi,
 				  struct reftable_record *rec)
 {
@@ -133,6 +160,11 @@  static int merged_iter_next_entry(struct merged_iter *mi,
 	return 0;
 }
 
+static int merged_iter_seek_void(void *it, struct reftable_record *want)
+{
+	return merged_iter_seek(it, want);
+}
+
 static int merged_iter_next_void(void *p, struct reftable_record *rec)
 {
 	struct merged_iter *mi = p;
@@ -147,6 +179,7 @@  static int merged_iter_next_void(void *p, struct reftable_record *rec)
 }
 
 static struct reftable_iterator_vtable merged_iter_vtable = {
+	.seek = merged_iter_seek_void,
 	.next = &merged_iter_next_void,
 	.close = &merged_iter_close,
 };
@@ -220,47 +253,13 @@  reftable_merged_table_min_update_index(struct reftable_merged_table *mt)
 	return mt->min;
 }
 
-static int reftable_table_seek_record(struct reftable_table *tab,
-				      struct reftable_iterator *it,
-				      struct reftable_record *rec)
-{
-	return tab->ops->seek_record(tab->table_arg, it, rec);
-}
-
-static int merged_table_seek_record(struct reftable_merged_table *mt,
-				    struct reftable_iterator *it,
-				    struct reftable_record *rec)
+static void merged_table_init_iter(struct reftable_merged_table *mt,
+				   struct reftable_iterator *it,
+				   uint8_t typ)
 {
-	struct merged_iter merged, *p;
-	int err;
-
-	merged_iter_init(&merged, mt);
-
-	for (size_t i = 0; i < mt->stack_len; i++) {
-		reftable_record_init(&merged.subiters[i].rec,
-				     reftable_record_type(rec));
-
-		err = reftable_table_seek_record(&mt->stack[i],
-						 &merged.subiters[i].iter, rec);
-		if (err < 0)
-			goto out;
-		if (err > 0)
-			continue;
-
-		err = merged_iter_advance_subiter(&merged, i);
-		if (err < 0)
-			goto out;
-	}
-
-	p = reftable_malloc(sizeof(*p));
-	*p = merged;
-	iterator_from_merged_iter(it, p);
-	err = 0;
-
-out:
-	if (err < 0)
-		merged_iter_close(&merged);
-	return err;
+	struct merged_iter *mi = reftable_malloc(sizeof(*mi));
+	merged_iter_init(mi, mt, typ);
+	iterator_from_merged_iter(it, mi);
 }
 
 int reftable_merged_table_seek_ref(struct reftable_merged_table *mt,
@@ -273,7 +272,8 @@  int reftable_merged_table_seek_ref(struct reftable_merged_table *mt,
 			.refname = (char *)name,
 		},
 	};
-	return merged_table_seek_record(mt, it, &rec);
+	merged_table_init_iter(mt, it, BLOCK_TYPE_REF);
+	return iterator_seek(it, &rec);
 }
 
 int reftable_merged_table_seek_log_at(struct reftable_merged_table *mt,
@@ -285,7 +285,8 @@  int reftable_merged_table_seek_log_at(struct reftable_merged_table *mt,
 					       .refname = (char *)name,
 					       .update_index = update_index,
 				       } };
-	return merged_table_seek_record(mt, it, &rec);
+	merged_table_init_iter(mt, it, BLOCK_TYPE_LOG);
+	return iterator_seek(it, &rec);
 }
 
 int reftable_merged_table_seek_log(struct reftable_merged_table *mt,
@@ -301,11 +302,11 @@  uint32_t reftable_merged_table_hash_id(struct reftable_merged_table *mt)
 	return mt->hash_id;
 }
 
-static int reftable_merged_table_seek_void(void *tab,
-					   struct reftable_iterator *it,
-					   struct reftable_record *rec)
+static void reftable_merged_table_init_iter_void(void *tab,
+						 struct reftable_iterator *it,
+						 uint8_t typ)
 {
-	return merged_table_seek_record(tab, it, rec);
+	merged_table_init_iter(tab, it, typ);
 }
 
 static uint32_t reftable_merged_table_hash_id_void(void *tab)
@@ -324,7 +325,7 @@  static uint64_t reftable_merged_table_max_update_index_void(void *tab)
 }
 
 static struct reftable_table_vtable merged_table_vtable = {
-	.seek_record = reftable_merged_table_seek_void,
+	.init_iter = reftable_merged_table_init_iter_void,
 	.hash_id = reftable_merged_table_hash_id_void,
 	.min_update_index = reftable_merged_table_min_update_index_void,
 	.max_update_index = reftable_merged_table_max_update_index_void,
diff --git a/reftable/reader.c b/reftable/reader.c
index 021608f638..a5a13cb0b9 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -369,29 +369,6 @@  static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
 	}
 }
 
-static int table_iter_next_void(void *ti, struct reftable_record *rec)
-{
-	return table_iter_next(ti, rec);
-}
-
-static void table_iter_close_void(void *ti)
-{
-	table_iter_close(ti);
-}
-
-static struct reftable_iterator_vtable table_iter_vtable = {
-	.next = &table_iter_next_void,
-	.close = &table_iter_close_void,
-};
-
-static void iterator_from_table_iter(struct reftable_iterator *it,
-				     struct table_iter *ti)
-{
-	assert(!it->ops);
-	it->iter_arg = ti;
-	it->ops = &table_iter_vtable;
-}
-
 static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ)
 {
 	int err;
@@ -576,43 +553,74 @@  static int table_iter_seek_indexed(struct table_iter *ti,
 	return err;
 }
 
-static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it,
-		       struct reftable_record *rec)
+static int table_iter_seek(struct table_iter *ti,
+			   struct reftable_record *want)
 {
-	uint8_t typ = reftable_record_type(rec);
-	struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
-	struct table_iter ti, *p;
+	uint8_t typ = reftable_record_type(want);
+	struct reftable_reader_offsets *offs = reader_offsets_for(ti->r, typ);
 	int err;
 
-	if (!offs->is_present) {
-		iterator_set_empty(it);
-		return 0;
-	}
-
-	table_iter_init(&ti, r);
-
-	err = table_iter_seek_start(&ti, reftable_record_type(rec),
+	err = table_iter_seek_start(ti, reftable_record_type(want),
 				    !!offs->index_offset);
 	if (err < 0)
 		goto out;
 
 	if (offs->index_offset)
-		err = table_iter_seek_indexed(&ti, rec);
+		err = table_iter_seek_indexed(ti, want);
 	else
-		err = table_iter_seek_linear(&ti, rec);
+		err = table_iter_seek_linear(ti, want);
 	if (err)
 		goto out;
 
-	REFTABLE_ALLOC_ARRAY(p, 1);
-	*p = ti;
-	iterator_from_table_iter(it, p);
-
 out:
-	if (err)
-		table_iter_close(&ti);
 	return err;
 }
 
+static int table_iter_seek_void(void *ti, struct reftable_record *want)
+{
+	return table_iter_seek(ti, want);
+}
+
+static int table_iter_next_void(void *ti, struct reftable_record *rec)
+{
+	return table_iter_next(ti, rec);
+}
+
+static void table_iter_close_void(void *ti)
+{
+	table_iter_close(ti);
+}
+
+static struct reftable_iterator_vtable table_iter_vtable = {
+	.seek = &table_iter_seek_void,
+	.next = &table_iter_next_void,
+	.close = &table_iter_close_void,
+};
+
+static void iterator_from_table_iter(struct reftable_iterator *it,
+				     struct table_iter *ti)
+{
+	assert(!it->ops);
+	it->iter_arg = ti;
+	it->ops = &table_iter_vtable;
+}
+
+static void reader_init_iter(struct reftable_reader *r,
+			     struct reftable_iterator *it,
+			     uint8_t typ)
+{
+	struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
+
+	if (offs->is_present) {
+		struct table_iter *ti;
+		REFTABLE_ALLOC_ARRAY(ti, 1);
+		table_iter_init(ti, r);
+		iterator_from_table_iter(it, ti);
+	} else {
+		iterator_set_empty(it);
+	}
+}
+
 int reftable_reader_seek_ref(struct reftable_reader *r,
 			     struct reftable_iterator *it, const char *name)
 {
@@ -622,19 +630,23 @@  int reftable_reader_seek_ref(struct reftable_reader *r,
 			.refname = (char *)name,
 		},
 	};
-	return reader_seek(r, it, &rec);
+	reader_init_iter(r, it, BLOCK_TYPE_REF);
+	return iterator_seek(it, &rec);
 }
 
 int reftable_reader_seek_log_at(struct reftable_reader *r,
 				struct reftable_iterator *it, const char *name,
 				uint64_t update_index)
 {
-	struct reftable_record rec = { .type = BLOCK_TYPE_LOG,
-				       .u.log = {
-					       .refname = (char *)name,
-					       .update_index = update_index,
-				       } };
-	return reader_seek(r, it, &rec);
+	struct reftable_record rec = {
+		.type = BLOCK_TYPE_LOG,
+		.u.log = {
+			.refname = (char *)name,
+			.update_index = update_index,
+		},
+	};
+	reader_init_iter(r, it, BLOCK_TYPE_LOG);
+	return iterator_seek(it, &rec);
 }
 
 int reftable_reader_seek_log(struct reftable_reader *r,
@@ -692,7 +704,8 @@  static int reftable_reader_refs_for_indexed(struct reftable_reader *r,
 	struct indexed_table_ref_iter *itr = NULL;
 
 	/* Look through the reverse index. */
-	err = reader_seek(r, &oit, &want);
+	reader_init_iter(r, &oit, BLOCK_TYPE_OBJ);
+	err = iterator_seek(&oit, &want);
 	if (err != 0)
 		goto done;
 
@@ -773,10 +786,11 @@  uint64_t reftable_reader_min_update_index(struct reftable_reader *r)
 
 /* generic table interface. */
 
-static int reftable_reader_seek_void(void *tab, struct reftable_iterator *it,
-				     struct reftable_record *rec)
+static void reftable_reader_init_iter_void(void *tab,
+					   struct reftable_iterator *it,
+					   uint8_t typ)
 {
-	return reader_seek(tab, it, rec);
+	reader_init_iter(tab, it, typ);
 }
 
 static uint32_t reftable_reader_hash_id_void(void *tab)
@@ -795,7 +809,7 @@  static uint64_t reftable_reader_max_update_index_void(void *tab)
 }
 
 static struct reftable_table_vtable reader_vtable = {
-	.seek_record = reftable_reader_seek_void,
+	.init_iter = reftable_reader_init_iter_void,
 	.hash_id = reftable_reader_hash_id_void,
 	.min_update_index = reftable_reader_min_update_index_void,
 	.max_update_index = reftable_reader_max_update_index_void,