diff mbox series

[5/6] reftable/reader: make table iterator reseekable

Message ID a281f936a2b3a697b32f57652a2120afd54f7e4f.1725881266.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series refs/reftable: wire up exclude patterns | expand

Commit Message

Patrick Steinhardt Sept. 9, 2024, 11:31 a.m. UTC
In 67ce50ba26 (Merge branch 'ps/reftable-reusable-iterator', 2024-05-30)
we have refactored the interface of reftable iterators such that they
can be reused in theory. This patch series only landed the required
changes on the interface level, but didn't yet implement the actual
logic to make iterators reusable.

As it turns out almost all of the infrastructure already does support
re-seeking. The only exception is the table iterator, which does not
reset its `is_finished` bit. Do so and add a couple of tests that verify
that we can re-seek iterators.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 Makefile                         |  1 +
 reftable/reader.c                |  1 +
 t/unit-tests/t-reftable-merged.c | 76 +++++++++++++++++++++++++
 t/unit-tests/t-reftable-reader.c | 96 ++++++++++++++++++++++++++++++++
 4 files changed, 174 insertions(+)
 create mode 100644 t/unit-tests/t-reftable-reader.c

Comments

karthik nayak Sept. 13, 2024, 12:11 p.m. UTC | #1
Patrick Steinhardt <ps@pks.im> writes:

[snip]

> diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
> index b8c92fdb365..19e54bdfb8b 100644
> --- a/t/unit-tests/t-reftable-merged.c
> +++ b/t/unit-tests/t-reftable-merged.c
> @@ -194,6 +194,81 @@ static void t_merged_refs(void)
>  	reftable_free(bs);
>  }
>
> +static void t_merged_seek_multiple_times(void)
> +{
> +	struct reftable_ref_record r1[] = {
> +		{
> +			.refname = (char *) "a",
> +			.update_index = 1,
> +			.value_type = REFTABLE_REF_VAL1,
> +			.value.val1 = { 1 },
> +		},
> +		{
> +			.refname = (char *) "c",
> +			.update_index = 1,
> +			.value_type = REFTABLE_REF_VAL1,
> +			.value.val1 = { 2 },
> +		}
> +	};
> +	struct reftable_ref_record r2[] = {
> +		{
> +			.refname = (char *) "b",
> +			.update_index = 2,
> +			.value_type = REFTABLE_REF_VAL1,
> +			.value.val1 = { 3 },
> +		},
> +		{
> +			.refname = (char *) "d",
> +			.update_index = 2,
> +			.value_type = REFTABLE_REF_VAL1,
> +			.value.val1 = { 4 },
> +		},
> +	};
> +	struct reftable_ref_record *refs[] = {
> +		r1, r2,
> +	};
> +	size_t sizes[] = {
> +		ARRAY_SIZE(r1), ARRAY_SIZE(r2),
> +	};
> +	struct strbuf bufs[] = {
> +		STRBUF_INIT, STRBUF_INIT,
> +	};
> +	struct reftable_block_source *sources = NULL;
> +	struct reftable_reader **readers = NULL;
> +	struct reftable_ref_record rec = { 0 };
> +	struct reftable_iterator it = { 0 };
> +	struct reftable_merged_table *mt;
> +
> +	mt = merged_table_from_records(refs, &sources, &readers, sizes, bufs, 2);
> +	merged_table_init_iter(mt, &it, BLOCK_TYPE_REF);
> +
> +	for (size_t i = 0; i < 5; i++) {
> +		int err = reftable_iterator_seek_ref(&it, "c");
> +		check(!err);
> +
> +		err = reftable_iterator_next_ref(&it, &rec);
> +		check(!err);
> +		err = reftable_ref_record_equal(&rec, &r1[1], GIT_SHA1_RAWSZ);
> +		check(err == 1);
> +
> +		err = reftable_iterator_next_ref(&it, &rec);
> +		check(!err);
> +		err = reftable_ref_record_equal(&rec, &r2[1], GIT_SHA1_RAWSZ);
> +		check(err == 1);
> +

So this skips r2[0] because when we seek, we seek all sub-iterators. So
in r2 we seek to 'd' since that is the next alphabetical value.

[snip]

> diff --git a/t/unit-tests/t-reftable-reader.c b/t/unit-tests/t-reftable-reader.c
> new file mode 100644
> index 00000000000..7a46580b6f1
> --- /dev/null
> +++ b/t/unit-tests/t-reftable-reader.c
> @@ -0,0 +1,96 @@
> +#include "test-lib.h"
> +#include "lib-reftable.h"
> +#include "reftable/blocksource.h"
> +#include "reftable/reader.h"
> +
> +static int t_reader_seek_once(void)
> +{
> +	struct reftable_ref_record records[] = {
> +		{
> +			.refname = (char *) "refs/heads/main",
> +			.value_type = REFTABLE_REF_VAL1,
> +			.value.val1 = { 42 },
> +		},
> +	};
> +	struct reftable_block_source source = { 0 };
> +	struct reftable_ref_record ref = { 0 };
> +	struct reftable_iterator it = { 0 };
> +	struct reftable_reader *reader;
> +	struct strbuf buf = STRBUF_INIT;
> +	int ret;
> +
> +	t_reftable_write_to_buf(&buf, records, ARRAY_SIZE(records), NULL, 0, NULL);
> +	block_source_from_strbuf(&source, &buf);
> +
> +	ret = reftable_reader_new(&reader, &source, "name");
> +	check_int(ret, ==, 0);
> +
> +	reftable_reader_init_ref_iterator(reader, &it);
> +	ret = reftable_iterator_seek_ref(&it, "");
> +	check_int(ret, ==, 0);
> +	ret = reftable_iterator_next_ref(&it, &ref);
> +	check_int(ret, ==, 0);
> +
> +	ret = reftable_ref_record_equal(&ref, &records[0], 20);

s/20/GIT_SHA1_RAWSZ

Also here and elsewhere, shouldn't we just do
`check(reftable_ref_record_equal(...))` or even
`!check(reftable_iterator_seek_ref(...))` ?

[snip]
Patrick Steinhardt Sept. 16, 2024, 6:56 a.m. UTC | #2
On Fri, Sep 13, 2024 at 07:11:54AM -0500, karthik nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> > diff --git a/t/unit-tests/t-reftable-reader.c b/t/unit-tests/t-reftable-reader.c
> > new file mode 100644
> > index 00000000000..7a46580b6f1
> > --- /dev/null
> > +++ b/t/unit-tests/t-reftable-reader.c
> > @@ -0,0 +1,96 @@
> > +#include "test-lib.h"
> > +#include "lib-reftable.h"
> > +#include "reftable/blocksource.h"
> > +#include "reftable/reader.h"
> > +
> > +static int t_reader_seek_once(void)
> > +{
> > +	struct reftable_ref_record records[] = {
> > +		{
> > +			.refname = (char *) "refs/heads/main",
> > +			.value_type = REFTABLE_REF_VAL1,
> > +			.value.val1 = { 42 },
> > +		},
> > +	};
> > +	struct reftable_block_source source = { 0 };
> > +	struct reftable_ref_record ref = { 0 };
> > +	struct reftable_iterator it = { 0 };
> > +	struct reftable_reader *reader;
> > +	struct strbuf buf = STRBUF_INIT;
> > +	int ret;
> > +
> > +	t_reftable_write_to_buf(&buf, records, ARRAY_SIZE(records), NULL, 0, NULL);
> > +	block_source_from_strbuf(&source, &buf);
> > +
> > +	ret = reftable_reader_new(&reader, &source, "name");
> > +	check_int(ret, ==, 0);
> > +
> > +	reftable_reader_init_ref_iterator(reader, &it);
> > +	ret = reftable_iterator_seek_ref(&it, "");
> > +	check_int(ret, ==, 0);
> > +	ret = reftable_iterator_next_ref(&it, &ref);
> > +	check_int(ret, ==, 0);
> > +
> > +	ret = reftable_ref_record_equal(&ref, &records[0], 20);
> 
> s/20/GIT_SHA1_RAWSZ

Indeed.

> Also here and elsewhere, shouldn't we just do
> `check(reftable_ref_record_equal(...))` or even
> `!check(reftable_iterator_seek_ref(...))` ?

I guess you mean `check(!reftable_iteraror_seek_ref())`, right?

In the case where we just expect a zero error code I can certainly adapt
the code to use `check(...)`. But the other cases shouldn't use
`check(!...)` because it is important that the returnd error code is
positive.

Patrick
karthik nayak Sept. 17, 2024, 4:44 p.m. UTC | #3
Patrick Steinhardt <ps@pks.im> writes:

> On Fri, Sep 13, 2024 at 07:11:54AM -0500, karthik nayak wrote:
>> Patrick Steinhardt <ps@pks.im> writes:
>> > diff --git a/t/unit-tests/t-reftable-reader.c b/t/unit-tests/t-reftable-reader.c
>> > new file mode 100644
>> > index 00000000000..7a46580b6f1
>> > --- /dev/null
>> > +++ b/t/unit-tests/t-reftable-reader.c
>> > @@ -0,0 +1,96 @@
>> > +#include "test-lib.h"
>> > +#include "lib-reftable.h"
>> > +#include "reftable/blocksource.h"
>> > +#include "reftable/reader.h"
>> > +
>> > +static int t_reader_seek_once(void)
>> > +{
>> > +	struct reftable_ref_record records[] = {
>> > +		{
>> > +			.refname = (char *) "refs/heads/main",
>> > +			.value_type = REFTABLE_REF_VAL1,
>> > +			.value.val1 = { 42 },
>> > +		},
>> > +	};
>> > +	struct reftable_block_source source = { 0 };
>> > +	struct reftable_ref_record ref = { 0 };
>> > +	struct reftable_iterator it = { 0 };
>> > +	struct reftable_reader *reader;
>> > +	struct strbuf buf = STRBUF_INIT;
>> > +	int ret;
>> > +
>> > +	t_reftable_write_to_buf(&buf, records, ARRAY_SIZE(records), NULL, 0, NULL);
>> > +	block_source_from_strbuf(&source, &buf);
>> > +
>> > +	ret = reftable_reader_new(&reader, &source, "name");
>> > +	check_int(ret, ==, 0);
>> > +
>> > +	reftable_reader_init_ref_iterator(reader, &it);
>> > +	ret = reftable_iterator_seek_ref(&it, "");
>> > +	check_int(ret, ==, 0);
>> > +	ret = reftable_iterator_next_ref(&it, &ref);
>> > +	check_int(ret, ==, 0);
>> > +
>> > +	ret = reftable_ref_record_equal(&ref, &records[0], 20);
>>
>> s/20/GIT_SHA1_RAWSZ
>
> Indeed.
>
>> Also here and elsewhere, shouldn't we just do
>> `check(reftable_ref_record_equal(...))` or even
>> `!check(reftable_iterator_seek_ref(...))` ?
>
> I guess you mean `check(!reftable_iteraror_seek_ref())`, right?
>

Yes.

> In the case where we just expect a zero error code I can certainly adapt
> the code to use `check(...)`. But the other cases shouldn't use
> `check(!...)` because it is important that the returnd error code is
> positive.
>

Perfect!

> Patrick

Karthik
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index 9460a80d0dd..4039e355b09 100644
--- a/Makefile
+++ b/Makefile
@@ -1346,6 +1346,7 @@  UNIT_TEST_PROGRAMS += t-reftable-basics
 UNIT_TEST_PROGRAMS += t-reftable-block
 UNIT_TEST_PROGRAMS += t-reftable-merged
 UNIT_TEST_PROGRAMS += t-reftable-pq
+UNIT_TEST_PROGRAMS += t-reftable-reader
 UNIT_TEST_PROGRAMS += t-reftable-readwrite
 UNIT_TEST_PROGRAMS += t-reftable-record
 UNIT_TEST_PROGRAMS += t-reftable-stack
diff --git a/reftable/reader.c b/reftable/reader.c
index f8770990876..6494ce2e327 100644
--- a/reftable/reader.c
+++ b/reftable/reader.c
@@ -328,6 +328,7 @@  static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ)
 	ti->typ = block_reader_type(&ti->br);
 	ti->block_off = off;
 	block_iter_seek_start(&ti->bi, &ti->br);
+	ti->is_finished = 0;
 	return 0;
 }
 
diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
index b8c92fdb365..19e54bdfb8b 100644
--- a/t/unit-tests/t-reftable-merged.c
+++ b/t/unit-tests/t-reftable-merged.c
@@ -194,6 +194,81 @@  static void t_merged_refs(void)
 	reftable_free(bs);
 }
 
+static void t_merged_seek_multiple_times(void)
+{
+	struct reftable_ref_record r1[] = {
+		{
+			.refname = (char *) "a",
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 1 },
+		},
+		{
+			.refname = (char *) "c",
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 2 },
+		}
+	};
+	struct reftable_ref_record r2[] = {
+		{
+			.refname = (char *) "b",
+			.update_index = 2,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 3 },
+		},
+		{
+			.refname = (char *) "d",
+			.update_index = 2,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 4 },
+		},
+	};
+	struct reftable_ref_record *refs[] = {
+		r1, r2,
+	};
+	size_t sizes[] = {
+		ARRAY_SIZE(r1), ARRAY_SIZE(r2),
+	};
+	struct strbuf bufs[] = {
+		STRBUF_INIT, STRBUF_INIT,
+	};
+	struct reftable_block_source *sources = NULL;
+	struct reftable_reader **readers = NULL;
+	struct reftable_ref_record rec = { 0 };
+	struct reftable_iterator it = { 0 };
+	struct reftable_merged_table *mt;
+
+	mt = merged_table_from_records(refs, &sources, &readers, sizes, bufs, 2);
+	merged_table_init_iter(mt, &it, BLOCK_TYPE_REF);
+
+	for (size_t i = 0; i < 5; i++) {
+		int err = reftable_iterator_seek_ref(&it, "c");
+		check(!err);
+
+		err = reftable_iterator_next_ref(&it, &rec);
+		check(!err);
+		err = reftable_ref_record_equal(&rec, &r1[1], GIT_SHA1_RAWSZ);
+		check(err == 1);
+
+		err = reftable_iterator_next_ref(&it, &rec);
+		check(!err);
+		err = reftable_ref_record_equal(&rec, &r2[1], GIT_SHA1_RAWSZ);
+		check(err == 1);
+
+		err = reftable_iterator_next_ref(&it, &rec);
+		check(err > 0);
+	}
+
+	for (size_t i = 0; i < ARRAY_SIZE(bufs); i++)
+		strbuf_release(&bufs[i]);
+	readers_destroy(readers, ARRAY_SIZE(refs));
+	reftable_ref_record_release(&rec);
+	reftable_iterator_destroy(&it);
+	reftable_merged_table_free(mt);
+	reftable_free(sources);
+}
+
 static struct reftable_merged_table *
 merged_table_from_log_records(struct reftable_log_record **logs,
 			      struct reftable_block_source **source,
@@ -383,6 +458,7 @@  int cmd_main(int argc UNUSED, const char *argv[] UNUSED)
 	TEST(t_default_write_opts(), "merged table with default write opts");
 	TEST(t_merged_logs(), "merged table with multiple log updates for same ref");
 	TEST(t_merged_refs(), "merged table with multiple updates to same ref");
+	TEST(t_merged_seek_multiple_times(), "merged table can seek multiple times");
 	TEST(t_merged_single_record(), "ref ocurring in only one record can be fetched");
 
 	return test_done();
diff --git a/t/unit-tests/t-reftable-reader.c b/t/unit-tests/t-reftable-reader.c
new file mode 100644
index 00000000000..7a46580b6f1
--- /dev/null
+++ b/t/unit-tests/t-reftable-reader.c
@@ -0,0 +1,96 @@ 
+#include "test-lib.h"
+#include "lib-reftable.h"
+#include "reftable/blocksource.h"
+#include "reftable/reader.h"
+
+static int t_reader_seek_once(void)
+{
+	struct reftable_ref_record records[] = {
+		{
+			.refname = (char *) "refs/heads/main",
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 42 },
+		},
+	};
+	struct reftable_block_source source = { 0 };
+	struct reftable_ref_record ref = { 0 };
+	struct reftable_iterator it = { 0 };
+	struct reftable_reader *reader;
+	struct strbuf buf = STRBUF_INIT;
+	int ret;
+
+	t_reftable_write_to_buf(&buf, records, ARRAY_SIZE(records), NULL, 0, NULL);
+	block_source_from_strbuf(&source, &buf);
+
+	ret = reftable_reader_new(&reader, &source, "name");
+	check_int(ret, ==, 0);
+
+	reftable_reader_init_ref_iterator(reader, &it);
+	ret = reftable_iterator_seek_ref(&it, "");
+	check_int(ret, ==, 0);
+	ret = reftable_iterator_next_ref(&it, &ref);
+	check_int(ret, ==, 0);
+
+	ret = reftable_ref_record_equal(&ref, &records[0], 20);
+	check_int(ret, ==, 1);
+
+	ret = reftable_iterator_next_ref(&it, &ref);
+	check_int(ret, ==, 1);
+
+	reftable_ref_record_release(&ref);
+	reftable_iterator_destroy(&it);
+	reftable_reader_decref(reader);
+	strbuf_release(&buf);
+	return 0;
+}
+
+static int t_reader_reseek(void)
+{
+	struct reftable_ref_record records[] = {
+		{
+			.refname = (char *) "refs/heads/main",
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 42 },
+		},
+	};
+	struct reftable_block_source source = { 0 };
+	struct reftable_ref_record ref = { 0 };
+	struct reftable_iterator it = { 0 };
+	struct reftable_reader *reader;
+	struct strbuf buf = STRBUF_INIT;
+	int ret;
+
+	t_reftable_write_to_buf(&buf, records, ARRAY_SIZE(records), NULL, 0, NULL);
+	block_source_from_strbuf(&source, &buf);
+
+	ret = reftable_reader_new(&reader, &source, "name");
+	check_int(ret, ==, 0);
+
+	reftable_reader_init_ref_iterator(reader, &it);
+
+	for (size_t i = 0; i < 5; i++) {
+		ret = reftable_iterator_seek_ref(&it, "");
+		check_int(ret, ==, 0);
+		ret = reftable_iterator_next_ref(&it, &ref);
+		check_int(ret, ==, 0);
+
+		ret = reftable_ref_record_equal(&ref, &records[0], GIT_SHA1_RAWSZ);
+		check_int(ret, ==, 1);
+
+		ret = reftable_iterator_next_ref(&it, &ref);
+		check_int(ret, ==, 1);
+	}
+
+	reftable_ref_record_release(&ref);
+	reftable_iterator_destroy(&it);
+	reftable_reader_decref(reader);
+	strbuf_release(&buf);
+	return 0;
+}
+
+int cmd_main(int argc UNUSED, const char *argv[] UNUSED)
+{
+	TEST(t_reader_seek_once(), "reader can seek once");
+	TEST(t_reader_reseek(), "reader can reseek multiple times");
+	return test_done();
+}