diff mbox series

[07/13] reftable/merged: split up initialization and seeking of records

Message ID 21b3e3ab5f04e66fdd352187b1da699d1ab67cee.1715166175.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series reftable: prepare for re-seekable iterators | expand

Commit Message

Patrick Steinhardt May 8, 2024, 11:04 a.m. UTC
To initialize a `struct merged_iter`, we need to seek all subiterators
to the wanted record and then add their results to the priority queue
used to sort the records. This logic is split up across two functions,
`merged_table_seek_record()` and `merged_table_iter()`. The scope of
these functions is somewhat weird though, where `merged_table_iter()` is
only responsible for adding the records of the subiterators to the
priority queue.

Clarify the scope of those functions such that `merged_table_iter()` is
only responsible for initializing the iterator's structure. Performing
the subiterator seeks are now part of `merged_table_seek_record()`.

This step is required to move seeking of records into the generic
`struct reftable_iterator` infrastructure.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/merged.c | 59 ++++++++++++++++++-----------------------------
 1 file changed, 22 insertions(+), 37 deletions(-)

Comments

Justin Tobler May 10, 2024, 7:18 p.m. UTC | #1
On 24/05/08 01:04PM, Patrick Steinhardt wrote:
> To initialize a `struct merged_iter`, we need to seek all subiterators
> to the wanted record and then add their results to the priority queue
> used to sort the records. This logic is split up across two functions,
> `merged_table_seek_record()` and `merged_table_iter()`. The scope of

Did we mean `merged_iter_init` instead of `merged_table_iter()` here?

> these functions is somewhat weird though, where `merged_table_iter()` is
> only responsible for adding the records of the subiterators to the
> priority queue.
> 
> Clarify the scope of those functions such that `merged_table_iter()` is
> only responsible for initializing the iterator's structure. Performing
> the subiterator seeks are now part of `merged_table_seek_record()`.
> 
> This step is required to move seeking of records into the generic
> `struct reftable_iterator` infrastructure.
> 
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
...
> @@ -246,32 +230,33 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
>  				    struct reftable_iterator *it,
>  				    struct reftable_record *rec)
>  {
> -	struct merged_iter merged = {
> -		.typ = reftable_record_type(rec),
> -		.hash_id = mt->hash_id,
> -		.suppress_deletions = mt->suppress_deletions,
> -		.advance_index = -1,
> -	};
> -	struct merged_iter *p;
> +	struct merged_iter merged, *p;
>  	int err;
>  
> -	REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len);
> +	merged_iter_init(&merged, mt);
> +
>  	for (size_t i = 0; i < mt->stack_len; i++) {
> +		reftable_record_init(&merged.subiters[merged.stack_len].rec,
> +				     reftable_record_type(rec));

I find it somewhat confusing how we increment the subiterator index
here. I assume this is because when a record is not found we don't want
to add it to the index. Might be nice to add a comment here explaining
this.

Edit: Looks like you address this in the next commit so I guess we are
good :)

> +
>  		err = reftable_table_seek_record(&mt->stack[i],
>  						 &merged.subiters[merged.stack_len].iter, rec);
>  		if (err < 0)
>  			goto out;
> -		if (!err)
> -			merged.stack_len++;
> -	}
> +		if (err > 0)
> +			continue;
>  
> -	err = merged_iter_init(&merged);
> -	if (err < 0)
> -		goto out;
> +		err = merged_iter_advance_subiter(&merged, merged.stack_len);
> +		if (err < 0)
> +			goto out;
> +
> +		merged.stack_len++;
> +	}
>  
> -	p = reftable_malloc(sizeof(struct merged_iter));
> +	p = reftable_malloc(sizeof(*p));
>  	*p = merged;
>  	iterator_from_merged_iter(it, p);
> +	err = 0;
>  
>  out:
>  	if (err < 0)
> -- 
> 2.45.0
>
Patrick Steinhardt May 13, 2024, 8:36 a.m. UTC | #2
On Fri, May 10, 2024 at 02:18:16PM -0500, Justin Tobler wrote:
> On 24/05/08 01:04PM, Patrick Steinhardt wrote:
> > To initialize a `struct merged_iter`, we need to seek all subiterators
> > to the wanted record and then add their results to the priority queue
> > used to sort the records. This logic is split up across two functions,
> > `merged_table_seek_record()` and `merged_table_iter()`. The scope of
> 
> Did we mean `merged_iter_init` instead of `merged_table_iter()` here?

Indeed, good catch.

> > @@ -246,32 +230,33 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
> >  				    struct reftable_iterator *it,
> >  				    struct reftable_record *rec)
> >  {
> > -	struct merged_iter merged = {
> > -		.typ = reftable_record_type(rec),
> > -		.hash_id = mt->hash_id,
> > -		.suppress_deletions = mt->suppress_deletions,
> > -		.advance_index = -1,
> > -	};
> > -	struct merged_iter *p;
> > +	struct merged_iter merged, *p;
> >  	int err;
> >  
> > -	REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len);
> > +	merged_iter_init(&merged, mt);
> > +
> >  	for (size_t i = 0; i < mt->stack_len; i++) {
> > +		reftable_record_init(&merged.subiters[merged.stack_len].rec,
> > +				     reftable_record_type(rec));
> 
> I find it somewhat confusing how we increment the subiterator index
> here. I assume this is because when a record is not found we don't want
> to add it to the index. Might be nice to add a comment here explaining
> this.
> 
> Edit: Looks like you address this in the next commit so I guess we are
> good :)

Seconded. And yes, as you noticed, this is one of the reasons why I
change this in the next patch.

Patrick
diff mbox series

Patch

diff --git a/reftable/merged.c b/reftable/merged.c
index f85a24c678..4e1b78e93f 100644
--- a/reftable/merged.c
+++ b/reftable/merged.c
@@ -25,34 +25,18 @@  struct merged_subiter {
 struct merged_iter {
 	struct merged_subiter *subiters;
 	struct merged_iter_pqueue pq;
-	uint32_t hash_id;
 	size_t stack_len;
-	uint8_t typ;
 	int suppress_deletions;
 	ssize_t advance_index;
 };
 
-static int merged_iter_init(struct merged_iter *mi)
+static void merged_iter_init(struct merged_iter *mi,
+			     struct reftable_merged_table *mt)
 {
-	for (size_t i = 0; i < mi->stack_len; i++) {
-		struct pq_entry e = {
-			.index = i,
-			.rec = &mi->subiters[i].rec,
-		};
-		int err;
-
-		reftable_record_init(&mi->subiters[i].rec, mi->typ);
-		err = iterator_next(&mi->subiters[i].iter,
-				    &mi->subiters[i].rec);
-		if (err < 0)
-			return err;
-		if (err > 0)
-			continue;
-
-		merged_iter_pqueue_add(&mi->pq, &e);
-	}
-
-	return 0;
+	memset(mi, 0, sizeof(*mi));
+	mi->advance_index = -1;
+	mi->suppress_deletions = mt->suppress_deletions;
+	REFTABLE_CALLOC_ARRAY(mi->subiters, mt->stack_len);
 }
 
 static void merged_iter_close(void *p)
@@ -246,32 +230,33 @@  static int merged_table_seek_record(struct reftable_merged_table *mt,
 				    struct reftable_iterator *it,
 				    struct reftable_record *rec)
 {
-	struct merged_iter merged = {
-		.typ = reftable_record_type(rec),
-		.hash_id = mt->hash_id,
-		.suppress_deletions = mt->suppress_deletions,
-		.advance_index = -1,
-	};
-	struct merged_iter *p;
+	struct merged_iter merged, *p;
 	int err;
 
-	REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len);
+	merged_iter_init(&merged, mt);
+
 	for (size_t i = 0; i < mt->stack_len; i++) {
+		reftable_record_init(&merged.subiters[merged.stack_len].rec,
+				     reftable_record_type(rec));
+
 		err = reftable_table_seek_record(&mt->stack[i],
 						 &merged.subiters[merged.stack_len].iter, rec);
 		if (err < 0)
 			goto out;
-		if (!err)
-			merged.stack_len++;
-	}
+		if (err > 0)
+			continue;
 
-	err = merged_iter_init(&merged);
-	if (err < 0)
-		goto out;
+		err = merged_iter_advance_subiter(&merged, merged.stack_len);
+		if (err < 0)
+			goto out;
+
+		merged.stack_len++;
+	}
 
-	p = reftable_malloc(sizeof(struct merged_iter));
+	p = reftable_malloc(sizeof(*p));
 	*p = merged;
 	iterator_from_merged_iter(it, p);
+	err = 0;
 
 out:
 	if (err < 0)