diff mbox series

[12/16] mktree: use iterator struct to add tree entries to index

Message ID 94d6615d634c4f78c88d3e01abbb27f13f85828c.1718130288.git.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series mktree: support more flexible usage | expand

Commit Message

Victoria Dye June 11, 2024, 6:24 p.m. UTC
From: Victoria Dye <vdye@github.com>

Create 'struct tree_entry_iterator' to manage iteration through a 'struct
tree_entry_array'. Using an iterator allows for conditional iteration; this
functionality will be necessary in later commits when performing parallel
iteration through multiple sets of tree entries.

Signed-off-by: Victoria Dye <vdye@github.com>
---
 builtin/mktree.c | 40 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

Comments

Patrick Steinhardt June 12, 2024, 9:40 a.m. UTC | #1
On Tue, Jun 11, 2024 at 06:24:44PM +0000, Victoria Dye via GitGitGadget wrote:
> From: Victoria Dye <vdye@github.com>
> 
> Create 'struct tree_entry_iterator' to manage iteration through a 'struct
> tree_entry_array'. Using an iterator allows for conditional iteration; this
> functionality will be necessary in later commits when performing parallel
> iteration through multiple sets of tree entries.
> 
> Signed-off-by: Victoria Dye <vdye@github.com>
> ---
>  builtin/mktree.c | 40 +++++++++++++++++++++++++++++++++++++---
>  1 file changed, 37 insertions(+), 3 deletions(-)
> 
> diff --git a/builtin/mktree.c b/builtin/mktree.c
> index 12f68187221..bee359e9978 100644
> --- a/builtin/mktree.c
> +++ b/builtin/mktree.c
> @@ -137,6 +137,38 @@ static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
>  	QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
>  }
>  
> +struct tree_entry_iterator {
> +	struct tree_entry *current;
> +
> +	/* private */
> +	struct {
> +		struct tree_entry_array *arr;
> +		size_t idx;
> +	} priv;
> +};
> +
> +static void init_tree_entry_iterator(struct tree_entry_iterator *iter,
> +				     struct tree_entry_array *arr)
> +{
> +	iter->priv.arr = arr;
> +	iter->priv.idx = 0;
> +	iter->current = 0 < arr->nr ? arr->entries[0] : NULL;
> +}

Nit: Same comment as before, I think these should rather be named
`tree_entry_iterator_init()` and `tree_entry_iterator_advance()`.

> +/*
> + * Advance the tree entry iterator to the next entry in the array. If no entries
> + * remain, 'current' is set to NULL. Returns the previous 'current' value of the
> + * iterator.
> + */
> +static struct tree_entry *advance_tree_entry_iterator(struct tree_entry_iterator *iter)
> +{
> +	struct tree_entry *prev = iter->current;
> +	iter->current = (iter->priv.idx + 1) < iter->priv.arr->nr
> +			? iter->priv.arr->entries[++iter->priv.idx]
> +			: NULL;
> +	return prev;
> +}

I think it's somewhat confusing to have this return a different value
than `current`. When I call `next()`, then I expect the iterator to
return the next item. And after having called `next()`, I expect that
the current value is the one that the previous call to `next()` has
returned.

To avoid confusion, I'd propose to get rid of the `current` member
altogether. It's not needed as we already save the current index and
avoids the confusion.

Patrick
Victoria Dye June 13, 2024, 6:38 p.m. UTC | #2
Patrick Steinhardt wrote:
> On Tue, Jun 11, 2024 at 06:24:44PM +0000, Victoria Dye via GitGitGadget wrote:
>> From: Victoria Dye <vdye@github.com>
>>
>> Create 'struct tree_entry_iterator' to manage iteration through a 'struct
>> tree_entry_array'. Using an iterator allows for conditional iteration; this
>> functionality will be necessary in later commits when performing parallel
>> iteration through multiple sets of tree entries.
>>
>> Signed-off-by: Victoria Dye <vdye@github.com>
>> ---
>>  builtin/mktree.c | 40 +++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 37 insertions(+), 3 deletions(-)
>>
>> diff --git a/builtin/mktree.c b/builtin/mktree.c
>> index 12f68187221..bee359e9978 100644
>> --- a/builtin/mktree.c
>> +++ b/builtin/mktree.c
>> @@ -137,6 +137,38 @@ static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
>>  	QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
>>  }
>>  
>> +struct tree_entry_iterator {
>> +	struct tree_entry *current;
>> +
>> +	/* private */
>> +	struct {
>> +		struct tree_entry_array *arr;
>> +		size_t idx;
>> +	} priv;
>> +};
>> +
>> +static void init_tree_entry_iterator(struct tree_entry_iterator *iter,
>> +				     struct tree_entry_array *arr)
>> +{
>> +	iter->priv.arr = arr;
>> +	iter->priv.idx = 0;
>> +	iter->current = 0 < arr->nr ? arr->entries[0] : NULL;
>> +}
> 
> Nit: Same comment as before, I think these should rather be named
> `tree_entry_iterator_init()` and `tree_entry_iterator_advance()`.

That works for me. I'm not attached to the naming convention I used and your
justification for changing it in [1] is reasonable.

[1] https://lore.kernel.org/git/ZmltDQ5SlVvrEDGP@tanuki/

>> +/*
>> + * Advance the tree entry iterator to the next entry in the array. If no entries
>> + * remain, 'current' is set to NULL. Returns the previous 'current' value of the
>> + * iterator.
>> + */
>> +static struct tree_entry *advance_tree_entry_iterator(struct tree_entry_iterator *iter)
>> +{
>> +	struct tree_entry *prev = iter->current;
>> +	iter->current = (iter->priv.idx + 1) < iter->priv.arr->nr
>> +			? iter->priv.arr->entries[++iter->priv.idx]
>> +			: NULL;
>> +	return prev;
>> +}
> 
> I think it's somewhat confusing to have this return a different value
> than `current`. When I call `next()`, then I expect the iterator to
> return the next item. And after having called `next()`, I expect that
> the current value is the one that the previous call to `next()` has
> returned.

I do see how it's confusing. I was attempting to mimic the various
array/stack "pop" methods throughout the codebase (which return the "popped"
value while moving the stack pointer), but that doesn't really work here
with an iterator. 

The only real benefit of this was that it simplified a loop somewhere later
on, but not by a ton. I'll drop the 'tree_entry *' return value from the
method and access 'iter->current' directly where it's needed.

> To avoid confusion, I'd propose to get rid of the `current` member
> altogether. It's not needed as we already save the current index and
> avoids the confusion.

The idea of the iterator is to have callers only ever reference the
'current' value to avoid needing to deal with the array & current index
directly; I find that it majorly simplifies the parallel iteration through
the base tree and entry array in [2]. IOW, in a language with support for
it, 'idx' would be private & 'current' would be public. So I would like to
keep the 'current' value as the publicly-accessible way of interacting with
the iterator (although, as mentioned above, I'm happy to drop it from the
'advance' method return value).

[2] https://lore.kernel.org/git/df0c50dfea3cb77e0070246efdf7a3f070b2ad97.1718130288.git.gitgitgadget@gmail.com/

> 
> Patrick
diff mbox series

Patch

diff --git a/builtin/mktree.c b/builtin/mktree.c
index 12f68187221..bee359e9978 100644
--- a/builtin/mktree.c
+++ b/builtin/mktree.c
@@ -137,6 +137,38 @@  static void sort_and_dedup_tree_entry_array(struct tree_entry_array *arr)
 	QSORT_S(arr->entries, arr->nr, ent_compare, &ignore_mode);
 }
 
+struct tree_entry_iterator {
+	struct tree_entry *current;
+
+	/* private */
+	struct {
+		struct tree_entry_array *arr;
+		size_t idx;
+	} priv;
+};
+
+static void init_tree_entry_iterator(struct tree_entry_iterator *iter,
+				     struct tree_entry_array *arr)
+{
+	iter->priv.arr = arr;
+	iter->priv.idx = 0;
+	iter->current = 0 < arr->nr ? arr->entries[0] : NULL;
+}
+
+/*
+ * Advance the tree entry iterator to the next entry in the array. If no entries
+ * remain, 'current' is set to NULL. Returns the previous 'current' value of the
+ * iterator.
+ */
+static struct tree_entry *advance_tree_entry_iterator(struct tree_entry_iterator *iter)
+{
+	struct tree_entry *prev = iter->current;
+	iter->current = (iter->priv.idx + 1) < iter->priv.arr->nr
+			? iter->priv.arr->entries[++iter->priv.idx]
+			: NULL;
+	return prev;
+}
+
 static int add_tree_entry_to_index(struct index_state *istate,
 				   struct tree_entry *ent)
 {
@@ -155,15 +187,17 @@  static int add_tree_entry_to_index(struct index_state *istate,
 
 static void write_tree(struct tree_entry_array *arr, struct object_id *oid)
 {
+	struct tree_entry_iterator iter = { NULL };
+	struct tree_entry *ent;
 	struct index_state istate = INDEX_STATE_INIT(the_repository);
 	istate.sparse_index = 1;
 
 	sort_and_dedup_tree_entry_array(arr);
 
-	/* Construct an in-memory index from the provided entries */
-	for (size_t i = 0; i < arr->nr; i++) {
-		struct tree_entry *ent = arr->entries[i];
+	init_tree_entry_iterator(&iter, arr);
 
+	/* Construct an in-memory index from the provided entries & base tree */
+	while ((ent = advance_tree_entry_iterator(&iter))) {
 		if (add_tree_entry_to_index(&istate, ent))
 			die(_("failed to add tree entry '%s'"), ent->name);
 	}