diff mbox series

[7/8] builtin/repack.c: drop `DELETE_PACK` macro

Message ID 559b487e2ab056c79367a673188764e4cdce3c96.1693946195.git.me@ttaylorr.com (mailing list archive)
State Superseded
Headers show
Series repack: refactor pack snapshot-ing logic | expand

Commit Message

Taylor Blau Sept. 5, 2023, 8:36 p.m. UTC
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 builtin/repack.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

Comments

Junio C Hamano Sept. 6, 2023, 5:05 p.m. UTC | #1
Taylor Blau <me@ttaylorr.com> writes:

> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
>  builtin/repack.c | 18 ++++++++++--------
>  1 file changed, 10 insertions(+), 8 deletions(-)

The reason being...?

> @@ -130,7 +132,7 @@ static void mark_packs_for_deletion_1(struct string_list *names,
>  		 * (if `-d` was given).
>  		 */
>  		if (!string_list_has_string(names, sha1))
> -			item->util = (void*)(uintptr_t)((size_t)item->util | DELETE_PACK);
> +			item->util = (void*)1;

Presumably the literal "1" is promoted to an appropriate type
(uintptr_t) here?


    We originally planned to use the .util member to store a bitset
    to represent various attributes for each pack, and defined
    DELETE_PACK as one of the possible bits, but over the N years of
    its existence, we never found the need for the second bit.

or something?

It is not a bad idea to demote .util from a set of bits to just a
"is it NULL?" boolean, but updating the above to something like

	#define DELETE_PACK ((void*)(uintptr_t)1)
	item->util = DELETE_PACK;

may still reap the same simplification benefit without introducing
the "what is that _one_ about?  can it be _two_ without changing the
meaning or what?" puzzlement.

Other than that, everything else in the series looked quite
straight-forward and sensible.

Thanks.
Taylor Blau Sept. 6, 2023, 5:22 p.m. UTC | #2
On Wed, Sep 06, 2023 at 10:05:37AM -0700, Junio C Hamano wrote:
> Taylor Blau <me@ttaylorr.com> writes:
>
> > Signed-off-by: Taylor Blau <me@ttaylorr.com>
> > ---
> >  builtin/repack.c | 18 ++++++++++--------
> >  1 file changed, 10 insertions(+), 8 deletions(-)
>
> The reason being...?

Wow, I have no idea how this got sent out without a commit message! At
least it was signed off ;-).

Here's a replacement that I cooked up, with your Helped-by. Let me know
if you want to replace this patch manually, or if you'd prefer a reroll
of the series. Either is fine with me! :-)

--- 8< ---
Subject: [PATCH] builtin/repack.c: treat string_list_item util as booleans

The `->util` field corresponding to each string_list_item used to track
the existence of some pack at the beginning of a repack operation was
originally intended to be used as a bitfield.

This bitfield tracked:

  - (1 << 0): whether or not the pack should be deleted
  - (1 << 1): whether or not the pack is cruft

The previous commit removed the use of the second bit, meaning that we
can treat this field as a boolean instead of a bitset.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 builtin/repack.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/builtin/repack.c b/builtin/repack.c
index 478fab96c9..575e69b020 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -26,7 +26,7 @@
 #define LOOSEN_UNREACHABLE 2
 #define PACK_CRUFT 4

-#define DELETE_PACK 1
+#define DELETE_PACK ((void*)(uintptr_t)1)

 static int pack_everything;
 static int delta_base_offset = 1;
@@ -96,6 +96,10 @@ static int repack_config(const char *var, const char *value,

 struct existing_packs {
 	struct string_list kept_packs;
+	/*
+	 * for both non_kept_packs, and cruft_packs, a non-NULL
+	 * 'util' field indicates the pack should be deleted.
+	 */
 	struct string_list non_kept_packs;
 	struct string_list cruft_packs;
 };
@@ -130,7 +134,7 @@ static void mark_packs_for_deletion_1(struct string_list *names,
 		 * (if `-d` was given).
 		 */
 		if (!string_list_has_string(names, sha1))
-			item->util = (void*)(uintptr_t)((size_t)item->util | DELETE_PACK);
+			item->util = DELETE_PACK;
 	}
 }

@@ -158,7 +162,7 @@ static void remove_redundant_packs_1(struct string_list *packs)
 {
 	struct string_list_item *item;
 	for_each_string_list_item(item, packs) {
-		if (!((uintptr_t)item->util & DELETE_PACK))
+		if (!item->util)
 			continue;
 		remove_redundant_pack(packdir, item->string);
 	}
@@ -695,20 +699,20 @@ static void midx_included_packs(struct string_list *include,

 		for_each_string_list_item(item, &existing->cruft_packs) {
 			/*
-			 * no need to check DELETE_PACK, since we're not
-			 * doing an ALL_INTO_ONE repack
+			 * no need to check for deleted packs, since we're
+			 * not doing an ALL_INTO_ONE repack
 			 */
 			string_list_insert(include, xstrfmt("%s.idx", item->string));
 		}
 	} else {
 		for_each_string_list_item(item, &existing->non_kept_packs) {
-			if ((uintptr_t)item->util & DELETE_PACK)
+			if (item->util)
 				continue;
 			string_list_insert(include, xstrfmt("%s.idx", item->string));
 		}

 		for_each_string_list_item(item, &existing->cruft_packs) {
-			if ((uintptr_t)item->util & DELETE_PACK)
+			if (item->util)
 				continue;
 			string_list_insert(include, xstrfmt("%s.idx", item->string));
 		}
--
2.38.0.16.g393fd4c6db

--- >8 ---

Thanks,
Taylor
Patrick Steinhardt Sept. 7, 2023, 8:19 a.m. UTC | #3
On Wed, Sep 06, 2023 at 01:22:59PM -0400, Taylor Blau wrote:
> On Wed, Sep 06, 2023 at 10:05:37AM -0700, Junio C Hamano wrote:
> > Taylor Blau <me@ttaylorr.com> writes:
> >
> > > Signed-off-by: Taylor Blau <me@ttaylorr.com>
> > > ---
> > >  builtin/repack.c | 18 ++++++++++--------
> > >  1 file changed, 10 insertions(+), 8 deletions(-)
> >
> > The reason being...?
> 
> Wow, I have no idea how this got sent out without a commit message! At
> least it was signed off ;-).
> 
> Here's a replacement that I cooked up, with your Helped-by. Let me know
> if you want to replace this patch manually, or if you'd prefer a reroll
> of the series. Either is fine with me! :-)

Personally I think that the original version is still more readable. If
you simply see `if (item->util)` you don't really have any indicator
what this actually means, whereas previously the more verbose check for
`if ((uintptr)item->util & DELETE_PACK)` gives the reader a nice hint
what this may be about.

If the intent is to make this check a bit prettier, how about we instead
introduce a helper function like the following:

```
static inline int pack_marked_for_deletion(const struct string_list_item *item)
{
        return (uintptr) item->util & DELETE_PACK;
}
```

Other than that the rest of this series looks good to me, thanks.

Patrick

> --- 8< ---
> Subject: [PATCH] builtin/repack.c: treat string_list_item util as booleans
> 
> The `->util` field corresponding to each string_list_item used to track
> the existence of some pack at the beginning of a repack operation was
> originally intended to be used as a bitfield.
> 
> This bitfield tracked:
> 
>   - (1 << 0): whether or not the pack should be deleted
>   - (1 << 1): whether or not the pack is cruft
> 
> The previous commit removed the use of the second bit, meaning that we
> can treat this field as a boolean instead of a bitset.
> 
> Helped-by: Junio C Hamano <gitster@pobox.com>
> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
>  builtin/repack.c | 18 +++++++++++-------
>  1 file changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/builtin/repack.c b/builtin/repack.c
> index 478fab96c9..575e69b020 100644
> --- a/builtin/repack.c
> +++ b/builtin/repack.c
> @@ -26,7 +26,7 @@
>  #define LOOSEN_UNREACHABLE 2
>  #define PACK_CRUFT 4
> 
> -#define DELETE_PACK 1
> +#define DELETE_PACK ((void*)(uintptr_t)1)
> 
>  static int pack_everything;
>  static int delta_base_offset = 1;
> @@ -96,6 +96,10 @@ static int repack_config(const char *var, const char *value,
> 
>  struct existing_packs {
>  	struct string_list kept_packs;
> +	/*
> +	 * for both non_kept_packs, and cruft_packs, a non-NULL
> +	 * 'util' field indicates the pack should be deleted.
> +	 */
>  	struct string_list non_kept_packs;
>  	struct string_list cruft_packs;
>  };
> @@ -130,7 +134,7 @@ static void mark_packs_for_deletion_1(struct string_list *names,
>  		 * (if `-d` was given).
>  		 */
>  		if (!string_list_has_string(names, sha1))
> -			item->util = (void*)(uintptr_t)((size_t)item->util | DELETE_PACK);
> +			item->util = DELETE_PACK;
>  	}
>  }
> 
> @@ -158,7 +162,7 @@ static void remove_redundant_packs_1(struct string_list *packs)
>  {
>  	struct string_list_item *item;
>  	for_each_string_list_item(item, packs) {
> -		if (!((uintptr_t)item->util & DELETE_PACK))
> +		if (!item->util)
>  			continue;
>  		remove_redundant_pack(packdir, item->string);
>  	}
> @@ -695,20 +699,20 @@ static void midx_included_packs(struct string_list *include,
> 
>  		for_each_string_list_item(item, &existing->cruft_packs) {
>  			/*
> -			 * no need to check DELETE_PACK, since we're not
> -			 * doing an ALL_INTO_ONE repack
> +			 * no need to check for deleted packs, since we're
> +			 * not doing an ALL_INTO_ONE repack
>  			 */
>  			string_list_insert(include, xstrfmt("%s.idx", item->string));
>  		}
>  	} else {
>  		for_each_string_list_item(item, &existing->non_kept_packs) {
> -			if ((uintptr_t)item->util & DELETE_PACK)
> +			if (item->util)
>  				continue;
>  			string_list_insert(include, xstrfmt("%s.idx", item->string));
>  		}
> 
>  		for_each_string_list_item(item, &existing->cruft_packs) {
> -			if ((uintptr_t)item->util & DELETE_PACK)
> +			if (item->util)
>  				continue;
>  			string_list_insert(include, xstrfmt("%s.idx", item->string));
>  		}
> --
> 2.38.0.16.g393fd4c6db
> 
> --- >8 ---
> 
> Thanks,
> Taylor
Jeff King Sept. 7, 2023, 8:58 a.m. UTC | #4
On Wed, Sep 06, 2023 at 01:22:59PM -0400, Taylor Blau wrote:

> --- 8< ---
> Subject: [PATCH] builtin/repack.c: treat string_list_item util as booleans
> 
> The `->util` field corresponding to each string_list_item used to track
> the existence of some pack at the beginning of a repack operation was
> originally intended to be used as a bitfield.
> 
> This bitfield tracked:
> 
>   - (1 << 0): whether or not the pack should be deleted
>   - (1 << 1): whether or not the pack is cruft
> 
> The previous commit removed the use of the second bit, meaning that we
> can treat this field as a boolean instead of a bitset.

I do think the boolean is syntactically a little nicer than the bit-set,
just because of the casting we have to with the void pointer). After
reading the earlier parts, I was hoping the culmination of this series
would be dropping the use of util entirely (presumably in favor of
separate lists). But maybe that would be too disruptive; I didn't try
it.

> -#define DELETE_PACK 1
> +#define DELETE_PACK ((void*)(uintptr_t)1)
> [...]
> @@ -130,7 +134,7 @@ static void mark_packs_for_deletion_1(struct string_list *names,
>  		 * (if `-d` was given).
>  		 */
>  		if (!string_list_has_string(names, sha1))
> -			item->util = (void*)(uintptr_t)((size_t)item->util | DELETE_PACK);
> +			item->util = DELETE_PACK;
>  	}
>  }

I do like the use of the macro here to make the meaning of the boolean
more plain, since the name "util" is totally meaningless (but we are
stuck with it).

But on the other side, things get more mysterious:

> @@ -158,7 +162,7 @@ static void remove_redundant_packs_1(struct string_list *packs)
>  {
>  	struct string_list_item *item;
>  	for_each_string_list_item(item, packs) {
> -		if (!((uintptr_t)item->util & DELETE_PACK))
> +		if (!item->util)
>  			continue;

This is syntactically much nicer, but the meaning of "util" as a boolean
for "we should delete this" is lost.

So I dunno. The end result of this patch is more readable syntactically,
but arguably less so semantically. Unless we have a good reason to ditch
the bit-set entirely, I wonder if we could have the best of both with
some macro helpers. Or even a set of matching helper functions like:

  void pack_mark_for_deletion(struct string_list_item *item)
  {
	(uintptr_t)item->util = 1;
  }

  int pack_should_deleted(const struct string_list_item *item)
  {
	return item->util;
  }

which could be a bool or a bit-set; the callers no longer need to care
and get to use human-readable names.

I dunno. It is a file-local convention and there aren't that many spots,
so maybe it is not worth worrying too much about. I'm pretty sure that I
got confused by the use of "util" here when looking at the code before,
as it did not have the DELETE_PACK name until your 72263ffc32
(builtin/repack.c: use named flags for existing_packs, 2022-05-20). But
maybe the comment that you added is sufficient.

If we had generic pointer-bitset macros, then perhaps other string-list
users could benefit, too. I thought maybe fast-export could use this for
its mark_to_ptr() stuff, but that is storing a whole 32-bit value, not a
bitset. So maybe this is just a weird localized thing.

A more radical idea is that we don't care very much about the data
structure as long as it is ordered. So we could just do:

  struct existing_pack {
          struct list_head list;
          int to_delete;
          char name[FLEX_ARRAY];
  };

and ditch string-list entirely. That lets us use "to_delete" in a
natural way. Though I suspect it makes all the _other_ code unreadable,
as we have to allocate them, deal with list_entry(), and so on.

So I guess I'm fine with any path (including the one in this patch).

-Peff
Junio C Hamano Sept. 7, 2023, 6:16 p.m. UTC | #5
Patrick Steinhardt <ps@pks.im> writes:

> If the intent is to make this check a bit prettier, how about we instead
> introduce a helper function like the following:
>
> ```
> static inline int pack_marked_for_deletion(const struct string_list_item *item)
> {
>         return (uintptr) item->util & DELETE_PACK;
> }
> ```

Good suggestion.

Or just check if it is NULL (with the new code that only cares about
the NULL-ness).  Regardless of the implementation, having such a
helper would document the intent of the code better, especially if
there are multiple places that make that check.

> Other than that the rest of this series looks good to me, thanks.
>
> Patrick

Thanks.
diff mbox series

Patch

diff --git a/builtin/repack.c b/builtin/repack.c
index 478fab96c9..6110598a69 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -26,8 +26,6 @@ 
 #define LOOSEN_UNREACHABLE 2
 #define PACK_CRUFT 4
 
-#define DELETE_PACK 1
-
 static int pack_everything;
 static int delta_base_offset = 1;
 static int pack_kept_objects = -1;
@@ -96,6 +94,10 @@  static int repack_config(const char *var, const char *value,
 
 struct existing_packs {
 	struct string_list kept_packs;
+	/*
+	 * for both non_kept_packs, and cruft_packs, a non-NULL
+	 * 'util' field indicates the pack should be deleted.
+	 */
 	struct string_list non_kept_packs;
 	struct string_list cruft_packs;
 };
@@ -130,7 +132,7 @@  static void mark_packs_for_deletion_1(struct string_list *names,
 		 * (if `-d` was given).
 		 */
 		if (!string_list_has_string(names, sha1))
-			item->util = (void*)(uintptr_t)((size_t)item->util | DELETE_PACK);
+			item->util = (void*)1;
 	}
 }
 
@@ -158,7 +160,7 @@  static void remove_redundant_packs_1(struct string_list *packs)
 {
 	struct string_list_item *item;
 	for_each_string_list_item(item, packs) {
-		if (!((uintptr_t)item->util & DELETE_PACK))
+		if (!item->util)
 			continue;
 		remove_redundant_pack(packdir, item->string);
 	}
@@ -695,20 +697,20 @@  static void midx_included_packs(struct string_list *include,
 
 		for_each_string_list_item(item, &existing->cruft_packs) {
 			/*
-			 * no need to check DELETE_PACK, since we're not
-			 * doing an ALL_INTO_ONE repack
+			 * no need to check for deleted packs, since we're
+			 * not doing an ALL_INTO_ONE repack
 			 */
 			string_list_insert(include, xstrfmt("%s.idx", item->string));
 		}
 	} else {
 		for_each_string_list_item(item, &existing->non_kept_packs) {
-			if ((uintptr_t)item->util & DELETE_PACK)
+			if (item->util)
 				continue;
 			string_list_insert(include, xstrfmt("%s.idx", item->string));
 		}
 
 		for_each_string_list_item(item, &existing->cruft_packs) {
-			if ((uintptr_t)item->util & DELETE_PACK)
+			if (item->util)
 				continue;
 			string_list_insert(include, xstrfmt("%s.idx", item->string));
 		}