diff mbox series

[3/7] pack-revindex: make `load_pack_revindex` take a repository

Message ID be4faf11011efcfab479e5785e6c2bbac95309bd.1681166596.git.me@ttaylorr.com (mailing list archive)
State Superseded
Headers show
Series pack-revindex: enable on-disk reverse indexes by default | expand

Commit Message

Taylor Blau April 10, 2023, 10:53 p.m. UTC
In a future commit, we will introduce a `pack.readReverseIndex`
configuration, which forces Git to generate the reverse index from
scratch instead of loading it from disk.

In order to avoid reading this configuration value more than once, we'll
use the `repo_settings` struct to lazily load this value.

In order to access the `struct repo_settings`, add a repository argument
to `load_pack_revindex`, and update all callers to pass the correct
instance (in all cases, `the_repository`).

Signed-off-by: Taylor Blauy <me@ttaylorr.com>
---
 pack-bitmap.c   | 5 +++--
 pack-revindex.c | 4 ++--
 pack-revindex.h | 3 ++-
 packfile.c      | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

Comments

Derrick Stolee April 11, 2023, 1:45 p.m. UTC | #1
On 4/10/2023 6:53 PM, Taylor Blau wrote:
> In a future commit, we will introduce a `pack.readReverseIndex`
> configuration, which forces Git to generate the reverse index from
> scratch instead of loading it from disk.
> 
> In order to avoid reading this configuration value more than once, we'll
> use the `repo_settings` struct to lazily load this value.
> 
> In order to access the `struct repo_settings`, add a repository argument
> to `load_pack_revindex`, and update all callers to pass the correct
> instance (in all cases, `the_repository`).

If all callers use the_repository, then we could presumably use
the_repository within the method directly. However, there are some
cases where the call chain is less obvious that we have already
entered something that is "repository scoped".

The patch below applies on top of this one and is the result of
exploring the two callers within pack-bitmap.c. Since they are
static, I was able to only modify things within that file, but
found two callers to _those_ methods that were repository scoped,
so without making this connection we are losing that scope.

There are other non-static methods in pack-bitmap.c that might
benefit from wiring a repository pointer through (or adding a
repository pointer to struct bitmap_index to get it for free),
but I used the trick of defining a local repository pointer at
the top of the method to make it easier to change in the future.

Thanks,
-Stolee


--- >8 ---

From 9816f7026199981b86d9f3e2188036e1b20bc2f9 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <derrickstolee@github.com>
Date: Tue, 11 Apr 2023 09:34:42 -0400
Subject: [PATCH] pack-bitmap: use struct repository more often

The previous change introduced a 'struct repository *' parameter to
load_pack_revindex(). To satisfy the callers within pack-bitmap.c, these
parameters were filled with 'the_repository'.

However, these callers are sometimes included in methods that are
already scoped to a 'struct repository *' parameter. By dropping the
link from that repository and using the_repository, we are giving a
false impression that this portion of the rev-index API is properly
scoped to a single repository.

Expand the static methods in pack-bitmap.c that call
load_pack_revindex() to include a 'struct repository *' parameter.
Modify the callers of those methods to pass a repository as appropriate.
For the methods without an appropriate repository, create a local
variable equal to the_repository so it is easier to convert them to
using a parameter in the future.

In the case of prepare_bitmap_git(), we already have a repository
pointer parameter that can be used. In prepare_bitmap_walk(), the
rev_info struct contains a repository pointer.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 pack-bitmap.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/pack-bitmap.c b/pack-bitmap.c
index 8e3bb00931..38b35c4823 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -463,7 +463,7 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
 	return 0;
 }
 
-static int load_reverse_index(struct bitmap_index *bitmap_git)
+static int load_reverse_index(struct repository *r, struct bitmap_index *bitmap_git)
 {
 	if (bitmap_is_midx(bitmap_git)) {
 		uint32_t i;
@@ -477,24 +477,23 @@ static int load_reverse_index(struct bitmap_index *bitmap_git)
 		 * since we will need to make use of them in pack-objects.
 		 */
 		for (i = 0; i < bitmap_git->midx->num_packs; i++) {
-			ret = load_pack_revindex(the_repository,
-						 bitmap_git->midx->packs[i]);
+			ret = load_pack_revindex(r, bitmap_git->midx->packs[i]);
 			if (ret)
 				return ret;
 		}
 		return 0;
 	}
-	return load_pack_revindex(the_repository, bitmap_git->pack);
+	return load_pack_revindex(r, bitmap_git->pack);
 }
 
-static int load_bitmap(struct bitmap_index *bitmap_git)
+static int load_bitmap(struct repository *r, struct bitmap_index *bitmap_git)
 {
 	assert(bitmap_git->map);
 
 	bitmap_git->bitmaps = kh_init_oid_map();
 	bitmap_git->ext_index.positions = kh_init_oid_pos();
 
-	if (load_reverse_index(bitmap_git))
+	if (load_reverse_index(r, bitmap_git))
 		goto failed;
 
 	if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) ||
@@ -581,7 +580,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
 {
 	struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
 
-	if (!open_bitmap(r, bitmap_git) && !load_bitmap(bitmap_git))
+	if (!open_bitmap(r, bitmap_git) && !load_bitmap(r, bitmap_git))
 		return bitmap_git;
 
 	free_bitmap_index(bitmap_git);
@@ -590,9 +589,10 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
 
 struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
 {
+	struct repository *r = the_repository;
 	struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
 
-	if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(bitmap_git))
+	if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git))
 		return bitmap_git;
 
 	free_bitmap_index(bitmap_git);
@@ -1593,7 +1593,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
 	 * from disk. this is the point of no return; after this the rev_list
 	 * becomes invalidated and we must perform the revwalk through bitmaps
 	 */
-	if (load_bitmap(bitmap_git) < 0)
+	if (load_bitmap(revs->repo, bitmap_git) < 0)
 		goto cleanup;
 
 	object_array_clear(&revs->pending);
@@ -1743,6 +1743,7 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
 				       uint32_t *entries,
 				       struct bitmap **reuse_out)
 {
+	struct repository *r = the_repository;
 	struct packed_git *pack;
 	struct bitmap *result = bitmap_git->result;
 	struct bitmap *reuse;
@@ -1753,7 +1754,7 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
 
 	assert(result);
 
-	load_reverse_index(bitmap_git);
+	load_reverse_index(r, bitmap_git);
 
 	if (bitmap_is_midx(bitmap_git))
 		pack = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)];
@@ -2133,11 +2134,12 @@ int rebuild_bitmap(const uint32_t *reposition,
 uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git,
 				struct packing_data *mapping)
 {
+	struct repository *r = the_repository;
 	uint32_t i, num_objects;
 	uint32_t *reposition;
 
 	if (!bitmap_is_midx(bitmap_git))
-		load_reverse_index(bitmap_git);
+		load_reverse_index(r, bitmap_git);
 	else if (load_midx_revindex(bitmap_git->midx) < 0)
 		BUG("rebuild_existing_bitmaps: missing required rev-cache "
 		    "extension");
Taylor Blau April 11, 2023, 9:30 p.m. UTC | #2
On Tue, Apr 11, 2023 at 09:45:21AM -0400, Derrick Stolee wrote:
> On 4/10/2023 6:53 PM, Taylor Blau wrote:
> > In a future commit, we will introduce a `pack.readReverseIndex`
> > configuration, which forces Git to generate the reverse index from
> > scratch instead of loading it from disk.
> >
> > In order to avoid reading this configuration value more than once, we'll
> > use the `repo_settings` struct to lazily load this value.
> >
> > In order to access the `struct repo_settings`, add a repository argument
> > to `load_pack_revindex`, and update all callers to pass the correct
> > instance (in all cases, `the_repository`).
>
> If all callers use the_repository, then we could presumably use
> the_repository within the method directly. However, there are some
> cases where the call chain is less obvious that we have already
> entered something that is "repository scoped".
>
> The patch below applies on top of this one and is the result of
> exploring the two callers within pack-bitmap.c. Since they are
> static, I was able to only modify things within that file, but
> found two callers to _those_ methods that were repository scoped,
> so without making this connection we are losing that scope.
>
> There are other non-static methods in pack-bitmap.c that might
> benefit from wiring a repository pointer through (or adding a
> repository pointer to struct bitmap_index to get it for free),
> but I used the trick of defining a local repository pointer at
> the top of the method to make it easier to change in the future.
>
> Thanks,
> -Stolee

> @@ -581,7 +580,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
>  {
>  	struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
>
> -	if (!open_bitmap(r, bitmap_git) && !load_bitmap(bitmap_git))
> +	if (!open_bitmap(r, bitmap_git) && !load_bitmap(r, bitmap_git))
>  		return bitmap_git;
>
>  	free_bitmap_index(bitmap_git);

Oops; we are indeed dropping the repository pointer that was given to
prepare_bitmap_git() here. It's unfortunate that we have to work through
so many layers to propagate it back down, but I agree that it's the
right thing to do.

> @@ -590,9 +589,10 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
>
>  struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
>  {
> +	struct repository *r = the_repository;

OK; and here we're using the trick you mentioned in the patch message to
avoid having to propagate this even further out. The rest of the patch
looks sensible to me.

In terms of working this one in, it feels odd to include it as a
separate commit since we know the one immediately prior to it is kind of
broken.

Do you want to squash these together? Something else? Anything is fine
with me here.

Thanks,
Taylor
Derrick Stolee April 12, 2023, 5:33 p.m. UTC | #3
On 4/11/2023 5:30 PM, Taylor Blau wrote:
> On Tue, Apr 11, 2023 at 09:45:21AM -0400, Derrick Stolee wrote:

>> @@ -581,7 +580,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
>>  {
>>  	struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
>>
>> -	if (!open_bitmap(r, bitmap_git) && !load_bitmap(bitmap_git))
>> +	if (!open_bitmap(r, bitmap_git) && !load_bitmap(r, bitmap_git))
>>  		return bitmap_git;
>>
>>  	free_bitmap_index(bitmap_git);
> 
> Oops; we are indeed dropping the repository pointer that was given to
> prepare_bitmap_git() here. It's unfortunate that we have to work through
> so many layers to propagate it back down, but I agree that it's the
> right thing to do.
> 
>> @@ -590,9 +589,10 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r)
>>
>>  struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx)
>>  {
>> +	struct repository *r = the_repository;
> 
> OK; and here we're using the trick you mentioned in the patch message to
> avoid having to propagate this even further out. The rest of the patch
> looks sensible to me.
> 
> In terms of working this one in, it feels odd to include it as a
> separate commit since we know the one immediately prior to it is kind of
> broken.
> 
> Do you want to squash these together? Something else? Anything is fine
> with me here.

Feel free to squash it in, to avoid having a commit where the chain is
broken.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/pack-bitmap.c b/pack-bitmap.c
index b2e7d06d60..8e3bb00931 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -477,13 +477,14 @@  static int load_reverse_index(struct bitmap_index *bitmap_git)
 		 * since we will need to make use of them in pack-objects.
 		 */
 		for (i = 0; i < bitmap_git->midx->num_packs; i++) {
-			ret = load_pack_revindex(bitmap_git->midx->packs[i]);
+			ret = load_pack_revindex(the_repository,
+						 bitmap_git->midx->packs[i]);
 			if (ret)
 				return ret;
 		}
 		return 0;
 	}
-	return load_pack_revindex(bitmap_git->pack);
+	return load_pack_revindex(the_repository, bitmap_git->pack);
 }
 
 static int load_bitmap(struct bitmap_index *bitmap_git)
diff --git a/pack-revindex.c b/pack-revindex.c
index 03c7e81f9d..e3d69cc0f7 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -283,7 +283,7 @@  static int load_pack_revindex_from_disk(struct packed_git *p)
 	return ret;
 }
 
-int load_pack_revindex(struct packed_git *p)
+int load_pack_revindex(struct repository *r, struct packed_git *p)
 {
 	if (p->revindex || p->revindex_data)
 		return 0;
@@ -356,7 +356,7 @@  int offset_to_pack_pos(struct packed_git *p, off_t ofs, uint32_t *pos)
 {
 	unsigned lo, hi;
 
-	if (load_pack_revindex(p) < 0)
+	if (load_pack_revindex(the_repository, p) < 0)
 		return -1;
 
 	lo = 0;
diff --git a/pack-revindex.h b/pack-revindex.h
index 4974e75eb4..3d1969ce8b 100644
--- a/pack-revindex.h
+++ b/pack-revindex.h
@@ -39,6 +39,7 @@ 
 
 struct packed_git;
 struct multi_pack_index;
+struct repository;
 
 /*
  * load_pack_revindex populates the revindex's internal data-structures for the
@@ -47,7 +48,7 @@  struct multi_pack_index;
  * If a '.rev' file is present it is mmap'd, and pointers are assigned into it
  * (instead of using the in-memory variant).
  */
-int load_pack_revindex(struct packed_git *p);
+int load_pack_revindex(struct repository *r, struct packed_git *p);
 
 /*
  * load_midx_revindex loads the '.rev' file corresponding to the given
diff --git a/packfile.c b/packfile.c
index b120405ccc..717fd685c9 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2151,7 +2151,7 @@  int for_each_object_in_pack(struct packed_git *p,
 	int r = 0;
 
 	if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
-		if (load_pack_revindex(p))
+		if (load_pack_revindex(the_repository, p))
 			return -1;
 	}