diff mbox series

[2/5] object.c: lookup_unknown_object() accept 'r' as parameter

Message ID 16dd64618ab6a086727685e9eca3850fabc46437.1581535151.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series object.c: localize global the_repository variable into r | expand

Commit Message

Johannes Schindelin via GitGitGadget Feb. 12, 2020, 7:19 p.m. UTC
From: Parth Gala <parthpgala@gmail.com>

'lookup_unknown_object()' and its callers are modified to enable
passing 'r' as an argument to 'lookup_unknown_object()' in an
effort to reduce dependence on global 'the_repository' variable.

Signed-off-by: Parth Gala <parthpgala@gmail.com>
---
 builtin/fsck.c                   | 3 ++-
 builtin/pack-objects.c           | 3 ++-
 http-push.c                      | 3 ++-
 object.c                         | 8 ++++----
 object.h                         | 2 +-
 refs.c                           | 3 ++-
 t/helper/test-example-decorate.c | 7 ++++---
 upload-pack.c                    | 3 ++-
 walker.c                         | 3 ++-
 9 files changed, 21 insertions(+), 14 deletions(-)

Comments

Taylor Blau Feb. 12, 2020, 8:25 p.m. UTC | #1
On Wed, Feb 12, 2020 at 07:19:08PM +0000, Parth Gala via GitGitGadget wrote:
> From: Parth Gala <parthpgala@gmail.com>
>
> 'lookup_unknown_object()' and its callers are modified to enable
> passing 'r' as an argument to 'lookup_unknown_object()' in an
> effort to reduce dependence on global 'the_repository' variable.

The changes in 'object.[ch]' look sane to me here, again, but I have the
same question about why assigning:

  struct repository *r = the_repository;

and passing 'r' everywhere is preferable to simply passing
'the_repository' in directly.

> Signed-off-by: Parth Gala <parthpgala@gmail.com>
> ---
>  builtin/fsck.c                   | 3 ++-
>  builtin/pack-objects.c           | 3 ++-
>  http-push.c                      | 3 ++-
>  object.c                         | 8 ++++----
>  object.h                         | 2 +-
>  refs.c                           | 3 ++-
>  t/helper/test-example-decorate.c | 7 ++++---
>  upload-pack.c                    | 3 ++-
>  walker.c                         | 3 ++-
>  9 files changed, 21 insertions(+), 14 deletions(-)
>
> diff --git a/builtin/fsck.c b/builtin/fsck.c
> index d2b4336f7e..cd0b67f3bc 100644
> --- a/builtin/fsck.c
> +++ b/builtin/fsck.c
> @@ -745,7 +745,8 @@ static int fsck_cache_tree(struct cache_tree *it)
>
>  static void mark_object_for_connectivity(const struct object_id *oid)
>  {
> -	struct object *obj = lookup_unknown_object(oid);
> +	struct repository *r = the_repository;
> +	struct object *obj = lookup_unknown_object(r, oid);
>  	obj->flags |= HAS_OBJ;
>  }
>
> diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> index 393c20a2d7..b03f4378a0 100644
> --- a/builtin/pack-objects.c
> +++ b/builtin/pack-objects.c
> @@ -2891,6 +2891,7 @@ static void add_objects_in_unpacked_packs(void)
>  {
>  	struct packed_git *p;
>  	struct in_pack in_pack;
> +	struct repository *r = the_repository;
>  	uint32_t i;
>
>  	memset(&in_pack, 0, sizeof(in_pack));
> @@ -2910,7 +2911,7 @@ static void add_objects_in_unpacked_packs(void)
>
>  		for (i = 0; i < p->num_objects; i++) {
>  			nth_packed_object_oid(&oid, p, i);
> -			o = lookup_unknown_object(&oid);
> +			o = lookup_unknown_object(r, &oid);
>  			if (!(o->flags & OBJECT_ADDED))
>  				mark_in_pack_object(o, p, &in_pack);
>  			o->flags |= OBJECT_ADDED;
> diff --git a/http-push.c b/http-push.c
> index 822f326599..c26d03b21b 100644
> --- a/http-push.c
> +++ b/http-push.c
> @@ -1416,6 +1416,7 @@ static void one_remote_ref(const char *refname)
>  {
>  	struct ref *ref;
>  	struct object *obj;
> +	struct repository *r = the_repository;
>
>  	ref = alloc_ref(refname);
>
> @@ -1432,7 +1433,7 @@ static void one_remote_ref(const char *refname)
>  	 * may be required for updating server info later.
>  	 */
>  	if (repo->can_update_info_refs && !has_object_file(&ref->old_oid)) {
> -		obj = lookup_unknown_object(&ref->old_oid);
> +		obj = lookup_unknown_object(r, &ref->old_oid);
>  		fprintf(stderr,	"  fetch %s for %s\n",
>  			oid_to_hex(&ref->old_oid), refname);
>  		add_fetch_request(obj);
> diff --git a/object.c b/object.c
> index 549fbe69ca..90338a509c 100644
> --- a/object.c
> +++ b/object.c
> @@ -177,12 +177,12 @@ void *object_as_type(struct repository *r, struct object *obj, enum object_type
>  	}
>  }
>
> -struct object *lookup_unknown_object(const struct object_id *oid)
> +struct object *lookup_unknown_object(struct repository *r, const struct object_id *oid)
>  {
> -	struct object *obj = lookup_object(the_repository, oid);
> +	struct object *obj = lookup_object(r, oid);
>  	if (!obj)
> -		obj = create_object(the_repository, oid,
> -				    alloc_object_node(the_repository));
> +		obj = create_object(r, oid,
> +				    alloc_object_node(r));
>  	return obj;
>  }
>
> diff --git a/object.h b/object.h
> index 5a8ae274ee..375236cec3 100644
> --- a/object.h
> +++ b/object.h
> @@ -144,7 +144,7 @@ struct object *parse_object_or_die(const struct object_id *oid, const char *name
>  struct object *parse_object_buffer(struct repository *r, const struct object_id *oid, enum object_type type, unsigned long size, void *buffer, int *eaten_p);
>
>  /** Returns the object, with potentially excess memory allocated. **/
> -struct object *lookup_unknown_object(const struct object_id *oid);
> +struct object *lookup_unknown_object(struct repository *, const struct object_id *oid);
>
>  struct object_list *object_list_insert(struct object *item,
>  				       struct object_list **list_p);
> diff --git a/refs.c b/refs.c
> index 1ab0bb54d3..a630a8c271 100644
> --- a/refs.c
> +++ b/refs.c
> @@ -379,7 +379,8 @@ static int filter_refs(const char *refname, const struct object_id *oid,
>
>  enum peel_status peel_object(const struct object_id *name, struct object_id *oid)
>  {
> -	struct object *o = lookup_unknown_object(name);
> +	struct repository *r = the_repository;
> +	struct object *o = lookup_unknown_object(r, name);
>
>  	if (o->type == OBJ_NONE) {
>  		int type = oid_object_info(the_repository, name, NULL);
> diff --git a/t/helper/test-example-decorate.c b/t/helper/test-example-decorate.c
> index c8a1cde7d2..6b3262a9d3 100644
> --- a/t/helper/test-example-decorate.c
> +++ b/t/helper/test-example-decorate.c
> @@ -10,6 +10,7 @@ int cmd__example_decorate(int argc, const char **argv)
>  	struct object_id two_oid = { {2} };
>  	struct object_id three_oid = { {3} };
>  	struct object *one, *two, *three;
> +	struct repository *r = the_repository;
>
>  	int decoration_a, decoration_b;
>
> @@ -26,8 +27,8 @@ int cmd__example_decorate(int argc, const char **argv)
>  	 * Add 2 objects, one with a non-NULL decoration and one with a NULL
>  	 * decoration.
>  	 */
> -	one = lookup_unknown_object(&one_oid);
> -	two = lookup_unknown_object(&two_oid);
> +	one = lookup_unknown_object(r, &one_oid);
> +	two = lookup_unknown_object(r, &two_oid);
>  	ret = add_decoration(&n, one, &decoration_a);
>  	if (ret)
>  		BUG("when adding a brand-new object, NULL should be returned");
> @@ -56,7 +57,7 @@ int cmd__example_decorate(int argc, const char **argv)
>  	ret = lookup_decoration(&n, two);
>  	if (ret != &decoration_b)
>  		BUG("lookup should return added declaration");
> -	three = lookup_unknown_object(&three_oid);
> +	three = lookup_unknown_object(r, &three_oid);
>  	ret = lookup_decoration(&n, three);
>  	if (ret)
>  		BUG("lookup for unknown object should return NULL");
> diff --git a/upload-pack.c b/upload-pack.c
> index cb7312268f..6d196e275b 100644
> --- a/upload-pack.c
> +++ b/upload-pack.c
> @@ -962,7 +962,8 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan
>  static int mark_our_ref(const char *refname, const char *refname_full,
>  			const struct object_id *oid)
>  {
> -	struct object *o = lookup_unknown_object(oid);
> +	struct repository *r = the_repository;
> +	struct object *o = lookup_unknown_object(r, oid);
>
>  	if (ref_is_hidden(refname, refname_full)) {
>  		o->flags |= HIDDEN_REF;
> diff --git a/walker.c b/walker.c
> index 06cd2bd569..098c69ebe1 100644
> --- a/walker.c
> +++ b/walker.c
> @@ -258,6 +258,7 @@ void walker_targets_free(int targets, char **target, const char **write_ref)
>  int walker_fetch(struct walker *walker, int targets, char **target,
>  		 const char **write_ref, const char *write_ref_log_details)
>  {
> +	struct repository *r = the_repository;
>  	struct strbuf refname = STRBUF_INIT;
>  	struct strbuf err = STRBUF_INIT;
>  	struct ref_transaction *transaction = NULL;
> @@ -285,7 +286,7 @@ int walker_fetch(struct walker *walker, int targets, char **target,
>  			error("Could not interpret response from server '%s' as something to pull", target[i]);
>  			goto done;
>  		}
> -		if (process(walker, lookup_unknown_object(&oids[i])))
> +		if (process(walker, lookup_unknown_object(r, &oids[i])))
>  			goto done;
>  	}
>
> --
> gitgitgadget
>

Thanks,
Taylor
Junio C Hamano Feb. 12, 2020, 9:11 p.m. UTC | #2
Taylor Blau <me@ttaylorr.com> writes:

> ... same question about why assigning:
>
>   struct repository *r = the_repository;
>
> and passing 'r' everywhere is preferable to simply passing
> 'the_repository' in directly.
> ...
>>  static void mark_object_for_connectivity(const struct object_id *oid)
>>  {
>> -	struct object *obj = lookup_unknown_object(oid);
>> +	struct repository *r = the_repository;
>> +	struct object *obj = lookup_unknown_object(r, oid);
>>  	obj->flags |= HAS_OBJ;
>>  }

I do not claim that it applies to this particular function, and the
function is too small for it to matter, but when a function is large
enough and it always works on one single repository, it would make
it easier to later update the function further to set up 'r'
upfront, making it point at the_repository for now, and to use 'r'
throughout the function.  That way, when the time comes to update
the function to work on an arbitrary repository, the only change
required will be to turn the local variable 'r' to an incoming
parameter the caller can supply.
Taylor Blau Feb. 13, 2020, 6 p.m. UTC | #3
On Wed, Feb 12, 2020 at 01:11:02PM -0800, Junio C Hamano wrote:
> Taylor Blau <me@ttaylorr.com> writes:
>
> > ... same question about why assigning:
> >
> >   struct repository *r = the_repository;
> >
> > and passing 'r' everywhere is preferable to simply passing
> > 'the_repository' in directly.
> > ...
> >>  static void mark_object_for_connectivity(const struct object_id *oid)
> >>  {
> >> -	struct object *obj = lookup_unknown_object(oid);
> >> +	struct repository *r = the_repository;
> >> +	struct object *obj = lookup_unknown_object(r, oid);
> >>  	obj->flags |= HAS_OBJ;
> >>  }
>
> I do not claim that it applies to this particular function, and the
> function is too small for it to matter, but when a function is large
> enough and it always works on one single repository, it would make
> it easier to later update the function further to set up 'r'
> upfront, making it point at the_repository for now, and to use 'r'
> throughout the function.  That way, when the time comes to update
> the function to work on an arbitrary repository, the only change
> required will be to turn the local variable 'r' to an incoming
> parameter the caller can supply.

Right, but my suggestion was that this advice doesn't apply to this
particular instance since I don't expect that we'd ever passing
something other than 'the_repository'.

Specifically, I was worried that we'd get bitten by re-assigning 'r' in
the middle of the function and then end up in some odd broken state.

Maybe I'm worrying too much.


Thanks,
Taylor
Junio C Hamano Feb. 13, 2020, 6:10 p.m. UTC | #4
Taylor Blau <me@ttaylorr.com> writes:

> On Wed, Feb 12, 2020 at 01:11:02PM -0800, Junio C Hamano wrote:
>> Taylor Blau <me@ttaylorr.com> writes:
>>
>> > ... same question about why assigning:
>> >
>> >   struct repository *r = the_repository;
>> >
>> > and passing 'r' everywhere is preferable to simply passing
>> > 'the_repository' in directly.
>> > ...
>> >>  static void mark_object_for_connectivity(const struct object_id *oid)
>> >>  {
>> >> -	struct object *obj = lookup_unknown_object(oid);
>> >> +	struct repository *r = the_repository;
>> >> +	struct object *obj = lookup_unknown_object(r, oid);
>> >>  	obj->flags |= HAS_OBJ;
>> >>  }
>> ...
> Right, but my suggestion was that this advice doesn't apply to this
> particular instance since I don't expect that we'd ever passing
> something other than 'the_repository'.
>
> Specifically, I was worried that we'd get bitten by re-assigning 'r' in
> the middle of the function and then end up in some odd broken state.

"git fsck" works only in a single, "the", repository, so I guess you
are right to be worried about unnecessary complexity here.
Jeff King Feb. 13, 2020, 6:52 p.m. UTC | #5
On Thu, Feb 13, 2020 at 10:10:45AM -0800, Junio C Hamano wrote:

> > Right, but my suggestion was that this advice doesn't apply to this
> > particular instance since I don't expect that we'd ever passing
> > something other than 'the_repository'.
> >
> > Specifically, I was worried that we'd get bitten by re-assigning 'r' in
> > the middle of the function and then end up in some odd broken state.
> 
> "git fsck" works only in a single, "the", repository, so I guess you
> are right to be worried about unnecessary complexity here.

I think the end-game for this whole repository transition would be to
get rid of the_repository, though. I.e., I'd envision the progression
something like this:

  1. Teach all of the library code to take (and operate on) "struct
     repository".

  2. Teach static local functions like this to pass in the_repository.

  3. Teach top-level commands like cmd_fsck() to pass the_repository to
     all of those static local helpers.

  4. Teach top-level commands to get a real repository pointer, either
     from the git.c wrapper (when RUN_SETUP is used) or by calling
     setup_git_repository() themselves.

  5. Grep for the_repository and drop it everywhere.

Here we're at step 2 now, but declaring "r" makes moving to step 3 just
a little easier. And I think the existence of steps 4 and 5 implies that
it would eventually be worth going through step 3.

Of course I just wrote those steps down for the first time, so maybe
nobody else shares my vision. ;)

-Peff
Taylor Blau Feb. 15, 2020, midnight UTC | #6
On Thu, Feb 13, 2020 at 01:52:35PM -0500, Jeff King wrote:
> On Thu, Feb 13, 2020 at 10:10:45AM -0800, Junio C Hamano wrote:
>
> > > Right, but my suggestion was that this advice doesn't apply to this
> > > particular instance since I don't expect that we'd ever passing
> > > something other than 'the_repository'.
> > >
> > > Specifically, I was worried that we'd get bitten by re-assigning 'r' in
> > > the middle of the function and then end up in some odd broken state.
> >
> > "git fsck" works only in a single, "the", repository, so I guess you
> > are right to be worried about unnecessary complexity here.
>
> I think the end-game for this whole repository transition would be to
> get rid of the_repository, though. I.e., I'd envision the progression
> something like this:
>
>   1. Teach all of the library code to take (and operate on) "struct
>      repository".
>
>   2. Teach static local functions like this to pass in the_repository.
>
>   3. Teach top-level commands like cmd_fsck() to pass the_repository to
>      all of those static local helpers.
>
>   4. Teach top-level commands to get a real repository pointer, either
>      from the git.c wrapper (when RUN_SETUP is used) or by calling
>      setup_git_repository() themselves.
>
>   5. Grep for the_repository and drop it everywhere.
>
> Here we're at step 2 now, but declaring "r" makes moving to step 3 just
> a little easier. And I think the existence of steps 4 and 5 implies that
> it would eventually be worth going through step 3.

Ah, the transition to step 3 justifies this, I think. I wasn't aware
that steps 3+ existed. If they didn't, I'd stand by my original advice,
but given that they do, the approach here makes more sense long-term.

> Of course I just wrote those steps down for the first time, so maybe
> nobody else shares my vision. ;)

Thanks for writing it down. I'm sure that it has been loosely discussed
over a while, but this is the first time that I've seen it all in one
place.

> -Peff

Thanks,
Taylor
diff mbox series

Patch

diff --git a/builtin/fsck.c b/builtin/fsck.c
index d2b4336f7e..cd0b67f3bc 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -745,7 +745,8 @@  static int fsck_cache_tree(struct cache_tree *it)
 
 static void mark_object_for_connectivity(const struct object_id *oid)
 {
-	struct object *obj = lookup_unknown_object(oid);
+	struct repository *r = the_repository;
+	struct object *obj = lookup_unknown_object(r, oid);
 	obj->flags |= HAS_OBJ;
 }
 
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 393c20a2d7..b03f4378a0 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -2891,6 +2891,7 @@  static void add_objects_in_unpacked_packs(void)
 {
 	struct packed_git *p;
 	struct in_pack in_pack;
+	struct repository *r = the_repository;
 	uint32_t i;
 
 	memset(&in_pack, 0, sizeof(in_pack));
@@ -2910,7 +2911,7 @@  static void add_objects_in_unpacked_packs(void)
 
 		for (i = 0; i < p->num_objects; i++) {
 			nth_packed_object_oid(&oid, p, i);
-			o = lookup_unknown_object(&oid);
+			o = lookup_unknown_object(r, &oid);
 			if (!(o->flags & OBJECT_ADDED))
 				mark_in_pack_object(o, p, &in_pack);
 			o->flags |= OBJECT_ADDED;
diff --git a/http-push.c b/http-push.c
index 822f326599..c26d03b21b 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1416,6 +1416,7 @@  static void one_remote_ref(const char *refname)
 {
 	struct ref *ref;
 	struct object *obj;
+	struct repository *r = the_repository;
 
 	ref = alloc_ref(refname);
 
@@ -1432,7 +1433,7 @@  static void one_remote_ref(const char *refname)
 	 * may be required for updating server info later.
 	 */
 	if (repo->can_update_info_refs && !has_object_file(&ref->old_oid)) {
-		obj = lookup_unknown_object(&ref->old_oid);
+		obj = lookup_unknown_object(r, &ref->old_oid);
 		fprintf(stderr,	"  fetch %s for %s\n",
 			oid_to_hex(&ref->old_oid), refname);
 		add_fetch_request(obj);
diff --git a/object.c b/object.c
index 549fbe69ca..90338a509c 100644
--- a/object.c
+++ b/object.c
@@ -177,12 +177,12 @@  void *object_as_type(struct repository *r, struct object *obj, enum object_type
 	}
 }
 
-struct object *lookup_unknown_object(const struct object_id *oid)
+struct object *lookup_unknown_object(struct repository *r, const struct object_id *oid)
 {
-	struct object *obj = lookup_object(the_repository, oid);
+	struct object *obj = lookup_object(r, oid);
 	if (!obj)
-		obj = create_object(the_repository, oid,
-				    alloc_object_node(the_repository));
+		obj = create_object(r, oid,
+				    alloc_object_node(r));
 	return obj;
 }
 
diff --git a/object.h b/object.h
index 5a8ae274ee..375236cec3 100644
--- a/object.h
+++ b/object.h
@@ -144,7 +144,7 @@  struct object *parse_object_or_die(const struct object_id *oid, const char *name
 struct object *parse_object_buffer(struct repository *r, const struct object_id *oid, enum object_type type, unsigned long size, void *buffer, int *eaten_p);
 
 /** Returns the object, with potentially excess memory allocated. **/
-struct object *lookup_unknown_object(const struct object_id *oid);
+struct object *lookup_unknown_object(struct repository *, const struct object_id *oid);
 
 struct object_list *object_list_insert(struct object *item,
 				       struct object_list **list_p);
diff --git a/refs.c b/refs.c
index 1ab0bb54d3..a630a8c271 100644
--- a/refs.c
+++ b/refs.c
@@ -379,7 +379,8 @@  static int filter_refs(const char *refname, const struct object_id *oid,
 
 enum peel_status peel_object(const struct object_id *name, struct object_id *oid)
 {
-	struct object *o = lookup_unknown_object(name);
+	struct repository *r = the_repository;
+	struct object *o = lookup_unknown_object(r, name);
 
 	if (o->type == OBJ_NONE) {
 		int type = oid_object_info(the_repository, name, NULL);
diff --git a/t/helper/test-example-decorate.c b/t/helper/test-example-decorate.c
index c8a1cde7d2..6b3262a9d3 100644
--- a/t/helper/test-example-decorate.c
+++ b/t/helper/test-example-decorate.c
@@ -10,6 +10,7 @@  int cmd__example_decorate(int argc, const char **argv)
 	struct object_id two_oid = { {2} };
 	struct object_id three_oid = { {3} };
 	struct object *one, *two, *three;
+	struct repository *r = the_repository;
 
 	int decoration_a, decoration_b;
 
@@ -26,8 +27,8 @@  int cmd__example_decorate(int argc, const char **argv)
 	 * Add 2 objects, one with a non-NULL decoration and one with a NULL
 	 * decoration.
 	 */
-	one = lookup_unknown_object(&one_oid);
-	two = lookup_unknown_object(&two_oid);
+	one = lookup_unknown_object(r, &one_oid);
+	two = lookup_unknown_object(r, &two_oid);
 	ret = add_decoration(&n, one, &decoration_a);
 	if (ret)
 		BUG("when adding a brand-new object, NULL should be returned");
@@ -56,7 +57,7 @@  int cmd__example_decorate(int argc, const char **argv)
 	ret = lookup_decoration(&n, two);
 	if (ret != &decoration_b)
 		BUG("lookup should return added declaration");
-	three = lookup_unknown_object(&three_oid);
+	three = lookup_unknown_object(r, &three_oid);
 	ret = lookup_decoration(&n, three);
 	if (ret)
 		BUG("lookup for unknown object should return NULL");
diff --git a/upload-pack.c b/upload-pack.c
index cb7312268f..6d196e275b 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -962,7 +962,8 @@  static void receive_needs(struct packet_reader *reader, struct object_array *wan
 static int mark_our_ref(const char *refname, const char *refname_full,
 			const struct object_id *oid)
 {
-	struct object *o = lookup_unknown_object(oid);
+	struct repository *r = the_repository;
+	struct object *o = lookup_unknown_object(r, oid);
 
 	if (ref_is_hidden(refname, refname_full)) {
 		o->flags |= HIDDEN_REF;
diff --git a/walker.c b/walker.c
index 06cd2bd569..098c69ebe1 100644
--- a/walker.c
+++ b/walker.c
@@ -258,6 +258,7 @@  void walker_targets_free(int targets, char **target, const char **write_ref)
 int walker_fetch(struct walker *walker, int targets, char **target,
 		 const char **write_ref, const char *write_ref_log_details)
 {
+	struct repository *r = the_repository;
 	struct strbuf refname = STRBUF_INIT;
 	struct strbuf err = STRBUF_INIT;
 	struct ref_transaction *transaction = NULL;
@@ -285,7 +286,7 @@  int walker_fetch(struct walker *walker, int targets, char **target,
 			error("Could not interpret response from server '%s' as something to pull", target[i]);
 			goto done;
 		}
-		if (process(walker, lookup_unknown_object(&oids[i])))
+		if (process(walker, lookup_unknown_object(r, &oids[i])))
 			goto done;
 	}