diff mbox series

[v1,06/10] packfile: packed_object_info avoids packed_to_object_type

Message ID 20240715003519.2671385-7-e@80x24.org (mailing list archive)
State New, archived
Headers show
Series cat-file speedups | expand

Commit Message

Eric Wong July 15, 2024, 12:35 a.m. UTC
For calls the delta base cache, packed_to_object_type calls
can be omitted.  This prepares us to bypass content_limit for
non-blob types in the following commit.

Signed-off-by: Eric Wong <e@80x24.org>
---
 packfile.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

Comments

Patrick Steinhardt July 24, 2024, 8:36 a.m. UTC | #1
On Mon, Jul 15, 2024 at 12:35:15AM +0000, Eric Wong wrote:
> For calls the delta base cache, packed_to_object_type calls
> can be omitted.  This prepares us to bypass content_limit for
> non-blob types in the following commit.
> 
> Signed-off-by: Eric Wong <e@80x24.org>
> ---
>  packfile.c | 18 ++++++++++--------
>  1 file changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/packfile.c b/packfile.c
> index b2660e14f9..c2ba6ab203 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -1522,7 +1522,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
>  {
>  	struct pack_window *w_curs = NULL;
>  	off_t curpos = obj_offset;
> -	enum object_type type;
> +	enum object_type type, final_type = OBJ_BAD;
>  	struct delta_base_cache_entry *ent;

I think it might help this patch to move `type` to the scopes where it's
used to demonstrate that all code paths set `final_type` as expected.

>  	/*
> @@ -1534,7 +1534,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
>  	ent = get_delta_base_cache_entry(p, obj_offset);
>  	if (ent) {
>  		oi->whence = OI_DBCACHED;
> -		type = ent->type;
> +		final_type = type = ent->type;
>  		if (oi->sizep)
>  			*oi->sizep = ent->size;
>  		if (oi->contentp) {
> @@ -1552,6 +1552,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
>  	} else if (oi->contentp && !oi->content_limit) {
>  		*oi->contentp = unpack_entry(r, p, obj_offset, &type,
>  						oi->sizep);
> +		final_type = type;
>  		if (!*oi->contentp)
>  			type = OBJ_BAD;
>  	} else {
> @@ -1581,6 +1582,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
>  			if (oi->sizep && *oi->sizep <= oi->content_limit) {
>  				*oi->contentp = unpack_entry(r, p, obj_offset,
>  							&type, oi->sizep);
> +				final_type = type;
>  				if (!*oi->contentp)
>  					type = OBJ_BAD;
>  			} else {
> @@ -1602,17 +1604,17 @@ int packed_object_info(struct repository *r, struct packed_git *p,
>  	}
>  
>  	if (oi->typep || oi->type_name) {
> -		enum object_type ptot;
> -		ptot = packed_to_object_type(r, p, obj_offset,
> -					     type, &w_curs, curpos);
> +		if (final_type < 0)
> +			final_type = packed_to_object_type(r, p, obj_offset,
> +						     type, &w_curs, curpos);

So this is the actual change we're interested in, right? Instead of
unconditionally calling `packed_to_object_type()`, we skip that call in
case we know that we have already figured out the correct object type.

Wouldn't it be easier to manage this with a single `type` variable,
only, and then conditionally call `packed_to_object_type()` only in the
cases where `type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA`? Not sure
whether that would be all that useful though given that the function
already knows to exit without doing anything in case the type is already
properly resolved. So maybe the next patch will enlighten me.

Patrick

>  		if (oi->typep)
> -			*oi->typep = ptot;
> +			*oi->typep = final_type;
>  		if (oi->type_name) {
> -			const char *tn = type_name(ptot);
> +			const char *tn = type_name(final_type);
>  			if (tn)
>  				strbuf_addstr(oi->type_name, tn);
>  		}
> -		if (ptot < 0) {
> +		if (final_type < 0) {
>  			type = OBJ_BAD;
>  			goto out;
>  		}
>
Eric Wong July 26, 2024, 8:01 a.m. UTC | #2
Patrick Steinhardt <ps@pks.im> wrote:
> On Mon, Jul 15, 2024 at 12:35:15AM +0000, Eric Wong wrote:
> > For calls the delta base cache, packed_to_object_type calls
> > can be omitted.  This prepares us to bypass content_limit for
> > non-blob types in the following commit.
> > 
> > Signed-off-by: Eric Wong <e@80x24.org>
> > ---
> >  packfile.c | 18 ++++++++++--------
> >  1 file changed, 10 insertions(+), 8 deletions(-)
> > 
> > diff --git a/packfile.c b/packfile.c
> > index b2660e14f9..c2ba6ab203 100644
> > --- a/packfile.c
> > +++ b/packfile.c
> > @@ -1522,7 +1522,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
> >  {
> >  	struct pack_window *w_curs = NULL;
> >  	off_t curpos = obj_offset;
> > -	enum object_type type;
> > +	enum object_type type, final_type = OBJ_BAD;
> >  	struct delta_base_cache_entry *ent;
> 
> I think it might help this patch to move `type` to the scopes where it's
> used to demonstrate that all code paths set `final_type` as expected.

The condition at the end of packed_object_info() requires the original
`type' to keep its top-level scope:

        if (oi->delta_base_oid) {
                if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {

But yeah, the whole function is huge and remains a bit convoluted.
Inlining cache_or_unpack_entry in 4/10 helped some, I think.

> >  	/*
> > @@ -1534,7 +1534,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
> >  	ent = get_delta_base_cache_entry(p, obj_offset);
> >  	if (ent) {
> >  		oi->whence = OI_DBCACHED;
> > -		type = ent->type;
> > +		final_type = type = ent->type;
> >  		if (oi->sizep)
> >  			*oi->sizep = ent->size;
> >  		if (oi->contentp) {
> > @@ -1552,6 +1552,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
> >  	} else if (oi->contentp && !oi->content_limit) {
> >  		*oi->contentp = unpack_entry(r, p, obj_offset, &type,
> >  						oi->sizep);
> > +		final_type = type;
> >  		if (!*oi->contentp)
> >  			type = OBJ_BAD;
> >  	} else {
> > @@ -1581,6 +1582,7 @@ int packed_object_info(struct repository *r, struct packed_git *p,
> >  			if (oi->sizep && *oi->sizep <= oi->content_limit) {
> >  				*oi->contentp = unpack_entry(r, p, obj_offset,
> >  							&type, oi->sizep);
> > +				final_type = type;
> >  				if (!*oi->contentp)
> >  					type = OBJ_BAD;
> >  			} else {
> > @@ -1602,17 +1604,17 @@ int packed_object_info(struct repository *r, struct packed_git *p,
> >  	}
> >  
> >  	if (oi->typep || oi->type_name) {
> > -		enum object_type ptot;
> > -		ptot = packed_to_object_type(r, p, obj_offset,
> > -					     type, &w_curs, curpos);
> > +		if (final_type < 0)
> > +			final_type = packed_to_object_type(r, p, obj_offset,
> > +						     type, &w_curs, curpos);
> 
> So this is the actual change we're interested in, right? Instead of
> unconditionally calling `packed_to_object_type()`, we skip that call in
> case we know that we have already figured out the correct object type.
> 
> Wouldn't it be easier to manage this with a single `type` variable,
> only, and then conditionally call `packed_to_object_type()` only in the
> cases where `type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA`? Not sure
> whether that would be all that useful though given that the function
> already knows to exit without doing anything in case the type is already
> properly resolved. So maybe the next patch will enlighten me.

As I mentioned above, I think the `type' var remains necessary.
diff mbox series

Patch

diff --git a/packfile.c b/packfile.c
index b2660e14f9..c2ba6ab203 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1522,7 +1522,7 @@  int packed_object_info(struct repository *r, struct packed_git *p,
 {
 	struct pack_window *w_curs = NULL;
 	off_t curpos = obj_offset;
-	enum object_type type;
+	enum object_type type, final_type = OBJ_BAD;
 	struct delta_base_cache_entry *ent;
 
 	/*
@@ -1534,7 +1534,7 @@  int packed_object_info(struct repository *r, struct packed_git *p,
 	ent = get_delta_base_cache_entry(p, obj_offset);
 	if (ent) {
 		oi->whence = OI_DBCACHED;
-		type = ent->type;
+		final_type = type = ent->type;
 		if (oi->sizep)
 			*oi->sizep = ent->size;
 		if (oi->contentp) {
@@ -1552,6 +1552,7 @@  int packed_object_info(struct repository *r, struct packed_git *p,
 	} else if (oi->contentp && !oi->content_limit) {
 		*oi->contentp = unpack_entry(r, p, obj_offset, &type,
 						oi->sizep);
+		final_type = type;
 		if (!*oi->contentp)
 			type = OBJ_BAD;
 	} else {
@@ -1581,6 +1582,7 @@  int packed_object_info(struct repository *r, struct packed_git *p,
 			if (oi->sizep && *oi->sizep <= oi->content_limit) {
 				*oi->contentp = unpack_entry(r, p, obj_offset,
 							&type, oi->sizep);
+				final_type = type;
 				if (!*oi->contentp)
 					type = OBJ_BAD;
 			} else {
@@ -1602,17 +1604,17 @@  int packed_object_info(struct repository *r, struct packed_git *p,
 	}
 
 	if (oi->typep || oi->type_name) {
-		enum object_type ptot;
-		ptot = packed_to_object_type(r, p, obj_offset,
-					     type, &w_curs, curpos);
+		if (final_type < 0)
+			final_type = packed_to_object_type(r, p, obj_offset,
+						     type, &w_curs, curpos);
 		if (oi->typep)
-			*oi->typep = ptot;
+			*oi->typep = final_type;
 		if (oi->type_name) {
-			const char *tn = type_name(ptot);
+			const char *tn = type_name(final_type);
 			if (tn)
 				strbuf_addstr(oi->type_name, tn);
 		}
-		if (ptot < 0) {
+		if (final_type < 0) {
 			type = OBJ_BAD;
 			goto out;
 		}