diff mbox series

[v2] server-info: do not list unlinked packs

Message ID 20190523172723.eny6smdt57zxau6z@dcvr (mailing list archive)
State New, archived
Headers show
Series [v2] server-info: do not list unlinked packs | expand

Commit Message

Eric Wong May 23, 2019, 5:27 p.m. UTC
Jeff King <peff@peff.net> wrote:
> On Thu, May 23, 2019 at 08:59:59AM +0000, Eric Wong wrote:
> 
> > > We never delete entries from the in-memory packed_git list; a reprepare
> > > only adds to the list. You'd need to teach update_server_info() to
> > > ignore packs which are no longer present (or switch to exec-ing a
> > > separate update-server-info binary).
> > 
> > Ah, checking files_exists() and setting a bit seems sufficient.
> 
> Yes, though we do we even need to store the bit?

I wanted to avoid the over-allocation, and I hit a bounds error
because I forgot to adjust num_pack as you mentioned below.

> I.e.,
> 
> > @@ -199,12 +200,16 @@ static void init_pack_info(const char *infofile, int force)
> >  		 */
> >  		if (!p->pack_local)
> >  			continue;
> > +		if (!file_exists(p->pack_name)) {
> > +			p->pack_unlinked = 1;
> > +			continue;
> > +		}
> >  		i++;
> >  	}
> >  	num_pack = i;
> >  	info = xcalloc(num_pack, sizeof(struct pack_info *));
> >  	for (i = 0, p = get_all_packs(the_repository); p; p = p->next) {
> > -		if (!p->pack_local)
> > +		if (!p->pack_local || p->pack_unlinked)
> >  			continue;
> >  		assert(i < num_pack);
> >  		info[i] = xcalloc(1, sizeof(struct pack_info));
> 
> If we just check file_exists() in the second loop, then this is entirely
> local to update_server_info(). And other users of packed_git do not have
> to wonder who is responsible for setting that flag in the global list.
> 
> It does mean you'd over-allocate the array (and num_pack would have to
> be adjusted down to "i" after the second loop), but that's not a big
> deal.  I do think the whole two-loop thing would be more readable if we
> simply grew it on the fly with ALLOC_GROW().

ALLOC_GROW makes the whole thing much nicer.
Thanks for the hint :>

---------------------8<---------------------
Subject: [PATCH] server-info: do not list unlinked packs

Having non-existent packs in objects/info/packs causes
dumb HTTP clients to abort.

v2: use single loop with ALLOC_GROW as suggested by Jeff King

Signed-off-by: Eric Wong <e@80x24.org>
Helped-by: Jeff King <peff@peff.net>
---
Interdiff:
  diff --git a/object-store.h b/object-store.h
  index 2c9facc8f2..272e01e452 100644
  --- a/object-store.h
  +++ b/object-store.h
  @@ -77,7 +77,6 @@ struct packed_git {
   		 freshened:1,
   		 do_not_close:1,
   		 pack_promisor:1,
  -		 pack_unlinked:1,
   		 multi_pack_index:1;
   	unsigned char hash[GIT_MAX_RAWSZ];
   	struct revindex_entry *revindex;
  diff --git a/server-info.c b/server-info.c
  index 69e2c5279b..92187c70db 100644
  --- a/server-info.c
  +++ b/server-info.c
  @@ -192,30 +192,21 @@ static void init_pack_info(const char *infofile, int force)
   {
   	struct packed_git *p;
   	int stale;
  -	int i = 0;
  +	int i;
  +	size_t alloc = 0;
   
   	for (p = get_all_packs(the_repository); p; p = p->next) {
   		/* we ignore things on alternate path since they are
   		 * not available to the pullers in general.
   		 */
  -		if (!p->pack_local)
  -			continue;
  -		if (!file_exists(p->pack_name)) {
  -			p->pack_unlinked = 1;
  -			continue;
  -		}
  -		i++;
  -	}
  -	num_pack = i;
  -	info = xcalloc(num_pack, sizeof(struct pack_info *));
  -	for (i = 0, p = get_all_packs(the_repository); p; p = p->next) {
  -		if (!p->pack_local || p->pack_unlinked)
  +		if (!p->pack_local || !file_exists(p->pack_name))
   			continue;
  -		assert(i < num_pack);
  +
  +		i = num_pack++;
  +		ALLOC_GROW(info, num_pack, alloc);
   		info[i] = xcalloc(1, sizeof(struct pack_info));
   		info[i]->p = p;
   		info[i]->old_num = -1;
  -		i++;
   	}
   
   	if (infofile && !force)

 server-info.c | 18 +++++++-----------
 t/t6500-gc.sh |  2 ++
 2 files changed, 9 insertions(+), 11 deletions(-)

Comments

Jeff King May 24, 2019, 6:05 a.m. UTC | #1
On Thu, May 23, 2019 at 05:27:23PM +0000, Eric Wong wrote:

> ALLOC_GROW makes the whole thing much nicer.
> Thanks for the hint :>
> 
> ---------------------8<---------------------
> Subject: [PATCH] server-info: do not list unlinked packs

This looks fine to me (though the "v2" note is in your commit message).
I'd probably break it into two separate patches, but I can live with it
either way.

-Peff
Ævar Arnfjörð Bjarmason May 24, 2019, 7:34 a.m. UTC | #2
On Thu, May 23 2019, Eric Wong wrote:

> Jeff King <peff@peff.net> wrote:
>> On Thu, May 23, 2019 at 08:59:59AM +0000, Eric Wong wrote:
>>
>> > > We never delete entries from the in-memory packed_git list; a reprepare
>> > > only adds to the list. You'd need to teach update_server_info() to
>> > > ignore packs which are no longer present (or switch to exec-ing a
>> > > separate update-server-info binary).
>> >
>> > Ah, checking files_exists() and setting a bit seems sufficient.
>>
>> Yes, though we do we even need to store the bit?
>
> I wanted to avoid the over-allocation, and I hit a bounds error
> because I forgot to adjust num_pack as you mentioned
> below. [...]ALLOC_GROW makes the whole thing much nicer.

If you want to avoid over-allocation the last thing you want is
ALLOC_GROW() :)

I.e. see alloc_nr() in cache.h, we explicitly over-allocate with it.

But as our extensive use of it shows this sort of pattern is the right
trade-off, both in terms of performance on modern hardware, and code
readability in cases like this where we're never going to realistically
have to worry about memory pressure.
diff mbox series

Patch

diff --git a/server-info.c b/server-info.c
index 41274d098b..92187c70db 100644
--- a/server-info.c
+++ b/server-info.c
@@ -1,4 +1,5 @@ 
 #include "cache.h"
+#include "dir.h"
 #include "repository.h"
 #include "refs.h"
 #include "object.h"
@@ -191,26 +192,21 @@  static void init_pack_info(const char *infofile, int force)
 {
 	struct packed_git *p;
 	int stale;
-	int i = 0;
+	int i;
+	size_t alloc = 0;
 
 	for (p = get_all_packs(the_repository); p; p = p->next) {
 		/* we ignore things on alternate path since they are
 		 * not available to the pullers in general.
 		 */
-		if (!p->pack_local)
-			continue;
-		i++;
-	}
-	num_pack = i;
-	info = xcalloc(num_pack, sizeof(struct pack_info *));
-	for (i = 0, p = get_all_packs(the_repository); p; p = p->next) {
-		if (!p->pack_local)
+		if (!p->pack_local || !file_exists(p->pack_name))
 			continue;
-		assert(i < num_pack);
+
+		i = num_pack++;
+		ALLOC_GROW(info, num_pack, alloc);
 		info[i] = xcalloc(1, sizeof(struct pack_info));
 		info[i]->p = p;
 		info[i]->old_num = -1;
-		i++;
 	}
 
 	if (infofile && !force)
diff --git a/t/t6500-gc.sh b/t/t6500-gc.sh
index 515c6735e9..c0f04dc6b0 100755
--- a/t/t6500-gc.sh
+++ b/t/t6500-gc.sh
@@ -71,6 +71,8 @@  test_expect_success 'gc --keep-largest-pack' '
 		git gc --keep-largest-pack &&
 		( cd .git/objects/pack && ls *.pack ) >pack-list &&
 		test_line_count = 2 pack-list &&
+		awk "/^P /{print \$2}" <.git/objects/info/packs >pack-info &&
+		test_line_count = 2 pack-info &&
 		test_path_is_file $BASE_PACK &&
 		git fsck
 	)