diff mbox series

[3/9] pack-write: refactor renaming in finish_tmp_packfile()

Message ID 35052ef494dbc55119614f3e22742d8d814b21b1.1631157880.git.me@ttaylorr.com (mailing list archive)
State Superseded
Headers show
Series packfile: avoid .idx rename races | expand

Commit Message

Taylor Blau Sept. 9, 2021, 3:24 a.m. UTC
From: Ævar Arnfjörð Bjarmason <avarab@gmail.com>

Refactor the renaming in finish_tmp_packfile() into a helper function.
The callers are now expected to pass a "name_buffer" ending in
"pack-OID." instead of the previous "pack-", we then append "pack",
"idx" or "rev" to it.

By doing the strbuf_setlen() in rename_tmp_packfile() we reuse the
buffer and avoid the repeated allocations we'd get if that function had
its own temporary "struct strbuf".

This approach of reusing the buffer does make the last user in
pack-object.c's write_pack_file() slightly awkward, since we needlessly
do a strbuf_setlen() before calling strbuf_release() for consistency. In
subsequent changes we'll move that bitmap writing code around, so let's
not skip the strbuf_setlen() now.

The previous strbuf_reset() idiom originated with 5889271114a
(finish_tmp_packfile():use strbuf for pathname construction,
2014-03-03), which in turn was a minimal adjustment of pre-strbuf code
added in 0e990530ae (finish_tmp_packfile(): a helper function,
2011-10-28).

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 builtin/pack-objects.c |  7 +++++--
 bulk-checkin.c         |  3 ++-
 pack-write.c           | 37 ++++++++++++++++---------------------
 3 files changed, 23 insertions(+), 24 deletions(-)

Comments

Junio C Hamano Sept. 9, 2021, 7:29 p.m. UTC | #1
Taylor Blau <me@ttaylorr.com> writes:

> @@ -1250,8 +1251,9 @@ static void write_pack_file(void)
>  					    &pack_idx_opts, hash);
>  
>  			if (write_bitmap_index) {
> -				strbuf_addf(&tmpname, "%s.bitmap", hash_to_hex(hash));
> +				size_t tmpname_len = tmpname.len;
>  
> +				strbuf_addstr(&tmpname, "bitmap");
>  				stop_progress(&progress_state);
>  
>  				bitmap_writer_show_progress(progress);
> @@ -1260,6 +1262,7 @@ static void write_pack_file(void)
>  				bitmap_writer_finish(written_list, nr_written,
>  						     tmpname.buf, write_bitmap_options);
>  				write_bitmap_index = 0;
> +				strbuf_setlen(&tmpname, tmpname_len);
>  			}
>  
>  			strbuf_release(&tmpname);

This runs setlen on tmpname strbuf and immediately releases (the
close brace before release closes the "if (write_bitmap_index)", not
any loop.  If we plan to insert more code to use tmpname where the
blank line we see above is in the future, it may make sense, but
even in such a case, adding setlen() only when it starts to matter
may make it easier to follow.

In any case, the above correctly adjusts tmpname to have a <hash>
plus '.' at the end of tmpname to call finish_tmp_packfile().

> diff --git a/bulk-checkin.c b/bulk-checkin.c
> index 6283bc8bd9..c19d471f0b 100644
> --- a/bulk-checkin.c
> +++ b/bulk-checkin.c
> @@ -46,7 +46,8 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
>  		close(fd);
>  	}
>  
> -	strbuf_addf(&packname, "%s/pack/pack-", get_object_directory());
> +	strbuf_addf(&packname, "%s/pack/pack-%s.", get_object_directory(),
> +		    hash_to_hex(hash));
>  	finish_tmp_packfile(&packname, state->pack_tmp_name,
>  			    state->written, state->nr_written,
>  			    &state->pack_idx_opts, hash);

OK.

> diff --git a/pack-write.c b/pack-write.c
> index 2767b78619..95b063be94 100644
> --- a/pack-write.c
> +++ b/pack-write.c
> @@ -458,6 +458,18 @@ struct hashfile *create_tmp_packfile(char **pack_tmp_name)
>  	return hashfd(fd, *pack_tmp_name);
>  }
>  
> +static void rename_tmp_packfile(struct strbuf *nb, const char *source,
> +				const char *ext)
> +{
> +	size_t nb_len = nb->len;
> +
> +	strbuf_addstr(nb, ext);
> +	if (rename(source, nb->buf))
> +		die_errno("unable to rename temporary '*.%s' file to '%s'",
> +			  ext, nb->buf);

I do not like '*.%s' here.  Without '*' it is clear enough, and
because nb->buf has already the same ext information, 

	unable to rename temporary file to '%s'.

would be even simpler without losing any information than this
rewrite does.

The original explicitly used the more human-facing terms like "pack
file", so we are losing information by this refactoring, but the
loss is not too bad.  In one case, namely ".idx" files, it is even
better, as the original says "temporary index file" to refer to the
new .idx file, which makes it ambiguous with _the_ index file.

> +	strbuf_setlen(nb, nb_len);
> +}

In the longer term if we were to add more auxiliary files next to
each of the .pack file, it makes perfect sense that the common
prefix is fed to rename_tmp_packfile() and the function reverts
contents of the prefix buffer back when it is done with it.  But it
would be more friendly to those adding more calls to this function
in the future to document the convention in a comment before the
function.

Also, the name "nb" would need rethinking.  As far as the callers
are concerned, that is not a full name, but they are supplying the
common prefix to the function.  Perhaps "struct strbuf *name_prefix"
or soemthing might be better?  I dunno.

>  void finish_tmp_packfile(struct strbuf *name_buffer,
>  			 const char *pack_tmp_name,
>  			 struct pack_idx_entry **written_list,
> @@ -466,7 +478,6 @@ void finish_tmp_packfile(struct strbuf *name_buffer,
>  			 unsigned char hash[])
>  {
>  	const char *idx_tmp_name, *rev_tmp_name = NULL;
> -	int basename_len = name_buffer->len;
>  
>  	if (adjust_shared_perm(pack_tmp_name))
>  		die_errno("unable to make temporary pack file readable");
> @@ -479,26 +490,10 @@ void finish_tmp_packfile(struct strbuf *name_buffer,
>  	rev_tmp_name = write_rev_file(NULL, written_list, nr_written, hash,
>  				      pack_idx_opts->flags);

It is unfortunate that write_idx_file() and write_rev_file() take
hash and come up with the temporary filename on their own (which
means their resulting filenames may not share the same prefix as
.pack and .idx files), but it is just the naming of temporaries and
inconsistencies among them is, eh, temporary, so it is OK.

> +	rename_tmp_packfile(name_buffer, pack_tmp_name, "pack");
> +	rename_tmp_packfile(name_buffer, idx_tmp_name, "idx");
> +	if (rev_tmp_name)
> +		rename_tmp_packfile(name_buffer, rev_tmp_name, "rev");
>  
>  	free((void *)idx_tmp_name);
>  }
Taylor Blau Sept. 9, 2021, 9:07 p.m. UTC | #2
On Thu, Sep 09, 2021 at 12:29:01PM -0700, Junio C Hamano wrote:
> Taylor Blau <me@ttaylorr.com> writes:
>
> > @@ -1250,8 +1251,9 @@ static void write_pack_file(void)
> >  					    &pack_idx_opts, hash);
> >
> >  			if (write_bitmap_index) {
> > -				strbuf_addf(&tmpname, "%s.bitmap", hash_to_hex(hash));
> > +				size_t tmpname_len = tmpname.len;
> >
> > +				strbuf_addstr(&tmpname, "bitmap");
> >  				stop_progress(&progress_state);
> >
> >  				bitmap_writer_show_progress(progress);
> > @@ -1260,6 +1262,7 @@ static void write_pack_file(void)
> >  				bitmap_writer_finish(written_list, nr_written,
> >  						     tmpname.buf, write_bitmap_options);
> >  				write_bitmap_index = 0;
> > +				strbuf_setlen(&tmpname, tmpname_len);
> >  			}
> >
> >  			strbuf_release(&tmpname);
>
> This runs setlen on tmpname strbuf and immediately releases (the
> close brace before release closes the "if (write_bitmap_index)", not
> any loop.  If we plan to insert more code to use tmpname where the
> blank line we see above is in the future, it may make sense, but
> even in such a case, adding setlen() only when it starts to matter
> may make it easier to follow.
>
> In any case, the above correctly adjusts tmpname to have a <hash>
> plus '.' at the end of tmpname to call finish_tmp_packfile().

Ævar makes a slight mention of this in their commit message:

    This approach of reusing the buffer does make the last user in
    pack-object.c's write_pack_file() slightly awkward, since we
    needlessly do a strbuf_setlen() before calling strbuf_release() for
    consistency. In subsequent changes we'll move that bitmap writing
    code around, so let's not skip the strbuf_setlen() now.

And this is to set us up for the final patch which moves the call to
rename_tmp_packfile_idx(&tmpname, &idx_tmp_name) after the closing brace
in the quoted portion of your message and the strbuf_release(). There,
we'll want the strbuf reset to the appropriate contents and length, and
not released.

But in the meantime it is awkward.

> I do not like '*.%s' here.  Without '*' it is clear enough, and
> because nb->buf has already the same ext information,
>
> [...] But it would be more friendly to those adding more calls to this
> function in the future to document the convention in a comment before
> the function.
>
> Also, the name "nb" would need rethinking.  As far as the callers
> are concerned, that is not a full name, but they are supplying the
> common prefix to the function.  Perhaps "struct strbuf *name_prefix"
> or soemthing might be better?  I dunno.

In each of these three, I wasn't able to decide if you wanted these
addressed in a newer version of this series, or if you were happy enough
with the result to pick it up. I'm happy to send you a new version, but
don't want to clog your inbox if you were already planning on picking
this up.

Thanks,
Taylor
Junio C Hamano Sept. 9, 2021, 11:30 p.m. UTC | #3
Taylor Blau <me@ttaylorr.com> writes:

> In each of these three, I wasn't able to decide if you wanted these
> addressed in a newer version of this series, or if you were happy enough
> with the result to pick it up. I'm happy to send you a new version, but
> don't want to clog your inbox if you were already planning on picking
> this up.

Well, it is often a no-cost operation to replace a topic that has
been in 'seen' with a newer round, so you do not have to worry about
my inbox.  As I often say, if it turns out to be a bad idea, I can
just drop it from 'seen' as if I didn't see it ;-)

Anyway.

If a newer version will come, I'd love to see the review comments at
least considered (be it from me or from anybody else) --- after all,
if the original were good enough, reviewer(s) wouldn't have raised
them as potential issues.

Of course, "considered" is the key word.  No need to blindly follow;
as long as there is a solid reason to justify why changing would be
worsening the well-written original.

Thanks.
Taylor Blau Sept. 9, 2021, 11:31 p.m. UTC | #4
On Thu, Sep 09, 2021 at 04:30:38PM -0700, Junio C Hamano wrote:
> Taylor Blau <me@ttaylorr.com> writes:
>
> > In each of these three, I wasn't able to decide if you wanted these
> > addressed in a newer version of this series, or if you were happy enough
> > with the result to pick it up. I'm happy to send you a new version, but
> > don't want to clog your inbox if you were already planning on picking
> > this up.
>
> Well, it is often a no-cost operation to replace a topic that has
> been in 'seen' with a newer round, so you do not have to worry about
> my inbox.  As I often say, if it turns out to be a bad idea, I can
> just drop it from 'seen' as if I didn't see it ;-)
>
> Anyway.
>
> If a newer version will come, I'd love to see the review comments at
> least considered (be it from me or from anybody else) --- after all,
> if the original were good enough, reviewer(s) wouldn't have raised
> them as potential issues.
>
> Of course, "considered" is the key word.  No need to blindly follow;
> as long as there is a solid reason to justify why changing would be
> worsening the well-written original.

Agreed strongly :-). After reading through your review again, I felt
like the individual components were greater than the sum of their parts,
and that the v2 of this combined series is on the whole better than v1.

Thanks for taking the time to review it.

Thanks,
Taylor
Junio C Hamano Sept. 10, 2021, 1:29 a.m. UTC | #5
Taylor Blau <me@ttaylorr.com> writes:

> Ævar makes a slight mention of this in their commit message:
>
>     This approach of reusing the buffer does make the last user in
>     pack-object.c's write_pack_file() slightly awkward, since we
>     needlessly do a strbuf_setlen() before calling strbuf_release() for
>     consistency. In subsequent changes we'll move that bitmap writing
>     code around, so let's not skip the strbuf_setlen() now.
>
> And this is to set us up for the final patch which moves the call to
> rename_tmp_packfile_idx(&tmpname, &idx_tmp_name) after the closing brace
> in the quoted portion of your message and the strbuf_release(). There,
> we'll want the strbuf reset to the appropriate contents and length, and
> not released.
>
> But in the meantime it is awkward.

That is why I said "adding setlen only when t starts to matter may
make it easier to follow".  It won't make it awkward at this step,
and it will make it stand out why it is needed when it is added,
presumably at the very end, when rename_tmp_packfile_idx() call is
added after this if() block.
diff mbox series

Patch

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index df49f656b9..2a105c8d6e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1237,7 +1237,8 @@  static void write_pack_file(void)
 					warning_errno(_("failed utime() on %s"), pack_tmp_name);
 			}
 
-			strbuf_addf(&tmpname, "%s-", base_name);
+			strbuf_addf(&tmpname, "%s-%s.", base_name,
+				    hash_to_hex(hash));
 
 			if (write_bitmap_index) {
 				bitmap_writer_set_checksum(hash);
@@ -1250,8 +1251,9 @@  static void write_pack_file(void)
 					    &pack_idx_opts, hash);
 
 			if (write_bitmap_index) {
-				strbuf_addf(&tmpname, "%s.bitmap", hash_to_hex(hash));
+				size_t tmpname_len = tmpname.len;
 
+				strbuf_addstr(&tmpname, "bitmap");
 				stop_progress(&progress_state);
 
 				bitmap_writer_show_progress(progress);
@@ -1260,6 +1262,7 @@  static void write_pack_file(void)
 				bitmap_writer_finish(written_list, nr_written,
 						     tmpname.buf, write_bitmap_options);
 				write_bitmap_index = 0;
+				strbuf_setlen(&tmpname, tmpname_len);
 			}
 
 			strbuf_release(&tmpname);
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 6283bc8bd9..c19d471f0b 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -46,7 +46,8 @@  static void finish_bulk_checkin(struct bulk_checkin_state *state)
 		close(fd);
 	}
 
-	strbuf_addf(&packname, "%s/pack/pack-", get_object_directory());
+	strbuf_addf(&packname, "%s/pack/pack-%s.", get_object_directory(),
+		    hash_to_hex(hash));
 	finish_tmp_packfile(&packname, state->pack_tmp_name,
 			    state->written, state->nr_written,
 			    &state->pack_idx_opts, hash);
diff --git a/pack-write.c b/pack-write.c
index 2767b78619..95b063be94 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -458,6 +458,18 @@  struct hashfile *create_tmp_packfile(char **pack_tmp_name)
 	return hashfd(fd, *pack_tmp_name);
 }
 
+static void rename_tmp_packfile(struct strbuf *nb, const char *source,
+				const char *ext)
+{
+	size_t nb_len = nb->len;
+
+	strbuf_addstr(nb, ext);
+	if (rename(source, nb->buf))
+		die_errno("unable to rename temporary '*.%s' file to '%s'",
+			  ext, nb->buf);
+	strbuf_setlen(nb, nb_len);
+}
+
 void finish_tmp_packfile(struct strbuf *name_buffer,
 			 const char *pack_tmp_name,
 			 struct pack_idx_entry **written_list,
@@ -466,7 +478,6 @@  void finish_tmp_packfile(struct strbuf *name_buffer,
 			 unsigned char hash[])
 {
 	const char *idx_tmp_name, *rev_tmp_name = NULL;
-	int basename_len = name_buffer->len;
 
 	if (adjust_shared_perm(pack_tmp_name))
 		die_errno("unable to make temporary pack file readable");
@@ -479,26 +490,10 @@  void finish_tmp_packfile(struct strbuf *name_buffer,
 	rev_tmp_name = write_rev_file(NULL, written_list, nr_written, hash,
 				      pack_idx_opts->flags);
 
-	strbuf_addf(name_buffer, "%s.pack", hash_to_hex(hash));
-
-	if (rename(pack_tmp_name, name_buffer->buf))
-		die_errno("unable to rename temporary pack file");
-
-	strbuf_setlen(name_buffer, basename_len);
-
-	strbuf_addf(name_buffer, "%s.idx", hash_to_hex(hash));
-	if (rename(idx_tmp_name, name_buffer->buf))
-		die_errno("unable to rename temporary index file");
-
-	strbuf_setlen(name_buffer, basename_len);
-
-	if (rev_tmp_name) {
-		strbuf_addf(name_buffer, "%s.rev", hash_to_hex(hash));
-		if (rename(rev_tmp_name, name_buffer->buf))
-			die_errno("unable to rename temporary reverse-index file");
-	}
-
-	strbuf_setlen(name_buffer, basename_len);
+	rename_tmp_packfile(name_buffer, pack_tmp_name, "pack");
+	rename_tmp_packfile(name_buffer, idx_tmp_name, "idx");
+	if (rev_tmp_name)
+		rename_tmp_packfile(name_buffer, rev_tmp_name, "rev");
 
 	free((void *)idx_tmp_name);
 }