mbox series

[v2,0/4] rename *.idx file into place last (also after *.bitmap)

Message ID cover-v2-0.4-0000000000-20210908T003631Z-avarab@gmail.com (mailing list archive)
Headers show
Series rename *.idx file into place last (also after *.bitmap) | expand

Message

Ævar Arnfjörð Bjarmason Sept. 8, 2021, 12:38 a.m. UTC
I came up with this on top of Taylor's series which fixes the order in
which we write files associated with pack files[1]. His series fixes a
race where we write *.idx before *.rev, but left the issue of writing
*.bitmap after *.idx, this series fixes that. Now we'll really write
the *.idx last.

This v2 attempts to fix the comments Taylor had on v1, in particular I
think the borderline readability improvements in v1 are now pretty
unambiguously more readable in 2/4 of this series. I also converted
the bulk-checkin.c caller, split up the wrapping change in pack.h into
its own commit etc.

1. https://lore.kernel.org/git/cover.1630461918.git.me@ttaylorr.com/ [1]

Ævar Arnfjörð Bjarmason (4):
  pack.h: line-wrap the definition of finish_tmp_packfile()
  pack-write: refactor renaming in finish_tmp_packfile()
  pack-write: split up finish_tmp_packfile() function
  pack-write: rename *.idx file into place last (really!)

 builtin/pack-objects.c | 16 +++++++++---
 bulk-checkin.c         | 16 ++++++++++++
 pack-write.c           | 59 +++++++++++++++++++++---------------------
 pack.h                 | 10 ++++++-
 4 files changed, 67 insertions(+), 34 deletions(-)

Range-diff against v1:
-:  ---------- > 1:  29f5787651 pack.h: line-wrap the definition of finish_tmp_packfile()
1:  0e6ef07ce0 ! 2:  7b39f4599b pack-write: use more idiomatic strbuf usage for packname construction
    @@ Metadata
     Author: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
     
      ## Commit message ##
    -    pack-write: use more idiomatic strbuf usage for packname construction
    +    pack-write: refactor renaming in finish_tmp_packfile()
     
    -    Change code added in 5889271114a (finish_tmp_packfile():use strbuf for
    -    pathname construction, 2014-03-03) to do strbuf_reset() instead of
    -    noting the length of the base template, and doing a strbuf_setlen() to
    -    reset it, also change the spacing in the finish_tmp_packfile() so that
    -    each setup of the template, rename, and strbuf_reset() is grouped
    -    together.
    +    Refactor the renaming in finish_tmp_packfile() so that it takes a
    +    "const struct strbuf *" instead of a non-const, and refactor the
    +    repetitive renaming pattern in finish_tmp_packfile() to use a new
    +    static rename_tmp_packfile() helper function.
     
    -    Since the prototype of the previous "name_buffer" now has a "const"
    -    use this chance to wrap the overly long definition of the
    -    finish_tmp_packfile() function.
    +    The previous strbuf_reset() idiom originated with
    +    5889271114a (finish_tmp_packfile():use strbuf for pathname
    +    construction, 2014-03-03), which in turn was a minimal adjustment of
    +    pre-strbuf code added in 0e990530ae (finish_tmp_packfile(): a helper
    +    function, 2011-10-28).
     
    -    This doesn't really matter for now, but as we'll see makes the
    -    subsequent change much easier, as we won't need to juggle the basename
    -    template v.s. its current contents anymore when writing bitmaps.
    +    Since the memory allocation is not a bottleneck here we can afford a
    +    bit more readability at the cost of re-allocating this new "struct
    +    strbuf sb".
     
         Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
     
      ## builtin/pack-objects.c ##
     @@ builtin/pack-objects.c: static void write_pack_file(void)
    - 
    - 		if (!pack_to_stdout) {
    - 			struct stat st;
    --			struct strbuf tmpname = STRBUF_INIT;
    -+			struct strbuf tmp_basename = STRBUF_INIT;
    - 
    - 			/*
    - 			 * Packs are runtime accessed in their mtime
    -@@ builtin/pack-objects.c: static void write_pack_file(void)
    - 					warning_errno(_("failed utime() on %s"), pack_tmp_name);
    - 			}
    - 
    --			strbuf_addf(&tmpname, "%s-", base_name);
    -+			strbuf_addf(&tmp_basename, "%s-", base_name);
    - 
    - 			if (write_bitmap_index) {
    - 				bitmap_writer_set_checksum(hash);
    -@@ builtin/pack-objects.c: static void write_pack_file(void)
    - 					&to_pack, written_list, nr_written);
    - 			}
    - 
    --			finish_tmp_packfile(&tmpname, pack_tmp_name,
    -+			finish_tmp_packfile(&tmp_basename, pack_tmp_name,
    - 					    written_list, nr_written,
      					    &pack_idx_opts, hash);
      
      			if (write_bitmap_index) {
     -				strbuf_addf(&tmpname, "%s.bitmap", hash_to_hex(hash));
     +				struct strbuf sb = STRBUF_INIT;
     +
    -+				strbuf_addf(&sb, "%s%s.bitmap",
    -+					    tmp_basename.buf,
    ++				strbuf_addf(&sb, "%s%s.bitmap", tmpname.buf,
     +					    hash_to_hex(hash));
      
      				stop_progress(&progress_state);
    @@ builtin/pack-objects.c: static void write_pack_file(void)
     +				strbuf_release(&sb);
      			}
      
    --			strbuf_release(&tmpname);
    -+			strbuf_release(&tmp_basename);
    - 			free(pack_tmp_name);
    - 			puts(hash_to_hex(hash));
    - 		}
    + 			strbuf_release(&tmpname);
     
      ## pack-write.c ##
     @@ pack-write.c: struct hashfile *create_tmp_packfile(char **pack_tmp_name)
    @@ pack-write.c: struct hashfile *create_tmp_packfile(char **pack_tmp_name)
      }
      
     -void finish_tmp_packfile(struct strbuf *name_buffer,
    -+void finish_tmp_packfile(const struct strbuf *tmp_basename,
    ++static void rename_tmp_packfile(const char *source,
    ++				 const struct strbuf *basename,
    ++				 const unsigned char hash[],
    ++				 const char *ext)
    ++{
    ++	struct strbuf sb = STRBUF_INIT;
    ++
    ++	strbuf_addf(&sb, "%s%s.%s", basename->buf, hash_to_hex(hash), ext);
    ++	if (rename(source, sb.buf))
    ++		die_errno("unable to rename temporary '*.%s' file to '%s'",
    ++			  ext, sb.buf);
    ++	strbuf_release(&sb);
    ++}
    ++
    ++void finish_tmp_packfile(const struct strbuf *basename,
      			 const char *pack_tmp_name,
      			 struct pack_idx_entry **written_list,
      			 uint32_t nr_written,
    - 			 struct pack_idx_option *pack_idx_opts,
    +@@ pack-write.c: void finish_tmp_packfile(struct strbuf *name_buffer,
      			 unsigned char hash[])
      {
    -+	struct strbuf sb = STRBUF_INIT;
      	const char *idx_tmp_name, *rev_tmp_name = NULL;
     -	int basename_len = name_buffer->len;
      
    @@ pack-write.c: void finish_tmp_packfile(struct strbuf *name_buffer,
     -	strbuf_addf(name_buffer, "%s.pack", hash_to_hex(hash));
     -
     -	if (rename(pack_tmp_name, name_buffer->buf))
    -+	strbuf_addf(&sb, "%s%s.pack", tmp_basename->buf, hash_to_hex(hash));
    -+	if (rename(pack_tmp_name, sb.buf))
    - 		die_errno("unable to rename temporary pack file");
    +-		die_errno("unable to rename temporary pack file");
     -
     -	strbuf_setlen(name_buffer, basename_len);
    -+	strbuf_reset(&sb);
    - 
    - 	if (rev_tmp_name) {
    +-
    +-	if (rev_tmp_name) {
     -		strbuf_addf(name_buffer, "%s.rev", hash_to_hex(hash));
     -		if (rename(rev_tmp_name, name_buffer->buf))
    -+		strbuf_addf(&sb, "%s%s.rev", tmp_basename->buf,
    -+			    hash_to_hex(hash));
    -+		if (rename(rev_tmp_name, sb.buf))
    - 			die_errno("unable to rename temporary reverse-index file");
    +-			die_errno("unable to rename temporary reverse-index file");
     -
     -		strbuf_setlen(name_buffer, basename_len);
    -+		strbuf_reset(&sb);
    - 	}
    - 
    +-	}
    +-
     -	strbuf_addf(name_buffer, "%s.idx", hash_to_hex(hash));
     -	if (rename(idx_tmp_name, name_buffer->buf))
    -+	strbuf_addf(&sb, "%s%s.idx", tmp_basename->buf, hash_to_hex(hash));
    -+	if (rename(idx_tmp_name, sb.buf))
    - 		die_errno("unable to rename temporary index file");
    +-		die_errno("unable to rename temporary index file");
     -
     -	strbuf_setlen(name_buffer, basename_len);
    -+	strbuf_reset(&sb);
    ++	rename_tmp_packfile(pack_tmp_name, basename, hash, "pack");
    ++	if (rev_tmp_name)
    ++		rename_tmp_packfile(rev_tmp_name, basename, hash, "rev");
    ++	rename_tmp_packfile(idx_tmp_name, basename, hash, "idx");
      
      	free((void *)idx_tmp_name);
      }
    @@ pack.h: int encode_in_pack_object_header(unsigned char *hdr, int hdr_len,
      int read_pack_header(int fd, struct pack_header *);
      
      struct hashfile *create_tmp_packfile(char **pack_tmp_name);
    --void finish_tmp_packfile(struct strbuf *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[]);
    -+void finish_tmp_packfile(const struct strbuf *name_buffer,
    -+			 const char *pack_tmp_name,
    -+			 struct pack_idx_entry **written_list,
    -+			 uint32_t nr_written,
    -+			 struct pack_idx_option *pack_idx_opts,
    -+			 unsigned char sha1[]);
    - 
    - #endif
    +-void finish_tmp_packfile(struct strbuf *name_buffer,
    ++void finish_tmp_packfile(const struct strbuf *basename,
    + 			 const char *pack_tmp_name,
    + 			 struct pack_idx_entry **written_list,
    + 			 uint32_t nr_written,
2:  42f83774fe ! 3:  1205f9d0c2 pack-write: split up finish_tmp_packfile() function
    @@ Commit message
         pack-write: split up finish_tmp_packfile() function
     
         Split up the finish_tmp_packfile() function and use the split-up
    -    version in pack-objects.c. This change should not change any
    -    functionality, but sets up code flow for a bug fix where we'll be able
    -    to move the *.idx in-place after the *.bitmap is written.
    +    version in pack-objects.c in preparation for moving the step of
    +    renaming the *.idx file later as part of a function change.
    +
    +    Since the only other caller of finish_tmp_packfile() was in
    +    bulk-checkin.c, and it won't be needing a change to its *.idx
    +    renaming, provide a thin wrapper for the old function as a static
    +    function in that file. If other callers end up needing the simpler
    +    version it could be moved back to "pack-write.c" and "pack.h".
     
         Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
     
    @@ builtin/pack-objects.c
     @@ builtin/pack-objects.c: static void write_pack_file(void)
      		if (!pack_to_stdout) {
      			struct stat st;
    - 			struct strbuf tmp_basename = STRBUF_INIT;
    + 			struct strbuf tmpname = STRBUF_INIT;
     +			char *idx_tmp_name = NULL;
      
      			/*
    @@ builtin/pack-objects.c: static void write_pack_file(void)
      					&to_pack, written_list, nr_written);
      			}
      
    --			finish_tmp_packfile(&tmp_basename, pack_tmp_name,
    --					    written_list, nr_written,
    +-			finish_tmp_packfile(&tmpname, pack_tmp_name,
    ++			stage_tmp_packfiles(&tmpname, pack_tmp_name,
    + 					    written_list, nr_written,
     -					    &pack_idx_opts, hash);
    -+			stage_tmp_packfile(&tmp_basename, pack_tmp_name,
    -+					   written_list, nr_written,
    -+					   &pack_idx_opts, hash, &idx_tmp_name);
    -+			rename_tmp_packfile_idx(&tmp_basename, hash, &idx_tmp_name);
    ++					    &pack_idx_opts, hash, &idx_tmp_name);
    ++			rename_tmp_packfile_idx(&tmpname, hash, &idx_tmp_name);
      
      			if (write_bitmap_index) {
      				struct strbuf sb = STRBUF_INIT;
    @@ builtin/pack-objects.c: static void write_pack_file(void)
      			}
      
     +			free(idx_tmp_name);
    - 			strbuf_release(&tmp_basename);
    + 			strbuf_release(&tmpname);
      			free(pack_tmp_name);
      			puts(hash_to_hex(hash));
     
    - ## pack-write.c ##
    -@@ pack-write.c: void finish_tmp_packfile(const struct strbuf *tmp_basename,
    - 			 uint32_t nr_written,
    - 			 struct pack_idx_option *pack_idx_opts,
    - 			 unsigned char hash[])
    + ## bulk-checkin.c ##
    +@@ bulk-checkin.c: static struct bulk_checkin_state {
    + 	uint32_t nr_written;
    + } state;
    + 
    ++static void finish_tmp_packfile(const struct strbuf *basename,
    ++				const char *pack_tmp_name,
    ++				struct pack_idx_entry **written_list,
    ++				uint32_t nr_written,
    ++				struct pack_idx_option *pack_idx_opts,
    ++				unsigned char hash[])
     +{
     +	char *idx_tmp_name = NULL;
     +
    -+	stage_tmp_packfile(tmp_basename, pack_tmp_name, written_list,
    -+			   nr_written, pack_idx_opts, hash, &idx_tmp_name);
    -+	rename_tmp_packfile_idx(tmp_basename, hash, &idx_tmp_name);
    ++	stage_tmp_packfiles(basename, pack_tmp_name, written_list, nr_written,
    ++			    pack_idx_opts, hash, &idx_tmp_name);
    ++	rename_tmp_packfile_idx(basename, hash, &idx_tmp_name);
     +
     +	free(idx_tmp_name);
     +}
     +
    -+void stage_tmp_packfile(const struct strbuf *tmp_basename,
    -+			const char *pack_tmp_name,
    -+			struct pack_idx_entry **written_list,
    -+			uint32_t nr_written,
    -+			struct pack_idx_option *pack_idx_opts,
    -+			unsigned char hash[],
    -+			char **idx_tmp_name)
    + static void finish_bulk_checkin(struct bulk_checkin_state *state)
    + {
    + 	struct object_id oid;
    +
    + ## pack-write.c ##
    +@@ pack-write.c: static void rename_tmp_packfile(const char *source,
    + 	strbuf_release(&sb);
    + }
    + 
    +-void finish_tmp_packfile(const struct strbuf *basename,
    ++void rename_tmp_packfile_idx(const struct strbuf *basename,
    ++			      unsigned char hash[], char **idx_tmp_name)
    ++{
    ++	rename_tmp_packfile(*idx_tmp_name, basename, hash, "idx");
    ++}
    ++
    ++void stage_tmp_packfiles(const struct strbuf *basename,
    + 			 const char *pack_tmp_name,
    + 			 struct pack_idx_entry **written_list,
    + 			 uint32_t nr_written,
    + 			 struct pack_idx_option *pack_idx_opts,
    +-			 unsigned char hash[])
    ++			 unsigned char hash[],
    ++			 char **idx_tmp_name)
      {
    - 	struct strbuf sb = STRBUF_INIT;
     -	const char *idx_tmp_name, *rev_tmp_name = NULL;
     +	const char *rev_tmp_name = NULL;
      
    @@ pack-write.c: void finish_tmp_packfile(const struct strbuf *tmp_basename,
      		die_errno("unable to make temporary index file readable");
      
      	rev_tmp_name = write_rev_file(NULL, written_list, nr_written, hash,
    -@@ pack-write.c: void finish_tmp_packfile(const struct strbuf *tmp_basename,
    - 		strbuf_reset(&sb);
    - 	}
    - 
    -+	strbuf_release(&sb);
    -+}
    -+
    -+void rename_tmp_packfile_idx(const struct strbuf *tmp_basename,
    -+			     unsigned char hash[], char **idx_tmp_name)
    -+{
    -+	struct strbuf sb = STRBUF_INIT;
    -+
    - 	strbuf_addf(&sb, "%s%s.idx", tmp_basename->buf, hash_to_hex(hash));
    --	if (rename(idx_tmp_name, sb.buf))
    -+	if (rename(*idx_tmp_name, sb.buf))
    - 		die_errno("unable to rename temporary index file");
    --	strbuf_reset(&sb);
    +@@ pack-write.c: void finish_tmp_packfile(const struct strbuf *basename,
    + 	rename_tmp_packfile(pack_tmp_name, basename, hash, "pack");
    + 	if (rev_tmp_name)
    + 		rename_tmp_packfile(rev_tmp_name, basename, hash, "rev");
    +-	rename_tmp_packfile(idx_tmp_name, basename, hash, "idx");
     -
     -	free((void *)idx_tmp_name);
    -+	strbuf_release(&sb);
      }
      
      void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought)
    @@ pack.h: int encode_in_pack_object_header(unsigned char *hdr, int hdr_len,
      int read_pack_header(int fd, struct pack_header *);
      
      struct hashfile *create_tmp_packfile(char **pack_tmp_name);
    -+void stage_tmp_packfile(const struct strbuf *tmp_basename,
    -+			const char *pack_tmp_name,
    -+			struct pack_idx_entry **written_list,
    -+			uint32_t nr_written,
    -+			struct pack_idx_option *pack_idx_opts,
    -+			unsigned char hash[],
    -+			char **idx_tmp_name);
    -+void rename_tmp_packfile_idx(const struct strbuf *tmp_basename,
    -+			     unsigned char hash[], char **idx_tmp_name);
    - void finish_tmp_packfile(const struct strbuf *name_buffer,
    +-void finish_tmp_packfile(const struct strbuf *basename,
    ++void stage_tmp_packfiles(const struct strbuf *basename,
      			 const char *pack_tmp_name,
      			 struct pack_idx_entry **written_list,
    + 			 uint32_t nr_written,
    + 			 struct pack_idx_option *pack_idx_opts,
    +-			 unsigned char sha1[]);
    ++			 unsigned char hash[],
    ++			 char **idx_tmp_name);
    ++void rename_tmp_packfile_idx(const struct strbuf *tmp_basename,
    ++			     unsigned char hash[], char **idx_tmp_name);
    + 
    + #endif
3:  78976fcb7b ! 4:  70f4a9767d pack-write: rename *.idx file into place last (really!)
    @@ Commit message
         those files are written out, nothing in that commit contradicts what's
         being done here.
     
    -    While we're at it let's add cross-commentary to both builtin/repack.c
    -    and builtin/pack-objects.c to point out the two places where we write
    -    out these sets of files in sequence.
    +    Note that the referenced earlier commit[1] is overly optimistic about
    +    "clos[ing the] race", i.e. yes we'll now write the files in the right
    +    order, but we might still race due to our sloppy use of fsync(). See
    +    the thread at [2] for a rabbit hole of various discussions about
    +    filesystem races in the face of doing and not doing fsync() (and if
    +    doing fsync(), not doing it properly).
     
         1. https://lore.kernel.org/git/a6a4d2154e83d41c10986c5f455279ab249a910c.1630461918.git.me@ttaylorr.com/
    +    2. https://lore.kernel.org/git/8735qgkvv1.fsf@evledraar.gmail.com/
     
         Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
     
      ## builtin/pack-objects.c ##
     @@ builtin/pack-objects.c: static void write_pack_file(void)
    - 			stage_tmp_packfile(&tmp_basename, pack_tmp_name,
    - 					   written_list, nr_written,
    - 					   &pack_idx_opts, hash, &idx_tmp_name);
    --			rename_tmp_packfile_idx(&tmp_basename, hash, &idx_tmp_name);
    + 			stage_tmp_packfiles(&tmpname, pack_tmp_name,
    + 					    written_list, nr_written,
    + 					    &pack_idx_opts, hash, &idx_tmp_name);
    +-			rename_tmp_packfile_idx(&tmpname, hash, &idx_tmp_name);
      
      			if (write_bitmap_index) {
      				struct strbuf sb = STRBUF_INIT;
    @@ builtin/pack-objects.c: static void write_pack_file(void)
      				strbuf_release(&sb);
      			}
      
    -+			/*
    -+			 * We must write the *.idx last, so that anything that expects
    -+			 * an accompanying *.rev, *.bitmap etc. can count on it being
    -+			 * present.
    -+			 *
    -+			 * See also corresponding logic in the "exts"
    -+			 * struct in builtin/repack.c
    -+			 */
    -+			rename_tmp_packfile_idx(&tmp_basename, hash, &idx_tmp_name);
    ++			rename_tmp_packfile_idx(&tmpname, hash, &idx_tmp_name);
     +
      			free(idx_tmp_name);
    - 			strbuf_release(&tmp_basename);
    + 			strbuf_release(&tmpname);
      			free(pack_tmp_name);
    -
    - ## builtin/repack.c ##
    -@@ builtin/repack.c: static struct {
    - 	{".rev", 1},
    - 	{".bitmap", 1},
    - 	{".promisor", 1},
    -+	/*
    -+	 * We must write the *.idx last, so that anything that expects
    -+	 * an accompanying *.rev, *.bitmap etc. can count on it being
    -+	 * present.
    -+	 *
    -+	 * See also corresponding logic in write_pack_file() in
    -+	 * builtin/pack-objects.c
    -+	 */
    - 	{".idx"},
    - };
    -