diff mbox series

[v3] merge-ort: fix segmentation fault in read-only repositories

Message ID pull.1362.v3.git.1663875999939.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v3] merge-ort: fix segmentation fault in read-only repositories | expand

Commit Message

Johannes Schindelin Sept. 22, 2022, 7:46 p.m. UTC
From: Johannes Schindelin <johannes.schindelin@gmx.de>

If the blob/tree objects cannot be written, we really need the merge
operations to fail, and not to continue (and then try to access the tree
object which is however still set to `NULL`).

Let's stop ignoring the return value of `write_object_file()` and
`write_tree()` and set `clean = -1` in the error case.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
    merge-tree: fix segmentation fault in read-only repositories
    
    Turns out that the segmentation fault reported by Taylor
    [https://lore.kernel.org/git/YyopQD+LvPucnz3w@nand.local/] happened
    while testing merge-ort in a read-only repository, and that the upstream
    version of git merge-tree is as affected as GitHub's internal version.
    
    Changes since v2:
    
     * Completely changed the approach by no longer touching
       builtin/merge-tree.c but instead fixing the underlying merge-ort
       layer: we no longer ignore the return value of write_tree() and
       write_object_file().
    
    Changes since v1:
    
     * Using the SANITY prereq now
     * If the merge was aborted due to a write error, exit with a non-zero
       code even if the (potentially partial) merge is clean
     * The test case now also verifies that the git merge-tree command fails
       in a read-only repository even if the merge would have succeeded

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1362%2Fdscho%2Fmerge-tree-in-read-only-repos-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1362/dscho/merge-tree-in-read-only-repos-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/1362

Range-diff vs v2:

 1:  1de4df3f471 ! 1:  198ff455f90 merge-tree: fix segmentation fault in read-only repositories
     @@ Metadata
      Author: Johannes Schindelin <Johannes.Schindelin@gmx.de>
      
       ## Commit message ##
     -    merge-tree: fix segmentation fault in read-only repositories
     +    merge-ort: fix segmentation fault in read-only repositories
      
     -    Independent of the question whether we want `git merge-tree` to report
     -    the tree name even when it failed to write the tree objects in a
     -    read-only repository, there is no question that we should avoid a
     -    segmentation fault.
     +    If the blob/tree objects cannot be written, we really need the merge
     +    operations to fail, and not to continue (and then try to access the tree
     +    object which is however still set to `NULL`).
      
     -    And when we report an invalid tree name (because the tree could not be
     -    written), we need the exit status to be non-zero.
     -
     -    Let's make it so.
     +    Let's stop ignoring the return value of `write_object_file()` and
     +    `write_tree()` and set `clean = -1` in the error case.
      
          Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
      
     - ## builtin/merge-tree.c ##
     -@@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
     - 	struct commit_list *merge_bases = NULL;
     - 	struct merge_options opt;
     - 	struct merge_result result = { 0 };
     -+	const struct object_id *tree_oid;
     - 
     - 	parent1 = get_merge_parent(branch1);
     - 	if (!parent1)
     -@@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
     - 	if (o->show_messages == -1)
     - 		o->show_messages = !result.clean;
     - 
     --	printf("%s%c", oid_to_hex(&result.tree->object.oid), line_termination);
     -+	tree_oid = result.tree ? &result.tree->object.oid : null_oid();
     -+	printf("%s%c", oid_to_hex(tree_oid), line_termination);
     - 	if (!result.clean) {
     - 		struct string_list conflicted_files = STRING_LIST_INIT_NODUP;
     - 		const char *last = NULL;
     -@@ builtin/merge-tree.c: static int real_merge(struct merge_tree_options *o,
     - 					      &result);
     + ## merge-ort.c ##
     +@@ merge-ort.c: static int tree_entry_order(const void *a_, const void *b_)
     + 				 b->string, strlen(b->string), bmi->result.mode);
     + }
     + 
     +-static void write_tree(struct object_id *result_oid,
     +-		       struct string_list *versions,
     +-		       unsigned int offset,
     +-		       size_t hash_size)
     ++static int write_tree(struct object_id *result_oid,
     ++		      struct string_list *versions,
     ++		      unsigned int offset,
     ++		      size_t hash_size)
     + {
     + 	size_t maxlen = 0, extra;
     + 	unsigned int nr;
     + 	struct strbuf buf = STRBUF_INIT;
     +-	int i;
     ++	int i, ret;
     + 
     + 	assert(offset <= versions->nr);
     + 	nr = versions->nr - offset;
     +@@ merge-ort.c: static void write_tree(struct object_id *result_oid,
     + 	}
     + 
     + 	/* Write this object file out, and record in result_oid */
     +-	write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid);
     ++	ret = write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid);
     + 	strbuf_release(&buf);
     ++	return ret;
     + }
     + 
     + static void record_entry_for_tree(struct directory_versions *dir_metadata,
     +@@ merge-ort.c: static void record_entry_for_tree(struct directory_versions *dir_metadata,
     + 			   basename)->util = &mi->result;
     + }
     + 
     +-static void write_completed_directory(struct merge_options *opt,
     +-				      const char *new_directory_name,
     +-				      struct directory_versions *info)
     ++static int write_completed_directory(struct merge_options *opt,
     ++				     const char *new_directory_name,
     ++				     struct directory_versions *info)
     + {
     + 	const char *prev_dir;
     + 	struct merged_info *dir_info = NULL;
     +-	unsigned int offset;
     ++	unsigned int offset, ret = 0;
     + 
     + 	/*
     + 	 * Some explanation of info->versions and info->offsets...
     +@@ merge-ort.c: static void write_completed_directory(struct merge_options *opt,
     + 	 * strcmp here.)
     + 	 */
     + 	if (new_directory_name == info->last_directory)
     +-		return;
     ++		return 0;
     + 
     + 	/*
     + 	 * If we are just starting (last_directory is NULL), or last_directory
     +@@ merge-ort.c: static void write_completed_directory(struct merge_options *opt,
     + 		 */
     + 		string_list_append(&info->offsets,
     + 				   info->last_directory)->util = (void*)offset;
     +-		return;
     ++		return 0;
     + 	}
     + 
     + 	/*
     +@@ merge-ort.c: static void write_completed_directory(struct merge_options *opt,
     + 		 */
     + 		dir_info->is_null = 0;
     + 		dir_info->result.mode = S_IFDIR;
     +-		write_tree(&dir_info->result.oid, &info->versions, offset,
     +-			   opt->repo->hash_algo->rawsz);
     ++		if (write_tree(&dir_info->result.oid, &info->versions, offset,
     ++			       opt->repo->hash_algo->rawsz) < 0)
     ++			ret = -1;
     + 	}
     + 
     + 	/*
     +@@ merge-ort.c: static void write_completed_directory(struct merge_options *opt,
     + 	/* And, of course, we need to update last_directory to match. */
     + 	info->last_directory = new_directory_name;
     + 	info->last_directory_len = strlen(info->last_directory);
     ++
     ++	return ret;
     + }
     + 
     + /* Per entry merge function */
     +@@ merge-ort.c: static void prefetch_for_content_merges(struct merge_options *opt,
     + 	oid_array_clear(&to_fetch);
     + }
     + 
     +-static void process_entries(struct merge_options *opt,
     +-			    struct object_id *result_oid)
     ++static int process_entries(struct merge_options *opt,
     ++			   struct object_id *result_oid)
     + {
     + 	struct hashmap_iter iter;
     + 	struct strmap_entry *e;
     +@@ merge-ort.c: static void process_entries(struct merge_options *opt,
     + 	struct directory_versions dir_metadata = { STRING_LIST_INIT_NODUP,
     + 						   STRING_LIST_INIT_NODUP,
     + 						   NULL, 0 };
     ++	int ret;
     + 
     + 	trace2_region_enter("merge", "process_entries setup", opt->repo);
     + 	if (strmap_empty(&opt->priv->paths)) {
     + 		oidcpy(result_oid, opt->repo->hash_algo->empty_tree);
     +-		return;
     ++		return 0;
       	}
     - 	merge_finalize(&opt, &result);
     --	return !result.clean; /* result.clean < 0 handled above */
     -+	return !result.tree || !result.clean; /* result.clean < 0 handled above */
     + 
     + 	/* Hack to pre-allocate plist to the desired size */
     +@@ merge-ort.c: static void process_entries(struct merge_options *opt,
     + 		 */
     + 		struct merged_info *mi = entry->util;
     + 
     +-		write_completed_directory(opt, mi->directory_name,
     +-					  &dir_metadata);
     ++		if (write_completed_directory(opt, mi->directory_name,
     ++					      &dir_metadata) < 0) {
     ++			ret = -1;
     ++			goto cleanup;
     ++		}
     + 		if (mi->clean)
     + 			record_entry_for_tree(&dir_metadata, path, mi);
     + 		else {
     +@@ merge-ort.c: static void process_entries(struct merge_options *opt,
     + 		fflush(stdout);
     + 		BUG("dir_metadata accounting completely off; shouldn't happen");
     + 	}
     +-	write_tree(result_oid, &dir_metadata.versions, 0,
     +-		   opt->repo->hash_algo->rawsz);
     ++	ret = write_tree(result_oid, &dir_metadata.versions, 0,
     ++			 opt->repo->hash_algo->rawsz);
     ++cleanup:
     + 	string_list_clear(&plist, 0);
     + 	string_list_clear(&dir_metadata.versions, 0);
     + 	string_list_clear(&dir_metadata.offsets, 0);
     + 	trace2_region_leave("merge", "process_entries cleanup", opt->repo);
     ++
     ++	return ret;
       }
       
     - int cmd_merge_tree(int argc, const char **argv, const char *prefix)
     + /*** Function Grouping: functions related to merge_switch_to_result() ***/
     +@@ merge-ort.c: redo:
     + 	}
     + 
     + 	trace2_region_enter("merge", "process_entries", opt->repo);
     +-	process_entries(opt, &working_tree_oid);
     ++	if (process_entries(opt, &working_tree_oid) < 0)
     ++		result->clean = -1;
     + 	trace2_region_leave("merge", "process_entries", opt->repo);
     + 
     + 	/* Set return values */
     + 	result->path_messages = &opt->priv->conflicts;
     + 
     +-	result->tree = parse_tree_indirect(&working_tree_oid);
     +-	/* existence of conflicted entries implies unclean */
     +-	result->clean &= strmap_empty(&opt->priv->conflicted);
     ++	if (result->clean >= 0) {
     ++		result->tree = parse_tree_indirect(&working_tree_oid);
     ++		/* existence of conflicted entries implies unclean */
     ++		result->clean &= strmap_empty(&opt->priv->conflicted);
     ++	}
     + 	if (!opt->priv->call_depth) {
     + 		result->priv = opt->priv;
     + 		result->_properly_initialized = RESULT_INITIALIZED;
      
       ## t/t4301-merge-tree-write-tree.sh ##
      @@ t/t4301-merge-tree-write-tree.sh: test_expect_success 'can override merge of unrelated histories' '


 merge-ort.c                      | 64 +++++++++++++++++++-------------
 t/t4301-merge-tree-write-tree.sh |  9 +++++
 2 files changed, 48 insertions(+), 25 deletions(-)


base-commit: dda7228a83e2e9ff584bf6adbf55910565b41e14

Comments

Elijah Newren Sept. 23, 2022, 1:38 a.m. UTC | #1
On Thu, Sep 22, 2022 at 12:46 PM Johannes Schindelin via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
>
> If the blob/tree objects cannot be written, we really need the merge
> operations to fail, and not to continue (and then try to access the tree
> object which is however still set to `NULL`).
>
> Let's stop ignoring the return value of `write_object_file()` and
> `write_tree()` and set `clean = -1` in the error case.

:-)

>  merge-ort.c                      | 64 +++++++++++++++++++-------------
>  t/t4301-merge-tree-write-tree.sh |  9 +++++
>  2 files changed, 48 insertions(+), 25 deletions(-)
>
> diff --git a/merge-ort.c b/merge-ort.c
> index 99dcee2db8a..f654296220e 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -3571,15 +3571,15 @@ static int tree_entry_order(const void *a_, const void *b_)
>                                  b->string, strlen(b->string), bmi->result.mode);
>  }
>
> -static void write_tree(struct object_id *result_oid,
> -                      struct string_list *versions,
> -                      unsigned int offset,
> -                      size_t hash_size)
> +static int write_tree(struct object_id *result_oid,
> +                     struct string_list *versions,
> +                     unsigned int offset,
> +                     size_t hash_size)
>  {
>         size_t maxlen = 0, extra;
>         unsigned int nr;
>         struct strbuf buf = STRBUF_INIT;
> -       int i;
> +       int i, ret;
>
>         assert(offset <= versions->nr);
>         nr = versions->nr - offset;
> @@ -3605,8 +3605,9 @@ static void write_tree(struct object_id *result_oid,
>         }
>
>         /* Write this object file out, and record in result_oid */
> -       write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid);
> +       ret = write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid);

This sent me into a bit of a hunt to try to figure out what the return
value of write_object_file() is.  I know it's non-zero on failure, but
don't know if it's always positive or always negative or possibly a
mix and how that maps to the idea of "clean_merge".  I gave up after a
few indirections, to be honest.

Anyway, regardless of my inability to find the answer to the above
question, would this be a bit easier to read if we initialized ret to
0 and then did something like

    if (write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid))
        ret = -1;

?

>         strbuf_release(&buf);
> +       return ret;
>  }
>
>  static void record_entry_for_tree(struct directory_versions *dir_metadata,
> @@ -3625,13 +3626,13 @@ static void record_entry_for_tree(struct directory_versions *dir_metadata,
>                            basename)->util = &mi->result;
>  }
>
> -static void write_completed_directory(struct merge_options *opt,
> -                                     const char *new_directory_name,
> -                                     struct directory_versions *info)
> +static int write_completed_directory(struct merge_options *opt,
> +                                    const char *new_directory_name,
> +                                    struct directory_versions *info)
>  {
>         const char *prev_dir;
>         struct merged_info *dir_info = NULL;
> -       unsigned int offset;
> +       unsigned int offset, ret = 0;
>
>         /*
>          * Some explanation of info->versions and info->offsets...
> @@ -3717,7 +3718,7 @@ static void write_completed_directory(struct merge_options *opt,
>          * strcmp here.)
>          */
>         if (new_directory_name == info->last_directory)
> -               return;
> +               return 0;
>
>         /*
>          * If we are just starting (last_directory is NULL), or last_directory
> @@ -3739,7 +3740,7 @@ static void write_completed_directory(struct merge_options *opt,
>                  */
>                 string_list_append(&info->offsets,
>                                    info->last_directory)->util = (void*)offset;
> -               return;
> +               return 0;
>         }
>
>         /*
> @@ -3769,8 +3770,9 @@ static void write_completed_directory(struct merge_options *opt,
>                  */
>                 dir_info->is_null = 0;
>                 dir_info->result.mode = S_IFDIR;
> -               write_tree(&dir_info->result.oid, &info->versions, offset,
> -                          opt->repo->hash_algo->rawsz);
> +               if (write_tree(&dir_info->result.oid, &info->versions, offset,
> +                              opt->repo->hash_algo->rawsz) < 0)
> +                       ret = -1;

This makes me wonder about write_tree() again.  What if its call to
write_object_file() returns a value greater than 0?  Is that possible?
 If it is, are those error cases or not?  About half the callers in
the code base that check the return value of write_object_file() check
for a non-zero value, the other half check for a value less than 0.
And I can't find any documentation.  And it seemed like too much time
for me to figure out what its range of return values was.

>         }
>
>         /*
> @@ -3798,6 +3800,8 @@ static void write_completed_directory(struct merge_options *opt,
>         /* And, of course, we need to update last_directory to match. */
>         info->last_directory = new_directory_name;
>         info->last_directory_len = strlen(info->last_directory);
> +
> +       return ret;
>  }
>
>  /* Per entry merge function */
> @@ -4214,8 +4218,8 @@ static void prefetch_for_content_merges(struct merge_options *opt,
>         oid_array_clear(&to_fetch);
>  }
>
> -static void process_entries(struct merge_options *opt,
> -                           struct object_id *result_oid)
> +static int process_entries(struct merge_options *opt,
> +                          struct object_id *result_oid)
>  {
>         struct hashmap_iter iter;
>         struct strmap_entry *e;
> @@ -4224,11 +4228,12 @@ static void process_entries(struct merge_options *opt,
>         struct directory_versions dir_metadata = { STRING_LIST_INIT_NODUP,
>                                                    STRING_LIST_INIT_NODUP,
>                                                    NULL, 0 };
> +       int ret;
>
>         trace2_region_enter("merge", "process_entries setup", opt->repo);
>         if (strmap_empty(&opt->priv->paths)) {
>                 oidcpy(result_oid, opt->repo->hash_algo->empty_tree);
> -               return;
> +               return 0;
>         }
>
>         /* Hack to pre-allocate plist to the desired size */
> @@ -4270,8 +4275,11 @@ static void process_entries(struct merge_options *opt,
>                  */
>                 struct merged_info *mi = entry->util;
>
> -               write_completed_directory(opt, mi->directory_name,
> -                                         &dir_metadata);
> +               if (write_completed_directory(opt, mi->directory_name,
> +                                             &dir_metadata) < 0) {
> +                       ret = -1;
> +                       goto cleanup;
> +               }
>                 if (mi->clean)
>                         record_entry_for_tree(&dir_metadata, path, mi);
>                 else {
> @@ -4291,12 +4299,15 @@ static void process_entries(struct merge_options *opt,
>                 fflush(stdout);
>                 BUG("dir_metadata accounting completely off; shouldn't happen");
>         }
> -       write_tree(result_oid, &dir_metadata.versions, 0,
> -                  opt->repo->hash_algo->rawsz);
> +       ret = write_tree(result_oid, &dir_metadata.versions, 0,
> +                        opt->repo->hash_algo->rawsz);
> +cleanup:
>         string_list_clear(&plist, 0);
>         string_list_clear(&dir_metadata.versions, 0);
>         string_list_clear(&dir_metadata.offsets, 0);
>         trace2_region_leave("merge", "process_entries cleanup", opt->repo);
> +
> +       return ret;
>  }
>
>  /*** Function Grouping: functions related to merge_switch_to_result() ***/
> @@ -4928,15 +4939,18 @@ redo:
>         }
>
>         trace2_region_enter("merge", "process_entries", opt->repo);
> -       process_entries(opt, &working_tree_oid);
> +       if (process_entries(opt, &working_tree_oid) < 0)
> +               result->clean = -1;
>         trace2_region_leave("merge", "process_entries", opt->repo);
>
>         /* Set return values */
>         result->path_messages = &opt->priv->conflicts;
>
> -       result->tree = parse_tree_indirect(&working_tree_oid);
> -       /* existence of conflicted entries implies unclean */
> -       result->clean &= strmap_empty(&opt->priv->conflicted);
> +       if (result->clean >= 0) {
> +               result->tree = parse_tree_indirect(&working_tree_oid);
> +               /* existence of conflicted entries implies unclean */
> +               result->clean &= strmap_empty(&opt->priv->conflicted);
> +       }
>         if (!opt->priv->call_depth) {
>                 result->priv = opt->priv;
>                 result->_properly_initialized = RESULT_INITIALIZED;
> diff --git a/t/t4301-merge-tree-write-tree.sh b/t/t4301-merge-tree-write-tree.sh
> index 28ca5c38bb5..013b77144bd 100755
> --- a/t/t4301-merge-tree-write-tree.sh
> +++ b/t/t4301-merge-tree-write-tree.sh
> @@ -810,4 +810,13 @@ test_expect_success 'can override merge of unrelated histories' '
>         test_cmp expect actual
>  '
>
> +test_expect_success SANITY 'merge-ort fails gracefully in a read-only repository' '
> +       git init --bare read-only &&
> +       git push read-only side1 side2 side3 &&
> +       test_when_finished "chmod -R u+w read-only" &&
> +       chmod -R a-w read-only &&
> +       test_must_fail git -C read-only merge-tree side1 side3 &&
> +       test_must_fail git -C read-only merge-tree side1 side2
> +'
> +
>  test_done
>
> base-commit: dda7228a83e2e9ff584bf6adbf55910565b41e14
> --
> gitgitgadget

Nice to see that it didn't take too much work to propagate the -1
value from write_tree() up the hierarchy.  Nice work!

I think we've still got some separate problems related to propagating
the return value of the other write_object_file() call, from
handle_content_merge().  The direct call in that other case is okay,
but the higher levels at process_renames() and process_entry() seem to
fumble it.  Your fix for failed tree writes is still valid without
fixing the failed blob writes, and I'm happy to tackle the other half
if you'd prefer to hand it off.  But, if you'd like to tackle that
half too, you might see fewer error messages when it fails to write to
the read-only repository, since it'll fail early on the first blob
write instead of only failing when it gets around to trying to write a
new tree.
Johannes Schindelin Sept. 23, 2022, 9:55 a.m. UTC | #2
Hi Elijah,

On Thu, 22 Sep 2022, Elijah Newren wrote:

> On Thu, Sep 22, 2022 at 12:46 PM Johannes Schindelin via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> >
> > diff --git a/merge-ort.c b/merge-ort.c
> > index 99dcee2db8a..f654296220e 100644
> > --- a/merge-ort.c
> > +++ b/merge-ort.c
> > @@ -3571,15 +3571,15 @@ static int tree_entry_order(const void *a_, const void *b_)
> >                                  b->string, strlen(b->string), bmi->result.mode);
> >  }
> >
> > -static void write_tree(struct object_id *result_oid,
> > -                      struct string_list *versions,
> > -                      unsigned int offset,
> > -                      size_t hash_size)
> > +static int write_tree(struct object_id *result_oid,
> > +                     struct string_list *versions,
> > +                     unsigned int offset,
> > +                     size_t hash_size)
> >  {
> >         size_t maxlen = 0, extra;
> >         unsigned int nr;
> >         struct strbuf buf = STRBUF_INIT;
> > -       int i;
> > +       int i, ret;
> >
> >         assert(offset <= versions->nr);
> >         nr = versions->nr - offset;
> > @@ -3605,8 +3605,9 @@ static void write_tree(struct object_id *result_oid,
> >         }
> >
> >         /* Write this object file out, and record in result_oid */
> > -       write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid);
> > +       ret = write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid);
>
> This sent me into a bit of a hunt to try to figure out what the return
> value of write_object_file() is.  I know it's non-zero on failure, but
> don't know if it's always positive or always negative or possibly a
> mix and how that maps to the idea of "clean_merge".  I gave up after a
> few indirections, to be honest.
>
> Anyway, regardless of my inability to find the answer to the above
> question, would this be a bit easier to read if we initialized ret to
> 0 and then did something like
>
>     if (write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid))
>         ret = -1;
>
> ?

You're right, this is a better pattern to follow because it makes it
easier to wrap your head around.

> > [...]
> > @@ -3769,8 +3770,9 @@ static void write_completed_directory(struct merge_options *opt,
> >                  */
> >                 dir_info->is_null = 0;
> >                 dir_info->result.mode = S_IFDIR;
> > -               write_tree(&dir_info->result.oid, &info->versions, offset,
> > -                          opt->repo->hash_algo->rawsz);
> > +               if (write_tree(&dir_info->result.oid, &info->versions, offset,
> > +                              opt->repo->hash_algo->rawsz) < 0)
> > +                       ret = -1;
>
> This makes me wonder about write_tree() again.  What if its call to
> write_object_file() returns a value greater than 0?  Is that possible?

No, `write_object_file()` returns either 0 or -1, always. I followed all
the code paths.

Here is a tangent (feel free to skip over this lengthy analysis):

These code paths include a _very, very_ misleading one. In
`write_loose_object()` (which is called via the call chain
`write_object_file()` -> `write_object_file_flags()` ->
`write_object_file_flags()`), we call `finalize_object_file()`, see
https://github.com/git/git/blob/v2.37.3/object-file.c#L2030). And in that
`finalize_object_file()` function, we set `ret = errno;` in two places
(https://github.com/git/git/blob/v2.37.3/object-file.c#L1833 and
https://github.com/git/git/blob/v2.37.3/object-file.c#L1850).

When I read that, I chased down what the conventions are for `errno`
values. Now, while
https://pubs.opengroup.org/onlinepubs/007908799/xsh/errno.h.html does not
say anything about `errno` values being non-negative,
https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
very much says:

	Values for errno are now required to be distinct positive values
	rather than non-zero values. This change is for alignment with the
	ISO/IEC 9899:1999 standard.

Terrible, right? If we set `ret = errno`, that means that we return
something positive!

I then even dug up the origins of the `ret = errno` statements. Turns out
that yours truly introduced the first of these `ret = errno` statements in
9e48b389990 (Work around missing hard links on FAT formatted media,
2005-10-26). :facepalm:

So why did I do such a thing? Well, back then, the `ret` value _already_
contained an `errno`, thanks to calling `link_temp_to_file()`, which _did_
return an `errno`. In fact, there was no code path in
`move_temp_to_file()` (as the function was called back then) that would
return -1, it always returned either 0 or an `errno`.

And after coming so far, after one hour of analyzing, I finally, finally
realized that there is no `return ret;` in that function. That function
has a `ret` variable that is never returned _from_ that function, but it
used to be returned _to_ that function, by the `link_temp_to_file()`
function that does not even exist any longer.

In my defense, this terribly misleading code was not even introduced by
myself. It was introduced in 230f13225df (Create object subdirectories on
demand, 2005-10-08).

tl;dr all is good, `finalize_object_file()` returns 0 upon success, -1
upon failure.

> If it is, are those error cases or not?  About half the callers in the
> code base that check the return value of write_object_file() check for a
> non-zero value, the other half check for a value less than 0.

And then there is `apply.c` where the return value isn't even checked at
all most of the time.

> And I can't find any documentation.  And it seemed like too much time
> for me to figure out what its range of return values was.
>
> [...]
>
> Nice to see that it didn't take too much work to propagate the -1 value
>from write_tree() up the hierarchy.  Nice work!

Thank you!

> I think we've still got some separate problems related to propagating
> the return value of the other write_object_file() call, from
> handle_content_merge().  The direct call in that other case is okay, but
> the higher levels at process_renames() and process_entry() seem to
> fumble it.  Your fix for failed tree writes is still valid without
> fixing the failed blob writes, and I'm happy to tackle the other half if
> you'd prefer to hand it off.  But, if you'd like to tackle that half
> too, you might see fewer error messages when it fails to write to the
> read-only repository, since it'll fail early on the first blob write
> instead of only failing when it gets around to trying to write a new
> tree.

It'll be part of v4 ;-)

Ciao,
Dscho
diff mbox series

Patch

diff --git a/merge-ort.c b/merge-ort.c
index 99dcee2db8a..f654296220e 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -3571,15 +3571,15 @@  static int tree_entry_order(const void *a_, const void *b_)
 				 b->string, strlen(b->string), bmi->result.mode);
 }
 
-static void write_tree(struct object_id *result_oid,
-		       struct string_list *versions,
-		       unsigned int offset,
-		       size_t hash_size)
+static int write_tree(struct object_id *result_oid,
+		      struct string_list *versions,
+		      unsigned int offset,
+		      size_t hash_size)
 {
 	size_t maxlen = 0, extra;
 	unsigned int nr;
 	struct strbuf buf = STRBUF_INIT;
-	int i;
+	int i, ret;
 
 	assert(offset <= versions->nr);
 	nr = versions->nr - offset;
@@ -3605,8 +3605,9 @@  static void write_tree(struct object_id *result_oid,
 	}
 
 	/* Write this object file out, and record in result_oid */
-	write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid);
+	ret = write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid);
 	strbuf_release(&buf);
+	return ret;
 }
 
 static void record_entry_for_tree(struct directory_versions *dir_metadata,
@@ -3625,13 +3626,13 @@  static void record_entry_for_tree(struct directory_versions *dir_metadata,
 			   basename)->util = &mi->result;
 }
 
-static void write_completed_directory(struct merge_options *opt,
-				      const char *new_directory_name,
-				      struct directory_versions *info)
+static int write_completed_directory(struct merge_options *opt,
+				     const char *new_directory_name,
+				     struct directory_versions *info)
 {
 	const char *prev_dir;
 	struct merged_info *dir_info = NULL;
-	unsigned int offset;
+	unsigned int offset, ret = 0;
 
 	/*
 	 * Some explanation of info->versions and info->offsets...
@@ -3717,7 +3718,7 @@  static void write_completed_directory(struct merge_options *opt,
 	 * strcmp here.)
 	 */
 	if (new_directory_name == info->last_directory)
-		return;
+		return 0;
 
 	/*
 	 * If we are just starting (last_directory is NULL), or last_directory
@@ -3739,7 +3740,7 @@  static void write_completed_directory(struct merge_options *opt,
 		 */
 		string_list_append(&info->offsets,
 				   info->last_directory)->util = (void*)offset;
-		return;
+		return 0;
 	}
 
 	/*
@@ -3769,8 +3770,9 @@  static void write_completed_directory(struct merge_options *opt,
 		 */
 		dir_info->is_null = 0;
 		dir_info->result.mode = S_IFDIR;
-		write_tree(&dir_info->result.oid, &info->versions, offset,
-			   opt->repo->hash_algo->rawsz);
+		if (write_tree(&dir_info->result.oid, &info->versions, offset,
+			       opt->repo->hash_algo->rawsz) < 0)
+			ret = -1;
 	}
 
 	/*
@@ -3798,6 +3800,8 @@  static void write_completed_directory(struct merge_options *opt,
 	/* And, of course, we need to update last_directory to match. */
 	info->last_directory = new_directory_name;
 	info->last_directory_len = strlen(info->last_directory);
+
+	return ret;
 }
 
 /* Per entry merge function */
@@ -4214,8 +4218,8 @@  static void prefetch_for_content_merges(struct merge_options *opt,
 	oid_array_clear(&to_fetch);
 }
 
-static void process_entries(struct merge_options *opt,
-			    struct object_id *result_oid)
+static int process_entries(struct merge_options *opt,
+			   struct object_id *result_oid)
 {
 	struct hashmap_iter iter;
 	struct strmap_entry *e;
@@ -4224,11 +4228,12 @@  static void process_entries(struct merge_options *opt,
 	struct directory_versions dir_metadata = { STRING_LIST_INIT_NODUP,
 						   STRING_LIST_INIT_NODUP,
 						   NULL, 0 };
+	int ret;
 
 	trace2_region_enter("merge", "process_entries setup", opt->repo);
 	if (strmap_empty(&opt->priv->paths)) {
 		oidcpy(result_oid, opt->repo->hash_algo->empty_tree);
-		return;
+		return 0;
 	}
 
 	/* Hack to pre-allocate plist to the desired size */
@@ -4270,8 +4275,11 @@  static void process_entries(struct merge_options *opt,
 		 */
 		struct merged_info *mi = entry->util;
 
-		write_completed_directory(opt, mi->directory_name,
-					  &dir_metadata);
+		if (write_completed_directory(opt, mi->directory_name,
+					      &dir_metadata) < 0) {
+			ret = -1;
+			goto cleanup;
+		}
 		if (mi->clean)
 			record_entry_for_tree(&dir_metadata, path, mi);
 		else {
@@ -4291,12 +4299,15 @@  static void process_entries(struct merge_options *opt,
 		fflush(stdout);
 		BUG("dir_metadata accounting completely off; shouldn't happen");
 	}
-	write_tree(result_oid, &dir_metadata.versions, 0,
-		   opt->repo->hash_algo->rawsz);
+	ret = write_tree(result_oid, &dir_metadata.versions, 0,
+			 opt->repo->hash_algo->rawsz);
+cleanup:
 	string_list_clear(&plist, 0);
 	string_list_clear(&dir_metadata.versions, 0);
 	string_list_clear(&dir_metadata.offsets, 0);
 	trace2_region_leave("merge", "process_entries cleanup", opt->repo);
+
+	return ret;
 }
 
 /*** Function Grouping: functions related to merge_switch_to_result() ***/
@@ -4928,15 +4939,18 @@  redo:
 	}
 
 	trace2_region_enter("merge", "process_entries", opt->repo);
-	process_entries(opt, &working_tree_oid);
+	if (process_entries(opt, &working_tree_oid) < 0)
+		result->clean = -1;
 	trace2_region_leave("merge", "process_entries", opt->repo);
 
 	/* Set return values */
 	result->path_messages = &opt->priv->conflicts;
 
-	result->tree = parse_tree_indirect(&working_tree_oid);
-	/* existence of conflicted entries implies unclean */
-	result->clean &= strmap_empty(&opt->priv->conflicted);
+	if (result->clean >= 0) {
+		result->tree = parse_tree_indirect(&working_tree_oid);
+		/* existence of conflicted entries implies unclean */
+		result->clean &= strmap_empty(&opt->priv->conflicted);
+	}
 	if (!opt->priv->call_depth) {
 		result->priv = opt->priv;
 		result->_properly_initialized = RESULT_INITIALIZED;
diff --git a/t/t4301-merge-tree-write-tree.sh b/t/t4301-merge-tree-write-tree.sh
index 28ca5c38bb5..013b77144bd 100755
--- a/t/t4301-merge-tree-write-tree.sh
+++ b/t/t4301-merge-tree-write-tree.sh
@@ -810,4 +810,13 @@  test_expect_success 'can override merge of unrelated histories' '
 	test_cmp expect actual
 '
 
+test_expect_success SANITY 'merge-ort fails gracefully in a read-only repository' '
+	git init --bare read-only &&
+	git push read-only side1 side2 side3 &&
+	test_when_finished "chmod -R u+w read-only" &&
+	chmod -R a-w read-only &&
+	test_must_fail git -C read-only merge-tree side1 side3 &&
+	test_must_fail git -C read-only merge-tree side1 side2
+'
+
 test_done