apply: reload .gitattributes after patching it
diff mbox series

Message ID 20190809100217.427178-1-sandals@crustytoothpaste.net
State New
Headers show
Series
  • apply: reload .gitattributes after patching it
Related show

Commit Message

brian m. carlson Aug. 9, 2019, 10:02 a.m. UTC
When applying multiple patches with git am, or when rebasing using the
am backend, it's possible that one of our patches has updated a
gitattributes file. Currently, we cache this information, so if a
file in a subsequent patch has attributes applied, the file will be
written out with the attributes in place as of the time we started the
rebase or am operation, not with the attributes applied by the previous
patch. This problem does not occur when using the -m or -i flags to
rebase.

To ensure we write the correct data into the working tree, expire the
cache after each patch that touches a path ending in ".gitattributes".

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
---
 apply.c           | 11 +++++++++++
 convert.c         |  9 ++++++++-
 convert.h         |  6 ++++++
 t/t3400-rebase.sh | 23 +++++++++++++++++++++++
 4 files changed, 48 insertions(+), 1 deletion(-)

Comments

Taylor Blau Aug. 9, 2019, 11:14 a.m. UTC | #1
Hi brian,

On Fri, Aug 09, 2019 at 10:02:17AM +0000, brian m. carlson wrote:
> When applying multiple patches with git am, or when rebasing using the
> am backend, it's possible that one of our patches has updated a
> gitattributes file. Currently, we cache this information, so if a
> file in a subsequent patch has attributes applied, the file will be
> written out with the attributes in place as of the time we started the
> rebase or am operation, not with the attributes applied by the previous
> patch. This problem does not occur when using the -m or -i flags to
> rebase.

Ah. Thanks for a description of the issue.

> To ensure we write the correct data into the working tree, expire the
> cache after each patch that touches a path ending in ".gitattributes".

OK, that seems like a sensible direction...

> Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
> ---
>  apply.c           | 11 +++++++++++
>  convert.c         |  9 ++++++++-
>  convert.h         |  6 ++++++
>  t/t3400-rebase.sh | 23 +++++++++++++++++++++++
>  4 files changed, 48 insertions(+), 1 deletion(-)
>
> diff --git a/apply.c b/apply.c
> index cde95369bb..b959b88b8e 100644
> --- a/apply.c
> +++ b/apply.c
> @@ -4643,6 +4643,7 @@ static int apply_patch(struct apply_state *state,
>  	struct patch *list = NULL, **listp = &list;
>  	int skipped_patch = 0;
>  	int res = 0;
> +	int flush_attributes = 0;
>
>  	state->patch_input_file = filename;
>  	if (read_patch_file(&buf, fd) < 0)
> @@ -4670,6 +4671,14 @@ static int apply_patch(struct apply_state *state,
>  			patch_stats(state, patch);
>  			*listp = patch;
>  			listp = &patch->next;
> +
> +			if (!flush_attributes && patch->new_name) {
> +				char *dummy = strip_path_suffix(patch->new_name, GITATTRIBUTES_FILE);

It's a shame that 'strip_path_suffix' doesn't take a 'char *out', and
accept NULL for that (which would save us the assignment and subsequent
'free'). In either case, this is certainly the appropriate function.

But, I'm not sure if 'dummy' is the best name for this variable or not.
My first thought was that I'd expect this to be named 'suffix' (or the
less descriptive 'p'), but I don't know if the change is warranted.
What *are* we supposed to call this variable?
"path_after_gitattributes" ;-)?

> +				/* The patch applied to a .gitattributes file. */
> +				if (dummy)
> +					flush_attributes = 1;
> +				free(dummy);
> +			}
>  		}
>  		else {
>  			if (state->apply_verbosity > verbosity_normal)
> @@ -4746,6 +4755,8 @@ static int apply_patch(struct apply_state *state,
>  	if (state->summary && state->apply_verbosity > verbosity_silent)
>  		summary_patch_list(list);
>
> +	if (flush_attributes)
> +		reset_parsed_attributes();
>  end:
>  	free_patch_list(list);
>  	strbuf_release(&buf);
> diff --git a/convert.c b/convert.c
> index 94ff837649..030e9b81b9 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -1293,10 +1293,11 @@ struct conv_attrs {
>  	const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */
>  };
>
> +static struct attr_check *check;
> +
>  static void convert_attrs(const struct index_state *istate,
>  			  struct conv_attrs *ca, const char *path)
>  {
> -	static struct attr_check *check;
>  	struct attr_check_item *ccheck = NULL;
>
>  	if (!check) {
> @@ -1339,6 +1340,12 @@ static void convert_attrs(const struct index_state *istate,
>  		ca->crlf_action = CRLF_AUTO_INPUT;
>  }
>
> +void reset_parsed_attributes(void)
> +{
> +	attr_check_free(check);
> +	check = NULL;
> +}
> +

Makes sense.

>  int would_convert_to_git_filter_fd(const struct index_state *istate, const char *path)
>  {
>  	struct conv_attrs ca;
> diff --git a/convert.h b/convert.h
> index 831559f10d..3710969d43 100644
> --- a/convert.h
> +++ b/convert.h
> @@ -94,6 +94,12 @@ void convert_to_git_filter_fd(const struct index_state *istate,
>  int would_convert_to_git_filter_fd(const struct index_state *istate,
>  				   const char *path);
>
> +/*
> + * Reset the internal list of attributes used by convert_to_git and
> + * convert_to_working_tree.
> + */
> +void reset_parsed_attributes(void);
> +
>  /*****************************************************************
>   *
>   * Streaming conversion support
> diff --git a/t/t3400-rebase.sh b/t/t3400-rebase.sh
> index 80b23fd326..062dc41df7 100755
> --- a/t/t3400-rebase.sh
> +++ b/t/t3400-rebase.sh
> @@ -301,6 +301,29 @@ test_expect_success 'rebase --am and --show-current-patch' '
>  	)
>  '
>
> +test_expect_success 'rebase --am and .gitattributes' '
> +	test_create_repo attributes &&
> +	(
> +		cd attributes &&
> +		test_commit init &&
> +		test_config filter.test.clean "sed -e '\''s/smudged/clean/g'\''" &&
> +		test_config filter.test.smudge "sed -e '\''s/clean/smudged/g'\''" &&
> +
> +		test_commit second &&
> +		git checkout -b test HEAD^ &&
> +
> +		echo "*.txt filter=test" >.gitattributes &&
> +		git add .gitattributes &&
> +		test_commit third &&
> +
> +		echo "This text is smudged." >a.txt &&
> +		git add a.txt &&
> +		test_commit fourth &&
> +		git rebase master &&
> +		grep "smudged" a.txt
> +	)
> +'
> +

This definitely exercise the behavior here. Thanks for adding a test
while you're at it.

>  test_expect_success 'rebase--merge.sh and --show-current-patch' '
>  	test_create_repo conflict-merge &&
>  	(

I wouldn't be opposed to renaming 'dummy' to be something else, but in
the case that you don't feel a re-roll is necessary, please consider my:

  Reviewed-by: Taylor Blau <me@ttaylorr.com>

Thanks,
Taylor
brian m. carlson Aug. 9, 2019, 11:25 a.m. UTC | #2
On 2019-08-09 at 11:14:52, Taylor Blau wrote:
> > diff --git a/apply.c b/apply.c
> > index cde95369bb..b959b88b8e 100644
> > --- a/apply.c
> > +++ b/apply.c
> > @@ -4643,6 +4643,7 @@ static int apply_patch(struct apply_state *state,
> >  	struct patch *list = NULL, **listp = &list;
> >  	int skipped_patch = 0;
> >  	int res = 0;
> > +	int flush_attributes = 0;
> >
> >  	state->patch_input_file = filename;
> >  	if (read_patch_file(&buf, fd) < 0)
> > @@ -4670,6 +4671,14 @@ static int apply_patch(struct apply_state *state,
> >  			patch_stats(state, patch);
> >  			*listp = patch;
> >  			listp = &patch->next;
> > +
> > +			if (!flush_attributes && patch->new_name) {
> > +				char *dummy = strip_path_suffix(patch->new_name, GITATTRIBUTES_FILE);
> 
> It's a shame that 'strip_path_suffix' doesn't take a 'char *out', and
> accept NULL for that (which would save us the assignment and subsequent
> 'free'). In either case, this is certainly the appropriate function.

Yeah, I felt the same way. We could refactor this out into two separate
functions, one which returns an ssize_t and one which actually
allocates, but I'm not sure it makes a huge amount of sense with just
one caller. The allocation is relatively small, and I've tried to make
sure it's called exactly once per patch so as not to be wasteful and
inefficient.

> But, I'm not sure if 'dummy' is the best name for this variable or not.
> My first thought was that I'd expect this to be named 'suffix' (or the
> less descriptive 'p'), but I don't know if the change is warranted.
> What *are* we supposed to call this variable?
> "path_after_gitattributes" ;-)?

I struggled with this variable name, as I'm sure you guessed. I could
call it "has_suffix" or something similar. I have mixed feelings about
naming pointer variables like booleans since it makes things prone to
misreading, but I think that might be the sanest thing to do here. I can
certainly do "p" as well.

I'll see if there are other comments about directions to take here, and
then reroll.
Jeff King Aug. 9, 2019, 11:36 a.m. UTC | #3
On Fri, Aug 09, 2019 at 11:25:52AM +0000, brian m. carlson wrote:

> > > +			if (!flush_attributes && patch->new_name) {
> > > +				char *dummy = strip_path_suffix(patch->new_name, GITATTRIBUTES_FILE);
> > 
> > It's a shame that 'strip_path_suffix' doesn't take a 'char *out', and
> > accept NULL for that (which would save us the assignment and subsequent
> > 'free'). In either case, this is certainly the appropriate function.
> 
> Yeah, I felt the same way. We could refactor this out into two separate
> functions, one which returns an ssize_t and one which actually
> allocates, but I'm not sure it makes a huge amount of sense with just
> one caller. The allocation is relatively small, and I've tried to make
> sure it's called exactly once per patch so as not to be wasteful and
> inefficient.

I think you could do this with:

  size_t len;
  if (strip_suffix(patch->new_name, GITATTRIBUTES_FILE, &len) &&
      len > 0 && is_dir_sep(patch->new_name[len-1]))
          flush_attributes = 1;

Not sure if that is better or worse. It avoids the extra memory
boilerplate, but the logic is a slightly more subtle.

-Peff
brian m. carlson Aug. 9, 2019, 11:47 a.m. UTC | #4
On 2019-08-09 at 11:36:14, Jeff King wrote:
> I think you could do this with:
> 
>   size_t len;
>   if (strip_suffix(patch->new_name, GITATTRIBUTES_FILE, &len) &&
>       len > 0 && is_dir_sep(patch->new_name[len-1]))
>           flush_attributes = 1;
> 
> Not sure if that is better or worse. It avoids the extra memory
> boilerplate, but the logic is a slightly more subtle.

I think an approach like that would be fine with a comment. I'm not sure
this exact approach works with -p0, though, so I'll have to tweak it a
bit.
SZEDER Gábor Aug. 9, 2019, 12:43 p.m. UTC | #5
On Fri, Aug 09, 2019 at 07:36:14AM -0400, Jeff King wrote:
> On Fri, Aug 09, 2019 at 11:25:52AM +0000, brian m. carlson wrote:
> 
> > > > +			if (!flush_attributes && patch->new_name) {
> > > > +				char *dummy = strip_path_suffix(patch->new_name, GITATTRIBUTES_FILE);
> > > 
> > > It's a shame that 'strip_path_suffix' doesn't take a 'char *out', and
> > > accept NULL for that (which would save us the assignment and subsequent
> > > 'free'). In either case, this is certainly the appropriate function.
> > 
> > Yeah, I felt the same way. We could refactor this out into two separate
> > functions, one which returns an ssize_t and one which actually
> > allocates, but I'm not sure it makes a huge amount of sense with just
> > one caller. The allocation is relatively small, and I've tried to make
> > sure it's called exactly once per patch so as not to be wasteful and
> > inefficient.
> 
> I think you could do this with:
> 
>   size_t len;
>   if (strip_suffix(patch->new_name, GITATTRIBUTES_FILE, &len) &&
>       len > 0 && is_dir_sep(patch->new_name[len-1]))
>           flush_attributes = 1;
> 
> Not sure if that is better or worse. It avoids the extra memory
> boilerplate, but the logic is a slightly more subtle.

Subtle indeed :) because you have to allow len == 0 to catch the
toplevel .gitattributes file.

But there is an other subtlety here: when I read the commit message
saying "patch that touches a path ending in ".gitattributes"." and saw
the new call to strip_path_suffix(), I immediately thought what would
happen with a file called 'foo.gitattributes'.  Only when I looked
into strip_path_suffix() became it clear that it only removes full
path components, so such a filename won't cause any trouble (though
perhaps the worst thing that could happen is that we unnecessarily
flush the attributes cache).
Jeff King Aug. 9, 2019, 1:51 p.m. UTC | #6
On Fri, Aug 09, 2019 at 02:43:18PM +0200, SZEDER Gábor wrote:

> > I think you could do this with:
> > 
> >   size_t len;
> >   if (strip_suffix(patch->new_name, GITATTRIBUTES_FILE, &len) &&
> >       len > 0 && is_dir_sep(patch->new_name[len-1]))
> >           flush_attributes = 1;
> > 
> > Not sure if that is better or worse. It avoids the extra memory
> > boilerplate, but the logic is a slightly more subtle.
> 
> Subtle indeed :) because you have to allow len == 0 to catch the
> toplevel .gitattributes file.

I'll pretend I was leaving that as a puzzle for the readers. :)

> But there is an other subtlety here: when I read the commit message
> saying "patch that touches a path ending in ".gitattributes"." and saw
> the new call to strip_path_suffix(), I immediately thought what would
> happen with a file called 'foo.gitattributes'.  Only when I looked
> into strip_path_suffix() became it clear that it only removes full
> path components, so such a filename won't cause any trouble (though
> perhaps the worst thing that could happen is that we unnecessarily
> flush the attributes cache).

Right. I think the term we want here is really "basename". So in fact:

  if (!strcmp(basename(patch->new_name), GITATTRIBUTES_FILE))

would do what we want, except for the annoying caveat that basename() is
allowed to modify its parameter (to remove trailing directory
separators, but we know we wouldn't have them here).

-Peff
brian m. carlson Aug. 9, 2019, 10:08 p.m. UTC | #7
On 2019-08-09 at 13:51:49, Jeff King wrote:
> On Fri, Aug 09, 2019 at 02:43:18PM +0200, SZEDER Gábor wrote:
> > But there is an other subtlety here: when I read the commit message
> > saying "patch that touches a path ending in ".gitattributes"." and saw
> > the new call to strip_path_suffix(), I immediately thought what would
> > happen with a file called 'foo.gitattributes'.  Only when I looked
> > into strip_path_suffix() became it clear that it only removes full
> > path components, so such a filename won't cause any trouble (though
> > perhaps the worst thing that could happen is that we unnecessarily
> > flush the attributes cache).
> 
> Right. I think the term we want here is really "basename". So in fact:
> 
>   if (!strcmp(basename(patch->new_name), GITATTRIBUTES_FILE))
> 
> would do what we want, except for the annoying caveat that basename() is
> allowed to modify its parameter (to remove trailing directory
> separators, but we know we wouldn't have them here).

I think this is exactly the function I'm looking for. I'm a little
uncomfortable relying on the fact that typical implementations don't
modify the string when there's no trailing slash, though.

I think I'm going to end up refactoring out the strip_path_suffix
function into a has_path_suffix and use that. That will avoid the
allocation and it doesn't rely on the generosity of OS implementers.

Patch
diff mbox series

diff --git a/apply.c b/apply.c
index cde95369bb..b959b88b8e 100644
--- a/apply.c
+++ b/apply.c
@@ -4643,6 +4643,7 @@  static int apply_patch(struct apply_state *state,
 	struct patch *list = NULL, **listp = &list;
 	int skipped_patch = 0;
 	int res = 0;
+	int flush_attributes = 0;
 
 	state->patch_input_file = filename;
 	if (read_patch_file(&buf, fd) < 0)
@@ -4670,6 +4671,14 @@  static int apply_patch(struct apply_state *state,
 			patch_stats(state, patch);
 			*listp = patch;
 			listp = &patch->next;
+
+			if (!flush_attributes && patch->new_name) {
+				char *dummy = strip_path_suffix(patch->new_name, GITATTRIBUTES_FILE);
+				/* The patch applied to a .gitattributes file. */
+				if (dummy)
+					flush_attributes = 1;
+				free(dummy);
+			}
 		}
 		else {
 			if (state->apply_verbosity > verbosity_normal)
@@ -4746,6 +4755,8 @@  static int apply_patch(struct apply_state *state,
 	if (state->summary && state->apply_verbosity > verbosity_silent)
 		summary_patch_list(list);
 
+	if (flush_attributes)
+		reset_parsed_attributes();
 end:
 	free_patch_list(list);
 	strbuf_release(&buf);
diff --git a/convert.c b/convert.c
index 94ff837649..030e9b81b9 100644
--- a/convert.c
+++ b/convert.c
@@ -1293,10 +1293,11 @@  struct conv_attrs {
 	const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */
 };
 
+static struct attr_check *check;
+
 static void convert_attrs(const struct index_state *istate,
 			  struct conv_attrs *ca, const char *path)
 {
-	static struct attr_check *check;
 	struct attr_check_item *ccheck = NULL;
 
 	if (!check) {
@@ -1339,6 +1340,12 @@  static void convert_attrs(const struct index_state *istate,
 		ca->crlf_action = CRLF_AUTO_INPUT;
 }
 
+void reset_parsed_attributes(void)
+{
+	attr_check_free(check);
+	check = NULL;
+}
+
 int would_convert_to_git_filter_fd(const struct index_state *istate, const char *path)
 {
 	struct conv_attrs ca;
diff --git a/convert.h b/convert.h
index 831559f10d..3710969d43 100644
--- a/convert.h
+++ b/convert.h
@@ -94,6 +94,12 @@  void convert_to_git_filter_fd(const struct index_state *istate,
 int would_convert_to_git_filter_fd(const struct index_state *istate,
 				   const char *path);
 
+/*
+ * Reset the internal list of attributes used by convert_to_git and
+ * convert_to_working_tree.
+ */
+void reset_parsed_attributes(void);
+
 /*****************************************************************
  *
  * Streaming conversion support
diff --git a/t/t3400-rebase.sh b/t/t3400-rebase.sh
index 80b23fd326..062dc41df7 100755
--- a/t/t3400-rebase.sh
+++ b/t/t3400-rebase.sh
@@ -301,6 +301,29 @@  test_expect_success 'rebase --am and --show-current-patch' '
 	)
 '
 
+test_expect_success 'rebase --am and .gitattributes' '
+	test_create_repo attributes &&
+	(
+		cd attributes &&
+		test_commit init &&
+		test_config filter.test.clean "sed -e '\''s/smudged/clean/g'\''" &&
+		test_config filter.test.smudge "sed -e '\''s/clean/smudged/g'\''" &&
+
+		test_commit second &&
+		git checkout -b test HEAD^ &&
+
+		echo "*.txt filter=test" >.gitattributes &&
+		git add .gitattributes &&
+		test_commit third &&
+
+		echo "This text is smudged." >a.txt &&
+		git add a.txt &&
+		test_commit fourth &&
+		git rebase master &&
+		grep "smudged" a.txt
+	)
+'
+
 test_expect_success 'rebase--merge.sh and --show-current-patch' '
 	test_create_repo conflict-merge &&
 	(