diff mbox series

[v3,2/2] diff: add -I<regex> that ignores matching changes

Message ID 20201015072406.4506-3-michal@isc.org (mailing list archive)
State New, archived
Headers show
Series [v2,1/3] merge-base, xdiff: zero out xpparam_t structures | expand

Commit Message

Michał Kępień Oct. 15, 2020, 7:24 a.m. UTC
Add a new diff option that enables ignoring changes whose all lines
(changed, removed, and added) match a given regular expression.  This is
similar to the -I/--ignore-matching-lines option in standalone diff
utilities and can be used e.g. to ignore changes which only affect code
comments or to look for unrelated changes in commits containing a large
number of automatically applied modifications (e.g. a tree-wide string
replacement).  The difference between -G/-S and the new -I option is
that the latter filters output on a per-change basis.

Use the 'ignore' field of xdchange_t for marking a change as ignored or
not.  Since the same field is used by --ignore-blank-lines, identical
hunk emitting rules apply for --ignore-blank-lines and -I.  These two
options can also be used together in the same git invocation (they are
complementary to each other).

Rename xdl_mark_ignorable() to xdl_mark_ignorable_lines(), to indicate
that it is logically a "sibling" of xdl_mark_ignorable_regex() rather
than its "parent".

Signed-off-by: Michał Kępień <michal@isc.org>
---
 Documentation/diff-options.txt |  5 ++++
 diff.c                         | 28 ++++++++++++++++++++
 diff.h                         |  4 +++
 t/t4013-diff-various.sh        | 33 ++++++++++++++++++++++++
 xdiff/xdiff.h                  |  4 +++
 xdiff/xdiffi.c                 | 47 ++++++++++++++++++++++++++++++++--
 6 files changed, 119 insertions(+), 2 deletions(-)

Comments

Phillip Wood Oct. 16, 2020, 3:32 p.m. UTC | #1
Hi Michał

Thanks for working on this, it will be a useful addition. Unfortunately 
there's a use-after-free error see below

On 15/10/2020 08:24, Michał Kępień wrote:
> Add a new diff option that enables ignoring changes whose all lines
> (changed, removed, and added) match a given regular expression.  This is
> similar to the -I/--ignore-matching-lines option in standalone diff
> utilities and can be used e.g. to ignore changes which only affect code
> comments or to look for unrelated changes in commits containing a large
> number of automatically applied modifications (e.g. a tree-wide string
> replacement).  The difference between -G/-S and the new -I option is
> that the latter filters output on a per-change basis.
> 
> Use the 'ignore' field of xdchange_t for marking a change as ignored or
> not.  Since the same field is used by --ignore-blank-lines, identical
> hunk emitting rules apply for --ignore-blank-lines and -I.  These two
> options can also be used together in the same git invocation (they are
> complementary to each other).
> 
> Rename xdl_mark_ignorable() to xdl_mark_ignorable_lines(), to indicate
> that it is logically a "sibling" of xdl_mark_ignorable_regex() rather
> than its "parent".
> 
> Signed-off-by: Michał Kępień <michal@isc.org>
> ---
>   Documentation/diff-options.txt |  5 ++++
>   diff.c                         | 28 ++++++++++++++++++++
>   diff.h                         |  4 +++
>   t/t4013-diff-various.sh        | 33 ++++++++++++++++++++++++
>   xdiff/xdiff.h                  |  4 +++
>   xdiff/xdiffi.c                 | 47 ++++++++++++++++++++++++++++++++--
>   6 files changed, 119 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
> index 573fb9bb71..ee52b65e46 100644
> --- a/Documentation/diff-options.txt
> +++ b/Documentation/diff-options.txt
> @@ -687,6 +687,11 @@ endif::git-format-patch[]
>   --ignore-blank-lines::
>   	Ignore changes whose lines are all blank.
>   
> +-I<regex>::
> +--ignore-matching-lines=<regex>::
> +	Ignore changes whose all lines match <regex>.  This option may
> +	be specified more than once.
> +
>   --inter-hunk-context=<lines>::
>   	Show the context between diff hunks, up to the specified number
>   	of lines, thereby fusing hunks that are close to each other.
> diff --git a/diff.c b/diff.c
> index 2bb2f8f57e..677de23352 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -3587,6 +3587,8 @@ static void builtin_diff(const char *name_a,
>   		if (header.len && !o->flags.suppress_diff_headers)
>   			ecbdata.header = &header;
>   		xpp.flags = o->xdl_opts;
> +		xpp.ignore_regex = o->ignore_regex;
> +		xpp.ignore_regex_nr = o->ignore_regex_nr;
>   		xpp.anchors = o->anchors;
>   		xpp.anchors_nr = o->anchors_nr;
>   		xecfg.ctxlen = o->context;
> @@ -3716,6 +3718,8 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
>   		memset(&xpp, 0, sizeof(xpp));
>   		memset(&xecfg, 0, sizeof(xecfg));
>   		xpp.flags = o->xdl_opts;
> +		xpp.ignore_regex = o->ignore_regex;
> +		xpp.ignore_regex_nr = o->ignore_regex_nr;
>   		xpp.anchors = o->anchors;
>   		xpp.anchors_nr = o->anchors_nr;
>   		xecfg.ctxlen = o->context;
> @@ -5203,6 +5207,22 @@ static int diff_opt_patience(const struct option *opt,
>   	return 0;
>   }
>   
> +static int diff_opt_ignore_regex(const struct option *opt,
> +				 const char *arg, int unset)
> +{
> +	struct diff_options *options = opt->value;
> +	regex_t *regex;
> +
> +	BUG_ON_OPT_NEG(unset);
> +	regex = xmalloc(sizeof(*regex));
> +	if (regcomp(regex, arg, REG_EXTENDED | REG_NEWLINE))
> +		return error(_("invalid regex given to -I: '%s'"), arg);
> +	ALLOC_GROW(options->ignore_regex, options->ignore_regex_nr + 1,
> +		   options->ignore_regex_alloc);
> +	options->ignore_regex[options->ignore_regex_nr++] = regex;
> +	return 0;
> +}
> +
>   static int diff_opt_pickaxe_regex(const struct option *opt,
>   				  const char *arg, int unset)
>   {
> @@ -5491,6 +5511,9 @@ static void prep_parse_options(struct diff_options *options)
>   		OPT_BIT_F(0, "ignore-blank-lines", &options->xdl_opts,
>   			  N_("ignore changes whose lines are all blank"),
>   			  XDF_IGNORE_BLANK_LINES, PARSE_OPT_NONEG),
> +		OPT_CALLBACK_F('I', "ignore-matching-lines", options, N_("<regex>"),
> +			       N_("ignore changes whose all lines match <regex>"),
> +			       0, diff_opt_ignore_regex),
>   		OPT_BIT(0, "indent-heuristic", &options->xdl_opts,
>   			N_("heuristic to shift diff hunk boundaries for easy reading"),
>   			XDF_INDENT_HEURISTIC),
> @@ -6405,6 +6428,11 @@ void diff_flush(struct diff_options *options)
>   	DIFF_QUEUE_CLEAR(q);
>   	if (options->close_file)
>   		fclose(options->file);
> +	for (i = 0; i < options->ignore_regex_nr; i++) {
> +		regfree(options->ignore_regex[i]);
> +		free(options->ignore_regex[i]);
> +	}
> +	free(options->ignore_regex);

If I run `git log -p -I foo` then the address sanitizer reports

AddressSanitizer: heap-use-after-free xdiff/xdiffi.c:1027 in 
record_matches_regex

after it has printed the diff for the first commit. I think freeing the 
regex here is the cause of the problem.

Best Wishes

Phillip

>   	/*
>   	 * Report the content-level differences with HAS_CHANGES;
> diff --git a/diff.h b/diff.h
> index 11de52e9e9..a402227b80 100644
> --- a/diff.h
> +++ b/diff.h
> @@ -234,6 +234,10 @@ struct diff_options {
>   	 */
>   	const char *pickaxe;
>   
> +	/* -I<regex> */
> +	regex_t **ignore_regex;
> +	size_t ignore_regex_nr, ignore_regex_alloc;
> +
>   	const char *single_follow;
>   	const char *a_prefix, *b_prefix;
>   	const char *line_prefix;
> diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
> index 5c7b0122b4..efaaee2ef0 100755
> --- a/t/t4013-diff-various.sh
> +++ b/t/t4013-diff-various.sh
> @@ -6,6 +6,7 @@
>   test_description='Various diff formatting options'
>   
>   . ./test-lib.sh
> +. "$TEST_DIRECTORY"/diff-lib.sh
>   
>   test_expect_success setup '
>   
> @@ -473,4 +474,36 @@ test_expect_success 'diff-tree --stdin with log formatting' '
>   	test_cmp expect actual
>   '
>   
> +test_expect_success 'diff -I<regex>' '
> +	test_seq 50 >I.txt &&
> +	sed -e "s/13/ten and three/" -e "/7\$/d" <I.txt >J.txt &&
> +	echo >>J.txt &&
> +
> +	test_expect_code 1 git diff --no-index --ignore-blank-lines -I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&
> +	cat >expect <<-\EOF &&
> +	diff --git a/I.txt b/J.txt
> +	--- a/I.txt
> +	+++ b/J.txt
> +	@@ -34,7 +31,6 @@
> +	 34
> +	 35
> +	 36
> +	-37
> +	 38
> +	 39
> +	 40
> +	EOF
> +	compare_diff_patch expect actual &&
> +
> +	test_expect_code 1 git diff --stat --no-index --ignore-blank-lines -I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&
> +	cat >expect <<-\EOF &&
> +	 I.txt => J.txt | 1 -
> +	 1 file changed, 1 deletion(-)
> +	EOF
> +	test_cmp expect actual &&
> +
> +	test_expect_code 129 git diff --no-index --ignore-matching-lines="^[124-9]" --ignore-matching-lines="^[124-9" I.txt J.txt >output 2>&1 &&
> +	test_i18ngrep "invalid regex given to -I: " output
> +'
> +
>   test_done
> diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
> index 032e3a9f41..7a04605146 100644
> --- a/xdiff/xdiff.h
> +++ b/xdiff/xdiff.h
> @@ -79,6 +79,10 @@ typedef struct s_mmbuffer {
>   typedef struct s_xpparam {
>   	unsigned long flags;
>   
> +	/* -I<regex> */
> +	regex_t **ignore_regex;
> +	size_t ignore_regex_nr;
> +
>   	/* See Documentation/diff-options.txt. */
>   	char **anchors;
>   	size_t anchors_nr;
> diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
> index bd035139f9..380eb728ed 100644
> --- a/xdiff/xdiffi.c
> +++ b/xdiff/xdiffi.c
> @@ -998,7 +998,7 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
>   	return 0;
>   }
>   
> -static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
> +static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags)
>   {
>   	xdchange_t *xch;
>   
> @@ -1019,6 +1019,46 @@ static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
>   	}
>   }
>   
> +static int record_matches_regex(xrecord_t *rec, xpparam_t const *xpp) {
> +	regmatch_t regmatch;
> +	int i;
> +
> +	for (i = 0; i < xpp->ignore_regex_nr; i++)
> +		if (!regexec_buf(xpp->ignore_regex[i], rec->ptr, rec->size, 1,
> +				 &regmatch, 0))
> +			return 1;
> +
> +	return 0;
> +}
> +
> +static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,
> +				     xpparam_t const *xpp)
> +{
> +	xdchange_t *xch;
> +
> +	for (xch = xscr; xch; xch = xch->next) {
> +		xrecord_t **rec;
> +		int ignore = 1;
> +		long i;
> +
> +		/*
> +		 * Do not override --ignore-blank-lines.
> +		 */
> +		if (xch->ignore)
> +			continue;
> +
> +		rec = &xe->xdf1.recs[xch->i1];
> +		for (i = 0; i < xch->chg1 && ignore; i++)
> +			ignore = record_matches_regex(rec[i], xpp);
> +
> +		rec = &xe->xdf2.recs[xch->i2];
> +		for (i = 0; i < xch->chg2 && ignore; i++)
> +			ignore = record_matches_regex(rec[i], xpp);
> +
> +		xch->ignore = ignore;
> +	}
> +}
> +
>   int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
>   	     xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
>   	xdchange_t *xscr;
> @@ -1038,7 +1078,10 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
>   	}
>   	if (xscr) {
>   		if (xpp->flags & XDF_IGNORE_BLANK_LINES)
> -			xdl_mark_ignorable(xscr, &xe, xpp->flags);
> +			xdl_mark_ignorable_lines(xscr, &xe, xpp->flags);
> +
> +		if (xpp->ignore_regex)
> +			xdl_mark_ignorable_regex(xscr, &xe, xpp);
>   
>   		if (ef(&xe, xscr, ecb, xecfg) < 0) {
>   
>
Junio C Hamano Oct. 16, 2020, 6:04 p.m. UTC | #2
Phillip Wood <phillip.wood123@gmail.com> writes:

>> @@ -6405,6 +6428,11 @@ void diff_flush(struct diff_options *options)
>>   	DIFF_QUEUE_CLEAR(q);
>>   	if (options->close_file)
>>   		fclose(options->file);
>> +	for (i = 0; i < options->ignore_regex_nr; i++) {
>> +		regfree(options->ignore_regex[i]);
>> +		free(options->ignore_regex[i]);
>> +	}
>> +	free(options->ignore_regex);
>
> If I run `git log -p -I foo` then the address sanitizer reports
>
> AddressSanitizer: heap-use-after-free xdiff/xdiffi.c:1027 in
> record_matches_regex
>
> after it has printed the diff for the first commit. I think freeing
> the regex here is the cause of the problem.

Yes, this is a good location to clear the diff_queue, which is
per-invocation.  But diff_options->ignore_regex[] can and should be
shared across multiple invocations of the diff machinery, as we are
parsing upfront in the command line option parser just once to be
used throughout the life of the program.  It is wrong to free them
early like this.

I really hate to suggest this, one common API call made into the
diff machinery from various consumers, when they are absolutely done
with diff_options, is probably diff_result_code().  Its purpose is
to collect bits computed to participate in the overall result into
the final result code and anybody who ran one or more diff and wants
to learn the outcome must call it, so it is unlikely that callers
that matter are missing a call to finalize their uses of the diff
machinery.  So if freeing .ignore_regex[] is really necessary, it
could be used to tell us where to do so [*1*].

Unlike per-invocation resources that MUST be freed for repeated
invocations of the diff machinery made in "git log" family,
resources held for and repeatedly used during the entire session
like this may not have to be freed, to be honest, though.

Thanks.


[Footnote]

*1* Adding calls to free() in diff_result_code() directly is
    probably not a good idea.  Some callers may not even need result
    code (e.g.  "blame") and do not call it at all, but we may want
    to free the resource in diff_options that is not per-invocation.
    So if we were to do this, we probably need a new API function,
    perhaps diff_options_clear() or something, and call that from
    diff_result_code(), and then find callers that do not care about
    the result code and call diff_options_clear() directly from
    them.

    If a caller does a single diff_setup_done(), makes repeated
    calls to the diff machinery, and calls diff_result_code() once
    per each iteration (e.g. imagine "git log -p" that detects the
    status for each commit), then having the diff_options_clear()
    call in diff_result_code() will break the machinery, so if we
    were to follow the outline given in the previous paragraph, we
    need to make sure there is no such caller.  But I see that
    diff_result_code() does not clear the fields it looks at in the
    diff_options fields after it uses them, so the second and
    subsequent calls into the diff machinery by such a caller may
    already be broken (not in the "resource leakage" sense, but in
    the correctness sense).
Junio C Hamano Oct. 16, 2020, 6:16 p.m. UTC | #3
Michał Kępień <michal@isc.org> writes:

> +test_expect_success 'diff -I<regex>' '
> +	test_seq 50 >I.txt &&
> +	sed -e "s/13/ten and three/" -e "/7\$/d" <I.txt >J.txt &&
> +	echo >>J.txt &&
> +
> +	test_expect_code 1 git diff --no-index --ignore-blank-lines -I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&

Please wrap an overly long line like this oned, perhaps like:

	test_expect_code 1 git diff --no-index --ignore-blank-lines \
		-I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&

> +	test_expect_code 1 git diff --stat --no-index --ignore-blank-lines -I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&

> +	test_expect_code 129 git diff --no-index --ignore-matching-lines="^[124-9]" --ignore-matching-lines="^[124-9" I.txt J.txt >output 2>&1 &&

Cramming unrelated tests into a single one made me puzzled, staring
at this line for longer than necessary before realizing that this is
an attempt to catch a malformed regexp.  If this were in a separate
test with its own title, e.g.

	test_expect_success 'diff -I<regex>: setup' '
		... set up the sample input, perhaps sample history ...
		... perhaps prepare the expected output in "expect" ...
	'

	test_expect_success 'diff -I<regex> -p' '
		git diff --I"ten.*e" ... >actual &&
		test_cmp expect actual
	'

	test_expect_success 'diff -I<regex> --stat' '
		git diff --stat --I"ten.*e" ... >actual &&
		test_cmp expect actual
	'

	test_expect_success 'diff -I<regex>: detect malformed regex'
		test_must_fail git diff -I="[^124-9" ... 2>error &&
		test_i18ngrep "invalid regex" error
	'
		
It would have been much easier to follow.

It also is curious that it only tests --no-index, which is a bolted
on feature that may not exercise the codepath the users depend on
working correctly.  If this were tested with "git log -p" for a few
commits, the early destruction of regexp may have been caught,
especially with ASAN build.

Other than that, and also the premature destruction of regexp
pointed out by Phillip already, looks reasonably done.

Thanks.
Michał Kępień Oct. 19, 2020, 9:48 a.m. UTC | #4
First, thanks to Phillip for testing this.

> Yes, this is a good location to clear the diff_queue, which is
> per-invocation.  But diff_options->ignore_regex[] can and should be
> shared across multiple invocations of the diff machinery, as we are
> parsing upfront in the command line option parser just once to be
> used throughout the life of the program.  It is wrong to free them
> early like this.

Ugh, sorry, I am only now starting to see the big picture.

> I really hate to suggest this, one common API call made into the
> diff machinery from various consumers, when they are absolutely done
> with diff_options, is probably diff_result_code().  Its purpose is
> to collect bits computed to participate in the overall result into
> the final result code and anybody who ran one or more diff and wants
> to learn the outcome must call it, so it is unlikely that callers
> that matter are missing a call to finalize their uses of the diff
> machinery.  So if freeing .ignore_regex[] is really necessary, it
> could be used to tell us where to do so [*1*].
> 
> Unlike per-invocation resources that MUST be freed for repeated
> invocations of the diff machinery made in "git log" family,
> resources held for and repeatedly used during the entire session
> like this may not have to be freed, to be honest, though.

After reading your message, taking a closer look, and doing a few
Valgrind runs, I tend to agree that perhaps there is no need to free the
'ignore_regex' array after all - it does *not* get allocated repeatedly
during each invocation of the diff machinery and when Git exits, the OS
will reclaim any memory allocated by the process anyway.  On my laptop,
a Valgrind run done for a -DSUPPRESS_ANNOTATED_LEAKS build claims that
'ignore_regex' is one of 1,300+ memory blocks still reachable at exit
time, which amount to about 2.5 MB of memory in total.  Given this, it
feels slightly weird to single 'ignore_regex' out.

I will revert the diff_flush() changes in v4 unless somebody disagrees
with the above reasoning.
Michał Kępień Oct. 19, 2020, 9:55 a.m. UTC | #5
> Michał Kępień <michal@isc.org> writes:
> 
> > +test_expect_success 'diff -I<regex>' '
> > +	test_seq 50 >I.txt &&
> > +	sed -e "s/13/ten and three/" -e "/7\$/d" <I.txt >J.txt &&
> > +	echo >>J.txt &&
> > +
> > +	test_expect_code 1 git diff --no-index --ignore-blank-lines -I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&
> 
> Please wrap an overly long line like this oned, perhaps like:
> 
> 	test_expect_code 1 git diff --no-index --ignore-blank-lines \
> 		-I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&

Yeah, thanks, I was unsure what the line length limit for test code is.
I will fix this in v4.

> > +	test_expect_code 1 git diff --stat --no-index --ignore-blank-lines -I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&
> 
> > +	test_expect_code 129 git diff --no-index --ignore-matching-lines="^[124-9]" --ignore-matching-lines="^[124-9" I.txt J.txt >output 2>&1 &&
> 
> Cramming unrelated tests into a single one made me puzzled, staring
> at this line for longer than necessary before realizing that this is
> an attempt to catch a malformed regexp.  If this were in a separate
> test with its own title, e.g.
> 
> (...)
> 		
> It would have been much easier to follow.
> 
> It also is curious that it only tests --no-index, which is a bolted
> on feature that may not exercise the codepath the users depend on
> working correctly.  If this were tested with "git log -p" for a few
> commits, the early destruction of regexp may have been caught,
> especially with ASAN build.

Sorry, I think I got a bit carried away trying to use the test suggested
by Johannes while also extending it with the missing bits.  I will try
to come up with something more balanced in v4.
Junio C Hamano Oct. 19, 2020, 5:29 p.m. UTC | #6
Michał Kępień <michal@isc.org> writes:

>> Cramming unrelated tests into a single one made me puzzled, staring
>> at this line for longer than necessary before realizing that this is
>> an attempt to catch a malformed regexp.  If this were in a separate
>> test with its own title, e.g.
>> 		
>> It would have been much easier to follow.
> ... I will try
> to come up with something more balanced in v4.

We want to avoid going overboard and doing a full test matrix.  

But having it tested fully with one variant while testing just the
basic for other variants may give us a good balance, e.g. "git log
-p -I<regex>" is tested but we do not bother testing interactions
with other options", while we try different combinations when
testing "git diff --no-index -I<regex>", like multiple -I options
etc.

Thanks.
diff mbox series

Patch

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 573fb9bb71..ee52b65e46 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -687,6 +687,11 @@  endif::git-format-patch[]
 --ignore-blank-lines::
 	Ignore changes whose lines are all blank.
 
+-I<regex>::
+--ignore-matching-lines=<regex>::
+	Ignore changes whose all lines match <regex>.  This option may
+	be specified more than once.
+
 --inter-hunk-context=<lines>::
 	Show the context between diff hunks, up to the specified number
 	of lines, thereby fusing hunks that are close to each other.
diff --git a/diff.c b/diff.c
index 2bb2f8f57e..677de23352 100644
--- a/diff.c
+++ b/diff.c
@@ -3587,6 +3587,8 @@  static void builtin_diff(const char *name_a,
 		if (header.len && !o->flags.suppress_diff_headers)
 			ecbdata.header = &header;
 		xpp.flags = o->xdl_opts;
+		xpp.ignore_regex = o->ignore_regex;
+		xpp.ignore_regex_nr = o->ignore_regex_nr;
 		xpp.anchors = o->anchors;
 		xpp.anchors_nr = o->anchors_nr;
 		xecfg.ctxlen = o->context;
@@ -3716,6 +3718,8 @@  static void builtin_diffstat(const char *name_a, const char *name_b,
 		memset(&xpp, 0, sizeof(xpp));
 		memset(&xecfg, 0, sizeof(xecfg));
 		xpp.flags = o->xdl_opts;
+		xpp.ignore_regex = o->ignore_regex;
+		xpp.ignore_regex_nr = o->ignore_regex_nr;
 		xpp.anchors = o->anchors;
 		xpp.anchors_nr = o->anchors_nr;
 		xecfg.ctxlen = o->context;
@@ -5203,6 +5207,22 @@  static int diff_opt_patience(const struct option *opt,
 	return 0;
 }
 
+static int diff_opt_ignore_regex(const struct option *opt,
+				 const char *arg, int unset)
+{
+	struct diff_options *options = opt->value;
+	regex_t *regex;
+
+	BUG_ON_OPT_NEG(unset);
+	regex = xmalloc(sizeof(*regex));
+	if (regcomp(regex, arg, REG_EXTENDED | REG_NEWLINE))
+		return error(_("invalid regex given to -I: '%s'"), arg);
+	ALLOC_GROW(options->ignore_regex, options->ignore_regex_nr + 1,
+		   options->ignore_regex_alloc);
+	options->ignore_regex[options->ignore_regex_nr++] = regex;
+	return 0;
+}
+
 static int diff_opt_pickaxe_regex(const struct option *opt,
 				  const char *arg, int unset)
 {
@@ -5491,6 +5511,9 @@  static void prep_parse_options(struct diff_options *options)
 		OPT_BIT_F(0, "ignore-blank-lines", &options->xdl_opts,
 			  N_("ignore changes whose lines are all blank"),
 			  XDF_IGNORE_BLANK_LINES, PARSE_OPT_NONEG),
+		OPT_CALLBACK_F('I', "ignore-matching-lines", options, N_("<regex>"),
+			       N_("ignore changes whose all lines match <regex>"),
+			       0, diff_opt_ignore_regex),
 		OPT_BIT(0, "indent-heuristic", &options->xdl_opts,
 			N_("heuristic to shift diff hunk boundaries for easy reading"),
 			XDF_INDENT_HEURISTIC),
@@ -6405,6 +6428,11 @@  void diff_flush(struct diff_options *options)
 	DIFF_QUEUE_CLEAR(q);
 	if (options->close_file)
 		fclose(options->file);
+	for (i = 0; i < options->ignore_regex_nr; i++) {
+		regfree(options->ignore_regex[i]);
+		free(options->ignore_regex[i]);
+	}
+	free(options->ignore_regex);
 
 	/*
 	 * Report the content-level differences with HAS_CHANGES;
diff --git a/diff.h b/diff.h
index 11de52e9e9..a402227b80 100644
--- a/diff.h
+++ b/diff.h
@@ -234,6 +234,10 @@  struct diff_options {
 	 */
 	const char *pickaxe;
 
+	/* -I<regex> */
+	regex_t **ignore_regex;
+	size_t ignore_regex_nr, ignore_regex_alloc;
+
 	const char *single_follow;
 	const char *a_prefix, *b_prefix;
 	const char *line_prefix;
diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
index 5c7b0122b4..efaaee2ef0 100755
--- a/t/t4013-diff-various.sh
+++ b/t/t4013-diff-various.sh
@@ -6,6 +6,7 @@ 
 test_description='Various diff formatting options'
 
 . ./test-lib.sh
+. "$TEST_DIRECTORY"/diff-lib.sh
 
 test_expect_success setup '
 
@@ -473,4 +474,36 @@  test_expect_success 'diff-tree --stdin with log formatting' '
 	test_cmp expect actual
 '
 
+test_expect_success 'diff -I<regex>' '
+	test_seq 50 >I.txt &&
+	sed -e "s/13/ten and three/" -e "/7\$/d" <I.txt >J.txt &&
+	echo >>J.txt &&
+
+	test_expect_code 1 git diff --no-index --ignore-blank-lines -I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&
+	cat >expect <<-\EOF &&
+	diff --git a/I.txt b/J.txt
+	--- a/I.txt
+	+++ b/J.txt
+	@@ -34,7 +31,6 @@
+	 34
+	 35
+	 36
+	-37
+	 38
+	 39
+	 40
+	EOF
+	compare_diff_patch expect actual &&
+
+	test_expect_code 1 git diff --stat --no-index --ignore-blank-lines -I"ten.*e" -I"^[124-9]" I.txt J.txt >actual &&
+	cat >expect <<-\EOF &&
+	 I.txt => J.txt | 1 -
+	 1 file changed, 1 deletion(-)
+	EOF
+	test_cmp expect actual &&
+
+	test_expect_code 129 git diff --no-index --ignore-matching-lines="^[124-9]" --ignore-matching-lines="^[124-9" I.txt J.txt >output 2>&1 &&
+	test_i18ngrep "invalid regex given to -I: " output
+'
+
 test_done
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 032e3a9f41..7a04605146 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -79,6 +79,10 @@  typedef struct s_mmbuffer {
 typedef struct s_xpparam {
 	unsigned long flags;
 
+	/* -I<regex> */
+	regex_t **ignore_regex;
+	size_t ignore_regex_nr;
+
 	/* See Documentation/diff-options.txt. */
 	char **anchors;
 	size_t anchors_nr;
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index bd035139f9..380eb728ed 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -998,7 +998,7 @@  static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 	return 0;
 }
 
-static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
+static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags)
 {
 	xdchange_t *xch;
 
@@ -1019,6 +1019,46 @@  static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
 	}
 }
 
+static int record_matches_regex(xrecord_t *rec, xpparam_t const *xpp) {
+	regmatch_t regmatch;
+	int i;
+
+	for (i = 0; i < xpp->ignore_regex_nr; i++)
+		if (!regexec_buf(xpp->ignore_regex[i], rec->ptr, rec->size, 1,
+				 &regmatch, 0))
+			return 1;
+
+	return 0;
+}
+
+static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,
+				     xpparam_t const *xpp)
+{
+	xdchange_t *xch;
+
+	for (xch = xscr; xch; xch = xch->next) {
+		xrecord_t **rec;
+		int ignore = 1;
+		long i;
+
+		/*
+		 * Do not override --ignore-blank-lines.
+		 */
+		if (xch->ignore)
+			continue;
+
+		rec = &xe->xdf1.recs[xch->i1];
+		for (i = 0; i < xch->chg1 && ignore; i++)
+			ignore = record_matches_regex(rec[i], xpp);
+
+		rec = &xe->xdf2.recs[xch->i2];
+		for (i = 0; i < xch->chg2 && ignore; i++)
+			ignore = record_matches_regex(rec[i], xpp);
+
+		xch->ignore = ignore;
+	}
+}
+
 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	     xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
 	xdchange_t *xscr;
@@ -1038,7 +1078,10 @@  int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	}
 	if (xscr) {
 		if (xpp->flags & XDF_IGNORE_BLANK_LINES)
-			xdl_mark_ignorable(xscr, &xe, xpp->flags);
+			xdl_mark_ignorable_lines(xscr, &xe, xpp->flags);
+
+		if (xpp->ignore_regex)
+			xdl_mark_ignorable_regex(xscr, &xe, xpp);
 
 		if (ef(&xe, xscr, ecb, xecfg) < 0) {