diff mbox series

[RFC] grep: allow for run time disabling of JIT in PCRE

Message ID 20190728235427.41425-1-carenas@gmail.com (mailing list archive)
State New, archived
Headers show
Series [RFC] grep: allow for run time disabling of JIT in PCRE | expand

Commit Message

Carlo Marcelo Arenas Belón July 28, 2019, 11:54 p.m. UTC
PCRE1 allowed for a compile time flag to disable JIT, but PCRE2 never
had one, forcing the use of JIT if -P was requested.

After ed0479ce3d (Merge branch 'ab/no-kwset' into next, 2019-07-15)
the PCRE2 engine will be used more broadly and therefore adding this
knob will give users a fallback for situations like the one observed
in OpenBSD with a JIT enabled PCRE2, because of W^X restrictions:

  $ git grep 'foo bar'
  fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
  $ git grep -G 'foo bar'
  fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
  $ git grep -E 'foo bar'
  fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
  $ git grep -F 'foo bar'
  fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'

Signed-off-by: Carlo Marcelo Arenas Belón <carenas@gmail.com>
---
 Documentation/git-grep.txt |  4 ++++
 grep.c                     | 15 +++++++++++++--
 grep.h                     |  1 +
 3 files changed, 18 insertions(+), 2 deletions(-)

Comments

Carlo Marcelo Arenas Belón July 29, 2019, 12:09 a.m. UTC | #1
On Sun, Jul 28, 2019 at 4:54 PM Carlo Marcelo Arenas Belón
<carenas@gmail.com> wrote:
> @@ -125,6 +126,12 @@ int grep_config(const char *var, const char *value, void *cb)
>                 return 0;
>         }
>
> +       if (!strcmp(var, "pcre.jit")) {
> +               int is_bool;
> +               opt->pcre_jit = git_config_bool_or_int(var, value, &is_bool);
> +               return 0;
> +       }
> +
>         if (!strcmp(var, "color.grep"))
>                 opt->color = git_config_colorbool(var, value);
>         if (!strcmp(var, "color.grep.match")) {

using git_config_bool_or_int, as I am hoping a future version will use
a third value (maybe -1) to
indicate JIT will be tried first, but then the interpreter will be
used in case JIT is not available (as
recommended in PCRE)

not sure also about the right name and where to document this flag, as
this is not only restricted to
the grep subcommand and the issue it is working around will be also
relevant for log (including pickaxe)

Carlo
Junio C Hamano July 29, 2019, 4:57 a.m. UTC | #2
Carlo Marcelo Arenas Belón  <carenas@gmail.com> writes:

> PCRE1 allowed for a compile time flag to disable JIT, but PCRE2 never
> had one, forcing the use of JIT if -P was requested.
>
> After ed0479ce3d (Merge branch 'ab/no-kwset' into next, 2019-07-15)
> the PCRE2 engine will be used more broadly and therefore adding this
> knob will give users a fallback for situations like the one observed
> in OpenBSD with a JIT enabled PCRE2, because of W^X restrictions:
>
>   $ git grep 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>   $ git grep -G 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>   $ git grep -E 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>   $ git grep -F 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'

;-) Yeah, we should have known that security-paranoid distros would
have W^X issues with this series, too.

I am not sure I like a config-only knob like this,
though---shouldn't we have a command line knob to turn jit off
first, and then for those who gets tired of having to type it all
the time add the configuration to flip the default for them?

Other than that, the feature itself makes quite a lot of sense.

>
> Signed-off-by: Carlo Marcelo Arenas Belón <carenas@gmail.com>
> ---
>  Documentation/git-grep.txt |  4 ++++
>  grep.c                     | 15 +++++++++++++--
>  grep.h                     |  1 +
>  3 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
> index c89fb569e3..ff544bdeec 100644
> --- a/Documentation/git-grep.txt
> +++ b/Documentation/git-grep.txt
> @@ -69,6 +69,10 @@ grep.fallbackToNoIndex::
>  	If set to true, fall back to git grep --no-index if git grep
>  	is executed outside of a git repository.  Defaults to false.
>  
> +pcre.jit::
> +	If set to false, disable JIT when using PCRE.  Defaults to
> +	true.
> +
>  
>  OPTIONS
>  -------
> diff --git a/grep.c b/grep.c
> index c7c06ae08d..3524d353dd 100644
> --- a/grep.c
> +++ b/grep.c
> @@ -56,6 +56,7 @@ void init_grep_defaults(struct repository *repo)
>  	opt->repo = repo;
>  	opt->relative = 1;
>  	opt->pathname = 1;
> +	opt->pcre_jit = 1;
>  	opt->max_depth = -1;
>  	opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
>  	color_set(opt->colors[GREP_COLOR_CONTEXT], "");
> @@ -125,6 +126,12 @@ int grep_config(const char *var, const char *value, void *cb)
>  		return 0;
>  	}
>  
> +	if (!strcmp(var, "pcre.jit")) {
> +		int is_bool;
> +		opt->pcre_jit = git_config_bool_or_int(var, value, &is_bool);
> +		return 0;
> +	}
> +
>  	if (!strcmp(var, "color.grep"))
>  		opt->color = git_config_colorbool(var, value);
>  	if (!strcmp(var, "color.grep.match")) {
> @@ -163,6 +170,7 @@ void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix
>  	opt->pattern_tail = &opt->pattern_list;
>  	opt->header_tail = &opt->header_list;
>  
> +	opt->pcre_jit = def->pcre_jit;
>  	opt->only_matching = def->only_matching;
>  	opt->color = def->color;
>  	opt->extended_regexp_option = def->extended_regexp_option;
> @@ -393,7 +401,8 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
>  		die("%s", error);
>  
>  #ifdef GIT_PCRE1_USE_JIT
> -	pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
> +	if (opt->pcre_jit)
> +		pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
>  #endif
>  }
>  
> @@ -489,7 +498,9 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
>  		compile_regexp_failed(p, (const char *)&errbuf);
>  	}
>  
> -	pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
> +	if (opt->pcre_jit)
> +		pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
> +
>  	if (p->pcre2_jit_on) {
>  		jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
>  		if (jitret)
> diff --git a/grep.h b/grep.h
> index c0c71eb4a9..fff152e606 100644
> --- a/grep.h
> +++ b/grep.h
> @@ -151,6 +151,7 @@ struct grep_opt {
>  	int allow_textconv;
>  	int extended;
>  	int use_reflog_filter;
> +	int pcre_jit;
>  	int pcre1;
>  	int pcre2;
>  	int relative;
Carlo Marcelo Arenas Belón July 29, 2019, 5:29 a.m. UTC | #3
On Sun, Jul 28, 2019 at 9:57 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> I am not sure I like a config-only knob like this,
> though---shouldn't we have a command line knob to turn jit off
> first, and then for those who gets tired of having to type it all
> the time add the configuration to flip the default for them?

are you suggesting to add a --pcre-jit parameter to both grep and log?

guess using pcre.jit for the configuration makes sense as far as
it is documented on both man pages, or would this imply this should go
instead into core?

I was expecting this to be set system (or at least global) wide most
of the time like
(ex: core.ignorecase, credential.helper) neither of which I can relate
to a command line parameter.

> Other than that, the feature itself makes quite a lot of sense.

should I target maint/master?, next conflicts because of ab/no-kwset and it also
conflicts with other in fly features too (some not even in pu), which is why now
is based on pu and depends on ab/pcre-jit-fixes

Carlo
Ævar Arnfjörð Bjarmason July 29, 2019, 8:55 a.m. UTC | #4
On Mon, Jul 29 2019, Carlo Marcelo Arenas Belón wrote:

> PCRE1 allowed for a compile time flag to disable JIT, but PCRE2 never
> had one, forcing the use of JIT if -P was requested.

What's that PCRE1 compile-time flag?

> After ed0479ce3d (Merge branch 'ab/no-kwset' into next, 2019-07-15)
> the PCRE2 engine will be used more broadly and therefore adding this
> knob will give users a fallback for situations like the one observed
> in OpenBSD with a JIT enabled PCRE2, because of W^X restrictions:
>
>   $ git grep 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>   $ git grep -G 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>   $ git grep -E 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>   $ git grep -F 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'

Yeah that obviously sucks more with ab/no-kwset, but that seems like a
case where -P would have been completely broken before, and therefore I
can't imagine the package ever passed "make test". Or is W^X also
exposed as some run-time option on OpenBSD?

I.e. aside from the merits of such a setting in general these examples
seem like just working around something that should be fixed at make
all/test time, or maybe I'm missing something.

To the extent that we'd want to make this sort of thing configurable, I
wonder if a continuation of my (*NO_JIT) patch isn't better, i.e. just
adding the ability to configure some string we'd inject at the start of
every pattern.

That would allow for setting any other number of options in
pcre2syntax(3) without us needing to carry config for each one,
e.g. (*LIMIT_HEAP=d), (*LIMIT_DEPTH=d) etc. It does present a larger
foot-gun surface though...
Carlo Marcelo Arenas Belón July 29, 2019, 10:26 a.m. UTC | #5
On Mon, Jul 29, 2019 at 1:55 AM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
>
> On Mon, Jul 29 2019, Carlo Marcelo Arenas Belón wrote:
>
> > PCRE1 allowed for a compile time flag to disable JIT, but PCRE2 never
> > had one, forcing the use of JIT if -P was requested.
>
> What's that PCRE1 compile-time flag?

NO_LIBPCRE1_JIT at GIT compile time (regardless of JIT support in the
PCRE1 library you are using)

> > After ed0479ce3d (Merge branch 'ab/no-kwset' into next, 2019-07-15)
> > the PCRE2 engine will be used more broadly and therefore adding this
> > knob will give users a fallback for situations like the one observed
> > in OpenBSD with a JIT enabled PCRE2, because of W^X restrictions:
> >
> >   $ git grep 'foo bar'
> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
> >   $ git grep -G 'foo bar'
> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
> >   $ git grep -E 'foo bar'
> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
> >   $ git grep -F 'foo bar'
> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>
> Yeah that obviously sucks more with ab/no-kwset, but that seems like a
> case where -P would have been completely broken before, and therefore I
> can't imagine the package ever passed "make test". Or is W^X also
> exposed as some run-time option on OpenBSD?

ironically, you could use PCRE1 since that is not using the JIT fast
path and therefore will fallback automatically to the interpreter

there is also a convoluted way to make your binary work by moving
it into a mount point that has been specially exempted from that W^X
restriction.

> I.e. aside from the merits of such a setting in general these examples
> seem like just working around something that should be fixed at make
> all/test time, or maybe I'm missing something.

1) before you could just avoid using -P and still be able to grep
2) there is no way to tell PCRE2 to get out of the way even if you are
    not using -P

you are right though that this is not a new problem and was reported
before with patches and the last comment saying a configuration
should be provided.

> To the extent that we'd want to make this sort of thing configurable, I
> wonder if a continuation of my (*NO_JIT) patch isn't better, i.e. just
> adding the ability to configure some string we'd inject at the start of
> every pattern.

looking at the number of lines of code, it would seem the configuration
approach is simpler.

> That would allow for setting any other number of options in
> pcre2syntax(3) without us needing to carry config for each one,
> e.g. (*LIMIT_HEAP=d), (*LIMIT_DEPTH=d) etc. It does present a larger
> foot-gun surface though...

the parameters I suspect users might need are not really accessible through
that (ex: jit stacksize).

it is important to note that currently we are not preventing any user to use
those flags themselves in their patterns either.

Carlo
Ævar Arnfjörð Bjarmason July 29, 2019, 12:38 p.m. UTC | #6
On Mon, Jul 29 2019, Carlo Arenas wrote:

> On Mon, Jul 29, 2019 at 1:55 AM Ævar Arnfjörð Bjarmason
> <avarab@gmail.com> wrote:
>>
>> On Mon, Jul 29 2019, Carlo Marcelo Arenas Belón wrote:
>>
>> > PCRE1 allowed for a compile time flag to disable JIT, but PCRE2 never
>> > had one, forcing the use of JIT if -P was requested.
>>
>> What's that PCRE1 compile-time flag?
>
> NO_LIBPCRE1_JIT at GIT compile time (regardless of JIT support in the
> PCRE1 library you are using)

Ah of course, I was reading this as "regexp
compile-time". I.e. something like (*NO_JIT). No *such* thing exists for
PCRE v1 JIT AFAIK as exposed by git-grep.

>> > After ed0479ce3d (Merge branch 'ab/no-kwset' into next, 2019-07-15)
>> > the PCRE2 engine will be used more broadly and therefore adding this
>> > knob will give users a fallback for situations like the one observed
>> > in OpenBSD with a JIT enabled PCRE2, because of W^X restrictions:
>> >
>> >   $ git grep 'foo bar'
>> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>> >   $ git grep -G 'foo bar'
>> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>> >   $ git grep -E 'foo bar'
>> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>> >   $ git grep -F 'foo bar'
>> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>>
>> Yeah that obviously sucks more with ab/no-kwset, but that seems like a
>> case where -P would have been completely broken before, and therefore I
>> can't imagine the package ever passed "make test". Or is W^X also
>> exposed as some run-time option on OpenBSD?
>
> ironically, you could use PCRE1 since that is not using the JIT fast
> path and therefore will fallback automatically to the interpreter

...because OpenBSD PCRE v1 was compiled with --disable-jit before, but
their v2 package has --enable-jit, it just doesn't work at all? Is this
your custom built git + OpenBSD packages of PCRE coming with the OS?

I don't use OpenBSD, but isn't this their recipe? Seems they use "make
test", and don't compile with PCRE at all if I'm reading it right:
https://github.com/openbsd/ports/blob/master/devel/git/Makefile

> there is also a convoluted way to make your binary work by moving
> it into a mount point that has been specially exempted from that W^X
> restriction.
>
>> I.e. aside from the merits of such a setting in general these examples
>> seem like just working around something that should be fixed at make
>> all/test time, or maybe I'm missing something.
>
> 1) before you could just avoid using -P and still be able to grep
> 2) there is no way to tell PCRE2 to get out of the way even if you are
>     not using -P

Right, no arguments at all about ab/no-kwset making this worse (re: your
#1). I just really prefer not to expose/document config for what
*should* be something purely internal if the X-Y problem is a bug being
exposed that we should just fix.

Particularly because I think it's a losing battle to provide run-time
options for what are surely a *lot* of "make test" failures.

If it really is unavoidable to detect this until runtime in some common
configurations I have no problem with it, I just haven't encountered
that so far.

> you are right though that this is not a new problem and was reported
> before with patches and the last comment saying a configuration
> should be provided.

patches = your recent
https://public-inbox.org/git/20181209230024.43444-2-carenas@gmail.com/
or something earlier?

That patch seems sane without having tested it. Seems like the
equivalent of what we do with v1 with PCRE2_JIT_COMPLETE.

I *am* curious if there's setups where fixing the code for PCRE v1 isn't
purely an academic exercise. Is there a reason for why these platforms
can't just move to PCRE v2 in principle (dumpster fires in "next"
non-withstanding)?

>> To the extent that we'd want to make this sort of thing configurable, I
>> wonder if a continuation of my (*NO_JIT) patch isn't better, i.e. just
>> adding the ability to configure some string we'd inject at the start of
>> every pattern.
>
> looking at the number of lines of code, it would seem the configuration
> approach is simpler.
>
>> That would allow for setting any other number of options in
>> pcre2syntax(3) without us needing to carry config for each one,
>> e.g. (*LIMIT_HEAP=d), (*LIMIT_DEPTH=d) etc. It does present a larger
>> foot-gun surface though...
>
> the parameters I suspect users might need are not really accessible through
> that (ex: jit stacksize).
>
> it is important to note that currently we are not preventing any user to use
> those flags themselves in their patterns either.
Carlo Marcelo Arenas Belón July 30, 2019, 1:01 p.m. UTC | #7
On Mon, Jul 29, 2019 at 5:38 AM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
> On Mon, Jul 29 2019, Carlo Arenas wrote:
> > On Mon, Jul 29, 2019 at 1:55 AM Ævar Arnfjörð Bjarmason
> > <avarab@gmail.com> wrote:
> >>
> >> On Mon, Jul 29 2019, Carlo Marcelo Arenas Belón wrote:
> >>
> >> > PCRE1 allowed for a compile time flag to disable JIT, but PCRE2 never
> >> > had one, forcing the use of JIT if -P was requested.
> >>
> >> What's that PCRE1 compile-time flag?
> >
> > NO_LIBPCRE1_JIT at GIT compile time (regardless of JIT support in the
> > PCRE1 library you are using)
>
> Ah of course, I was reading this as "regexp
> compile-time". I.e. something like (*NO_JIT). No *such* thing exists for
> PCRE v1 JIT AFAIK as exposed by git-grep.

correct, but there are still other knobs like (*UTF), (*UCP), (?m) or
(?i) that also
affect some of our assumptions.

> >> > After ed0479ce3d (Merge branch 'ab/no-kwset' into next, 2019-07-15)
> >> > the PCRE2 engine will be used more broadly and therefore adding this
> >> > knob will give users a fallback for situations like the one observed
> >> > in OpenBSD with a JIT enabled PCRE2, because of W^X restrictions:
> >> >
> >> >   $ git grep 'foo bar'
> >> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
> >> >   $ git grep -G 'foo bar'
> >> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
> >> >   $ git grep -E 'foo bar'
> >> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
> >> >   $ git grep -F 'foo bar'
> >> >   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
> >>
> >> Yeah that obviously sucks more with ab/no-kwset, but that seems like a
> >> case where -P would have been completely broken before, and therefore I
> >> can't imagine the package ever passed "make test". Or is W^X also
> >> exposed as some run-time option on OpenBSD?
> >
> > ironically, you could use PCRE1 since that is not using the JIT fast
> > path and therefore will fallback automatically to the interpreter
>
> ...because OpenBSD PCRE v1 was compiled with --disable-jit before, but
> their v2 package has --enable-jit, it just doesn't work at all? Is this
> your custom built git + OpenBSD packages of PCRE coming with the OS?

sorry for the confusion, custom builds of both PCRE and git.
and I was referring to the fact that after 685668faaa (grep: stop
using a custom JIT stack with PCRE v1, 2019-07-26)
and with my patches to avoid UTF-8 issues, git + PCRE1 was a much pleasant
experience than git + PCRE2 in OpenBSD (which is also why I didn't
even care about fixing it for pcre.jit=!1 yet)

as shown by :
$ git grep 'foo bar' Documentation | cat
Documentation/RelNotes/1.8.0.txt:   when the user says "git checkout
-b -t foo bar" (e.g. "-t" is not a
Documentation/git-rev-list.txt: $ git rev-list foo bar ^baz
Documentation/gitattributes.txt:abc foo bar baz
$ git grep -P 'foo bar' Documentation | cat
Documentation/RelNotes/1.8.0.txt:   when the user says "git checkout
-b -t foo bar" (e.g. "-t" is not a
Documentation/git-rev-list.txt: $ git rev-list foo bar ^baz
Documentation/gitattributes.txt:abc foo bar baz
$ git grep -P 'foo[ ]bar' Documentation | cat
Documentation/RelNotes/1.8.0.txt:   when the user says "git checkout
-b -t foo bar" (e.g. "-t" is not a
Documentation/git-rev-list.txt: $ git rev-list foo bar ^baz
Documentation/gitattributes.txt:abc foo bar baz
$ dmesg | grep git
git-grep(87484): mmap W^X violation

the last of which might be suppressed with `-c pcre.jit=0` once I get
to fix that

> I don't use OpenBSD, but isn't this their recipe? Seems they use "make
> test", and don't compile with PCRE at all if I'm reading it right:
> https://github.com/openbsd/ports/blob/master/devel/git/Makefile

yes, I was using OpenBSD as a testbase where issues with JIT would
be easily reproducible; their packagers are smart enough not to enable
JIT by default in PCRE or even link it with git as you pointed out.

NetBSD/HardenedBSD might be a better example of a native default package that
would be affected in its standard configuration, if that is what you
were looking for.

> >> I.e. aside from the merits of such a setting in general these examples
> >> seem like just working around something that should be fixed at make
> >> all/test time, or maybe I'm missing something.
> >
> > 1) before you could just avoid using -P and still be able to grep
> > 2) there is no way to tell PCRE2 to get out of the way even if you are
> >     not using -P
>
> Right, no arguments at all about ab/no-kwset making this worse (re: your
> #1). I just really prefer not to expose/document config for what
> *should* be something purely internal if the X-Y problem is a bug being
> exposed that we should just fix.
>
> Particularly because I think it's a losing battle to provide run-time
> options for what are surely a *lot* of "make test" failures.

not sure I understand.  The knob was there to give flexibility to the user
to decide for himself how he wants to use the application and how that
use fits in their environment.

we can't predict either of those with 100% certainty and while it makes
sense we will impose some constrains we should understand that the
more inflexible those are, the more users will be alienated.

this specific constrain is particularly silly, even PCRE recommends using
JIT only as an optimization and fallback to the interpreter but we don't
follow that advice and the least we could do is give a escape hatch to
the users.

> If it really is unavoidable to detect this until runtime in some common
> configurations I have no problem with it, I just haven't encountered
> that so far.

guess it is sort of a chicken/egg problem and we would rather have
OpenBSD never linking their git with PCRE.

FWIW we can't know ahead of time if someone setup will include
running a PAX/SELinux enabled kernel on their otherwise regular
userspace encountering this problem.

there is also the case of Linux distributions without official packages
(like Gentoo and Arch) where each user decides which options they
want to use on their packages at their own time.

> patches = your recent
> https://public-inbox.org/git/20181209230024.43444-2-carenas@gmail.com/
> or something earlier?
>
> That patch seems sane without having tested it. Seems like the
> equivalent of what we do with v1 with PCRE2_JIT_COMPLETE.

I am missing context here; that patch was obsoleted by your ab/pcre-jit-fixes
branch and has nothing to do with PCRE2_JIT_COMPLETE.

if I recall correctly, was waiting on feedback on the series on top of your
original pcre-jit-fixes branch (with only 3 patches) and that was posted in:
https://public-inbox.org/git/20190726202642.7986-1-carenas@gmail.com/T/#u

> I *am* curious if there's setups where fixing the code for PCRE v1 isn't
> purely an academic exercise. Is there a reason for why these platforms
> can't just move to PCRE v2 in principle (dumpster fires in "next"
> non-withstanding)?

how can you expect me (or anyone else) to answer that question? obviously
we don't know, I personally for sure have no problem.

brian mentioned[1] some CentOS 6 users that don't have PCRE2 in their systems,
I mentioned Xcode's git in macOS as likely not updating since it uses
a system library
and that is also used by several other applications (like Safari)

but regardless of that, I see no reason for making their life more
difficult; PCRE1 is
widely used, it is still being supported, and they can use PCRE2 as an
alternative
most of the time (at least with git)

Carlo

[1] https://public-inbox.org/git/20190615191514.GD8616@genre.crustytoothpaste.net/
Johannes Schindelin July 31, 2019, 12:24 p.m. UTC | #8
Hi,

On Sun, 28 Jul 2019, Carlo Marcelo Arenas Belón wrote:

> PCRE1 allowed for a compile time flag to disable JIT, but PCRE2 never
> had one, forcing the use of JIT if -P was requested.
>
> After ed0479ce3d (Merge branch 'ab/no-kwset' into next, 2019-07-15)
> the PCRE2 engine will be used more broadly and therefore adding this
> knob will give users a fallback for situations like the one observed
> in OpenBSD with a JIT enabled PCRE2, because of W^X restrictions:

Just so that nobody else needs to feel like an idiot for not knowing
what on the burning planet "W^X" is supposed to mean:
https://en.wikipedia.org/wiki/W%5EX says it is essentially an OS-level
mechanism to prevent executing code that was written into memory by the
same process, i.e. JIT.

Makes me wonder whether node.js works on OpenBSD, or any decently fast
web browser like Firefox or Chromium...

But I digress. I just wanted to chime in with the results of my web hunt
for that "W^X" term.

Ciao,
Dscho

>
>   $ git grep 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>   $ git grep -G 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>   $ git grep -E 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>   $ git grep -F 'foo bar'
>   fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
>
> Signed-off-by: Carlo Marcelo Arenas Belón <carenas@gmail.com>
> ---
>  Documentation/git-grep.txt |  4 ++++
>  grep.c                     | 15 +++++++++++++--
>  grep.h                     |  1 +
>  3 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
> index c89fb569e3..ff544bdeec 100644
> --- a/Documentation/git-grep.txt
> +++ b/Documentation/git-grep.txt
> @@ -69,6 +69,10 @@ grep.fallbackToNoIndex::
>  	If set to true, fall back to git grep --no-index if git grep
>  	is executed outside of a git repository.  Defaults to false.
>
> +pcre.jit::
> +	If set to false, disable JIT when using PCRE.  Defaults to
> +	true.
> +
>
>  OPTIONS
>  -------
> diff --git a/grep.c b/grep.c
> index c7c06ae08d..3524d353dd 100644
> --- a/grep.c
> +++ b/grep.c
> @@ -56,6 +56,7 @@ void init_grep_defaults(struct repository *repo)
>  	opt->repo = repo;
>  	opt->relative = 1;
>  	opt->pathname = 1;
> +	opt->pcre_jit = 1;
>  	opt->max_depth = -1;
>  	opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
>  	color_set(opt->colors[GREP_COLOR_CONTEXT], "");
> @@ -125,6 +126,12 @@ int grep_config(const char *var, const char *value, void *cb)
>  		return 0;
>  	}
>
> +	if (!strcmp(var, "pcre.jit")) {
> +		int is_bool;
> +		opt->pcre_jit = git_config_bool_or_int(var, value, &is_bool);
> +		return 0;
> +	}
> +
>  	if (!strcmp(var, "color.grep"))
>  		opt->color = git_config_colorbool(var, value);
>  	if (!strcmp(var, "color.grep.match")) {
> @@ -163,6 +170,7 @@ void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix
>  	opt->pattern_tail = &opt->pattern_list;
>  	opt->header_tail = &opt->header_list;
>
> +	opt->pcre_jit = def->pcre_jit;
>  	opt->only_matching = def->only_matching;
>  	opt->color = def->color;
>  	opt->extended_regexp_option = def->extended_regexp_option;
> @@ -393,7 +401,8 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
>  		die("%s", error);
>
>  #ifdef GIT_PCRE1_USE_JIT
> -	pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
> +	if (opt->pcre_jit)
> +		pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
>  #endif
>  }
>
> @@ -489,7 +498,9 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
>  		compile_regexp_failed(p, (const char *)&errbuf);
>  	}
>
> -	pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
> +	if (opt->pcre_jit)
> +		pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
> +
>  	if (p->pcre2_jit_on) {
>  		jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
>  		if (jitret)
> diff --git a/grep.h b/grep.h
> index c0c71eb4a9..fff152e606 100644
> --- a/grep.h
> +++ b/grep.h
> @@ -151,6 +151,7 @@ struct grep_opt {
>  	int allow_textconv;
>  	int extended;
>  	int use_reflog_filter;
> +	int pcre_jit;
>  	int pcre1;
>  	int pcre2;
>  	int relative;
> --
> 2.22.0
>
>
diff mbox series

Patch

diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index c89fb569e3..ff544bdeec 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -69,6 +69,10 @@  grep.fallbackToNoIndex::
 	If set to true, fall back to git grep --no-index if git grep
 	is executed outside of a git repository.  Defaults to false.
 
+pcre.jit::
+	If set to false, disable JIT when using PCRE.  Defaults to
+	true.
+
 
 OPTIONS
 -------
diff --git a/grep.c b/grep.c
index c7c06ae08d..3524d353dd 100644
--- a/grep.c
+++ b/grep.c
@@ -56,6 +56,7 @@  void init_grep_defaults(struct repository *repo)
 	opt->repo = repo;
 	opt->relative = 1;
 	opt->pathname = 1;
+	opt->pcre_jit = 1;
 	opt->max_depth = -1;
 	opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
 	color_set(opt->colors[GREP_COLOR_CONTEXT], "");
@@ -125,6 +126,12 @@  int grep_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	if (!strcmp(var, "pcre.jit")) {
+		int is_bool;
+		opt->pcre_jit = git_config_bool_or_int(var, value, &is_bool);
+		return 0;
+	}
+
 	if (!strcmp(var, "color.grep"))
 		opt->color = git_config_colorbool(var, value);
 	if (!strcmp(var, "color.grep.match")) {
@@ -163,6 +170,7 @@  void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix
 	opt->pattern_tail = &opt->pattern_list;
 	opt->header_tail = &opt->header_list;
 
+	opt->pcre_jit = def->pcre_jit;
 	opt->only_matching = def->only_matching;
 	opt->color = def->color;
 	opt->extended_regexp_option = def->extended_regexp_option;
@@ -393,7 +401,8 @@  static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 		die("%s", error);
 
 #ifdef GIT_PCRE1_USE_JIT
-	pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
+	if (opt->pcre_jit)
+		pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
 #endif
 }
 
@@ -489,7 +498,9 @@  static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 		compile_regexp_failed(p, (const char *)&errbuf);
 	}
 
-	pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
+	if (opt->pcre_jit)
+		pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
+
 	if (p->pcre2_jit_on) {
 		jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
 		if (jitret)
diff --git a/grep.h b/grep.h
index c0c71eb4a9..fff152e606 100644
--- a/grep.h
+++ b/grep.h
@@ -151,6 +151,7 @@  struct grep_opt {
 	int allow_textconv;
 	int extended;
 	int use_reflog_filter;
+	int pcre_jit;
 	int pcre1;
 	int pcre2;
 	int relative;