@@ -365,6 +365,8 @@ struct diff_options {
struct repository *repo;
struct option *parseopts;
+
+ struct grep_opt *grep_filter;
};
unsigned diff_filter_bit(char status);
@@ -520,6 +522,8 @@ int git_config_rename(const char *var, const char *value);
#define DIFF_PICKAXE_KIND_G 8 /* grep in the patch */
#define DIFF_PICKAXE_KIND_OBJFIND 16 /* specific object IDs */
+#define DIFF_PICKAXE_KIND_GS_MASK (DIFF_PICKAXE_KIND_S | \
+ DIFF_PICKAXE_KIND_G)
#define DIFF_PICKAXE_KINDS_MASK (DIFF_PICKAXE_KIND_S | \
DIFF_PICKAXE_KIND_G | \
DIFF_PICKAXE_KIND_OBJFIND)
@@ -6,16 +6,16 @@
#include "diff.h"
#include "diffcore.h"
#include "xdiff-interface.h"
-#include "kwset.h"
+#include "grep.h"
#include "commit.h"
#include "quote.h"
typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
struct diff_options *o,
- regex_t *regexp, kwset_t kws);
+ struct grep_opt *grep_filter);
struct diffgrep_cb {
- regex_t *regexp;
+ struct grep_opt *grep_filter;
int hit;
};
@@ -23,6 +23,8 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
{
struct diffgrep_cb *data = priv;
regmatch_t regmatch;
+ struct grep_opt *grep_filter = data->grep_filter;
+ struct grep_pat *grep_pat = grep_filter->pattern_list;
if (line[0] != '+' && line[0] != '-')
return;
@@ -32,25 +34,25 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len)
* caller early.
*/
return;
- data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
- ®match, 0);
+ data->hit = patmatch(grep_pat, line + 1, line + len + 1, ®match, 0);
}
static int diff_grep(mmfile_t *one, mmfile_t *two,
struct diff_options *o,
- regex_t *regexp, kwset_t kws)
+ struct grep_opt *grep_filter)
{
- regmatch_t regmatch;
struct diffgrep_cb ecbdata;
xpparam_t xpp;
xdemitconf_t xecfg;
+ regmatch_t regmatch;
+ struct grep_pat *grep_pat = grep_filter->pattern_list;
if (!one)
- return !regexec_buf(regexp, two->ptr, two->size,
- 1, ®match, 0);
+ return patmatch(grep_pat, two->ptr, two->ptr + two->size,
+ ®match, 0);
if (!two)
- return !regexec_buf(regexp, one->ptr, one->size,
- 1, ®match, 0);
+ return patmatch(grep_pat, one->ptr, one->ptr + one->size,
+ ®match, 0);
/*
* We have both sides; need to run textual diff and see if
@@ -58,7 +60,7 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
*/
memset(&xpp, 0, sizeof(xpp));
memset(&xecfg, 0, sizeof(xecfg));
- ecbdata.regexp = regexp;
+ ecbdata.grep_filter = grep_filter;
ecbdata.hit = 0;
xecfg.ctxlen = 0;
xecfg.interhunkctxlen = o->interhunkcontext;
@@ -68,52 +70,40 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
return ecbdata.hit;
}
-static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
+static unsigned int contains(mmfile_t *mf, struct grep_opt *grep_filter)
{
+
unsigned int cnt = 0;
unsigned long sz = mf->size;
- const char *data = mf->ptr;
-
- if (regexp) {
- regmatch_t regmatch;
- int flags = 0;
-
- while (!regexec_buf(regexp, data, sz, 1, ®match, flags)) {
- flags |= REG_NOTBOL;
- data += regmatch.rm_eo;
- sz -= regmatch.rm_eo;
- if (regmatch.rm_so == regmatch.rm_eo) {
- data++;
- sz--;
- }
- cnt++;
- }
-
- } else { /* Classic exact string match */
- while (sz) {
- struct kwsmatch kwsm;
- size_t offset = kwsexec(kws, data, sz, &kwsm);
- if (offset == -1)
- break;
- sz -= offset + kwsm.size[0];
- data += offset + kwsm.size[0];
- cnt++;
+ char *data = mf->ptr;
+ regmatch_t regmatch;
+ int flags = 0;
+ struct grep_pat *grep_pat = grep_filter->pattern_list;
+
+ while (patmatch(grep_pat, data, data + sz, ®match, flags)) {
+ flags |= REG_NOTBOL;
+ data += regmatch.rm_eo;
+ sz -= regmatch.rm_eo;
+ if (regmatch.rm_so == regmatch.rm_eo) {
+ data++;
+ sz--;
}
+ cnt++;
}
return cnt;
}
static int has_changes(mmfile_t *one, mmfile_t *two,
struct diff_options *o,
- regex_t *regexp, kwset_t kws)
+ struct grep_opt *grep_filter)
{
- unsigned int one_contains = one ? contains(one, regexp, kws) : 0;
- unsigned int two_contains = two ? contains(two, regexp, kws) : 0;
+ unsigned int one_contains = one ? contains(one, grep_filter) : 0;
+ unsigned int two_contains = two ? contains(two, grep_filter) : 0;
return one_contains != two_contains;
}
static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
- regex_t *regexp, kwset_t kws, pickaxe_fn fn)
+ struct grep_opt *grep_filter, pickaxe_fn fn)
{
struct userdiff_driver *textconv_one = NULL;
struct userdiff_driver *textconv_two = NULL;
@@ -160,7 +150,7 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
ret = fn(DIFF_FILE_VALID(p->one) ? &mf1 : NULL,
DIFF_FILE_VALID(p->two) ? &mf2 : NULL,
- o, regexp, kws);
+ o, grep_filter);
if (textconv_one)
free(mf1.ptr);
@@ -173,7 +163,7 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
}
static void pickaxe(struct diff_queue_struct *q, struct diff_options *o,
- regex_t *regexp, kwset_t kws, pickaxe_fn fn)
+ struct grep_opt *grep_filter, pickaxe_fn fn)
{
int i;
struct diff_queue_struct outq;
@@ -184,7 +174,7 @@ static void pickaxe(struct diff_queue_struct *q, struct diff_options *o,
/* Showing the whole changeset if needle exists */
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
- if (pickaxe_match(p, o, regexp, kws, fn))
+ if (pickaxe_match(p, o, grep_filter, fn))
return; /* do not munge the queue */
}
@@ -199,7 +189,7 @@ static void pickaxe(struct diff_queue_struct *q, struct diff_options *o,
/* Showing only the filepairs that has the needle */
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
- if (pickaxe_match(p, o, regexp, kws, fn))
+ if (pickaxe_match(p, o, grep_filter, fn))
diff_q(&outq, p);
else
diff_free_filepair(p);
@@ -210,54 +200,35 @@ static void pickaxe(struct diff_queue_struct *q, struct diff_options *o,
*q = outq;
}
-static void regcomp_or_die(regex_t *regex, const char *needle, int cflags)
-{
- int err = regcomp(regex, needle, cflags);
- if (err) {
- /* The POSIX.2 people are surely sick */
- char errbuf[1024];
- regerror(err, regex, errbuf, 1024);
- die("invalid regex: %s", errbuf);
- }
-}
-
void diffcore_pickaxe(struct diff_options *o)
{
const char *needle = o->pickaxe;
int opts = o->pickaxe_opts;
- regex_t regex, *regexp = NULL;
- kwset_t kws = NULL;
+ struct grep_opt opt;
+
+ if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_GS_MASK)) {
+ grep_init(&opt, the_repository, NULL);
+#ifdef USE_LIBPCRE2
+ grep_commit_pattern_type(GREP_PATTERN_TYPE_PCRE, &opt);
+#else
+ grep_commit_pattern_type(GREP_PATTERN_TYPE_ERE, &opt);
+#endif
- if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
- int cflags = REG_EXTENDED | REG_NEWLINE;
if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE)
- cflags |= REG_ICASE;
- regcomp_or_die(®ex, needle, cflags);
- regexp = ®ex;
- } else if (opts & DIFF_PICKAXE_KIND_S) {
- if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE &&
- has_non_ascii(needle)) {
- struct strbuf sb = STRBUF_INIT;
- int cflags = REG_NEWLINE | REG_ICASE;
-
- basic_regex_quote_buf(&sb, needle);
- regcomp_or_die(®ex, sb.buf, cflags);
- strbuf_release(&sb);
- regexp = ®ex;
- } else {
- kws = kwsalloc(o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE
- ? tolower_trans_tbl : NULL);
- kwsincr(kws, needle, strlen(needle));
- kwsprep(kws);
- }
+ opt.ignore_case = 1;
+ if (opts & DIFF_PICKAXE_KIND_S &&
+ !(opts & DIFF_PICKAXE_REGEX))
+ opt.fixed = 1;
+
+ append_grep_pattern(&opt, needle, "diffcore-pickaxe", 0, GREP_PATTERN);
+ compile_grep_patterns(&opt);
}
- pickaxe(&diff_queued_diff, o, regexp, kws,
+ pickaxe(&diff_queued_diff, o, &opt,
(opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes);
- if (regexp)
- regfree(regexp);
- if (kws)
- kwsfree(kws);
+ if (opts & ~DIFF_PICKAXE_KIND_OBJFIND)
+ free_grep_patterns(&opt);
+
return;
}
Follow-up b65abcafc7a (grep: use PCRE v2 for optimized fixed-string search, 2019-07-01) and remove the use of kwset in the pickaxe code for fixed-string search, in favor of optimistically using PCRE v2. This does mean that the semantics of the -G option subtly change, before it's an ERE, whereas now it'll be a PCRE if we're compiled with PCRE. Since PCRE is almost entirely a strict superset of ERE syntax I think this is OK. Now when running the newly added t/perf/p4209-pickaxe.sh[1] and the latest PCRE v2 we'll get the following performance improvements (well, mostly improvements): Test origin/next HEAD ----------------------------------------------------------------------------------------------------------------------- 4209.1: git log --pickaxe-regex -S'[þæö]' <limit-rev>.. 0.41(0.37+0.03) 0.42(0.37+0.05) +2.4% 4209.2: git log -G'a' <limit-rev>.. 0.61(0.52+0.08) 0.50(0.45+0.05) -18.0% 4209.3: git log --pickaxe-regex -S'a' <limit-rev>.. 0.73(0.66+0.07) 0.42(0.37+0.05) -42.5% 4209.4: git log --text -G'a' <limit-rev>.. 0.61(0.54+0.06) 0.50(0.44+0.05) -18.0% 4209.5: git log --text --pickaxe-all -G'a' <limit-rev>.. 0.44(0.37+0.06) 0.46(0.36+0.06) +4.5% 4209.6: git log --pickaxe-all -G'a' <limit-rev>.. 0.46(0.39+0.07) 0.38(0.33+0.05) -17.4% 4209.7: git log --pickaxe-all --pickaxe-regex -S'a' <limit-rev>.. 0.55(0.48+0.06) 0.35(0.30+0.04) -36.4% 4209.8: git log -G'uncommon' <limit-rev>.. 0.68(0.60+0.07) 0.59(0.53+0.05) -13.2% 4209.9: git log --pickaxe-regex -S'uncommon' <limit-rev>.. 0.48(0.45+0.03) 0.36(0.31+0.05) -25.0% 4209.10: git log --text -G'uncommon' <limit-rev>.. 0.66(0.58+0.07) 0.58(0.52+0.04) -12.1% 4209.11: git log --text --pickaxe-all -G'uncommon' <limit-rev>.. 0.67(0.61+0.05) 0.57(0.52+0.05) -14.9% 4209.12: git log --pickaxe-all -G'uncommon' <limit-rev>.. 0.62(0.55+0.06) 0.52(0.46+0.06) -16.1% 4209.13: git log --pickaxe-all --pickaxe-regex -S'uncommon' <limit-rev>.. 0.49(0.43+0.06) 0.31(0.26+0.04) -36.7% 4209.14: git log -G'[þæö]' <limit-rev>.. 0.71(0.64+0.07) 0.51(0.47+0.04) -28.2% 4209.15: git log --pickaxe-regex -S'[þæö]' <limit-rev>.. 0.45(0.40+0.05) 0.42(0.37+0.04) -6.7% 4209.16: git log --text -G'[þæö]' <limit-rev>.. 0.64(0.56+0.07) 0.50(0.44+0.06) -21.9% 4209.17: git log --text --pickaxe-all -G'[þæö]' <limit-rev>.. 0.64(0.54+0.09) 0.50(0.47+0.03) -21.9% 4209.18: git log --pickaxe-all -G'[þæö]' <limit-rev>.. 0.66(0.59+0.07) 0.50(0.45+0.05) -24.2% 4209.19: git log --pickaxe-all --pickaxe-regex -S'[þæö]' <limit-rev>.. 0.42(0.38+0.04) 0.41(0.37+0.04) -2.4% 4209.20: git log -i --pickaxe-regex -S'[þæö]' <limit-rev>.. 0.41(0.38+0.03) 0.49(0.43+0.05) +19.5% 4209.21: git log -i -G'a' <limit-rev>.. 0.63(0.61+0.02) 0.50(0.45+0.05) -20.6% 4209.22: git log -i --pickaxe-regex -S'a' <limit-rev>.. 0.81(0.75+0.06) 0.60(0.58+0.02) -25.9% 4209.23: git log -i --text -G'a' <limit-rev>.. 0.64(0.54+0.10) 0.49(0.44+0.05) -23.4% 4209.24: git log -i --text --pickaxe-all -G'a' <limit-rev>.. 0.47(0.43+0.04) 0.37(0.31+0.06) -21.3% 4209.25: git log -i --pickaxe-all -G'a' <limit-rev>.. 0.51(0.43+0.08) 0.37(0.31+0.06) -27.5% 4209.26: git log -i --pickaxe-all --pickaxe-regex -S'a' <limit-rev>.. 0.62(0.55+0.05) 0.46(0.41+0.04) -25.8% 4209.27: git log -i -G'uncommon' <limit-rev>.. 0.64(0.58+0.05) 0.51(0.48+0.03) -20.3% 4209.28: git log -i --pickaxe-regex -S'uncommon' <limit-rev>.. 0.48(0.44+0.04) 0.47(0.42+0.05) -2.1% 4209.29: git log -i --text -G'uncommon' <limit-rev>.. 0.62(0.53+0.08) 0.51(0.44+0.07) -17.7% 4209.30: git log -i --text --pickaxe-all -G'uncommon' <limit-rev>.. 0.61(0.53+0.08) 0.51(0.45+0.06) -16.4% 4209.31: git log -i --pickaxe-all -G'uncommon' <limit-rev>.. 0.63(0.57+0.05) 0.51(0.47+0.04) -19.0% 4209.32: git log -i --pickaxe-all --pickaxe-regex -S'uncommon' <limit-rev>.. 0.47(0.42+0.04) 0.47(0.45+0.01) +0.0% 4209.33: git log -i -G'[þæö]' <limit-rev>.. 0.60(0.54+0.05) 0.51(0.47+0.04) -15.0% 4209.34: git log -i --pickaxe-regex -S'[þæö]' <limit-rev>.. 0.39(0.33+0.05) 0.49(0.45+0.03) +25.6% 4209.35: git log -i --text -G'[þæö]' <limit-rev>.. 0.59(0.53+0.06) 0.51(0.47+0.02) -13.6% 4209.36: git log -i --text --pickaxe-all -G'[þæö]' <limit-rev>.. 0.62(0.56+0.05) 0.51(0.45+0.04) -17.7% 4209.37: git log -i --pickaxe-all -G'[þæö]' <limit-rev>.. 0.61(0.55+0.06) 0.51(0.47+0.04) -16.4% 4209.38: git log -i --pickaxe-all --pickaxe-regex -S'[þæö]' <limit-rev>.. 0.42(0.39+0.02) 0.49(0.46+0.03) +16.7% 1. With these options: GIT_PERF_EXTRA=1 GIT_PERF_REPEAT_COUNT=10 GIT_PERF_MAKE_OPTS='-j8 USE_LIBPCRE=Y CFLAGS=-O3 LIBPCREDIR=/home/avar/g/pcre2/inst' ./run origin/next HEAD -- p4209-pickaxe.sh Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> --- diff.h | 4 ++ diffcore-pickaxe.c | 143 ++++++++++++++++++--------------------------- 2 files changed, 61 insertions(+), 86 deletions(-)