Message ID | 50c69a988b2afca68f06e36949e4a8cc3a93940a.1628888668.git.jonathantanmy@google.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | In grep, no adding submodule ODB as alternates | expand |
On Fri, Aug 13, 2021 at 6:05 PM Jonathan Tan <jonathantanmy@google.com> wrote: > > Record the repository whenever an OID grep source is created, and teach > the worker threads to explicitly provide the repository when accessing > objects. > > Signed-off-by: Jonathan Tan <jonathantanmy@google.com> > --- > builtin/grep.c | 15 ++++++--------- > grep.c | 7 +++++-- > grep.h | 13 ++++++++++++- > 3 files changed, 23 insertions(+), 12 deletions(-) > > diff --git a/builtin/grep.c b/builtin/grep.c > index 69d8ea0808..d27e95e092 100644 > --- a/builtin/grep.c > +++ b/builtin/grep.c > @@ -349,7 +349,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid, > struct grep_source gs; > > grep_source_name(opt, filename, tree_name_len, &pathbuf); > - grep_source_init_oid(&gs, pathbuf.buf, path, oid); > + grep_source_init_oid(&gs, pathbuf.buf, path, oid, opt->repo); > strbuf_release(&pathbuf); > > if (num_threads > 1) { > @@ -462,14 +462,11 @@ static int grep_submodule(struct grep_opt *opt, > repo_read_gitmodules(subrepo, 0); > > /* > - * NEEDSWORK: This adds the submodule's object directory to the list of > - * alternates for the single in-memory object store. This has some bad > - * consequences for memory (processed objects will never be freed) and > - * performance (this increases the number of pack files git has to pay > - * attention to, to the sum of the number of pack files in all the > - * repositories processed so far). This can be removed once the object > - * store is no longer global and instead is a member of the repository > - * object. > + * All code paths tested by test code no longer need submodule ODBs to > + * be added as alternates, but add it to the list just in case. > + * Submodule ODBs added through add_submodule_odb_by_path() will be > + * lazily registered as alternates when needed (and except in an > + * unexpected code interaction, it won't be needed). Nice. > */ > add_submodule_odb_by_path(subrepo->objects->odb->path); > obj_read_unlock(); > diff --git a/grep.c b/grep.c > index 8a8105c2eb..79598f245f 100644 > --- a/grep.c > +++ b/grep.c > @@ -1863,7 +1863,8 @@ void grep_source_init_file(struct grep_source *gs, const char *name, > } > > void grep_source_init_oid(struct grep_source *gs, const char *name, > - const char *path, const struct object_id *oid) > + const char *path, const struct object_id *oid, > + struct repository *repo) > { > gs->type = GREP_SOURCE_OID; > gs->name = xstrdup_or_null(name); > @@ -1872,6 +1873,7 @@ void grep_source_init_oid(struct grep_source *gs, const char *name, > gs->size = 0; > gs->driver = NULL; > gs->identifier = oiddup(oid); > + gs->repo = repo; > } > > void grep_source_clear(struct grep_source *gs) > @@ -1900,7 +1902,8 @@ static int grep_source_load_oid(struct grep_source *gs) > { > enum object_type type; > > - gs->buf = read_object_file(gs->identifier, &type, &gs->size); > + gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type, > + &gs->size); > if (!gs->buf) > return error(_("'%s': unable to read %s"), > gs->name, > diff --git a/grep.h b/grep.h > index 480b3f5bba..24c6b64cea 100644 > --- a/grep.h > +++ b/grep.h > @@ -120,7 +120,16 @@ struct grep_opt { > struct grep_pat *header_list; > struct grep_pat **header_tail; > struct grep_expr *pattern_expression; > + > + /* > + * NEEDSWORK: See if we can remove this field, because the repository > + * should probably be per-source, not per-repo. Hmm, I think the "not per-repo" part is a bit confusing, as it refers to "the repository" ("the repository should not be per-repo"?) Could we remove that part? Maybe we could also be a bit more specific regarding the suggested conversion: "See if we can remove this field, because the repository should probably be per-source. That is, grep.c functions using `grep_opt.repo` should probably start using `grep_source.repo` instead." (But that's nitpicking from my part, feel free to ignore it.) > [...] This is potentially the > + * cause of at least one bug - "git grep" ignoring the textconv > + * attributes from submodules. See [1] for more information. > + * [1] https://lore.kernel.org/git/CAHd-oW5iEQarYVxEXoTG-ua2zdoybTrSjCBKtO0YT292fm0NQQ@mail.gmail.com/ > + */ > struct repository *repo; > + > const char *prefix; > int prefix_length; > regex_t regexp; > @@ -187,6 +196,7 @@ struct grep_source { > GREP_SOURCE_BUF, > } type; > void *identifier; > + struct repository *repo; /* if GREP_SOURCE_OID */ > > char *buf; > unsigned long size; > @@ -198,7 +208,8 @@ struct grep_source { > void grep_source_init_file(struct grep_source *gs, const char *name, > const char *path); > void grep_source_init_oid(struct grep_source *gs, const char *name, > - const char *path, const struct object_id *oid); > + const char *path, const struct object_id *oid, > + struct repository *repo); > void grep_source_clear_data(struct grep_source *gs); > void grep_source_clear(struct grep_source *gs); > void grep_source_load_driver(struct grep_source *gs, > -- > 2.33.0.rc1.237.g0d66db33f3-goog >
> > @@ -120,7 +120,16 @@ struct grep_opt { > > struct grep_pat *header_list; > > struct grep_pat **header_tail; > > struct grep_expr *pattern_expression; > > + > > + /* > > + * NEEDSWORK: See if we can remove this field, because the repository > > + * should probably be per-source, not per-repo. > > Hmm, I think the "not per-repo" part is a bit confusing, as it refers > to "the repository" ("the repository should not be per-repo"?) Could > we remove that part? > > Maybe we could also be a bit more specific regarding the suggested > conversion: "See if we can remove this field, because the repository > should probably be per-source. That is, grep.c functions using > `grep_opt.repo` should probably start using `grep_source.repo` > instead." (But that's nitpicking from my part, feel free to ignore > it.) Thanks - your suggestion is much clearer, so I'll use it.
diff --git a/builtin/grep.c b/builtin/grep.c index 69d8ea0808..d27e95e092 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -349,7 +349,7 @@ static int grep_oid(struct grep_opt *opt, const struct object_id *oid, struct grep_source gs; grep_source_name(opt, filename, tree_name_len, &pathbuf); - grep_source_init_oid(&gs, pathbuf.buf, path, oid); + grep_source_init_oid(&gs, pathbuf.buf, path, oid, opt->repo); strbuf_release(&pathbuf); if (num_threads > 1) { @@ -462,14 +462,11 @@ static int grep_submodule(struct grep_opt *opt, repo_read_gitmodules(subrepo, 0); /* - * NEEDSWORK: This adds the submodule's object directory to the list of - * alternates for the single in-memory object store. This has some bad - * consequences for memory (processed objects will never be freed) and - * performance (this increases the number of pack files git has to pay - * attention to, to the sum of the number of pack files in all the - * repositories processed so far). This can be removed once the object - * store is no longer global and instead is a member of the repository - * object. + * All code paths tested by test code no longer need submodule ODBs to + * be added as alternates, but add it to the list just in case. + * Submodule ODBs added through add_submodule_odb_by_path() will be + * lazily registered as alternates when needed (and except in an + * unexpected code interaction, it won't be needed). */ add_submodule_odb_by_path(subrepo->objects->odb->path); obj_read_unlock(); diff --git a/grep.c b/grep.c index 8a8105c2eb..79598f245f 100644 --- a/grep.c +++ b/grep.c @@ -1863,7 +1863,8 @@ void grep_source_init_file(struct grep_source *gs, const char *name, } void grep_source_init_oid(struct grep_source *gs, const char *name, - const char *path, const struct object_id *oid) + const char *path, const struct object_id *oid, + struct repository *repo) { gs->type = GREP_SOURCE_OID; gs->name = xstrdup_or_null(name); @@ -1872,6 +1873,7 @@ void grep_source_init_oid(struct grep_source *gs, const char *name, gs->size = 0; gs->driver = NULL; gs->identifier = oiddup(oid); + gs->repo = repo; } void grep_source_clear(struct grep_source *gs) @@ -1900,7 +1902,8 @@ static int grep_source_load_oid(struct grep_source *gs) { enum object_type type; - gs->buf = read_object_file(gs->identifier, &type, &gs->size); + gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type, + &gs->size); if (!gs->buf) return error(_("'%s': unable to read %s"), gs->name, diff --git a/grep.h b/grep.h index 480b3f5bba..24c6b64cea 100644 --- a/grep.h +++ b/grep.h @@ -120,7 +120,16 @@ struct grep_opt { struct grep_pat *header_list; struct grep_pat **header_tail; struct grep_expr *pattern_expression; + + /* + * NEEDSWORK: See if we can remove this field, because the repository + * should probably be per-source, not per-repo. This is potentially the + * cause of at least one bug - "git grep" ignoring the textconv + * attributes from submodules. See [1] for more information. + * [1] https://lore.kernel.org/git/CAHd-oW5iEQarYVxEXoTG-ua2zdoybTrSjCBKtO0YT292fm0NQQ@mail.gmail.com/ + */ struct repository *repo; + const char *prefix; int prefix_length; regex_t regexp; @@ -187,6 +196,7 @@ struct grep_source { GREP_SOURCE_BUF, } type; void *identifier; + struct repository *repo; /* if GREP_SOURCE_OID */ char *buf; unsigned long size; @@ -198,7 +208,8 @@ struct grep_source { void grep_source_init_file(struct grep_source *gs, const char *name, const char *path); void grep_source_init_oid(struct grep_source *gs, const char *name, - const char *path, const struct object_id *oid); + const char *path, const struct object_id *oid, + struct repository *repo); void grep_source_clear_data(struct grep_source *gs); void grep_source_clear(struct grep_source *gs); void grep_source_load_driver(struct grep_source *gs,
Record the repository whenever an OID grep source is created, and teach the worker threads to explicitly provide the repository when accessing objects. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> --- builtin/grep.c | 15 ++++++--------- grep.c | 7 +++++-- grep.h | 13 ++++++++++++- 3 files changed, 23 insertions(+), 12 deletions(-)