mbox series

[v2,0/8] In grep, no adding submodule ODB as alternates

Message ID cover.1628888668.git.jonathantanmy@google.com (mailing list archive)
Headers show
Series In grep, no adding submodule ODB as alternates | expand

Message

Jonathan Tan Aug. 13, 2021, 9:05 p.m. UTC
Thanks everyone for your reviews. I believe I've addressed all of them.

The main change is that I've also made a change in submodule-config to
avoid registering submodule ODBs as alternates (thanks, Matheus, for
noticing this).

Jonathan Tan (8):
  submodule: lazily add submodule ODBs as alternates
  grep: use submodule-ODB-as-alternate lazy-addition
  grep: typesafe versions of grep_source_init
  grep: read submodule entry with explicit repo
  grep: allocate subrepos on heap
  grep: add repository to OID grep sources
  submodule-config: pass repo upon blob config read
  t7814: show lack of alternate ODB-adding

 builtin/grep.c                     | 64 +++++++++++++++++++-----------
 config.c                           | 18 ++++++---
 config.h                           |  3 ++
 grep.c                             | 51 +++++++++++++++---------
 grep.h                             | 18 +++++++--
 object-file.c                      |  5 +++
 submodule-config.c                 |  5 ++-
 submodule.c                        | 25 +++++++++++-
 submodule.h                        |  8 ++++
 t/README                           | 10 +++++
 t/t7814-grep-recurse-submodules.sh |  3 ++
 11 files changed, 156 insertions(+), 54 deletions(-)

Range-diff against v1:
1:  5994a517e8 = 1:  5994a517e8 submodule: lazily add submodule ODBs as alternates
2:  31e9b914c4 = 2:  31e9b914c4 grep: use submodule-ODB-as-alternate lazy-addition
3:  e5e6a0dc1e ! 3:  aa3f1f3c89 grep: typesafe versions of grep_source_init
    @@ builtin/grep.c: static int grep_file(struct grep_opt *opt, const char *filename)
      	if (num_threads > 1) {
     
      ## grep.c ##
    -@@ grep.c: int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
    +@@ grep.c: int grep_source(struct grep_opt *opt, struct grep_source *gs)
    + 	return grep_source_1(opt, gs, 0);
    + }
    + 
    ++static void grep_source_init_buf(struct grep_source *gs, char *buf,
    ++				 unsigned long size)
    ++{
    ++	gs->type = GREP_SOURCE_BUF;
    ++	gs->name = NULL;
    ++	gs->path = NULL;
    ++	gs->buf = buf;
    ++	gs->size = size;
    ++	gs->driver = NULL;
    ++	gs->identifier = NULL;
    ++}
    ++
    + int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
    + {
      	struct grep_source gs;
      	int r;
      
     -	grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
    -+	grep_source_init_buf(&gs);
    - 	gs.buf = buf;
    - 	gs.size = size;
    +-	gs.buf = buf;
    +-	gs.size = size;
    ++	grep_source_init_buf(&gs, buf, size);
    + 
    + 	r = grep_source(opt, &gs);
      
     @@ grep.c: int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
      	return r;
    @@ grep.c: int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
     +	gs->size = 0;
     +	gs->driver = NULL;
     +	gs->identifier = oiddup(oid);
    -+}
    -+
    -+void grep_source_init_buf(struct grep_source *gs)
    -+{
    -+	gs->type = GREP_SOURCE_BUF;
    -+	gs->name = NULL;
    -+	gs->path = NULL;
    -+	gs->buf = NULL;
    -+	gs->size = 0;
    -+	gs->driver = NULL;
    -+	gs->identifier = NULL;
      }
      
      void grep_source_clear(struct grep_source *gs)
    @@ grep.h: struct grep_source {
     +			   const char *path);
     +void grep_source_init_oid(struct grep_source *gs, const char *name,
     +			  const char *path, const struct object_id *oid);
    -+void grep_source_init_buf(struct grep_source *gs);
      void grep_source_clear_data(struct grep_source *gs);
      void grep_source_clear(struct grep_source *gs);
      void grep_source_load_driver(struct grep_source *gs,
4:  30ead880b3 = 4:  050deacfb7 grep: read submodule entry with explicit repo
5:  f62608e88f ! 5:  3f24815224 grep: allocate subrepos on heap
    @@ builtin/grep.c: static void work_done(struct work_item *w)
     +		free(repos_to_free[i]);
     +	}
     +	free(repos_to_free);
    ++	repos_to_free_nr = 0;
    ++	repos_to_free_alloc = 0;
     +}
     +
      static void *run(void *arg)
6:  b2df245587 ! 6:  50c69a988b grep: add repository to OID grep sources
    @@ builtin/grep.c: static int grep_oid(struct grep_opt *opt, const struct object_id
      	strbuf_release(&pathbuf);
      
      	if (num_threads > 1) {
    +@@ builtin/grep.c: static int grep_submodule(struct grep_opt *opt,
    + 	repo_read_gitmodules(subrepo, 0);
    + 
    + 	/*
    +-	 * NEEDSWORK: This adds the submodule's object directory to the list of
    +-	 * alternates for the single in-memory object store.  This has some bad
    +-	 * consequences for memory (processed objects will never be freed) and
    +-	 * performance (this increases the number of pack files git has to pay
    +-	 * attention to, to the sum of the number of pack files in all the
    +-	 * repositories processed so far).  This can be removed once the object
    +-	 * store is no longer global and instead is a member of the repository
    +-	 * object.
    ++	 * All code paths tested by test code no longer need submodule ODBs to
    ++	 * be added as alternates, but add it to the list just in case.
    ++	 * Submodule ODBs added through add_submodule_odb_by_path() will be
    ++	 * lazily registered as alternates when needed (and except in an
    ++	 * unexpected code interaction, it won't be needed).
    + 	 */
    + 	add_submodule_odb_by_path(subrepo->objects->odb->path);
    + 	obj_read_unlock();
     
      ## grep.c ##
     @@ grep.c: void grep_source_init_file(struct grep_source *gs, const char *name,
    @@ grep.c: void grep_source_init_oid(struct grep_source *gs, const char *name,
     +	gs->repo = repo;
      }
      
    - void grep_source_init_buf(struct grep_source *gs)
    + void grep_source_clear(struct grep_source *gs)
     @@ grep.c: static int grep_source_load_oid(struct grep_source *gs)
      {
      	enum object_type type;
    @@ grep.c: static int grep_source_load_oid(struct grep_source *gs)
      			     gs->name,
     
      ## grep.h ##
    +@@ grep.h: struct grep_opt {
    + 	struct grep_pat *header_list;
    + 	struct grep_pat **header_tail;
    + 	struct grep_expr *pattern_expression;
    ++
    ++	/*
    ++	 * NEEDSWORK: See if we can remove this field, because the repository
    ++	 * should probably be per-source, not per-repo. This is potentially the
    ++	 * cause of at least one bug - "git grep" ignoring the textconv
    ++	 * attributes from submodules. See [1] for more information.
    ++	 * [1] https://lore.kernel.org/git/CAHd-oW5iEQarYVxEXoTG-ua2zdoybTrSjCBKtO0YT292fm0NQQ@mail.gmail.com/
    ++	 */
    + 	struct repository *repo;
    ++
    + 	const char *prefix;
    + 	int prefix_length;
    + 	regex_t regexp;
     @@ grep.h: struct grep_source {
      		GREP_SOURCE_BUF,
      	} type;
    @@ grep.h: struct grep_source {
     -			  const char *path, const struct object_id *oid);
     +			  const char *path, const struct object_id *oid,
     +			  struct repository *repo);
    - void grep_source_init_buf(struct grep_source *gs);
      void grep_source_clear_data(struct grep_source *gs);
      void grep_source_clear(struct grep_source *gs);
    + void grep_source_load_driver(struct grep_source *gs,
-:  ---------- > 7:  94db10a4e5 submodule-config: pass repo upon blob config read
7:  f1fc89894b = 8:  4a51fcfb77 t7814: show lack of alternate ODB-adding

Comments

Matheus Tavares Aug. 16, 2021, 3:14 p.m. UTC | #1
On Fri, Aug 13, 2021 at 6:05 PM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> Thanks everyone for your reviews. I believe I've addressed all of them.
>
> The main change is that I've also made a change in submodule-config to
> avoid registering submodule ODBs as alternates (thanks, Matheus, for
> noticing this).

Thanks for working on this! I reviewed the entire series and left a
few comments on some small things, but overall it looks good to me.

Reviewed-by: Matheus Tavares <matheus.bernardino@usp.br>