diff mbox series

[1/8] built-in add -i: allow filtering the modified files list

Message ID 1844c4d55e21c40cbdbfdd73c82b4a1a074ff184.1573821382.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series [1/8] built-in add -i: allow filtering the modified files list | expand

Commit Message

Philip Peterson via GitGitGadget Nov. 15, 2019, 12:36 p.m. UTC
From: Johannes Schindelin <johannes.schindelin@gmx.de>

In the `update` command of `git add -i`, we are primarily interested in the
list of modified files that have worktree (i.e. unstaged) changes.

At the same time, we need to determine _also_ the staged changes, to be
able to produce the full added/deleted information.

The Perl script version of `git add -i` has a parameter of the
`list_modified()` function for that matter. In C, we can be a lot more
precise, using an `enum`.

The C implementation of the filter also has an easier time to avoid
unnecessary work, simply by using an adaptive order of the `diff-index`
and `diff-files` phases, and then skipping files in the second phase
when they have not been seen in the first phase.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 add-interactive.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

Comments

Junio C Hamano Nov. 21, 2019, 8:06 a.m. UTC | #1
"Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
writes:

> +enum modified_files_filter {
> +	NO_FILTER = 0,
> +	WORKTREE_ONLY = 1,
> +	INDEX_ONLY = 2,
> +};
> +
> +static int get_modified_files(struct repository *r,
> +			      enum modified_files_filter filter,
> +			      struct string_list *files,
>  			      const struct pathspec *ps)
>  {
>  	struct object_id head_oid;
>  	int is_initial = !resolve_ref_unsafe("HEAD", RESOLVE_REF_READING,
>  					     &head_oid, NULL);
>  	struct collection_status s = { FROM_WORKTREE };

Will have a comment on this later.

> +	int i;
>  
>  	if (discard_index(r->index) < 0 ||
>  	    repo_read_index_preload(r, ps, 0) < 0)
> @@ -411,10 +424,16 @@ static int get_modified_files(struct repository *r, struct string_list *files,
>  	s.files = files;
>  	hashmap_init(&s.file_map, pathname_entry_cmp, NULL, 0);
>  
> -	for (s.phase = FROM_WORKTREE; s.phase <= FROM_INDEX; s.phase++) {
> +	for (i = 0; i < 2; i++) {
>  		struct rev_info rev;
>  		struct setup_revision_opt opt = { 0 };
>  
> +		if (filter == INDEX_ONLY)
> +			s.phase = i ? FROM_WORKTREE : FROM_INDEX;
> +		else
> +			s.phase = i ? FROM_INDEX : FROM_WORKTREE;
> +		s.skip_unseen = filter && i;

;-)

Looks somewhat crazy but it works---when the caller wants to do
'index-only' for example we are not interested in paths that did not
appear in the INDEX side of the diff, so we run FROM_INDEX diff first
and then the let next one (i.e. FROM_WORKTREE diff) be skipped for
paths that are not in the result of the first one.  To me personally,
I would find the tertially expession written like this

	s.phase = (i == 0) ? FROM_INDEX : FROM_WORKTREE;

much easier to follow, as it matches the order which ones are done
first.

Also I notice two things.

 - It used to make 100% sense to call this field .phase because we
   always processed the first phase and then on to the second phase,
   where the first one was called WORKTREE and the second one was
   called INDEX.  In the new world order after this step, the name
   .phase no longer makes any sense.  Sometimes we run diff-files in
   phase 0 and diff-index in phase 1, but some other times we run
   diff-index in phase 0 and diff-files in phase 0.  The variable
   that has the closest meaning to the 'phase' is the newly
   introduced 'i'.

 - The initialization of the local "struct collection_status s" at
   the beginning of the function still uses .phase = FROM_WORKTREE
   which might be somewhat misleading.  We cannot remove the whole
   initialization, as the original used to nul initialize the other
   fields in this structure and I suspect the code still relies on
   it, but the initialization of .phase in particular no longer has
   any effect; it always is assigned inside the loop before the
   field gets used.


>  		opt.def = is_initial ?
>  			empty_tree_oid_hex() : oid_to_hex(&head_oid);

By the way, this is not a new issue introduced by this patch, but I
notice copy_pathspec() is used twice on the same rev.prune_data in
this functino.  Who is responsible for releasing the resource held
in this struct?
Johannes Schindelin Nov. 25, 2019, 3:20 p.m. UTC | #2
Hi Junio,

On Thu, 21 Nov 2019, Junio C Hamano wrote:

> "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com>
> writes:
>
> > +enum modified_files_filter {
> > +	NO_FILTER = 0,
> > +	WORKTREE_ONLY = 1,
> > +	INDEX_ONLY = 2,
> > +};
> > +
> > +static int get_modified_files(struct repository *r,
> > +			      enum modified_files_filter filter,
> > +			      struct string_list *files,
> >  			      const struct pathspec *ps)
> >  {
> >  	struct object_id head_oid;
> >  	int is_initial = !resolve_ref_unsafe("HEAD", RESOLVE_REF_READING,
> >  					     &head_oid, NULL);
> >  	struct collection_status s = { FROM_WORKTREE };
>
> Will have a comment on this later.

Yes, you're right, this initialization does not make sense. I changed it
to `{ 0 }` because I still want the struct to be zero'ed out.

> > +	int i;
> >
> >  	if (discard_index(r->index) < 0 ||
> >  	    repo_read_index_preload(r, ps, 0) < 0)
> > @@ -411,10 +424,16 @@ static int get_modified_files(struct repository *r, struct string_list *files,
> >  	s.files = files;
> >  	hashmap_init(&s.file_map, pathname_entry_cmp, NULL, 0);
> >
> > -	for (s.phase = FROM_WORKTREE; s.phase <= FROM_INDEX; s.phase++) {
> > +	for (i = 0; i < 2; i++) {
> >  		struct rev_info rev;
> >  		struct setup_revision_opt opt = { 0 };
> >
> > +		if (filter == INDEX_ONLY)
> > +			s.phase = i ? FROM_WORKTREE : FROM_INDEX;
> > +		else
> > +			s.phase = i ? FROM_INDEX : FROM_WORKTREE;
> > +		s.skip_unseen = filter && i;
>
> ;-)
>
> Looks somewhat crazy but it works---when the caller wants to do
> 'index-only' for example we are not interested in paths that did not
> appear in the INDEX side of the diff, so we run FROM_INDEX diff first
> and then the let next one (i.e. FROM_WORKTREE diff) be skipped for
> paths that are not in the result of the first one.  To me personally,
> I would find the tertially expession written like this
>
> 	s.phase = (i == 0) ? FROM_INDEX : FROM_WORKTREE;
>
> much easier to follow, as it matches the order which ones are done
> first.

Sure, that reverses the order, but it makes it more intuitive because `i
== 0` happens first. Changed.

> Also I notice two things.
>
>  - It used to make 100% sense to call this field .phase because we
>    always processed the first phase and then on to the second phase,
>    where the first one was called WORKTREE and the second one was
>    called INDEX.  In the new world order after this step, the name
>    .phase no longer makes any sense.  Sometimes we run diff-files in
>    phase 0 and diff-index in phase 1, but some other times we run
>    diff-index in phase 0 and diff-files in phase 0.  The variable
>    that has the closest meaning to the 'phase' is the newly
>    introduced 'i'.

I renamed it to `mode` in this commit. While I usually frown on renaming
fields in the same commit as introducing changes in behavior, I think in
this case it's kind of okay because it does not add many lines.

>  - The initialization of the local "struct collection_status s" at
>    the beginning of the function still uses .phase = FROM_WORKTREE
>    which might be somewhat misleading.  We cannot remove the whole
>    initialization, as the original used to nul initialize the other
>    fields in this structure and I suspect the code still relies on
>    it, but the initialization of .phase in particular no longer has
>    any effect; it always is assigned inside the loop before the
>    field gets used.
>
>
> >  		opt.def = is_initial ?
> >  			empty_tree_oid_hex() : oid_to_hex(&head_oid);
>
> By the way, this is not a new issue introduced by this patch, but I
> notice copy_pathspec() is used twice on the same rev.prune_data in
> this functino.  Who is responsible for releasing the resource held
> in this struct?

Good point. I assumed that the diff machinery would take care of this, but
it does not. So I introduced another patch to fix up what is already in
`next`.

Ciao,
Dscho
diff mbox series

Patch

diff --git a/add-interactive.c b/add-interactive.c
index 5d89863bab..8ec930ac15 100644
--- a/add-interactive.c
+++ b/add-interactive.c
@@ -347,6 +347,7 @@  struct collection_status {
 
 	const char *reference;
 
+	unsigned skip_unseen:1;
 	struct string_list *files;
 	struct hashmap file_map;
 };
@@ -374,6 +375,9 @@  static void collect_changes_cb(struct diff_queue_struct *q,
 		entry = hashmap_get_entry_from_hash(&s->file_map, hash, name,
 						    struct pathname_entry, ent);
 		if (!entry) {
+			if (s->skip_unseen)
+				continue;
+
 			add_file_item(s->files, name);
 
 			entry = xcalloc(sizeof(*entry), 1);
@@ -395,13 +399,22 @@  static void collect_changes_cb(struct diff_queue_struct *q,
 	free_diffstat_info(&stat);
 }
 
-static int get_modified_files(struct repository *r, struct string_list *files,
+enum modified_files_filter {
+	NO_FILTER = 0,
+	WORKTREE_ONLY = 1,
+	INDEX_ONLY = 2,
+};
+
+static int get_modified_files(struct repository *r,
+			      enum modified_files_filter filter,
+			      struct string_list *files,
 			      const struct pathspec *ps)
 {
 	struct object_id head_oid;
 	int is_initial = !resolve_ref_unsafe("HEAD", RESOLVE_REF_READING,
 					     &head_oid, NULL);
 	struct collection_status s = { FROM_WORKTREE };
+	int i;
 
 	if (discard_index(r->index) < 0 ||
 	    repo_read_index_preload(r, ps, 0) < 0)
@@ -411,10 +424,16 @@  static int get_modified_files(struct repository *r, struct string_list *files,
 	s.files = files;
 	hashmap_init(&s.file_map, pathname_entry_cmp, NULL, 0);
 
-	for (s.phase = FROM_WORKTREE; s.phase <= FROM_INDEX; s.phase++) {
+	for (i = 0; i < 2; i++) {
 		struct rev_info rev;
 		struct setup_revision_opt opt = { 0 };
 
+		if (filter == INDEX_ONLY)
+			s.phase = i ? FROM_WORKTREE : FROM_INDEX;
+		else
+			s.phase = i ? FROM_INDEX : FROM_WORKTREE;
+		s.skip_unseen = filter && i;
+
 		opt.def = is_initial ?
 			empty_tree_oid_hex() : oid_to_hex(&head_oid);
 
@@ -498,7 +517,7 @@  static void print_file_item(int i, struct string_list_item *item,
 static int run_status(struct add_i_state *s, const struct pathspec *ps,
 		      struct string_list *files, struct list_options *opts)
 {
-	if (get_modified_files(s->r, files, ps) < 0)
+	if (get_modified_files(s->r, NO_FILTER, files, ps) < 0)
 		return -1;
 
 	list(s, files, opts);