diff mbox series

[01/11] Start to implement a built-in version of `git add --interactive`

Message ID 12978dc248a2cd07c90559691b8a2add84f45394.1554917868.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series git add -i: add a rudimentary version in C (supporting only status and help so far) | expand

Commit Message

John Cai via GitGitGadget April 10, 2019, 5:37 p.m. UTC
From: Johannes Schindelin <johannes.schindelin@gmx.de>

This is hardly the first conversion of a Git command that is implemented
as a script to a built-in. So far, the most successful strategy for such
conversions has been to add a built-in helper and call that for more and
more functionality from the script, as more and more parts are
converted.

With the interactive add, we choose a different strategy. The sole
reason for this is that on Windows (where such a conversion has the most
benefits in terms of speed and robustness) we face the very specific
problem that a `system()` call in Perl seems to close `stdin` in the
parent process when the spawned process consumes even one character from
`stdin`. And that just does not work for us here, as it would stop the
main loop as soon as any interactive command was performed by the
helper. Which is almost all of the commands in `git add -i`.

It is almost as if Perl told us once again that it does not want us to
use it on Windows.

Instead, we follow the opposite route where we start with a bare-bones
version of the built-in interactive add, guarded by the new
`add.interactive.useBuiltin` config variable, and then add more and more
functionality to it, until it is feature complete.

At this point, the built-in version of `git add -i` only states that it
cannot do anything yet ;-)

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 Documentation/config/add.txt |  5 +++++
 Makefile                     |  1 +
 add-interactive.c            | 13 +++++++++++++
 add-interactive.h            | 10 ++++++++++
 builtin/add.c                | 16 +++++++++++++++-
 t/README                     |  4 ++++
 6 files changed, 48 insertions(+), 1 deletion(-)
 create mode 100644 add-interactive.c
 create mode 100644 add-interactive.h

Comments

Jeff Hostetler April 18, 2019, 2:31 p.m. UTC | #1
On 4/10/2019 1:37 PM, Johannes Schindelin via GitGitGadget wrote:
> From: Johannes Schindelin <johannes.schindelin@gmx.de>
> 
> This is hardly the first conversion of a Git command that is implemented
> as a script to a built-in. So far, the most successful strategy for such
> conversions has been to add a built-in helper and call that for more and
> more functionality from the script, as more and more parts are
> converted.
> 
> With the interactive add, we choose a different strategy. The sole
> reason for this is that on Windows (where such a conversion has the most
> benefits in terms of speed and robustness) we face the very specific
> problem that a `system()` call in Perl seems to close `stdin` in the
> parent process when the spawned process consumes even one character from
> `stdin`. And that just does not work for us here, as it would stop the
> main loop as soon as any interactive command was performed by the
> helper. Which is almost all of the commands in `git add -i`.
> 
> It is almost as if Perl told us once again that it does not want us to
> use it on Windows.
> 
> Instead, we follow the opposite route where we start with a bare-bones
> version of the built-in interactive add, guarded by the new
> `add.interactive.useBuiltin` config variable, and then add more and more
> functionality to it, until it is feature complete.
> 
> At this point, the built-in version of `git add -i` only states that it
> cannot do anything yet ;-)
> 
> Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
> ---
>   Documentation/config/add.txt |  5 +++++
>   Makefile                     |  1 +
>   add-interactive.c            | 13 +++++++++++++
>   add-interactive.h            | 10 ++++++++++
>   builtin/add.c                | 16 +++++++++++++++-
>   t/README                     |  4 ++++
>   6 files changed, 48 insertions(+), 1 deletion(-)
>   create mode 100644 add-interactive.c
>   create mode 100644 add-interactive.h
> 
> diff --git a/Documentation/config/add.txt b/Documentation/config/add.txt
> index 4d753f006e..c9f748f81c 100644
> --- a/Documentation/config/add.txt
> +++ b/Documentation/config/add.txt
> @@ -5,3 +5,8 @@ add.ignore-errors (deprecated)::
>   	option of linkgit:git-add[1].  `add.ignore-errors` is deprecated,
>   	as it does not follow the usual naming convention for configuration
>   	variables.
> +
> +add.interactive.useBuiltin::
> +	[EXPERIMENTAL] Set to `true` to use the experimental built-in
> +	implementation of the interactive version of linkgit:git-add[1]
> +	instead of the Perl script version. Is `false` by default.
> diff --git a/Makefile b/Makefile
> index c5240942f2..18e656a32f 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -848,6 +848,7 @@ LIB_H = $(shell $(FIND) . \
>   	-name '*.h' -print)
>   
>   LIB_OBJS += abspath.o
> +LIB_OBJS += add-interactive.o
>   LIB_OBJS += advice.o
>   LIB_OBJS += alias.o
>   LIB_OBJS += alloc.o
> diff --git a/add-interactive.c b/add-interactive.c
> new file mode 100644
> index 0000000000..540bf185d8
> --- /dev/null
> +++ b/add-interactive.c
> @@ -0,0 +1,13 @@
> +#include "cache.h"
> +#include "add-interactive.h"
> +#include "config.h"
> +
> +int add_i_config(const char *var, const char *value, void *cb)
> +{
> +	return git_default_config(var, value, cb);
> +}
[...]
> diff --git a/builtin/add.c b/builtin/add.c
> index db2dfa4350..5a32a755c8 100644
> --- a/builtin/add.c
> +++ b/builtin/add.c
[...]
> +static int use_builtin_add_i;
[...]
> @@ -319,7 +324,12 @@ static int add_config(const char *var, const char *value, void *cb)
>   		ignore_add_errors = git_config_bool(var, value);
>   		return 0;
>   	}
> -	return git_default_config(var, value, cb);
> +	if (!strcmp(var, "add.interactive.usebuiltin")) {
> +		use_builtin_add_i = git_config_bool(var, value);
> +		return 0;
> +	}
> +
> +	return add_i_config(var, value, cb);
 >   }

Something about this split between add_config() in this file
and add_i_config() in add-interactive.c bothers me.  I'm not
saying it is wrong, but it bothers me.

Perhaps it is that we moved the call to git_default_config()
to add-interactive.c -- so correct behavior of the non-interactive
case depends on add-interactive.c to do the right thing.

Also, since we can't control the order of observed the k/v pairs,
we can't do "if (use_builtin_add_i) return add_i_config(...);"
And that wouldn't allow both versions to pickup a common config
setting (in their own static variables).

Currently, neither function looks at any other k/v pairs, so
this is a bit of a moot point, but I'm wondering if this should
look like this:

     int add_config(...)
     {
         // give add-interactive.c a chance to look at k/v pair, but
         // do not short-cut because we don't know yet whether we
         // will be interactive or not yet.
         (void)add_i_config(...);

         ...ignore_add_errors...
         ...use_builtin_add_i...

         return git_default_config(...);
     }

     int add_i_config(...)
     {
         return 0;
     }

or just inline everything here in add_config() and be done with it.


Jeff
Jeff King April 18, 2019, 4:06 p.m. UTC | #2
On Thu, Apr 18, 2019 at 10:31:30AM -0400, Jeff Hostetler wrote:

> Currently, neither function looks at any other k/v pairs, so
> this is a bit of a moot point, but I'm wondering if this should
> look like this:
> 
>     int add_config(...)
>     {
>         // give add-interactive.c a chance to look at k/v pair, but
>         // do not short-cut because we don't know yet whether we
>         // will be interactive or not yet.
>         (void)add_i_config(...);
> 
>         ...ignore_add_errors...
>         ...use_builtin_add_i...
> 
>         return git_default_config(...);
>     }

Yeah, I agree this split seems a bit more natural. It is worth
propagating errors from add_i_config(), though, like:

  if (add_i_config(var, value, data))
	return -1;

so that any key-specific errors (e.g., config_error_nonbool) stop the
parsing in the usual way.

-Peff
Johannes Schindelin April 30, 2019, 11:40 p.m. UTC | #3
Hi Jeff & Jeff,

On Thu, 18 Apr 2019, Jeff King wrote:

> On Thu, Apr 18, 2019 at 10:31:30AM -0400, Jeff Hostetler wrote:
>
> > Currently, neither function looks at any other k/v pairs, so
> > this is a bit of a moot point, but I'm wondering if this should
> > look like this:
> >
> >     int add_config(...)
> >     {
> >         // give add-interactive.c a chance to look at k/v pair, but
> >         // do not short-cut because we don't know yet whether we
> >         // will be interactive or not yet.
> >         (void)add_i_config(...);
> >
> >         ...ignore_add_errors...
> >         ...use_builtin_add_i...
> >
> >         return git_default_config(...);
> >     }
>
> Yeah, I agree this split seems a bit more natural. It is worth
> propagating errors from add_i_config(), though, like:
>
>   if (add_i_config(var, value, data))
> 	return -1;
>
> so that any key-specific errors (e.g., config_error_nonbool) stop the
> parsing in the usual way.

The only problem there is that `add_i_config()` (like all the other
`git_config()` callbacks) does not report whether it consumed the
key/value pair or not. I tried to avoid deviating from the standard
practice to avoid calling `git_default_config()` when we already consumed
the config setting.

And I also tried pretty hard to *not* bleed any internal state of
`add-interactive` into `builtin/add`, as I wanted the new code to be as
libified as possible (in a nearby thread, somebody wished for a new `-p`
mode that would essentially be a combined `git stash -p` and `git add -p`,
and with properly libified code such a beast is a lot more feasible).

Any idea how to deal with that?

I guess I could invert the order, where `add_config()` would be called
as a fall-back from `add_i_config()`...

Or I invent a new convention where `add_i_config()` returns 1 when it
consumed the key/value pair. But that would set a precedent that is
inconsistent with the entire existing code base, something I am
uncomfortable to do for the sake of `add -i`...


Ciao,
Dscho
Jeff King May 1, 2019, 2:21 a.m. UTC | #4
On Tue, Apr 30, 2019 at 07:40:06PM -0400, Johannes Schindelin wrote:

> > Yeah, I agree this split seems a bit more natural. It is worth
> > propagating errors from add_i_config(), though, like:
> >
> >   if (add_i_config(var, value, data))
> > 	return -1;
> >
> > so that any key-specific errors (e.g., config_error_nonbool) stop the
> > parsing in the usual way.
> 
> The only problem there is that `add_i_config()` (like all the other
> `git_config()` callbacks) does not report whether it consumed the
> key/value pair or not. I tried to avoid deviating from the standard
> practice to avoid calling `git_default_config()` when we already consumed
> the config setting.

I don't think it's worth worrying too much about that. We wouldn't match
the keys in multiple places anyway (and even if we did, it would
arguably be the right thing to give every callback a chance to see
them).

The only thing it does is short-circuit the rest of the checks that we
know won't match. But that doesn't really change the performance
substantially; the worst case is already that we have to hit every
possible strcmp().

And most of our config code does not worry about this, and is OK with
branching (it just needs to propagate errors, as above).  For some more
discussion, see 6680a0874f (drop odd return value semantics from
userdiff_config, 2012-02-07).

All that said...

> And I also tried pretty hard to *not* bleed any internal state of
> `add-interactive` into `builtin/add`, as I wanted the new code to be as
> libified as possible (in a nearby thread, somebody wished for a new `-p`
> mode that would essentially be a combined `git stash -p` and `git add -p`,
> and with properly libified code such a beast is a lot more feasible).
> 
> Any idea how to deal with that?

The most lib-ified thing is to just use the configset code. I.e.,
wherever you need the config, just load it on demand via
git_config_get_int or whatever.

> Or I invent a new convention where `add_i_config()` returns 1 when it
> consumed the key/value pair. But that would set a precedent that is
> inconsistent with the entire existing code base, something I am
> uncomfortable to do for the sake of `add -i`...

Yes, don't do that. :) That was the same thing we finally got rid of for
userdiff_config().

-Peff
Johannes Schindelin May 13, 2019, 11:14 a.m. UTC | #5
Hi Peff,

On Tue, 30 Apr 2019, Jeff King wrote:

> On Tue, Apr 30, 2019 at 07:40:06PM -0400, Johannes Schindelin wrote:
>
> > And I also tried pretty hard to *not* bleed any internal state of
> > `add-interactive` into `builtin/add`, as I wanted the new code to be
> > as libified as possible (in a nearby thread, somebody wished for a new
> > `-p` mode that would essentially be a combined `git stash -p` and `git
> > add -p`, and with properly libified code such a beast is a lot more
> > feasible).
> >
> > Any idea how to deal with that?
>
> The most lib-ified thing is to just use the configset code. I.e.,
> wherever you need the config, just load it on demand via
> git_config_get_int or whatever.

True.

And it cost me *quite* a few days to implement the changes. But the result
is definitely a lot better, in my opinion.

> > Or I invent a new convention where `add_i_config()` returns 1 when it
> > consumed the key/value pair. But that would set a precedent that is
> > inconsistent with the entire existing code base, something I am
> > uncomfortable to do for the sake of `add -i`...
>
> Yes, don't do that. :) That was the same thing we finally got rid of for
> userdiff_config().

Thanks for stopping me. I did not remember about the userdiff_config()
thing.

Ciao,
Dscho
diff mbox series

Patch

diff --git a/Documentation/config/add.txt b/Documentation/config/add.txt
index 4d753f006e..c9f748f81c 100644
--- a/Documentation/config/add.txt
+++ b/Documentation/config/add.txt
@@ -5,3 +5,8 @@  add.ignore-errors (deprecated)::
 	option of linkgit:git-add[1].  `add.ignore-errors` is deprecated,
 	as it does not follow the usual naming convention for configuration
 	variables.
+
+add.interactive.useBuiltin::
+	[EXPERIMENTAL] Set to `true` to use the experimental built-in
+	implementation of the interactive version of linkgit:git-add[1]
+	instead of the Perl script version. Is `false` by default.
diff --git a/Makefile b/Makefile
index c5240942f2..18e656a32f 100644
--- a/Makefile
+++ b/Makefile
@@ -848,6 +848,7 @@  LIB_H = $(shell $(FIND) . \
 	-name '*.h' -print)
 
 LIB_OBJS += abspath.o
+LIB_OBJS += add-interactive.o
 LIB_OBJS += advice.o
 LIB_OBJS += alias.o
 LIB_OBJS += alloc.o
diff --git a/add-interactive.c b/add-interactive.c
new file mode 100644
index 0000000000..540bf185d8
--- /dev/null
+++ b/add-interactive.c
@@ -0,0 +1,13 @@ 
+#include "cache.h"
+#include "add-interactive.h"
+#include "config.h"
+
+int add_i_config(const char *var, const char *value, void *cb)
+{
+	return git_default_config(var, value, cb);
+}
+
+int run_add_i(struct repository *r, const struct pathspec *ps)
+{
+	die(_("No commands are available in the built-in `git add -i` yet!"));
+}
diff --git a/add-interactive.h b/add-interactive.h
new file mode 100644
index 0000000000..e6e6e051eb
--- /dev/null
+++ b/add-interactive.h
@@ -0,0 +1,10 @@ 
+#ifndef ADD_INTERACTIVE_H
+#define ADD_INTERACTIVE_H
+
+int add_i_config(const char *var, const char *value, void *cb);
+
+struct repository;
+struct pathspec;
+int run_add_i(struct repository *r, const struct pathspec *ps);
+
+#endif
diff --git a/builtin/add.c b/builtin/add.c
index db2dfa4350..5a32a755c8 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -20,6 +20,7 @@ 
 #include "bulk-checkin.h"
 #include "argv-array.h"
 #include "submodule.h"
+#include "add-interactive.h"
 
 static const char * const builtin_add_usage[] = {
 	N_("git add [<options>] [--] <pathspec>..."),
@@ -28,6 +29,7 @@  static const char * const builtin_add_usage[] = {
 static int patch_interactive, add_interactive, edit_interactive;
 static int take_worktree_changes;
 static int add_renormalize;
+static int use_builtin_add_i;
 
 struct update_callback_data {
 	int flags;
@@ -186,6 +188,9 @@  int run_add_interactive(const char *revision, const char *patch_mode,
 	int status, i;
 	struct argv_array argv = ARGV_ARRAY_INIT;
 
+	if (use_builtin_add_i && !patch_mode)
+		return !!run_add_i(the_repository, pathspec);
+
 	argv_array_push(&argv, "add--interactive");
 	if (patch_mode)
 		argv_array_push(&argv, patch_mode);
@@ -319,7 +324,12 @@  static int add_config(const char *var, const char *value, void *cb)
 		ignore_add_errors = git_config_bool(var, value);
 		return 0;
 	}
-	return git_default_config(var, value, cb);
+	if (!strcmp(var, "add.interactive.usebuiltin")) {
+		use_builtin_add_i = git_config_bool(var, value);
+		return 0;
+	}
+
+	return add_i_config(var, value, cb);
 }
 
 static const char embedded_advice[] = N_(
@@ -394,8 +404,12 @@  int cmd_add(int argc, const char **argv, const char *prefix)
 	int require_pathspec;
 	char *seen = NULL;
 	struct lock_file lock_file = LOCK_INIT;
+	int use_builtin_add_i_env =
+		git_env_bool("GIT_TEST_ADD_I_USE_BUILTIN", -1);
 
 	git_config(add_config, NULL);
+	if (use_builtin_add_i_env >= 0)
+		use_builtin_add_i = use_builtin_add_i_env;
 
 	argc = parse_options(argc, argv, prefix, builtin_add_options,
 			  builtin_add_usage, PARSE_OPT_KEEP_ARGV0);
diff --git a/t/README b/t/README
index 886bbec5bc..6408a1847e 100644
--- a/t/README
+++ b/t/README
@@ -383,6 +383,10 @@  GIT_TEST_REBASE_USE_BUILTIN=<boolean>, when false, disables the
 builtin version of git-rebase. See 'rebase.useBuiltin' in
 git-config(1).
 
+GIT_TEST_ADD_I_USE_BUILTIN=<boolean>, when true, enables the
+builtin version of git add -i. See 'add.interactive.useBuiltin' in
+git-config(1).
+
 GIT_TEST_INDEX_THREADS=<n> enables exercising the multi-threaded loading
 of the index for the whole test suite by bypassing the default number of
 cache entries and thread minimums. Setting this to 1 will make the