diff mbox series

[RFC] bisect: Introduce skip-when to automatically skip commits

Message ID 20240330081026.362962-2-oliver@schinagl.nl (mailing list archive)
State New, archived
Headers show
Series [RFC] bisect: Introduce skip-when to automatically skip commits | expand

Commit Message

Olliver Schinagl March 30, 2024, 8:10 a.m. UTC
Before I go dig myself in deeper, I'd like some feedback and opinions on
whether this is the correct direction.

If I got it right, do say so, as then I can start adding some tests and
update the documentation.

Olliver

---

In some situations, it is needed to skip certain commits when bisecting,
because the compile doesn't work, or tests are known to fail.

For this purpose, we introduce the `--skip-when` flag which takes a
script as an input and is expected to return exit code 125 if a commit
is to be skipped, which uses a regular `git bisect skip` and the commit
thus ends up on the skipped pile.

In addition we also offer a git-hook, to make this as predictable and
painless as possible.

The script can do whatever it wants to to determine if a commit is to be
skipped; From comparing the hash against a known list, to checking git
notes for a keyword or, as the included example, the commit body.

Signed-off-by: Olliver Schinagl <oliver@schinagl.nl>
---
 bisect.c                                 |  2 +
 builtin/bisect.c                         | 93 +++++++++++++++++++++++-
 templates/hooks--bisect-skip_when.sample | 10 +++
 3 files changed, 101 insertions(+), 4 deletions(-)
 create mode 100755 templates/hooks--bisect-skip_when.sample

Comments

Olliver Schinagl April 5, 2024, 6:50 a.m. UTC | #1
Hey all,

I've also got my work on a branch in my repo, if that helps to look at 
things, https://gitlab.com/olliver/git/-/tree/skip_bisect

Also included is a script to be used as an example. I opted to use `git 
show`, which is nice because it works both on commits, but also on notes.

Anyway, any thoughts on the bellow before I send the full series?

Olliver


On 30-03-2024 09:10, Olliver Schinagl wrote:
> Before I go dig myself in deeper, I'd like some feedback and opinions on
> whether this is the correct direction.
>
> If I got it right, do say so, as then I can start adding some tests and
> update the documentation.
>
> Olliver
>
> ---
>
> In some situations, it is needed to skip certain commits when bisecting,
> because the compile doesn't work, or tests are known to fail.
>
> For this purpose, we introduce the `--skip-when` flag which takes a
> script as an input and is expected to return exit code 125 if a commit
> is to be skipped, which uses a regular `git bisect skip` and the commit
> thus ends up on the skipped pile.
>
> In addition we also offer a git-hook, to make this as predictable and
> painless as possible.
>
> The script can do whatever it wants to to determine if a commit is to be
> skipped; From comparing the hash against a known list, to checking git
> notes for a keyword or, as the included example, the commit body.
>
> Signed-off-by: Olliver Schinagl <oliver@schinagl.nl>
> ---
>   bisect.c                                 |  2 +
>   builtin/bisect.c                         | 93 +++++++++++++++++++++++-
>   templates/hooks--bisect-skip_when.sample | 10 +++
>   3 files changed, 101 insertions(+), 4 deletions(-)
>   create mode 100755 templates/hooks--bisect-skip_when.sample
>
> diff --git a/bisect.c b/bisect.c
> index 60aae2fe50..185909cca9 100644
> --- a/bisect.c
> +++ b/bisect.c
> @@ -476,6 +476,7 @@ static GIT_PATH_FUNC(git_path_bisect_names, "BISECT_NAMES")
>   static GIT_PATH_FUNC(git_path_bisect_ancestors_ok, "BISECT_ANCESTORS_OK")
>   static GIT_PATH_FUNC(git_path_bisect_run, "BISECT_RUN")
>   static GIT_PATH_FUNC(git_path_bisect_start, "BISECT_START")
> +static GIT_PATH_FUNC(git_path_bisect_skip_when, "BISECT_SKIP_WHEN")
>   static GIT_PATH_FUNC(git_path_bisect_log, "BISECT_LOG")
>   static GIT_PATH_FUNC(git_path_bisect_terms, "BISECT_TERMS")
>   static GIT_PATH_FUNC(git_path_bisect_first_parent, "BISECT_FIRST_PARENT")
> @@ -1179,6 +1180,7 @@ int bisect_clean_state(void)
>   	unlink_or_warn(git_path_bisect_log());
>   	unlink_or_warn(git_path_bisect_names());
>   	unlink_or_warn(git_path_bisect_run());
> +	unlink_or_warn(git_path_bisect_skip_when());
>   	unlink_or_warn(git_path_bisect_terms());
>   	unlink_or_warn(git_path_bisect_first_parent());
>   	/*
> diff --git a/builtin/bisect.c b/builtin/bisect.c
> index 9891cf2604..6870142b85 100644
> --- a/builtin/bisect.c
> +++ b/builtin/bisect.c
> @@ -4,6 +4,7 @@
>   #include "environment.h"
>   #include "gettext.h"
>   #include "hex.h"
> +#include "hook.h"
>   #include "object-name.h"
>   #include "oid-array.h"
>   #include "parse-options.h"
> @@ -14,19 +15,21 @@
>   #include "revision.h"
>   #include "run-command.h"
>   #include "strvec.h"
> +#include "wrapper.h"
>   
>   static GIT_PATH_FUNC(git_path_bisect_terms, "BISECT_TERMS")
>   static GIT_PATH_FUNC(git_path_bisect_ancestors_ok, "BISECT_ANCESTORS_OK")
>   static GIT_PATH_FUNC(git_path_bisect_start, "BISECT_START")
> +static GIT_PATH_FUNC(git_path_bisect_skip_when, "BISECT_SKIP_WHEN")
>   static GIT_PATH_FUNC(git_path_bisect_log, "BISECT_LOG")
>   static GIT_PATH_FUNC(git_path_bisect_names, "BISECT_NAMES")
>   static GIT_PATH_FUNC(git_path_bisect_first_parent, "BISECT_FIRST_PARENT")
>   static GIT_PATH_FUNC(git_path_bisect_run, "BISECT_RUN")
>   
>   #define BUILTIN_GIT_BISECT_START_USAGE \
> -	N_("git bisect start [--term-(new|bad)=<term> --term-(old|good)=<term>]" \
> -	   "    [--no-checkout] [--first-parent] [<bad> [<good>...]] [--]" \
> -	   "    [<pathspec>...]")
> +	N_("git bisect start [--term-(new|bad)=<term> --term-(old|good)=<term>]\n" \
> +	   "                 [--no-checkout] [--first-parent] [--skip-when=<script>]\n" \
> +	   "                 [<bad> [<good>...]] [--] [<pathspec>...]")
>   #define BUILTIN_GIT_BISECT_STATE_USAGE \
>   	N_("git bisect (good|bad) [<rev>...]")
>   #define BUILTIN_GIT_BISECT_TERMS_USAGE \
> @@ -89,6 +92,7 @@ static const char vocab_bad[] = "bad|new";
>   static const char vocab_good[] = "good|old";
>   
>   static int bisect_autostart(struct bisect_terms *terms);
> +static enum bisect_error bisect_skip(struct bisect_terms *terms, int argc, const char **argv);
>   
>   /*
>    * Check whether the string `term` belongs to the set of strings
> @@ -680,14 +684,74 @@ static enum bisect_error bisect_next(struct bisect_terms *terms, const char *pre
>   	return res;
>   }
>   
> +static int get_skip_when(const char **skip_when)
> +{
> +	struct strbuf str = STRBUF_INIT;
> +	FILE *fp = NULL;
> +	int res = 0;
> +
> +	fp = fopen(git_path_bisect_skip_when(), "r");
> +	if (!fp) {
> +		res = -1;
> +		goto finish;
> +	}
> +
> +	strbuf_getline_lf(&str, fp);
> +	*skip_when = strbuf_detach(&str, NULL);
> +
> +finish:
> +	if (fp)
> +		fclose(fp);
> +	strbuf_release(&str);
> +
> +	return res;
> +}
> +
>   static enum bisect_error bisect_auto_next(struct bisect_terms *terms, const char *prefix)
>   {
> +	int no_checkout = ref_exists("BISECT_HEAD");
> +	enum bisect_error res;
> +	struct object_id oid;
> +
>   	if (bisect_next_check(terms, NULL)) {
>   		bisect_print_status(terms);
>   		return BISECT_OK;
>   	}
>   
> -	return bisect_next(terms, prefix);
> +	res = bisect_next(terms, prefix);
> +	if (res)
> +		return res;
> +
> +	if (!read_ref(no_checkout ? "BISECT_HEAD" : "HEAD", &oid)) {
> +		struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
> +		char *rev = oid_to_hex(&oid);
> +		const char *skip_when = NULL;
> +		int ret = 0;
> +
> +		get_skip_when(&skip_when);
> +		if (skip_when != NULL) {
> +			struct child_process cmd = CHILD_PROCESS_INIT;
> +
> +			cmd.use_shell = 1;
> +			cmd.no_stdin = 1;
> +			strvec_pushl(&cmd.args, skip_when, rev, NULL);
> +
> +			printf(_("running '%s'\n"), skip_when);
> +			ret = run_command(&cmd);
> +		}
> +
> +		strvec_push(&opt.args, rev);
> +		if ((ret == 125) ||
> +		    (run_hooks_opt("bisect-skip_when", &opt) == 125)) {
> +			struct strvec argv = STRVEC_INIT;
> +
> +			printf(_("auto skipping commit [%s]...\n"), rev);
> +			sq_dequote_to_strvec("skip", &argv);
> +			res = bisect_skip(terms, argv.nr, argv.v);
> +		}
> +	}
> +
> +	return res;
>   }
>   
>   static enum bisect_error bisect_start(struct bisect_terms *terms, int argc,
> @@ -703,6 +767,7 @@ static enum bisect_error bisect_start(struct bisect_terms *terms, int argc,
>   	struct strbuf start_head = STRBUF_INIT;
>   	struct strbuf bisect_names = STRBUF_INIT;
>   	struct object_id head_oid;
> +	char *skip_when = NULL;
>   	struct object_id oid;
>   	const char *head;
>   
> @@ -727,6 +792,15 @@ static enum bisect_error bisect_start(struct bisect_terms *terms, int argc,
>   			no_checkout = 1;
>   		} else if (!strcmp(arg, "--first-parent")) {
>   			first_parent_only = 1;
> +		} else if (!strcmp(arg, "--skip-when")) {
> +			i++;
> +
> +			if (argc <= i)
> +				return error(_("'' is not a valid skip-when script"));
> +
> +			skip_when = xstrdup(argv[i]);
> +		} else if (skip_prefix(arg, "--skip-when=", &arg)) {
> +			skip_when = xstrdup(arg);
>   		} else if (!strcmp(arg, "--term-good") ||
>   			 !strcmp(arg, "--term-old")) {
>   			i++;
> @@ -867,11 +941,22 @@ static enum bisect_error bisect_start(struct bisect_terms *terms, int argc,
>   		goto finish;
>   	}
>   
> +	if (skip_when) {
> +		if (access(skip_when, X_OK)) {
> +			res = error(_("%s: no such path in the working tree.\n"), skip_when);
> +			goto finish;
> +		}
> +		write_to_file(git_path_bisect_skip_when(), "%s\n", skip_when);
> +	}
> +
>   	res = bisect_append_log_quoted(argv);
>   	if (res)
>   		res = BISECT_FAILED;
>   
>   finish:
> +	if (skip_when)
> +		free(skip_when);
> +
>   	string_list_clear(&revs, 0);
>   	string_list_clear(&states, 0);
>   	strbuf_release(&start_head);
> diff --git a/templates/hooks--bisect-skip_when.sample b/templates/hooks--bisect-skip_when.sample
> new file mode 100755
> index 0000000000..ff3960841f
> --- /dev/null
> +++ b/templates/hooks--bisect-skip_when.sample
> @@ -0,0 +1,10 @@
> +#!/bin/sh
> +#
> +# usage: ${0} <commit_object_name>
> +# expected to exit with 125 when the commit should be skipped
> +
> +if git cat-file commit "${1:-HEAD}" | grep -q "^GIT_BISECT_SKIP=1$"; then
> +	exit 125
> +fi
> +
> +exit 0
Junio C Hamano April 6, 2024, 1:08 a.m. UTC | #2
Olliver Schinagl <oliver@schinagl.nl> writes:

> Hey all,
>
> I've also got my work on a branch in my repo, if that helps to look at
> things, https://gitlab.com/olliver/git/-/tree/skip_bisect
>
> Also included is a script to be used as an example. I opted to use
> `git show`, which is nice because it works both on commits, but also
> on notes.
>
> Anyway, any thoughts on the bellow before I send the full series?
>
> Olliver

I would not write get_skip_when() before studying the same file to
see if there already is a helper to read the whole file used in the
vicinity (like strbuf_read_file(), perhaps).

I do not have enough concentration to follow changes to
bisect_auto_next() is reasonable.  Especially I do not know why
"bisect-skip_when" wants to exist and what it is trying to do,
besides the fact that its name looks horrible ;-).
Olliver Schinagl April 6, 2024, 10:06 a.m. UTC | #3
On 06-04-2024 03:08, Junio C Hamano wrote:
> Olliver Schinagl <oliver@schinagl.nl> writes:
> 
>> Hey all,
>>
>> I've also got my work on a branch in my repo, if that helps to look at
>> things, https://gitlab.com/olliver/git/-/tree/skip_bisect
>>
>> Also included is a script to be used as an example. I opted to use
>> `git show`, which is nice because it works both on commits, but also
>> on notes.
>>
>> Anyway, any thoughts on the bellow before I send the full series?
>>
>> Olliver
> 
> I would not write get_skip_when() before studying the same file to
> see if there already is a helper to read the whole file used in the
> vicinity (like strbuf_read_file(), perhaps).

Fair enough. I'm a little worried about optimization vs readability. I 
think it makes it mre clear what the code does in its current form; but 
I'll investigate. Bisecting shouldn't be a computational often happening 
thing, so I'm not to worried about performance. But I'm not too familiar 
with the git code base, so I don't know either :p

> 
> I do not have enough concentration to follow changes to
> bisect_auto_next() is reasonable.  Especially I do not know why
> "bisect-skip_when" wants to exist and what it is trying to do,
> besides the fact that its name looks horrible ;-).
> 
naming things, sure. I can look into this absolutly :)

But in short, bisect_auto_next was returning just after checkout It 
seemed. So after checkout, running the script seemed sensible. But I 
look at it as a normal git user. So you checkout, test your commit, skip 
to the next one if applicable.


I'll think of your two comments, and see if I can address them as you 
regain your concentration :p

But seeing that these are your main concerns, I'm more confident I'm not 
completly on the wrong path here.

Olliver
Olliver Schinagl April 6, 2024, 10:22 a.m. UTC | #4
On 06-04-2024 03:08, Junio C Hamano wrote:
> Olliver Schinagl <oliver@schinagl.nl> writes:
> 
>> Hey all,
>>
>> I've also got my work on a branch in my repo, if that helps to look at
>> things, https://gitlab.com/olliver/git/-/tree/skip_bisect
>>
>> Also included is a script to be used as an example. I opted to use
>> `git show`, which is nice because it works both on commits, but also
>> on notes.
>>
>> Anyway, any thoughts on the bellow before I send the full series?
>>
>> Olliver
> 
> I would not write get_skip_when() before studying the same file to
> see if there already is a helper to read the whole file used in the
> vicinity (like strbuf_read_file(), perhaps).

So I just remembered, when I started this journey, I wanted to squeeze 
it all into `get_terms()` and make it part of the terms struct, as that 
was passed around everywhere. I figured, I can rename it into being 
something more generic. But I realized that skip_when doesn't actually 
need to be passed around at all (which we can see in the current 
implementation). With get_terms() in my mind, I just what that function did.

I saw strbuf_read_file() but I didn't quite understand what it was 
doing, it was a bit cryptic at first. Now that you mention it however, I 
see the error of my ways, and that strbuf_read_file() might be good 
enough and do exactly what get_skip_when does.

So thank you for that hint :)

Olliver

> 
> I do not have enough concentration to follow changes to
> bisect_auto_next() is reasonable.  Especially I do not know why
> "bisect-skip_when" wants to exist and what it is trying to do,
> besides the fact that its name looks horrible ;-).
>
Phillip Wood April 6, 2024, 1:50 p.m. UTC | #5
Hi Olliver

On 06/04/2024 11:06, Olliver Schinagl wrote:
> On 06-04-2024 03:08, Junio C Hamano wrote:
>> Olliver Schinagl <oliver@schinagl.nl> writes:
>>
>>> Hey all,
>>>
>>> I've also got my work on a branch in my repo, if that helps to look at
>>> things, https://gitlab.com/olliver/git/-/tree/skip_bisect
>>>
>>> Also included is a script to be used as an example. I opted to use
>>> `git show`, which is nice because it works both on commits, but also
>>> on notes.
>>>
>>> Anyway, any thoughts on the bellow before I send the full series?
>>>
>>> Olliver
>>
>> I would not write get_skip_when() before studying the same file to
>> see if there already is a helper to read the whole file used in the
>> vicinity (like strbuf_read_file(), perhaps).
> 
> Fair enough. I'm a little worried about optimization vs readability. I 
> think it makes it mre clear what the code does in its current form; but 
> I'll investigate. Bisecting shouldn't be a computational often happening 
> thing, so I'm not to worried about performance. But I'm not too familiar 
> with the git code base, so I don't know either :p

If you search builtin/bisect.c you'll see some existing callers of 
strbuf_read_file() that read other files like BISECT_START. Those 
callers should give you an idea of how to use it.

>>
>> I do not have enough concentration to follow changes to
>> bisect_auto_next() is reasonable.  Especially I do not know why
>> "bisect-skip_when" wants to exist and what it is trying to do,
>> besides the fact that its name looks horrible ;-).
>>
> naming things, sure. I can look into this absolutly :)

For me it's not just the name but the whole hook thing - do we really 
need that rather than just the command line option?

The other thing I wondered about is the exit code handling for the 
"--skip-when" script. In Junio's example in an earlier message he used a 
successful exit to mean "skip this commit" and an unsuccessful exit to 
mean "test this commit". To me that matches the name of the option - we 
skip when the script given to "--skip-when" is successful. Copying the 
mechanism used by "git bisect run" seems a bit cumbersome as we only 
need to know whether to skip or not, we don't need a special way of 
distinguishing "skip this commit" from "this commit is good" and "this 
commit is bad"

Best Wishes

Phillip

> But in short, bisect_auto_next was returning just after checkout It 
> seemed. So after checkout, running the script seemed sensible. But I 
> look at it as a normal git user. So you checkout, test your commit, skip 
> to the next one if applicable.
> 
> 
> I'll think of your two comments, and see if I can address them as you 
> regain your concentration :p
> 
> But seeing that these are your main concerns, I'm more confident I'm not 
> completly on the wrong path here.
> 
> Olliver
>
Junio C Hamano April 6, 2024, 5:07 p.m. UTC | #6
Olliver Schinagl <oliver@schinagl.nl> writes:

> But seeing that these are your main concerns, I'm more confident I'm
> not completly on the wrong path here.

Mind you that they are not "MAIN" concerns.  They were the ones that
jumped out at me from your sketch.  After seeing the real thing, I
may find completely different issues that I could have spotted in
this version as well---it is natural that people notice things they
did not initially notice with a richer context.
Olliver Schinagl April 6, 2024, 7:17 p.m. UTC | #7
Hey Phillip,

On 06-04-2024 15:50, Phillip Wood wrote:
> Hi Olliver
> 
> On 06/04/2024 11:06, Olliver Schinagl wrote:
>> On 06-04-2024 03:08, Junio C Hamano wrote:
>>> Olliver Schinagl <oliver@schinagl.nl> writes:
>>>
>>>> Hey all,
>>>>
>>>> I've also got my work on a branch in my repo, if that helps to look at
>>>> things, https://gitlab.com/olliver/git/-/tree/skip_bisect
>>>>
>>>> Also included is a script to be used as an example. I opted to use
>>>> `git show`, which is nice because it works both on commits, but also
>>>> on notes.
>>>>
>>>> Anyway, any thoughts on the bellow before I send the full series?
>>>>
>>>> Olliver
>>>
>>> I would not write get_skip_when() before studying the same file to
>>> see if there already is a helper to read the whole file used in the
>>> vicinity (like strbuf_read_file(), perhaps).
>>
>> Fair enough. I'm a little worried about optimization vs readability. I 
>> think it makes it mre clear what the code does in its current form; 
>> but I'll investigate. Bisecting shouldn't be a computational often 
>> happening thing, so I'm not to worried about performance. But I'm not 
>> too familiar with the git code base, so I don't know either :p
> 
> If you search builtin/bisect.c you'll see some existing callers of 
> strbuf_read_file() that read other files like BISECT_START. Those 
> callers should give you an idea of how to use it.

Yeah, I found after Junio's hint :) What threw me off, as I wrote 
earlier, get_terms(). I wonder now, why is get_terms() implemented as it 
is, and should it not use the same functions? Or is it because terms is 
a multi-line file, whereas the others are all single line (I didn't 
look, though I see addline functions for the strbuf functions. Should 
this be refactored?

> 
>>>
>>> I do not have enough concentration to follow changes to
>>> bisect_auto_next() is reasonable.  Especially I do not know why
>>> "bisect-skip_when" wants to exist and what it is trying to do,
>>> besides the fact that its name looks horrible ;-).
>>>
>> naming things, sure. I can look into this absolutly :)
> 
> For me it's not just the name but the whole hook thing - do we really 
> need that rather than just the command line option?

So with the name, I started to think some more about it, and after 
playing with some names, I settled on 'bisect-post-checkout'. Things 
then sort of fell more into place. It is still a hook/commandline 
option, but it's a much smaller change (since we don't have any special 
code to check the exit code anymore) as we can (obviously) run `git 
bisect skip` instead of `exit 125` as well of course. The `exit 125` 
thinking came from `git bisect run` and maybe a suggestion on the ML 
earlier (I don't quite recall).

> 
> The other thing I wondered about is the exit code handling for the 
> "--skip-when" script. In Junio's example in an earlier message he used a 
> successful exit to mean "skip this commit" and an unsuccessful exit to 
> mean "test this commit". To me that matches the name of the option - we 
> skip when the script given to "--skip-when" is successful. Copying the 
> mechanism used by "git bisect run" seems a bit cumbersome as we only 
> need to know whether to skip or not, we don't need a special way of 
> distinguishing "skip this commit" from "this commit is good" and "this 
> commit is bad"

So as I explained above, I think just offering a 'post-checkout' step, 
and let the user decide what to do there, makes things even simpler and 
more flexible.

I've just pushed my latest changes to 
https://gitlab.com/olliver/git/-/commit/4361a5deb0c5ee4c113c25b57752af61b74aabf3 
and will start working on some tests before offering it for review again.

Thank you!

Olliver
> 
> Best Wishes
> 
> Phillip
> 
>> But in short, bisect_auto_next was returning just after checkout It 
>> seemed. So after checkout, running the script seemed sensible. But I 
>> look at it as a normal git user. So you checkout, test your commit, 
>> skip to the next one if applicable.
>>
>>
>> I'll think of your two comments, and see if I can address them as you 
>> regain your concentration :p
>>
>> But seeing that these are your main concerns, I'm more confident I'm 
>> not completly on the wrong path here.
>>
>> Olliver
>>
Olliver Schinagl April 6, 2024, 7:19 p.m. UTC | #8
Hey Junio,

On 06-04-2024 19:07, Junio C Hamano wrote:
> Olliver Schinagl <oliver@schinagl.nl> writes:
> 
>> But seeing that these are your main concerns, I'm more confident I'm
>> not completly on the wrong path here.
> 
> Mind you that they are not "MAIN" concerns.  They were the ones that
> jumped out at me from your sketch.  After seeing the real thing, I
> may find completely different issues that I could have spotted in
> this version as well---it is natural that people notice things they
> did not initially notice with a richer context.

I completely understand and agree. Just as I changed my design 3 times 
now already after learning new/more things.

Thank you for your time, patience and feedback however, it is much 
appreciated!

Olliver
Phillip Wood April 7, 2024, 2:09 p.m. UTC | #9
On 06/04/2024 20:17, Olliver Schinagl wrote:
> Hey Phillip,
> 
> On 06-04-2024 15:50, Phillip Wood wrote:
>> Hi Olliver
>>
>> On 06/04/2024 11:06, Olliver Schinagl wrote:
>>> On 06-04-2024 03:08, Junio C Hamano wrote:
>>>> Olliver Schinagl <oliver@schinagl.nl> writes:
>> If you search builtin/bisect.c you'll see some existing callers of 
>> strbuf_read_file() that read other files like BISECT_START. Those 
>> callers should give you an idea of how to use it.
> 
> Yeah, I found after Junio's hint :) What threw me off, as I wrote 
> earlier, get_terms(). I wonder now, why is get_terms() implemented as it 
> is, and should it not use the same functions? Or is it because terms is 
> a multi-line file, whereas the others are all single line (I didn't 
> look, though I see addline functions for the strbuf functions. Should 
> this be refactored?

get_terms() wants to read the first line into `term_bad` and the second 
line into `term_good` so it makes sense that it uses two calls to 
`strbuf_getline()` to do that. It does not want to read the whole file 
into a single buffer as we do here.

> So with the name, I started to think some more about it, and after 
> playing with some names, I settled on 'bisect-post-checkout'. Things 
> then sort of fell more into place. It is still a hook/commandline 
> option, but it's a much smaller change (since we don't have any special 
> code to check the exit code anymore) as we can (obviously) run `git 
> bisect skip` instead of `exit 125` as well of course.

Does that mean you will be starting "git bisect skip" from the script 
run by the current "git bisect" process. I don't think calling git 
recursively like that is a good idea as you'll potentially end up with a 
bunch of "git bisect" processes all waiting for their post checkout 
script to finish running.

Best Wishes

Phillip
Olliver Schinagl April 7, 2024, 2:52 p.m. UTC | #10
Hey Phillip,

On 07-04-2024 16:09, phillip.wood123@gmail.com wrote:
> On 06/04/2024 20:17, Olliver Schinagl wrote:
>> Hey Phillip,
>>
>> On 06-04-2024 15:50, Phillip Wood wrote:
>>> Hi Olliver
>>>
>>> On 06/04/2024 11:06, Olliver Schinagl wrote:
>>>> On 06-04-2024 03:08, Junio C Hamano wrote:
>>>>> Olliver Schinagl <oliver@schinagl.nl> writes:
>>> If you search builtin/bisect.c you'll see some existing callers of 
>>> strbuf_read_file() that read other files like BISECT_START. Those 
>>> callers should give you an idea of how to use it.
>>
>> Yeah, I found after Junio's hint :) What threw me off, as I wrote 
>> earlier, get_terms(). I wonder now, why is get_terms() implemented as 
>> it is, and should it not use the same functions? Or is it because 
>> terms is a multi-line file, whereas the others are all single line (I 
>> didn't look, though I see addline functions for the strbuf functions. 
>> Should this be refactored?
> 
> get_terms() wants to read the first line into `term_bad` and the second 
> line into `term_good` so it makes sense that it uses two calls to 
> `strbuf_getline()` to do that. It does not want to read the whole file 
> into a single buffer as we do here.

Right, but I why not use strbuf_getline()?

> 
>> So with the name, I started to think some more about it, and after 
>> playing with some names, I settled on 'bisect-post-checkout'. Things 
>> then sort of fell more into place. It is still a hook/commandline 
>> option, but it's a much smaller change (since we don't have any 
>> special code to check the exit code anymore) as we can (obviously) run 
>> `git bisect skip` instead of `exit 125` as well of course.
> 
> Does that mean you will be starting "git bisect skip" from the script 
> run by the current "git bisect" process. I don't think calling git 
> recursively like that is a good idea as you'll potentially end up with a 
> bunch of "git bisect" processes all waiting for their post checkout 
> script to finish running.

Well the process is inherently recursive, though that's up to the user 
depending on what they put in their script of course. I don't think git 
is 'waiting' is it? In that, git bisect runs the command, the command 
runs git bisect, git bisect stores the commit hash in the skip file and 
'exists', which goes then back to the bisect job, which then continues 
as it normally would.

So technically, we're not doing anything bad in git, but a user might do 
something bad.

Thank you,

Olliver

> 
> Best Wishes
> 
> Phillip
Phillip Wood April 7, 2024, 3:12 p.m. UTC | #11
On 07/04/2024 15:52, Olliver Schinagl wrote:
> Hey Phillip,
> 
> On 07-04-2024 16:09, phillip.wood123@gmail.com wrote:
>> On 06/04/2024 20:17, Olliver Schinagl wrote:
>>> Hey Phillip,
>>>
>>> On 06-04-2024 15:50, Phillip Wood wrote:
>>>> Hi Olliver
>>>>
>>>> On 06/04/2024 11:06, Olliver Schinagl wrote:
>>>>> On 06-04-2024 03:08, Junio C Hamano wrote:
>>>>>> Olliver Schinagl <oliver@schinagl.nl> writes:
>>>> If you search builtin/bisect.c you'll see some existing callers of 
>>>> strbuf_read_file() that read other files like BISECT_START. Those 
>>>> callers should give you an idea of how to use it.
>>>
>>> Yeah, I found after Junio's hint :) What threw me off, as I wrote 
>>> earlier, get_terms(). I wonder now, why is get_terms() implemented as 
>>> it is, and should it not use the same functions? Or is it because 
>>> terms is a multi-line file, whereas the others are all single line (I 
>>> didn't look, though I see addline functions for the strbuf functions. 
>>> Should this be refactored?
>>
>> get_terms() wants to read the first line into `term_bad` and the 
>> second line into `term_good` so it makes sense that it uses two calls 
>> to `strbuf_getline()` to do that. It does not want to read the whole 
>> file into a single buffer as we do here.
> 
> Right, but I why not use strbuf_getline()?

Because you want the whole file, not just one line as the script name 
could potentially contain a newline

>>> So with the name, I started to think some more about it, and after 
>>> playing with some names, I settled on 'bisect-post-checkout'. Things 
>>> then sort of fell more into place. It is still a hook/commandline 
>>> option, but it's a much smaller change (since we don't have any 
>>> special code to check the exit code anymore) as we can (obviously) 
>>> run `git bisect skip` instead of `exit 125` as well of course.
>>
>> Does that mean you will be starting "git bisect skip" from the script 
>> run by the current "git bisect" process. I don't think calling git 
>> recursively like that is a good idea as you'll potentially end up with 
>> a bunch of "git bisect" processes all waiting for their post checkout 
>> script to finish running.
> 
> Well the process is inherently recursive, though that's up to the user 
> depending on what they put in their script of course. I don't think git 
> is 'waiting' is it? In that, git bisect runs the command, the command 
> runs git bisect, git bisect stores the commit hash in the skip file and 
> 'exists', which goes then back to the bisect job, which then continues 
> as it normally would.
> 
> So technically, we're not doing anything bad in git, but a user might do 
> something bad.

If I understand correctly we're encouraging the user to run "git bisect 
skip" from the post checkout script. Doesn't that mean we'll end up with 
a set of processes that look like

	- git bisect start
	  - post checkout script
             - git bisect skip
               - post checkout script
                 - git bisect skip
                   ...

as the "git bisect start" is waiting for the post checkout script to 
finish running, but that script is waiting for "git bisect skip" to 
finish running and so on. Each of those processes takes up system 
resources, similar to how a recursive function can exhaust the available 
stack space by calling itself over and over again.

Best Wishes

Phillip
Olliver Schinagl April 7, 2024, 9:11 p.m. UTC | #12
On 07-04-2024 17:12, phillip.wood123@gmail.com wrote:
> On 07/04/2024 15:52, Olliver Schinagl wrote:
>> Hey Phillip,
>>
>> On 07-04-2024 16:09, phillip.wood123@gmail.com wrote:
>>> On 06/04/2024 20:17, Olliver Schinagl wrote:
>>>> Hey Phillip,
>>>>
>>>> On 06-04-2024 15:50, Phillip Wood wrote:
>>>>> Hi Olliver
>>>>>
>>>>> On 06/04/2024 11:06, Olliver Schinagl wrote:
>>>>>> On 06-04-2024 03:08, Junio C Hamano wrote:
>>>>>>> Olliver Schinagl <oliver@schinagl.nl> writes:
>>>>> If you search builtin/bisect.c you'll see some existing callers of 
>>>>> strbuf_read_file() that read other files like BISECT_START. Those 
>>>>> callers should give you an idea of how to use it.
>>>>
>>>> Yeah, I found after Junio's hint :) What threw me off, as I wrote 
>>>> earlier, get_terms(). I wonder now, why is get_terms() implemented 
>>>> as it is, and should it not use the same functions? Or is it because 
>>>> terms is a multi-line file, whereas the others are all single line 
>>>> (I didn't look, though I see addline functions for the strbuf 
>>>> functions. Should this be refactored?
>>>
>>> get_terms() wants to read the first line into `term_bad` and the 
>>> second line into `term_good` so it makes sense that it uses two calls 
>>> to `strbuf_getline()` to do that. It does not want to read the whole 
>>> file into a single buffer as we do here.
>>
>> Right, but I why not use strbuf_getline()?
> 
> Because you want the whole file, not just one line as the script name 
> could potentially contain a newline

I suppose; I'd think that there would be a strbuf function call to do 
exactly (more or less) of what was needed. But I'll let it go ;)

> 
>>>> So with the name, I started to think some more about it, and after 
>>>> playing with some names, I settled on 'bisect-post-checkout'. Things 
>>>> then sort of fell more into place. It is still a hook/commandline 
>>>> option, but it's a much smaller change (since we don't have any 
>>>> special code to check the exit code anymore) as we can (obviously) 
>>>> run `git bisect skip` instead of `exit 125` as well of course.
>>>
>>> Does that mean you will be starting "git bisect skip" from the script 
>>> run by the current "git bisect" process. I don't think calling git 
>>> recursively like that is a good idea as you'll potentially end up 
>>> with a bunch of "git bisect" processes all waiting for their post 
>>> checkout script to finish running.
>>
>> Well the process is inherently recursive, though that's up to the user 
>> depending on what they put in their script of course. I don't think 
>> git is 'waiting' is it? In that, git bisect runs the command, the 
>> command runs git bisect, git bisect stores the commit hash in the skip 
>> file and 'exists', which goes then back to the bisect job, which then 
>> continues as it normally would.
>>
>> So technically, we're not doing anything bad in git, but a user might 
>> do something bad.
> 
> If I understand correctly we're encouraging the user to run "git bisect 
> skip" from the post checkout script. Doesn't that mean we'll end up with 
> a set of processes that look like
> 
>      - git bisect start
>        - post checkout script
>              - git bisect skip
>                - post checkout script
>                  - git bisect skip
>                    ...
> 
> as the "git bisect start" is waiting for the post checkout script to 
> finish running, but that script is waiting for "git bisect skip" to 
> finish running and so on. Each of those processes takes up system 
> resources, similar to how a recursive function can exhaust the available 
> stack space by calling itself over and over again.

Hmm, you might be right. I was thinking that `git bisect skip` would put 
the hash in the file, and then exit, but of course it also goes to the 
next checkout and thus triggers the script again (potentially), we don't 
want that. We do want the hash to end up in the file, but then not 
continue, as that would be the job of git bisect.

So then I go back to my previous solution, which expects exit code 125, 
like the other case in bisect. That shouldn't cause that behavior, as 
we'd otherwise have the same problem with the other exit code 125.

Thank you,

Olliver

> 
> Best Wishes
> 
> Phillip
Junio C Hamano April 8, 2024, 4:49 p.m. UTC | #13
phillip.wood123@gmail.com writes:

>>> get_terms() wants to read the first line into `term_bad` and the
>>> second line into `term_good` so it makes sense that it uses two
>>> calls to `strbuf_getline()` to do that. It does not want to read
>>> the whole file into a single buffer as we do here.
>> Right, but I why not use strbuf_getline()?
>
> Because you want the whole file, not just one line as the script name
> could potentially contain a newline

It is technically true, but it somehow sounds like an implausible
scenario to me.  The real reason why read_file() is preferrable is
because you do not have to write, and we do not want to see you write,
the whole "open (and handle error), read, chomp, and return" sequence.

I would even suspect that get_terms() is a poorly written
anti-pattern.  If I were adding that function to the system today, I
wouldn't be surprised if I did read_file() the whole thing and
worked in-core to split two items out.

> If I understand correctly we're encouraging the user to run "git
> bisect skip" from the post checkout script. Doesn't that mean we'll
> end up with a set of processes that look like
>
> 	- git bisect start
> 	  - post checkout script
>             - git bisect skip
>               - post checkout script
>                 - git bisect skip
>                   ...
>
> as the "git bisect start" is waiting for the post checkout script to
> finish running, but that script is waiting for "git bisect skip" to
> finish running and so on. Each of those processes takes up system
> resources, similar to how a recursive function can exhaust the
> available stack space by calling itself over and over again.

True.  What such a post-checkout script can do is to only mark the
HEAD as "untestable", just like a run script given to "bisect run"
signals that fact by returnint 125.  And at that point, I doubt it
makes sense to add such a post-checkout script for the purpose of
allowing "bisect skip".

Having said that, a post-checkout script and pre-resume script may
have a huge value in helping those whose tests cannot be automated
(in other words, they cannot do "git bisect run") when they need to
tweak the working tree during bisection.  We all have seen, during a
bisection session that spans a segment of history that has another
bug that affects our test *but* is orthogonal to the bug we are
chasing, that we "cherry-pick --no-commit" the fix for that other
problem inside "git bisect run" script.  It might look something
like

    #!/bin/sh
    if git merge-base --is-ancestor $the_other_bug HEAD
    then
	# we need the fix
	git cherry-pick --no-commit $fix_for_the_other_bug ||
	exit 125
    fi

    make test
    status=$?
    git reset --hard ;# undo the cherry-pick
    exit $status

But to those whose test is not a good match to "git bisect run", if
we had a mechanism to tweak the checked out working tree after the
"bisect next" (which is an internal mechanism that "bisect good",
"bisect bad", and "bisect skip" share to give you the next HEAD and
the working tree to test) checks out the working tree before it
gives the control back to you, we could split the above script into
two parts and throw the "conditionally cherry-pick the fix" part
into that mechanism.  We'd need to have a companion script to "redo
the damage" (the "reset --hard" in the above illustration) if this
were to work seamlessly.  That obviously is totally orthogonal to
what we are discussing in this thread, but may make a good #leftoverbits
material (but not for novices).
Olliver Schinagl April 10, 2024, 10:39 a.m. UTC | #14
On 08-04-2024 18:49, Junio C Hamano wrote:
> phillip.wood123@gmail.com writes:
>
>>>> get_terms() wants to read the first line into `term_bad` and the
>>>> second line into `term_good` so it makes sense that it uses two
>>>> calls to `strbuf_getline()` to do that. It does not want to read
>>>> the whole file into a single buffer as we do here.
>>> Right, but I why not use strbuf_getline()?
>> Because you want the whole file, not just one line as the script name
>> could potentially contain a newline
> It is technically true, but it somehow sounds like an implausible
> scenario to me.  The real reason why read_file() is preferrable is
> because you do not have to write, and we do not want to see you write,
> the whole "open (and handle error), read, chomp, and return" sequence.
>
> I would even suspect that get_terms() is a poorly written
> anti-pattern.  If I were adding that function to the system today, I
> wouldn't be surprised if I did read_file() the whole thing and
> worked in-core to split two items out.

So I've peaked at it, and think something like:

+int bisect_read_terms(const char **read_bad, const char **read_good)
+{
+       struct strbuf sb = STRBUF_INIT;
+
+       if (!strbuf_read_file(&sb, git_path_bisect_terms(), 0)) {
+               *read_bad = "bad";
+               *read_good = "good";
+               return -1;
+       }
+
+       terms = strbuf_split(&sb);
+       *term_bad = strbuf_detach(terms[0], NULL);
+       *term_good = strbuf_detach(terms[1], NULL);
+
+       strbuf_release(&sb);
+
+       return 0;
+}

would do the trick. This function could then be called from 
builtin/bisect.c as well to have a single interface. Right now, there's 
two ways to do the same thing, just because the arguments to the 
function are different, and the body is slightly different, but the same.

Shall I send a MR with this?

>
>> If I understand correctly we're encouraging the user to run "git
>> bisect skip" from the post checkout script. Doesn't that mean we'll
>> end up with a set of processes that look like
>>
>> 	- git bisect start
>> 	  - post checkout script
>>              - git bisect skip
>>                - post checkout script
>>                  - git bisect skip
>>                    ...
>>
>> as the "git bisect start" is waiting for the post checkout script to
>> finish running, but that script is waiting for "git bisect skip" to
>> finish running and so on. Each of those processes takes up system
>> resources, similar to how a recursive function can exhaust the
>> available stack space by calling itself over and over again.
> True.  What such a post-checkout script can do is to only mark the
> HEAD as "untestable", just like a run script given to "bisect run"
> signals that fact by returnint 125.  And at that point, I doubt it
> makes sense to add such a post-checkout script for the purpose of
> allowing "bisect skip".
>
> Having said that, a post-checkout script and pre-resume script may
> have a huge value in helping those whose tests cannot be automated
> (in other words, they cannot do "git bisect run") when they need to
> tweak the working tree during bisection.  We all have seen, during a
> bisection session that spans a segment of history that has another
> bug that affects our test *but* is orthogonal to the bug we are
> chasing, that we "cherry-pick --no-commit" the fix for that other
> problem inside "git bisect run" script.  It might look something
> like
>
>      #!/bin/sh
>      if git merge-base --is-ancestor $the_other_bug HEAD
>      then
> 	# we need the fix
> 	git cherry-pick --no-commit $fix_for_the_other_bug ||
> 	exit 125
>      fi
>
>      make test
>      status=$?
>      git reset --hard ;# undo the cherry-pick
>      exit $status
>
> But to those whose test is not a good match to "git bisect run", if
> we had a mechanism to tweak the checked out working tree after the
> "bisect next" (which is an internal mechanism that "bisect good",
> "bisect bad", and "bisect skip" share to give you the next HEAD and
> the working tree to test) checks out the working tree before it
> gives the control back to you, we could split the above script into
> two parts and throw the "conditionally cherry-pick the fix" part
> into that mechanism.  We'd need to have a companion script to "redo
> the damage" (the "reset --hard" in the above illustration) if this
> were to work seamlessly.  That obviously is totally orthogonal to
> what we are discussing in this thread, but may make a good #leftoverbits
> material (but not for novices).

While completly orthonogal, I agree; it would be nice to have and 
'abuse' for the bisect-skip usecase. So if we ignore the fact that it 
can be abused for this (which I don't think is a bad thing, it just 
risks the recursive issue Phillip mentioned.


As I'm not familiar with the deeps of bisect, I just use it as a dumb 
simple user, e.g. start; good, bad; I'm not sure the usecase you are 
describing is completly clear to me.

Are you saying 'git bisect run` is great, but not useful in all 
situations, and so in some cases, we want what you just said? Or would 
this also be part of git bisect run?

I've drafted the post-checkout and pre-resume here: 
https://gitlab.com/olliver/git/-/commit/6b5415377600551c0d94a359fd4b8ca7a3678dcf 
where I'm not clear on what the best points are for for the pre/post 
points. I've put the 'pre' bit in the bisect_state function, as that was 
being triggered by many suboptions, but might not be correct based on 
your answer to the above 
(https://gitlab.com/olliver/git/-/commit/6b5415377600551c0d94a359fd4b8ca7a3678dcf#46324e17f99db64a67eb9a5983ffc3a680914ee3_1001_1028). 
The post-checkout part, I've put in bisect_next 
(https://gitlab.com/olliver/git/-/commit/43993fca32f174f1005c7a445887c0ba5c4036b5#46324e17f99db64a67eb9a5983ffc3a680914ee3_672_717) 
which seems to match what you described.


Olliver
Junio C Hamano April 10, 2024, 4:47 p.m. UTC | #15
Olliver Schinagl <oliver@schinagl.nl> writes:

> While completly orthonogal, I agree; it would be nice to have and
> 'abuse' for the bisect-skip usecase. So if we ignore the fact that it
> can be abused for this (which I don't think is a bad thing, it just
> risks the recursive issue Phillip mentioned.

I do not see the "recursive" issue here, though.  If we had such a
mechansim, those whose test cannot be driven by "bisect run" can
still use the "--post-checkout" and "--pre-resume" options, where
the post-checkout option names a file that has:

	#!/bin/sh
        if git merge-base --is-ancestor $the_other_bug HEAD
        then
          # we need the fix
          git cherry-pick --no-commit $fix_for_the_other_bug ||
          exit 125
        fi

in it.  There is no "recursive"-ness here.  And then after manually
testing the checked out stuff (with tweak, thanks to the post-checkout
script), they can now say "git bisect good/bad/skip" and that is
when their --pre-resume script kicks in, which may do

	#!/bin/sh
	git reset --hard ;# undo the damage done by post-checkout

before the bisect machinery goes and picks the next commit to test.

Notice that I still kept the "exit 125" in the above post-checkout
example?  That is where the "bisect next" that picked the commit to
test, checked out that commit and updated the working tree, and run
your post-checkout script, can be told that the version checked out
is untestable and to be skipped.  So such a post-checkout script can
be treated as a strict superset of --skip-when script we have been
discussing.

Needless to say, if we were to do this, we probably should let
"bisect run" also pay attention to these two scripts.  They are most
likely to become new parameters specified when "bisect start" is run
to be recorded as one of the many states "git bisect" creates.
Olliver Schinagl April 10, 2024, 7:22 p.m. UTC | #16
On 10-04-2024 18:47, Junio C Hamano wrote:
> Olliver Schinagl <oliver@schinagl.nl> writes:
> 
>> While completly orthonogal, I agree; it would be nice to have and
>> 'abuse' for the bisect-skip usecase. So if we ignore the fact that it
>> can be abused for this (which I don't think is a bad thing, it just
>> risks the recursive issue Phillip mentioned.
> 
> I do not see the "recursive" issue here, though.  If we had such a
> mechansim, those whose test cannot be driven by "bisect run" can
> still use the "--post-checkout" and "--pre-resume" options, where
> the post-checkout option names a file that has:
> 
> 	#!/bin/sh
>          if git merge-base --is-ancestor $the_other_bug HEAD
>          then
>            # we need the fix
>            git cherry-pick --no-commit $fix_for_the_other_bug ||
>            exit 125
>          fi
> 
> in it.  There is no "recursive"-ness here.  And then after manually
> testing the checked out stuff (with tweak, thanks to the post-checkout
> script), they can now say "git bisect good/bad/skip" and that is
> when their --pre-resume script kicks in, which may do
> 
> 	#!/bin/sh
> 	git reset --hard ;# undo the damage done by post-checkout
> 
> before the bisect machinery goes and picks the next commit to test.

Yep, that was all perfectly clear to me :) Though I do admit, I 
initially overlooked the 'not' in your comment on 'those whose test 
can**not** be driven by "bisect run"' bit.

> 
> Notice that I still kept the "exit 125" in the above post-checkout
> example?  That is where the "bisect next" that picked the commit to
> test, checked out that commit and updated the working tree, and run
> your post-checkout script, can be told that the version checked out
> is untestable and to be skipped.

This is where things got stuck for me. I had the 'exit 125' bit for a 
while, but couldn't figure out 'how to mark' stuff. Right now, it just 
calls the script, and if you are a bad user, you can call `git bisect 
skip` and things work as expected, albeit with the aforementioned recursion.

In the past, as I reported here as well, I tried to capture exit 125, 
the above would still be true of course, but exit 125 would be a way to 
'catch' this and respond accordingly. The 'accordingly' is where I get 
stuck.

See, the hook is named 'post-checkout' and thus, it runs after checkout 
has been performed. So we are now on the 'broken' commit we do not want 
to test, git should have skipped this already, and not checked it out.

So ok fine, we can call it 'pre-checkout'. But then what. I experimented 
with marking the skippyness just after `find_bisection()` here: 
https://gitlab.com/olliver/git/-/blob/post_search/bisect.c?ref_type=heads#L1090

with a simple
	strbuf_addf(&skip, "refs/bisect/skip-%s", oid_to_hex(oid));
	oid_array_append(&skipped_revs, &oid);

While this kinda worked, it failed when two (or more) commits in order 
where to be skipped and 'finished' with 'no possible commits to check'

So I kinda gave up here and went back to post-checkout.

> So such a post-checkout script can
> be treated as a strict superset of --skip-when script we have been
> discussing.
> 
> Needless to say, if we were to do this, we probably should let
> "bisect run" also pay attention to these two scripts.  They are most
> likely to become new parameters specified when "bisect start" is run
> to be recorded as one of the many states "git bisect" creates.

The way I've done it now is that it's called from `bisect_next()` here 
https://gitlab.com/olliver/git/-/commit/20dd6f5f0e2a55f940bab1e3aced0686d8dfd0c5#46324e17f99db64a67eb9a5983ffc3a680914ee3_672_717 
but as I said, checkout has already commenced. Doing it here seems to 
work with bisect run as well. Resume is done in `bisect_state()` here 
https://gitlab.com/olliver/git/-/commit/f9b14a66ea5c4c98f48236db119d3eb60427c1bd#46324e17f99db64a67eb9a5983ffc3a680914ee3_1001_1028 
which also happens in the run case.


The whole exit 125 and avoid recursion thing, is to me more like an 
additional 'nice-to-have' feature. Recursion wouldn't be a huge thing 
for modern systems generally anyway in the 'normal/common' case where 
you recurse 10-ish times. It'll of course get worse if there's multiple 
commits that would need to be skipped. So even if the recursion is 
20-ish, it's ugly, but not horrible.

In any case, if the recursion thing is considered bad and must be solved 
if a user does this, as I said before it's not clear to me how to 
trigger git to say 'oh, I have to go back and do something else; or, 
call git bisect skip internally, without causing the recursion; or 'put 
the commit in the queue to be skipped, before it's checked out (which of 
course is not a 'post-checkout' of course.
Junio C Hamano April 10, 2024, 7:31 p.m. UTC | #17
Olliver Schinagl <oliver@schinagl.nl> writes:

> See, the hook is named 'post-checkout' and thus, it runs after
> checkout has been performed. So we are now on the 'broken' commit we
> do not want to test, git should have skipped this already, and not
> checked it out.

You are not the only user of this feature (by the way, do not call
this a "hook".  It should be per "git bisect" session) and others
may need to actually inspect their working tree state before being
able to say "nah, I do not want to test this version, please give me
another one" by exiting with 125.  That is why post-checkout is more
useful in general.  Contrasted with that, a check that happens
before the checkout is useful only in a much narrower "I can tell by
looking only at the commit object name" use case, which I would not
be interested in seeing.

Thanks.
Olliver Schinagl April 10, 2024, 7:39 p.m. UTC | #18
On 10-04-2024 21:31, Junio C Hamano wrote:
> Olliver Schinagl <oliver@schinagl.nl> writes:
> 
>> See, the hook is named 'post-checkout' and thus, it runs after
>> checkout has been performed. So we are now on the 'broken' commit we
>> do not want to test, git should have skipped this already, and not
>> checked it out.
> 
> You are not the only user of this feature (by the way, do not call
> this a "hook".  It should be per "git bisect" session)

Yep, it is stored as part of the bisect session when invoked via as a CLI.

> and others may need to actually inspect their working tree state before being
> able to say "nah, I do not want to test this version, please give me
> another one" by exiting with 125.

So your use of 'You' and 'others' is a bit confusing, if it's me, the 
person, and 'others' the script itself. But yes, I fully agree with what 
you are saying I think. Just 'please give me a nother one' could be done 
in the script itself, or via exit 125 of course (where I totally get 
that doing the 125 routine is much better).


> That is why post-checkout is more
> useful in general.  Contrasted with that, a check that happens
> before the checkout is useful only in a much narrower "I can tell by
> looking only at the commit object name" use case, which I would not
> be interested in seeing.

So that leaves me in the same lack of understanding :p

If we run something post checkout, `bisect_checkout()` has run, and git 
bisect (the application) is (almost) done. The directory has been 
'checked out' (as `bisect_checkout()` of course does `git checkout` 
(unless `no_checkout` is used) The tree is of course in a bisect session.

So if a script runs, and does the 'please give me a nother commit' 
thing, how would that work within `builtin/bisect.c` (or in `bisect.c`). 
This is the part that would puzzle me. The command returns with 125, 
then what? How can git 'go on and give you a nother one'. afaik the only 
return codes from `bisect_next()` etc are BISECT_OK and BISECT_FAIL?

Olliver

> 
> Thanks.
> 
>
Phillip Wood April 12, 2024, 1:35 p.m. UTC | #19
Hi Junio

On 08/04/2024 17:49, Junio C Hamano wrote:
> phillip.wood123@gmail.com writes:
 >
> Having said that, a post-checkout script and pre-resume script may
> have a huge value in helping those whose tests cannot be automated
> (in other words, they cannot do "git bisect run") when they need to
> tweak the working tree during bisection.  We all have seen, during a
> bisection session that spans a segment of history that has another
> bug that affects our test *but* is orthogonal to the bug we are
> chasing, that we "cherry-pick --no-commit" the fix for that other
> problem inside "git bisect run" script.  It might look something
> like
> 
>      #!/bin/sh
>      if git merge-base --is-ancestor $the_other_bug HEAD
>      then
> 	# we need the fix
> 	git cherry-pick --no-commit $fix_for_the_other_bug ||
> 	exit 125
>      fi
> 
>      make test
>      status=$?
>      git reset --hard ;# undo the cherry-pick
>      exit $status

I like this suggestion. A generalized post-checkout script that prepares 
the wortree and can indicate that this commit should be skipped could be 
really useful. When having to test manually being able to automate 
building each checkout would be convenient even if one does not have an 
orthogonal bug to worry about.

Best Wishes

Phillip
diff mbox series

Patch

diff --git a/bisect.c b/bisect.c
index 60aae2fe50..185909cca9 100644
--- a/bisect.c
+++ b/bisect.c
@@ -476,6 +476,7 @@  static GIT_PATH_FUNC(git_path_bisect_names, "BISECT_NAMES")
 static GIT_PATH_FUNC(git_path_bisect_ancestors_ok, "BISECT_ANCESTORS_OK")
 static GIT_PATH_FUNC(git_path_bisect_run, "BISECT_RUN")
 static GIT_PATH_FUNC(git_path_bisect_start, "BISECT_START")
+static GIT_PATH_FUNC(git_path_bisect_skip_when, "BISECT_SKIP_WHEN")
 static GIT_PATH_FUNC(git_path_bisect_log, "BISECT_LOG")
 static GIT_PATH_FUNC(git_path_bisect_terms, "BISECT_TERMS")
 static GIT_PATH_FUNC(git_path_bisect_first_parent, "BISECT_FIRST_PARENT")
@@ -1179,6 +1180,7 @@  int bisect_clean_state(void)
 	unlink_or_warn(git_path_bisect_log());
 	unlink_or_warn(git_path_bisect_names());
 	unlink_or_warn(git_path_bisect_run());
+	unlink_or_warn(git_path_bisect_skip_when());
 	unlink_or_warn(git_path_bisect_terms());
 	unlink_or_warn(git_path_bisect_first_parent());
 	/*
diff --git a/builtin/bisect.c b/builtin/bisect.c
index 9891cf2604..6870142b85 100644
--- a/builtin/bisect.c
+++ b/builtin/bisect.c
@@ -4,6 +4,7 @@ 
 #include "environment.h"
 #include "gettext.h"
 #include "hex.h"
+#include "hook.h"
 #include "object-name.h"
 #include "oid-array.h"
 #include "parse-options.h"
@@ -14,19 +15,21 @@ 
 #include "revision.h"
 #include "run-command.h"
 #include "strvec.h"
+#include "wrapper.h"
 
 static GIT_PATH_FUNC(git_path_bisect_terms, "BISECT_TERMS")
 static GIT_PATH_FUNC(git_path_bisect_ancestors_ok, "BISECT_ANCESTORS_OK")
 static GIT_PATH_FUNC(git_path_bisect_start, "BISECT_START")
+static GIT_PATH_FUNC(git_path_bisect_skip_when, "BISECT_SKIP_WHEN")
 static GIT_PATH_FUNC(git_path_bisect_log, "BISECT_LOG")
 static GIT_PATH_FUNC(git_path_bisect_names, "BISECT_NAMES")
 static GIT_PATH_FUNC(git_path_bisect_first_parent, "BISECT_FIRST_PARENT")
 static GIT_PATH_FUNC(git_path_bisect_run, "BISECT_RUN")
 
 #define BUILTIN_GIT_BISECT_START_USAGE \
-	N_("git bisect start [--term-(new|bad)=<term> --term-(old|good)=<term>]" \
-	   "    [--no-checkout] [--first-parent] [<bad> [<good>...]] [--]" \
-	   "    [<pathspec>...]")
+	N_("git bisect start [--term-(new|bad)=<term> --term-(old|good)=<term>]\n" \
+	   "                 [--no-checkout] [--first-parent] [--skip-when=<script>]\n" \
+	   "                 [<bad> [<good>...]] [--] [<pathspec>...]")
 #define BUILTIN_GIT_BISECT_STATE_USAGE \
 	N_("git bisect (good|bad) [<rev>...]")
 #define BUILTIN_GIT_BISECT_TERMS_USAGE \
@@ -89,6 +92,7 @@  static const char vocab_bad[] = "bad|new";
 static const char vocab_good[] = "good|old";
 
 static int bisect_autostart(struct bisect_terms *terms);
+static enum bisect_error bisect_skip(struct bisect_terms *terms, int argc, const char **argv);
 
 /*
  * Check whether the string `term` belongs to the set of strings
@@ -680,14 +684,74 @@  static enum bisect_error bisect_next(struct bisect_terms *terms, const char *pre
 	return res;
 }
 
+static int get_skip_when(const char **skip_when)
+{
+	struct strbuf str = STRBUF_INIT;
+	FILE *fp = NULL;
+	int res = 0;
+
+	fp = fopen(git_path_bisect_skip_when(), "r");
+	if (!fp) {
+		res = -1;
+		goto finish;
+	}
+
+	strbuf_getline_lf(&str, fp);
+	*skip_when = strbuf_detach(&str, NULL);
+
+finish:
+	if (fp)
+		fclose(fp);
+	strbuf_release(&str);
+
+	return res;
+}
+
 static enum bisect_error bisect_auto_next(struct bisect_terms *terms, const char *prefix)
 {
+	int no_checkout = ref_exists("BISECT_HEAD");
+	enum bisect_error res;
+	struct object_id oid;
+
 	if (bisect_next_check(terms, NULL)) {
 		bisect_print_status(terms);
 		return BISECT_OK;
 	}
 
-	return bisect_next(terms, prefix);
+	res = bisect_next(terms, prefix);
+	if (res)
+		return res;
+
+	if (!read_ref(no_checkout ? "BISECT_HEAD" : "HEAD", &oid)) {
+		struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT;
+		char *rev = oid_to_hex(&oid);
+		const char *skip_when = NULL;
+		int ret = 0;
+
+		get_skip_when(&skip_when);
+		if (skip_when != NULL) {
+			struct child_process cmd = CHILD_PROCESS_INIT;
+
+			cmd.use_shell = 1;
+			cmd.no_stdin = 1;
+			strvec_pushl(&cmd.args, skip_when, rev, NULL);
+
+			printf(_("running '%s'\n"), skip_when);
+			ret = run_command(&cmd);
+		}
+
+		strvec_push(&opt.args, rev);
+		if ((ret == 125) ||
+		    (run_hooks_opt("bisect-skip_when", &opt) == 125)) {
+			struct strvec argv = STRVEC_INIT;
+
+			printf(_("auto skipping commit [%s]...\n"), rev);
+			sq_dequote_to_strvec("skip", &argv);
+			res = bisect_skip(terms, argv.nr, argv.v);
+		}
+	}
+
+	return res;
 }
 
 static enum bisect_error bisect_start(struct bisect_terms *terms, int argc,
@@ -703,6 +767,7 @@  static enum bisect_error bisect_start(struct bisect_terms *terms, int argc,
 	struct strbuf start_head = STRBUF_INIT;
 	struct strbuf bisect_names = STRBUF_INIT;
 	struct object_id head_oid;
+	char *skip_when = NULL;
 	struct object_id oid;
 	const char *head;
 
@@ -727,6 +792,15 @@  static enum bisect_error bisect_start(struct bisect_terms *terms, int argc,
 			no_checkout = 1;
 		} else if (!strcmp(arg, "--first-parent")) {
 			first_parent_only = 1;
+		} else if (!strcmp(arg, "--skip-when")) {
+			i++;
+
+			if (argc <= i)
+				return error(_("'' is not a valid skip-when script"));
+
+			skip_when = xstrdup(argv[i]);
+		} else if (skip_prefix(arg, "--skip-when=", &arg)) {
+			skip_when = xstrdup(arg);
 		} else if (!strcmp(arg, "--term-good") ||
 			 !strcmp(arg, "--term-old")) {
 			i++;
@@ -867,11 +941,22 @@  static enum bisect_error bisect_start(struct bisect_terms *terms, int argc,
 		goto finish;
 	}
 
+	if (skip_when) {
+		if (access(skip_when, X_OK)) {
+			res = error(_("%s: no such path in the working tree.\n"), skip_when);
+			goto finish;
+		}
+		write_to_file(git_path_bisect_skip_when(), "%s\n", skip_when);
+	}
+
 	res = bisect_append_log_quoted(argv);
 	if (res)
 		res = BISECT_FAILED;
 
 finish:
+	if (skip_when)
+		free(skip_when);
+
 	string_list_clear(&revs, 0);
 	string_list_clear(&states, 0);
 	strbuf_release(&start_head);
diff --git a/templates/hooks--bisect-skip_when.sample b/templates/hooks--bisect-skip_when.sample
new file mode 100755
index 0000000000..ff3960841f
--- /dev/null
+++ b/templates/hooks--bisect-skip_when.sample
@@ -0,0 +1,10 @@ 
+#!/bin/sh
+#
+# usage: ${0} <commit_object_name>
+# expected to exit with 125 when the commit should be skipped
+
+if git cat-file commit "${1:-HEAD}" | grep -q "^GIT_BISECT_SKIP=1$"; then
+	exit 125
+fi
+
+exit 0