diff mbox series

[01/15] run-job: create barebones builtin

Message ID 665da239774419074a9bae49b9c92b340885bfa3.1585946894.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series [01/15] run-job: create barebones builtin | expand

Commit Message

Linus Arver via GitGitGadget April 3, 2020, 8:48 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

The 'git run-job' command will be used to execute a short-lived set
of maintenance activities by a background job manager. The intention
is to perform small batches of work that reduce the foreground time
taken by repository maintenance such as 'git gc --auto'.

This change does the absolute minimum to create the builtin and show
the usage output.

Provide an explicit warning that this command is experimental. The
set of jobs may change, and each job could alter its behavior in
future versions.

RFC QUESTION: This builtin is based on the background maintenance in
Scalar. Specifically, this builtin is based on the "scalar run <job>"
command [1] [2]. My default thought was to make this a "git run <job>"
command to maximize similarity. However, it seems like "git run" is
too generic. Or, am I being overly verbose for no reason?

[1] https://github.com/microsoft/scalar/blob/master/docs/advanced.md#run-maintenance-in-the-foreground
[2] https://github.com/microsoft/scalar/blob/master/Scalar/CommandLine/RunVerb.cs

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 .gitignore                    |  1 +
 Documentation/git-run-job.txt | 36 +++++++++++++++++++++++++++++++++++
 Makefile                      |  1 +
 builtin.h                     |  1 +
 builtin/run-job.c             | 28 +++++++++++++++++++++++++++
 command-list.txt              |  1 +
 git.c                         |  1 +
 t/t7900-run-job.sh            | 15 +++++++++++++++
 8 files changed, 84 insertions(+)
 create mode 100644 Documentation/git-run-job.txt
 create mode 100644 builtin/run-job.c
 create mode 100755 t/t7900-run-job.sh

Comments

Phillip Wood April 5, 2020, 3:10 p.m. UTC | #1
Hi Stolee

On 03/04/2020 21:48, Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <dstolee@microsoft.com>
> 
> The 'git run-job' command will be used to execute a short-lived set
> of maintenance activities by a background job manager. The intention
> is to perform small batches of work that reduce the foreground time
> taken by repository maintenance such as 'git gc --auto'.
> 
> This change does the absolute minimum to create the builtin and show
> the usage output.
> 
> Provide an explicit warning that this command is experimental. The
> set of jobs may change, and each job could alter its behavior in
> future versions.
> 
> RFC QUESTION: This builtin is based on the background maintenance in
> Scalar. Specifically, this builtin is based on the "scalar run <job>"
> command [1] [2]. My default thought was to make this a "git run <job>"
> command to maximize similarity. However, it seems like "git run" is
> too generic. Or, am I being overly verbose for no reason?

Having read through this series I wondered if we wanted a single git 
command such as 'git maintenance' (suggestions of better names welcome) 
and then 'git run-job' could become 'git maintenance run', 'git 
job-runner' would become another subcommand (run-jobs or schedule-jobs?) 
and the 'git please-run-maintenance-on-this-repo' you mentioned in you 
email to Junio could become 'git maintenance init' (or maybe setup)

Best Wishes

Phillip

> [1] https://github.com/microsoft/scalar/blob/master/docs/advanced.md#run-maintenance-in-the-foreground
> [2] https://github.com/microsoft/scalar/blob/master/Scalar/CommandLine/RunVerb.cs
> 
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>   .gitignore                    |  1 +
>   Documentation/git-run-job.txt | 36 +++++++++++++++++++++++++++++++++++
>   Makefile                      |  1 +
>   builtin.h                     |  1 +
>   builtin/run-job.c             | 28 +++++++++++++++++++++++++++
>   command-list.txt              |  1 +
>   git.c                         |  1 +
>   t/t7900-run-job.sh            | 15 +++++++++++++++
>   8 files changed, 84 insertions(+)
>   create mode 100644 Documentation/git-run-job.txt
>   create mode 100644 builtin/run-job.c
>   create mode 100755 t/t7900-run-job.sh
> 
> diff --git a/.gitignore b/.gitignore
> index 188bd1c3de1..5dea9d3b96b 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -144,6 +144,7 @@
>   /git-rev-parse
>   /git-revert
>   /git-rm
> +/git-run-job
>   /git-send-email
>   /git-send-pack
>   /git-serve
> diff --git a/Documentation/git-run-job.txt b/Documentation/git-run-job.txt
> new file mode 100644
> index 00000000000..0627b3ed259
> --- /dev/null
> +++ b/Documentation/git-run-job.txt
> @@ -0,0 +1,36 @@
> +git-run-job(1)
> +==============
> +
> +NAME
> +----
> +git-run-job - Run a maintenance job. Intended for background operation.
> +
> +
> +SYNOPSIS
> +--------
> +[verse]
> +'git run-job <job-name>'
> +
> +
> +DESCRIPTION
> +-----------
> +
> +Run a maintenance job on the current repository. This is available as a
> +command for a few reasons. First, the background job feature can launch
> +these commands on a schedule and each process will completely clear its
> +memory when complete. Second, an expert user could create their own job
> +schedule by running these jobs themselves.
> +
> +THIS COMMAND IS EXPERIMENTAL. THE SET OF AVAILABLE JOBS OR THEIR EXACT
> +BEHAVIOR MAY BE ALTERED IN THE FUTURE.
> +
> +
> +JOBS
> +----
> +
> +TBD
> +
> +
> +GIT
> +---
> +Part of the linkgit:git[1] suite
> diff --git a/Makefile b/Makefile
> index ef1ff2228f0..f5f9c4d9e94 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1125,6 +1125,7 @@ BUILTIN_OBJS += builtin/rev-list.o
>   BUILTIN_OBJS += builtin/rev-parse.o
>   BUILTIN_OBJS += builtin/revert.o
>   BUILTIN_OBJS += builtin/rm.o
> +BUILTIN_OBJS += builtin/run-job.o
>   BUILTIN_OBJS += builtin/send-pack.o
>   BUILTIN_OBJS += builtin/shortlog.o
>   BUILTIN_OBJS += builtin/show-branch.o
> diff --git a/builtin.h b/builtin.h
> index 2b25a80cde3..3e0ddaaf67f 100644
> --- a/builtin.h
> +++ b/builtin.h
> @@ -220,6 +220,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix);
>   int cmd_rev_parse(int argc, const char **argv, const char *prefix);
>   int cmd_revert(int argc, const char **argv, const char *prefix);
>   int cmd_rm(int argc, const char **argv, const char *prefix);
> +int cmd_run_job(int argc, const char **argv, const char *prefix);
>   int cmd_send_pack(int argc, const char **argv, const char *prefix);
>   int cmd_shortlog(int argc, const char **argv, const char *prefix);
>   int cmd_show(int argc, const char **argv, const char *prefix);
> diff --git a/builtin/run-job.c b/builtin/run-job.c
> new file mode 100644
> index 00000000000..2c78d053aa4
> --- /dev/null
> +++ b/builtin/run-job.c
> @@ -0,0 +1,28 @@
> +#include "builtin.h"
> +#include "config.h"
> +#include "parse-options.h"
> +
> +static char const * const builtin_run_job_usage[] = {
> +	N_("git run-job"),
> +	NULL
> +};
> +
> +int cmd_run_job(int argc, const char **argv, const char *prefix)
> +{
> +	static struct option builtin_run_job_options[] = {
> +		OPT_END(),
> +	};
> +
> +	if (argc == 2 && !strcmp(argv[1], "-h"))
> +		usage_with_options(builtin_run_job_usage,
> +				   builtin_run_job_options);
> +
> +	git_config(git_default_config, NULL);
> +	argc = parse_options(argc, argv, prefix,
> +			     builtin_run_job_options,
> +			     builtin_run_job_usage,
> +			     PARSE_OPT_KEEP_UNKNOWN);
> +
> +	usage_with_options(builtin_run_job_usage,
> +			   builtin_run_job_options);
> +}
> diff --git a/command-list.txt b/command-list.txt
> index 20878946558..1cd2b415e46 100644
> --- a/command-list.txt
> +++ b/command-list.txt
> @@ -156,6 +156,7 @@ git-revert                              mainporcelain
>   git-rev-list                            plumbinginterrogators
>   git-rev-parse                           plumbinginterrogators
>   git-rm                                  mainporcelain           worktree
> +git-run-job                             plumbingmanipulators
>   git-send-email                          foreignscminterface             complete
>   git-send-pack                           synchingrepositories
>   git-shell                               synchelpers
> diff --git a/git.c b/git.c
> index b07198fe036..db5a43c8687 100644
> --- a/git.c
> +++ b/git.c
> @@ -566,6 +566,7 @@ static struct cmd_struct commands[] = {
>   	{ "rev-parse", cmd_rev_parse, NO_PARSEOPT },
>   	{ "revert", cmd_revert, RUN_SETUP | NEED_WORK_TREE },
>   	{ "rm", cmd_rm, RUN_SETUP },
> +	{ "run-job", cmd_run_job, RUN_SETUP },
>   	{ "send-pack", cmd_send_pack, RUN_SETUP },
>   	{ "shortlog", cmd_shortlog, RUN_SETUP_GENTLY | USE_PAGER },
>   	{ "show", cmd_show, RUN_SETUP },
> diff --git a/t/t7900-run-job.sh b/t/t7900-run-job.sh
> new file mode 100755
> index 00000000000..1eac80b7ed3
> --- /dev/null
> +++ b/t/t7900-run-job.sh
> @@ -0,0 +1,15 @@
> +#!/bin/sh
> +
> +test_description='git run-job
> +
> +Testing the background jobs, in the foreground
> +'
> +
> +. ./test-lib.sh
> +
> +test_expect_success 'help text' '
> +	test_must_fail git run-job -h 2>err &&
> +	test_i18ngrep "usage: git run-job" err
> +'
> +
> +test_done
>
Junio C Hamano April 5, 2020, 7:21 p.m. UTC | #2
Phillip Wood <phillip.wood123@gmail.com> writes:

> Hi Stolee
>
> On 03/04/2020 21:48, Derrick Stolee via GitGitGadget wrote:
>> From: Derrick Stolee <dstolee@microsoft.com>
>>
>> The 'git run-job' command will be used to execute a short-lived set
>> of maintenance activities by a background job manager. The intention
>> is to perform small batches of work that reduce the foreground time
>> taken by repository maintenance such as 'git gc --auto'.
>>
>> This change does the absolute minimum to create the builtin and show
>> the usage output.
>>
>> Provide an explicit warning that this command is experimental. The
>> set of jobs may change, and each job could alter its behavior in
>> future versions.
>>
>> RFC QUESTION: This builtin is based on the background maintenance in
>> Scalar. Specifically, this builtin is based on the "scalar run <job>"
>> command [1] [2]. My default thought was to make this a "git run <job>"
>> command to maximize similarity. However, it seems like "git run" is
>> too generic. Or, am I being overly verbose for no reason?
>
> Having read through this series I wondered if we wanted a single git
> command such as 'git maintenance' (suggestions of better names
> welcome) and then 'git run-job' could become 'git maintenance run',
> 'git job-runner' would become another subcommand (run-jobs or
> schedule-jobs?) and the 'git please-run-maintenance-on-this-repo' you
> mentioned in you email to Junio could become 'git maintenance init'
> (or maybe setup)

I had a very similar impression.  In addition to what you already
said, a few more were:

 - Why the existing "git repack" isn't such "maintenance" command?
   IOW why do we even need [01/15]?  After all, "repack" may have
   started its life as a tool to reorganize the PACKFILES, but it is
   no longer limited to 'git/objects/pack/*.pack' files with its
   knowledge about the loose object files and the "--prune" option.
   Consolidating pieces of information spread across multiple .idx
   files, reachability bitmaps and commit graph files, into a newer
   and more performant forms can just be part of "packing the pieces
   of information in a repository for optimum performance", which is
   a better way to understand why "repack" has a word 'pack' in its
   name.

 - Many of the "maintenance" operations this series proposes do make
   sense, just like other "maintenance" operations we already have
   in "repack", "prune", "prune-packed" etc., which are welcome
   additions. 

 - Like the individual steps that appear in e.g. "repack", however,
   some of the individual steps in this series can be triggered by
   calling underlying tools directly, allowing scripted maintenance
   commands that suit individual needs better than the canned
   invocation of "run-job", but I didn't get the impression that the
   series strives to make sure that all knobs of these individual
   steps are available to scripters who want to deviate from what
   "run-job" prescribes.  If it is not doing so, we probably should.

 - Again, I do not think we want a reimplementation of cron, at or
   inetd that is not specific to "git" at all.

Thanks.
Derrick Stolee April 6, 2020, 2:42 p.m. UTC | #3
On 4/5/2020 3:21 PM, Junio C Hamano wrote:
> Phillip Wood <phillip.wood123@gmail.com> writes:
> 
>> Hi Stolee
>>
>> On 03/04/2020 21:48, Derrick Stolee via GitGitGadget wrote:
>>> From: Derrick Stolee <dstolee@microsoft.com>
>>>
>>> The 'git run-job' command will be used to execute a short-lived set
>>> of maintenance activities by a background job manager. The intention
>>> is to perform small batches of work that reduce the foreground time
>>> taken by repository maintenance such as 'git gc --auto'.
>>>
>>> This change does the absolute minimum to create the builtin and show
>>> the usage output.
>>>
>>> Provide an explicit warning that this command is experimental. The
>>> set of jobs may change, and each job could alter its behavior in
>>> future versions.
>>>
>>> RFC QUESTION: This builtin is based on the background maintenance in
>>> Scalar. Specifically, this builtin is based on the "scalar run <job>"
>>> command [1] [2]. My default thought was to make this a "git run <job>"
>>> command to maximize similarity. However, it seems like "git run" is
>>> too generic. Or, am I being overly verbose for no reason?
>>
>> Having read through this series I wondered if we wanted a single git
>> command such as 'git maintenance' (suggestions of better names
>> welcome) and then 'git run-job' could become 'git maintenance run',
>> 'git job-runner' would become another subcommand (run-jobs or
>> schedule-jobs?) and the 'git please-run-maintenance-on-this-repo' you
>> mentioned in you email to Junio could become 'git maintenance init'
>> (or maybe setup)
> 
> I had a very similar impression.  In addition to what you already
> said, a few more were:
> 
>  - Why the existing "git repack" isn't such "maintenance" command?
>    IOW why do we even need [01/15]?  After all, "repack" may have
>    started its life as a tool to reorganize the PACKFILES, but it is
>    no longer limited to 'git/objects/pack/*.pack' files with its
>    knowledge about the loose object files and the "--prune" option.
>    Consolidating pieces of information spread across multiple .idx
>    files, reachability bitmaps and commit graph files, into a newer
>    and more performant forms can just be part of "packing the pieces
>    of information in a repository for optimum performance", which is
>    a better way to understand why "repack" has a word 'pack' in its
>    name.

To me, "git repack" is a specific kind of maintenance. The end result
is a pack-file. Now, "git gc" is a bit more general, because it will
create a pack-file but also update the commit-graph file. Still, its
name is still very specific: it "collects garbage". The goals of this
series are to replace "git gc --auto" with something less invasive.

I'll include an alternate CLI proposal at the end of this message.

>  - Many of the "maintenance" operations this series proposes do make
>    sense, just like other "maintenance" operations we already have
>    in "repack", "prune", "prune-packed" etc., which are welcome
>    additions. 

Thanks. I'm glad these steps make sense. They are definitely more
"incremental" updates than a full repack or GC.
 
>  - Like the individual steps that appear in e.g. "repack", however,
>    some of the individual steps in this series can be triggered by
>    calling underlying tools directly, allowing scripted maintenance
>    commands that suit individual needs better than the canned
>    invocation of "run-job", but I didn't get the impression that the
>    series strives to make sure that all knobs of these individual
>    steps are available to scripters who want to deviate from what
>    "run-job" prescribes.  If it is not doing so, we probably should.
> 
>  - Again, I do not think we want a reimplementation of cron, at or
>    inetd that is not specific to "git" at all.

I expected the job-runner to get some push-back. The design for it in
the current RFC matched how we do it in Scalar more than anything else.
You're probably right that it would be better to leave the "background"
part to the platform.

Of course, not every platform has "cron" but that just means we need a
cross-platform way to launch Git processes on some schedule. That could
be a command that creates a cron job on platforms that have it, and on
Windows it could create a scheduled task instead.

But what should we launch? It should probably be a Git command that
checks config for a list of repositories, then runs "the maintenance
command" on each of those repos.

I'm inserting a break here to draw the eye to a new proposed design:

---

Create a "git maintenance" builtin. This has a few subcommands:

1. "run" will run the configured maintenance on the current repo. This
   should become the single entry point for users to say "please clean
   up my repo." What _exactly_ it does can be altered with config. I'll
   list some possibilities after listing the subcommands.

2. "run-on-repos" uses command-line arguments or config to launch "git
   -C <dir> maintenance run" for all configured directories. The
   intention is that this is launched on some schedule by a platform-
   specific scheduling mechanism (i.e. cron).
   (This subcommand could use a better name.)

3. "schedule" adds the current repository to the configured list of
   repositories for running with "run-on-repos". It will also initialize
   the platform-specific scheduling mechanism. This may be to start the
   schedule for the first time OR to update how frequent "run-on-repos"
   is run, as appropriate.

4. (OPTIONAL) "mode <mode>" adjusts the config for the current repo to
   change the type of maintenance requested for this repo. For example,
   "simple" could just run "git gc --auto" using a normal range.
   "incremental" could run the maintenance tasks from this series.
   Finally, "server" could run maintenance tasks as if we are serving
   the repo to others, so we repack aggressively with full bitmaps, and
   more frequently.

Here are some possible maintenance tasks. Not all of them would
be appropriate to run on the same repo, or at least not with the
same frequency:

* "fetch" : the background fetch from PATCH 3. Appropriate for all modes,
  but perhaps would want users to opt-in to this in the  basic mode.

* "commit-graph" : the incremental commit-graph writes from PATCH 2.
  Appropriate whenever the "fetch" command is being run, but also
  valuable for the "server" mode.

* "gc" : Run "git gc --auto". This would be enabled by default, but
  should be disabled for the "incremental" and "server" modes.

* "repack" : Run "git repack <options>" with appropriate options based
  on config. The "server" mode would include custom delta and bitmap
  options. (I will leave the specifics to those who maintain servers to
  recommend the best options for "server" mode.)

* "loose-objects" : see PATCH 4. Appropriate for "incremental" mode.

* "multi-pack-index" or "incremental-repack" : Run the "pack-files" job
  from PATCH 5. Appropriate for "incremental" mode.

* "pack-refs" : create a packed-refs file or repack the reftable as
  appropriate for those features. (I have less familiarity with these.)

Notice that with this new set of options we could do something rather
dramatic: replace all calls to "git gc --auto" with "git maintenance
run --auto". By default, these would be equivalent. However, "git
maintenance run --auto" is more clear that the behavior is less specific
than "git gc" and could be configured to do something different.

I used an "--auto" option in the suggestion above to help distinguish
between the command being run as a foreground operation instead of a
background operation. Part of setting up a schedule would include
disabling these "foreground" maintenance tasks and relying entirely on
the background tasks instead. The best situation would be to avoid
launching the subprocess at all.

---

What do people think of this alternative? Does this get us closer to an
appropriate level of work for Git to do?

Thanks,
-Stolee
Đoàn Trần Công Danh April 7, 2020, 12:58 a.m. UTC | #4
On 2020-04-06 10:42:23-0400, Derrick Stolee <stolee@gmail.com> wrote:
> Of course, not every platform has "cron" but that just means we need a
> cross-platform way to launch Git processes on some schedule. That could
> be a command that creates a cron job on platforms that have it, and on

There's Unix system that doesn't have cron.
People could use other scheduler mechanism.

A lot of systemd users uses systemd-timer.
I'm using snooze.
Each of those set of utilities have different grammar and
configuration.

> Windows it could create a scheduled task instead.
> 
> But what should we launch? It should probably be a Git command that
> checks config for a list of repositories, then runs "the maintenance
> command" on each of those repos.
> 
> I'm inserting a break here to draw the eye to a new proposed design:
> 
> ---
> 
> Create a "git maintenance" builtin. This has a few subcommands:
> 
> 1. "run" will run the configured maintenance on the current repo. This
>    should become the single entry point for users to say "please clean
>    up my repo." What _exactly_ it does can be altered with config. I'll
>    list some possibilities after listing the subcommands.
> 
> 2. "run-on-repos" uses command-line arguments or config to launch "git
>    -C <dir> maintenance run" for all configured directories. The
>    intention is that this is launched on some schedule by a platform-
>    specific scheduling mechanism (i.e. cron).

So, IIUC, Git will have a _hard_ dependencies on cron on *nix?
Else, we're gonna received a bug-report that some tools doesn't work?

I've seen some bug report in our distro that "git add -p" doesn't work
like documented, because it's in "git-perl" packages.
When we merge "git-perl" back to git, other people (who never use
"git add -p" and git-sendemail) complain why does we add a hard dependencies
on perl to git.

>    (This subcommand could use a better name.)
> 
> 3. "schedule" adds the current repository to the configured list of
>    repositories for running with "run-on-repos". It will also initialize
>    the platform-specific scheduling mechanism. This may be to start the
>    schedule for the first time OR to update how frequent "run-on-repos"
>    is run, as appropriate.
> 
> 4. (OPTIONAL) "mode <mode>" adjusts the config for the current repo to
>    change the type of maintenance requested for this repo. For example,
>    "simple" could just run "git gc --auto" using a normal range.
>    "incremental" could run the maintenance tasks from this series.
>    Finally, "server" could run maintenance tasks as if we are serving
>    the repo to others, so we repack aggressively with full bitmaps, and
>    more frequently.
Derrick Stolee April 7, 2020, 10:54 a.m. UTC | #5
On 4/6/2020 8:58 PM, Danh Doan wrote:
> On 2020-04-06 10:42:23-0400, Derrick Stolee <stolee@gmail.com> wrote:
>> Of course, not every platform has "cron" but that just means we need a
>> cross-platform way to launch Git processes on some schedule. That could
>> be a command that creates a cron job on platforms that have it, and on
> 
> There's Unix system that doesn't have cron.
> People could use other scheduler mechanism.
> 
> A lot of systemd users uses systemd-timer.
> I'm using snooze.

Thanks for listing some alternatives. I'll look into these.

> Each of those set of utilities have different grammar and
> configuration.
> 
>> Windows it could create a scheduled task instead.

>> 2. "run-on-repos" uses command-line arguments or config to launch "git
>>    -C <dir> maintenance run" for all configured directories. The
>>    intention is that this is launched on some schedule by a platform-
>>    specific scheduling mechanism (i.e. cron).
> 
> So, IIUC, Git will have a _hard_ dependencies on cron on *nix?
> Else, we're gonna received a bug-report that some tools doesn't work?

No. Such a dependency would be unacceptable. I'm just using cron
as an example when available.
 
> I've seen some bug report in our distro that "git add -p" doesn't work
> like documented, because it's in "git-perl" packages.
> When we merge "git-perl" back to git, other people (who never use
> "git add -p" and git-sendemail) complain why does we add a hard dependencies
> on perl to git.

Good news: "git add -p" is becoming a builtin with a lot of work by
some determined contributors.

Thanks,
-Stolee
Đoàn Trần Công Danh April 7, 2020, 2:16 p.m. UTC | #6
On 2020-04-07 06:54:33-0400, Derrick Stolee <stolee@gmail.com> wrote:
> On 4/6/2020 8:58 PM, Danh Doan wrote:
> > On 2020-04-06 10:42:23-0400, Derrick Stolee <stolee@gmail.com> wrote:
> >> Of course, not every platform has "cron" but that just means we need a
> >> cross-platform way to launch Git processes on some schedule. That could
> >> be a command that creates a cron job on platforms that have it, and on
> > 
> > There's Unix system that doesn't have cron.
> > People could use other scheduler mechanism.
> > 
> > A lot of systemd users uses systemd-timer.
> > I'm using snooze.
> 
> Thanks for listing some alternatives. I'll look into these.

I didn't mean to list those alternatives as only possible
alternatives.

The point is people have their own preference to choose a scheduler
that suites their need.

Someone could use their own supervisor system with things like:

	#/bin/sh

	sleep 3600 # 1 hour
	exec git cmd

When "git cmd" exit, the supervisor will start the job again (because
it's down and it needs to be run).

> > Each of those set of utilities have different grammar and
> > configuration.
> > 
> >> Windows it could create a scheduled task instead.
> 
> >> 2. "run-on-repos" uses command-line arguments or config to launch "git
> >>    -C <dir> maintenance run" for all configured directories. The
> >>    intention is that this is launched on some schedule by a platform-
> >>    specific scheduling mechanism (i.e. cron).
> > 
> > So, IIUC, Git will have a _hard_ dependencies on cron on *nix?
> > Else, we're gonna received a bug-report that some tools doesn't work?
> 
> No. Such a dependency would be unacceptable. I'm just using cron
> as an example when available.

That will be too many possible solutions out there,
I'm still not convinced on adding a scheduler to Git.

> > I've seen some bug report in our distro that "git add -p" doesn't work
> > like documented, because it's in "git-perl" packages.
> > When we merge "git-perl" back to git, other people (who never use
> > "git add -p" and git-sendemail) complain why does we add a hard dependencies
> > on perl to git.
> 
> Good news: "git add -p" is becoming a builtin with a lot of work by
> some determined contributors.

Yeah, I knew it. t3701.{44,46} is also fixed with the builtin.
But, it will be some version into the future to be enabled by default.

The point is there're people that don't want to see a new hard
dependencies for Git.
Johannes Schindelin April 7, 2020, 2:30 p.m. UTC | #7
Hi,

On Tue, 7 Apr 2020, Danh Doan wrote:

> On 2020-04-07 06:54:33-0400, Derrick Stolee <stolee@gmail.com> wrote:
> > On 4/6/2020 8:58 PM, Danh Doan wrote:
> > > On 2020-04-06 10:42:23-0400, Derrick Stolee <stolee@gmail.com> wrote:
> > >> Of course, not every platform has "cron" but that just means we need a
> > >> cross-platform way to launch Git processes on some schedule. That could
> > >> be a command that creates a cron job on platforms that have it, and on
> > >
> > > There's Unix system that doesn't have cron.
> > > People could use other scheduler mechanism.
> > >
> > > A lot of systemd users uses systemd-timer.
> > > I'm using snooze.
> >
> > Thanks for listing some alternatives. I'll look into these.
>
> I didn't mean to list those alternatives as only possible
> alternatives.

In contrast, I think that they are _really_ alternatives, and they are
only options for people who are dedicated fans of fiddling with the
technical details.

In other words, `cron` is a very viable option for a few people who are
_not_ in the audience of this here patch series.

The audience of this patch series are software engineers who _have_ to use
Git, but do not really want to spend their time learning about the
internal details. For those developers, especially those working on
insanely large repositories, we want to provide some convenient functions
(much like `git gc --auto` tries to help developers who do not want to
bother with Git details, but _better_ because it tries very much to stay
_out_ of the way of the developer, which `git gc --auto` distinctly does
_not_) that were developed using the experience with the world's largest
repository.

> The point is people have their own preference to choose a scheduler
> that suites their need.

And they can!

But again, this here patch series is obviously for those who do not want
to tinker with Git's functionality, yet still want to have decent
performance.

Learning from the experience that led to the invention of `git gc --auto`,
there is _a large_ benefit in doing this: `git gc --auto` was invented
because some prolific Linux contributors were experiencing abysmal
performance because they did not want to spend time learning how to
keep their repositories in a good shape, but rather they wanted to spend
time developing Linux kernel code.

> Someone could use their own supervisor system with things like:
>
> 	#/bin/sh
>
> 	sleep 3600 # 1 hour
> 	exec git cmd
>
> When "git cmd" exit, the supervisor will start the job again (because
> it's down and it needs to be run).

Sure. And that would work for developers who are interested in how the
world works, and have enough time to learn about this.

In my experience the vast majority of enterprise software developers are
not really as excited about Git internals as I am. This patch series is
for them. Because they are good people, too, and deserve our care in
supporting them.

Ciao,
Dscho
diff mbox series

Patch

diff --git a/.gitignore b/.gitignore
index 188bd1c3de1..5dea9d3b96b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -144,6 +144,7 @@ 
 /git-rev-parse
 /git-revert
 /git-rm
+/git-run-job
 /git-send-email
 /git-send-pack
 /git-serve
diff --git a/Documentation/git-run-job.txt b/Documentation/git-run-job.txt
new file mode 100644
index 00000000000..0627b3ed259
--- /dev/null
+++ b/Documentation/git-run-job.txt
@@ -0,0 +1,36 @@ 
+git-run-job(1)
+==============
+
+NAME
+----
+git-run-job - Run a maintenance job. Intended for background operation.
+
+
+SYNOPSIS
+--------
+[verse]
+'git run-job <job-name>'
+
+
+DESCRIPTION
+-----------
+
+Run a maintenance job on the current repository. This is available as a
+command for a few reasons. First, the background job feature can launch
+these commands on a schedule and each process will completely clear its
+memory when complete. Second, an expert user could create their own job
+schedule by running these jobs themselves.
+
+THIS COMMAND IS EXPERIMENTAL. THE SET OF AVAILABLE JOBS OR THEIR EXACT
+BEHAVIOR MAY BE ALTERED IN THE FUTURE.
+
+
+JOBS
+----
+
+TBD
+
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Makefile b/Makefile
index ef1ff2228f0..f5f9c4d9e94 100644
--- a/Makefile
+++ b/Makefile
@@ -1125,6 +1125,7 @@  BUILTIN_OBJS += builtin/rev-list.o
 BUILTIN_OBJS += builtin/rev-parse.o
 BUILTIN_OBJS += builtin/revert.o
 BUILTIN_OBJS += builtin/rm.o
+BUILTIN_OBJS += builtin/run-job.o
 BUILTIN_OBJS += builtin/send-pack.o
 BUILTIN_OBJS += builtin/shortlog.o
 BUILTIN_OBJS += builtin/show-branch.o
diff --git a/builtin.h b/builtin.h
index 2b25a80cde3..3e0ddaaf67f 100644
--- a/builtin.h
+++ b/builtin.h
@@ -220,6 +220,7 @@  int cmd_rev_list(int argc, const char **argv, const char *prefix);
 int cmd_rev_parse(int argc, const char **argv, const char *prefix);
 int cmd_revert(int argc, const char **argv, const char *prefix);
 int cmd_rm(int argc, const char **argv, const char *prefix);
+int cmd_run_job(int argc, const char **argv, const char *prefix);
 int cmd_send_pack(int argc, const char **argv, const char *prefix);
 int cmd_shortlog(int argc, const char **argv, const char *prefix);
 int cmd_show(int argc, const char **argv, const char *prefix);
diff --git a/builtin/run-job.c b/builtin/run-job.c
new file mode 100644
index 00000000000..2c78d053aa4
--- /dev/null
+++ b/builtin/run-job.c
@@ -0,0 +1,28 @@ 
+#include "builtin.h"
+#include "config.h"
+#include "parse-options.h"
+
+static char const * const builtin_run_job_usage[] = {
+	N_("git run-job"),
+	NULL
+};
+
+int cmd_run_job(int argc, const char **argv, const char *prefix)
+{
+	static struct option builtin_run_job_options[] = {
+		OPT_END(),
+	};
+
+	if (argc == 2 && !strcmp(argv[1], "-h"))
+		usage_with_options(builtin_run_job_usage,
+				   builtin_run_job_options);
+
+	git_config(git_default_config, NULL);
+	argc = parse_options(argc, argv, prefix,
+			     builtin_run_job_options,
+			     builtin_run_job_usage,
+			     PARSE_OPT_KEEP_UNKNOWN);
+
+	usage_with_options(builtin_run_job_usage,
+			   builtin_run_job_options);
+}
diff --git a/command-list.txt b/command-list.txt
index 20878946558..1cd2b415e46 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -156,6 +156,7 @@  git-revert                              mainporcelain
 git-rev-list                            plumbinginterrogators
 git-rev-parse                           plumbinginterrogators
 git-rm                                  mainporcelain           worktree
+git-run-job                             plumbingmanipulators
 git-send-email                          foreignscminterface             complete
 git-send-pack                           synchingrepositories
 git-shell                               synchelpers
diff --git a/git.c b/git.c
index b07198fe036..db5a43c8687 100644
--- a/git.c
+++ b/git.c
@@ -566,6 +566,7 @@  static struct cmd_struct commands[] = {
 	{ "rev-parse", cmd_rev_parse, NO_PARSEOPT },
 	{ "revert", cmd_revert, RUN_SETUP | NEED_WORK_TREE },
 	{ "rm", cmd_rm, RUN_SETUP },
+	{ "run-job", cmd_run_job, RUN_SETUP },
 	{ "send-pack", cmd_send_pack, RUN_SETUP },
 	{ "shortlog", cmd_shortlog, RUN_SETUP_GENTLY | USE_PAGER },
 	{ "show", cmd_show, RUN_SETUP },
diff --git a/t/t7900-run-job.sh b/t/t7900-run-job.sh
new file mode 100755
index 00000000000..1eac80b7ed3
--- /dev/null
+++ b/t/t7900-run-job.sh
@@ -0,0 +1,15 @@ 
+#!/bin/sh
+
+test_description='git run-job
+
+Testing the background jobs, in the foreground
+'
+
+. ./test-lib.sh
+
+test_expect_success 'help text' '
+	test_must_fail git run-job -h 2>err &&
+	test_i18ngrep "usage: git run-job" err
+'
+
+test_done