Message ID | 18429182ffcf4cb56fcf3d8c76e41652a6c45adc.1585946894.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Maintenance jobs and job runner | expand |
Hi Stolee On 03/04/2020 21:48, Derrick Stolee via GitGitGadget wrote: > From: Derrick Stolee <dstolee@microsoft.com> > > We want to run different maintenance tasks at different rates. That > means we cannot rely only on the time between job-runner loop pauses > to reduce the frequency of specific jobs. This means we need to > persist a timestamp for the previous run somewhere. A natural place > is the Git config file for that repo. > > Create the job.<job-namme>.lastRun config option to store the > timestamp of the previous run of that job. Set this config option > after a successful run of 'git -C <repo> run-job <job-name>'. > > To maximize code reuse, we dynamically construct the config key > and parse the config value into a timestamp in a generic way. This > makes the introduction of another config option extremely trivial: > > The job.<job-name>.interval option allows a user to specify a > minimum number of seconds between two calls to 'git run-job > <job-name>' on a given repo. This could be stored in the global > or system config to provide an update on the default for all repos, > or could be updated on a per-repo basis. This is checked on every > iteration of the job loop, so a user could update this and see the > effect without restarting the job-runner process. > > RFC QUESTION: I'm using a 'git -C <repo> config <option>' process > to test if enough time has elapsed before running the 'run-job' > process. These 'config' processes are pretty light-weight, so > hopefully they are not too noisy. An alternative would be to > always run 'git -C <repo> run-job <job-name>' and rely on that > process to no-op based on the configured values and how recently > they were run. You're still executing another process so it doesn't really save anything in the 'noop' case. In the case where something needs to be done I think the extra config process is unlikely to be noticed (especially as 'git job' forks a lot anyway) Best Wishes Phillip > However, that will likely interrupt users who want > to test the jobs in the foreground. Finally, that user disruption > would be mitigated by adding a "--check-latest" option. A user > running a job manually would not include that argument by default > and would succeed. However, any logging that we might do for the > job-runner would make it look like we are running the run-job > commands all the time even if they are no-ops. Advice welcome! > > Signed-off-by: Derrick Stolee <dstolee@microsoft.com> > --- > Documentation/config/job.txt | 10 ++ > Documentation/git-run-job.txt | 3 + > builtin/job-runner.c | 177 +++++++++++++++++++++++++++++++++- > 3 files changed, 189 insertions(+), 1 deletion(-) > > diff --git a/Documentation/config/job.txt b/Documentation/config/job.txt > index 2dfed3935fa..7c799d66221 100644 > --- a/Documentation/config/job.txt > +++ b/Documentation/config/job.txt > @@ -1,3 +1,13 @@ > +job.<job-name>.interval:: > + The minimum number of seconds between runs of > + `git run-job <job-name>` when running `git job-runner`. > + > +job.<job-name>.lastRun:: > + The Unix epoch for the timestamp of the previous run of > + `git run-job <job-name>` when running `git job-runner`. You > + can manually update this to a later time to delay a specific > + job on this repository. > + > job.pack-files.batchSize:: > This string value `<size>` will be passed to the > `git multi-pack-index repack --batch-size=<size>` command as > diff --git a/Documentation/git-run-job.txt b/Documentation/git-run-job.txt > index cdd6417f7c9..c6d5674d699 100644 > --- a/Documentation/git-run-job.txt > +++ b/Documentation/git-run-job.txt > @@ -90,6 +90,9 @@ pack-files, the job does not attempt to repack. Otherwise, the batch > size is the sum of all pack-file sizes minus the largest pack-file size. > The batch size is capped at two gigabytes. This intends to pack all > small pack-files into a single pack-file. > ++ > +The `--batch-size=<size>` option will override the dynamic or configured > +batch size. > > > GIT > diff --git a/builtin/job-runner.c b/builtin/job-runner.c > index bed401f94bf..aee55c106e8 100644 > --- a/builtin/job-runner.c > +++ b/builtin/job-runner.c > @@ -4,11 +4,175 @@ > #include "run-command.h" > #include "string-list.h" > > +#define MAX_TIMESTAMP ((timestamp_t)-1) > + > static char const * const builtin_job_runner_usage[] = { > N_("git job-runner [<options>]"), > NULL > }; > > +static char *config_name(const char *prefix, > + const char *job, > + const char *postfix) > +{ > + int postfix_dot = 0; > + struct strbuf name = STRBUF_INIT; > + > + if (prefix) { > + strbuf_addstr(&name, prefix); > + strbuf_addch(&name, '.'); > + } > + > + if (job) { > + strbuf_addstr(&name, job); > + postfix_dot = 1; > + } > + > + if (postfix) { > + if (postfix_dot) > + strbuf_addch(&name, '.'); > + strbuf_addstr(&name, postfix); > + } > + > + return strbuf_detach(&name, NULL); > +} > + > +static int try_get_config(const char *job, > + const char *repo, > + const char *postfix, > + char **value) > +{ > + int result = 0; > + FILE *proc_out; > + struct strbuf line = STRBUF_INIT; > + char *cname = config_name("job", job, postfix); > + struct child_process *config_proc = xmalloc(sizeof(*config_proc)); > + > + child_process_init(config_proc); > + > + argv_array_push(&config_proc->args, "git"); > + argv_array_push(&config_proc->args, "-C"); > + argv_array_push(&config_proc->args, repo); > + argv_array_push(&config_proc->args, "config"); > + argv_array_push(&config_proc->args, cname); > + free(cname); > + > + config_proc->out = -1; > + > + if (start_command(config_proc)) { > + warning(_("failed to start process for repo '%s'"), > + repo); > + result = 1; > + goto cleanup; > + } > + > + proc_out = xfdopen(config_proc->out, "r"); > + > + /* if there is no line, leave the value as given */ > + if (!strbuf_getline(&line, proc_out)) > + *value = strbuf_detach(&line, NULL); > + else > + *value = NULL; > + > + strbuf_release(&line); > + > + fclose(proc_out); > + > + result = finish_command(config_proc); > + > +cleanup: > + free(config_proc); > + return result; > +} > + > +static int try_get_timestamp(const char *job, > + const char *repo, > + const char *postfix, > + timestamp_t *t) > +{ > + char *value; > + int result = try_get_config(job, repo, postfix, &value); > + > + if (!result) { > + *t = atol(value); > + free(value); > + } > + > + return result; > +} > + > +static timestamp_t get_last_run(const char *job, > + const char *repo) > +{ > + timestamp_t last_run = 0; > + > + /* In an error state, do not run the job */ > + if (try_get_timestamp(job, repo, "lastrun", &last_run)) > + return MAX_TIMESTAMP; > + > + return last_run; > +} > + > +static timestamp_t get_interval(const char *job, > + const char *repo) > +{ > + timestamp_t interval = MAX_TIMESTAMP; > + > + /* In an error state, do not run the job */ > + if (try_get_timestamp(job, repo, "interval", &interval)) > + return MAX_TIMESTAMP; > + > + if (interval == MAX_TIMESTAMP) { > + /* load defaults for each job */ > + if (!strcmp(job, "commit-graph") || !strcmp(job, "fetch")) > + interval = 60 * 60; /* 1 hour */ > + else > + interval = 24 * 60 * 60; /* 1 day */ > + } > + > + return interval; > +} > + > +static int set_last_run(const char *job, > + const char *repo, > + timestamp_t last_run) > +{ > + int result = 0; > + struct strbuf last_run_string = STRBUF_INIT; > + struct child_process *config_proc = xmalloc(sizeof(*config_proc)); > + char *cname = config_name("job", job, "lastrun"); > + > + strbuf_addf(&last_run_string, "%"PRItime, last_run); > + > + child_process_init(config_proc); > + > + argv_array_push(&config_proc->args, "git"); > + argv_array_push(&config_proc->args, "-C"); > + argv_array_push(&config_proc->args, repo); > + argv_array_push(&config_proc->args, "config"); > + argv_array_push(&config_proc->args, cname); > + argv_array_push(&config_proc->args, last_run_string.buf); > + free(cname); > + strbuf_release(&last_run_string); > + > + if (start_command(config_proc)) { > + warning(_("failed to start process for repo '%s'"), > + repo); > + result = 1; > + goto cleanup; > + } > + > + if (finish_command(config_proc)) { > + warning(_("failed to finish process for repo '%s'"), > + repo); > + result = 1; > + } > + > +cleanup: > + free(config_proc); > + return result; > +} > + > static struct string_list arg_repos = STRING_LIST_INIT_DUP; > > static int arg_repos_append(const struct option *opt, > @@ -54,9 +218,20 @@ static int load_active_repos(struct string_list *repos) > > static int run_job(const char *job, const char *repo) > { > + int result; > struct argv_array cmd = ARGV_ARRAY_INIT; > + timestamp_t now = time(NULL); > + timestamp_t last_run = get_last_run(job, repo); > + timestamp_t interval = get_interval(job, repo); > + > + if (last_run + interval > now) > + return 0; > + > argv_array_pushl(&cmd, "-C", repo, "run-job", job, NULL); > - return run_command_v_opt(cmd.argv, RUN_GIT_CMD); > + result = run_command_v_opt(cmd.argv, RUN_GIT_CMD); > + > + set_last_run(job, repo, now); > + return result; > } > > static int run_job_loop_step(struct string_list *list) >
diff --git a/Documentation/config/job.txt b/Documentation/config/job.txt index 2dfed3935fa..7c799d66221 100644 --- a/Documentation/config/job.txt +++ b/Documentation/config/job.txt @@ -1,3 +1,13 @@ +job.<job-name>.interval:: + The minimum number of seconds between runs of + `git run-job <job-name>` when running `git job-runner`. + +job.<job-name>.lastRun:: + The Unix epoch for the timestamp of the previous run of + `git run-job <job-name>` when running `git job-runner`. You + can manually update this to a later time to delay a specific + job on this repository. + job.pack-files.batchSize:: This string value `<size>` will be passed to the `git multi-pack-index repack --batch-size=<size>` command as diff --git a/Documentation/git-run-job.txt b/Documentation/git-run-job.txt index cdd6417f7c9..c6d5674d699 100644 --- a/Documentation/git-run-job.txt +++ b/Documentation/git-run-job.txt @@ -90,6 +90,9 @@ pack-files, the job does not attempt to repack. Otherwise, the batch size is the sum of all pack-file sizes minus the largest pack-file size. The batch size is capped at two gigabytes. This intends to pack all small pack-files into a single pack-file. ++ +The `--batch-size=<size>` option will override the dynamic or configured +batch size. GIT diff --git a/builtin/job-runner.c b/builtin/job-runner.c index bed401f94bf..aee55c106e8 100644 --- a/builtin/job-runner.c +++ b/builtin/job-runner.c @@ -4,11 +4,175 @@ #include "run-command.h" #include "string-list.h" +#define MAX_TIMESTAMP ((timestamp_t)-1) + static char const * const builtin_job_runner_usage[] = { N_("git job-runner [<options>]"), NULL }; +static char *config_name(const char *prefix, + const char *job, + const char *postfix) +{ + int postfix_dot = 0; + struct strbuf name = STRBUF_INIT; + + if (prefix) { + strbuf_addstr(&name, prefix); + strbuf_addch(&name, '.'); + } + + if (job) { + strbuf_addstr(&name, job); + postfix_dot = 1; + } + + if (postfix) { + if (postfix_dot) + strbuf_addch(&name, '.'); + strbuf_addstr(&name, postfix); + } + + return strbuf_detach(&name, NULL); +} + +static int try_get_config(const char *job, + const char *repo, + const char *postfix, + char **value) +{ + int result = 0; + FILE *proc_out; + struct strbuf line = STRBUF_INIT; + char *cname = config_name("job", job, postfix); + struct child_process *config_proc = xmalloc(sizeof(*config_proc)); + + child_process_init(config_proc); + + argv_array_push(&config_proc->args, "git"); + argv_array_push(&config_proc->args, "-C"); + argv_array_push(&config_proc->args, repo); + argv_array_push(&config_proc->args, "config"); + argv_array_push(&config_proc->args, cname); + free(cname); + + config_proc->out = -1; + + if (start_command(config_proc)) { + warning(_("failed to start process for repo '%s'"), + repo); + result = 1; + goto cleanup; + } + + proc_out = xfdopen(config_proc->out, "r"); + + /* if there is no line, leave the value as given */ + if (!strbuf_getline(&line, proc_out)) + *value = strbuf_detach(&line, NULL); + else + *value = NULL; + + strbuf_release(&line); + + fclose(proc_out); + + result = finish_command(config_proc); + +cleanup: + free(config_proc); + return result; +} + +static int try_get_timestamp(const char *job, + const char *repo, + const char *postfix, + timestamp_t *t) +{ + char *value; + int result = try_get_config(job, repo, postfix, &value); + + if (!result) { + *t = atol(value); + free(value); + } + + return result; +} + +static timestamp_t get_last_run(const char *job, + const char *repo) +{ + timestamp_t last_run = 0; + + /* In an error state, do not run the job */ + if (try_get_timestamp(job, repo, "lastrun", &last_run)) + return MAX_TIMESTAMP; + + return last_run; +} + +static timestamp_t get_interval(const char *job, + const char *repo) +{ + timestamp_t interval = MAX_TIMESTAMP; + + /* In an error state, do not run the job */ + if (try_get_timestamp(job, repo, "interval", &interval)) + return MAX_TIMESTAMP; + + if (interval == MAX_TIMESTAMP) { + /* load defaults for each job */ + if (!strcmp(job, "commit-graph") || !strcmp(job, "fetch")) + interval = 60 * 60; /* 1 hour */ + else + interval = 24 * 60 * 60; /* 1 day */ + } + + return interval; +} + +static int set_last_run(const char *job, + const char *repo, + timestamp_t last_run) +{ + int result = 0; + struct strbuf last_run_string = STRBUF_INIT; + struct child_process *config_proc = xmalloc(sizeof(*config_proc)); + char *cname = config_name("job", job, "lastrun"); + + strbuf_addf(&last_run_string, "%"PRItime, last_run); + + child_process_init(config_proc); + + argv_array_push(&config_proc->args, "git"); + argv_array_push(&config_proc->args, "-C"); + argv_array_push(&config_proc->args, repo); + argv_array_push(&config_proc->args, "config"); + argv_array_push(&config_proc->args, cname); + argv_array_push(&config_proc->args, last_run_string.buf); + free(cname); + strbuf_release(&last_run_string); + + if (start_command(config_proc)) { + warning(_("failed to start process for repo '%s'"), + repo); + result = 1; + goto cleanup; + } + + if (finish_command(config_proc)) { + warning(_("failed to finish process for repo '%s'"), + repo); + result = 1; + } + +cleanup: + free(config_proc); + return result; +} + static struct string_list arg_repos = STRING_LIST_INIT_DUP; static int arg_repos_append(const struct option *opt, @@ -54,9 +218,20 @@ static int load_active_repos(struct string_list *repos) static int run_job(const char *job, const char *repo) { + int result; struct argv_array cmd = ARGV_ARRAY_INIT; + timestamp_t now = time(NULL); + timestamp_t last_run = get_last_run(job, repo); + timestamp_t interval = get_interval(job, repo); + + if (last_run + interval > now) + return 0; + argv_array_pushl(&cmd, "-C", repo, "run-job", job, NULL); - return run_command_v_opt(cmd.argv, RUN_GIT_CMD); + result = run_command_v_opt(cmd.argv, RUN_GIT_CMD); + + set_last_run(job, repo, now); + return result; } static int run_job_loop_step(struct string_list *list)