Message ID | 3b17a4e3d3fa620638299294a56adb9237fd5c56.1631108472.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | rebase -i: a couple of small improvements | expand |
Hi Phillip, On Wed, 8 Sep 2021, Phillip Wood via GitGitGadget wrote: > From: Phillip Wood <phillip.wood@dunelm.org.uk> > > 54fd3243da ("rebase -i: reread the todo list if `exec` touched it", > 2017-04-26) sought to reread the todo list after running an exec > command only if it had been changed. To accomplish this it checks the > stat data of the todo list after running an exec command to see if it > has changed. Unfortunately there are two problems, firstly the > implementation is buggy we actually reread the list after each exec > which is quadratic in the number of commit lookups and secondly the > design is predicated on using nanosecond time stamps which are not the > default. > > The implementation bug stems from the fact that we write a new todo > list to disk before running each command but do not update the stat > data to reflect this[1]. > > The design problem is that it is possible for the user to edit the > todo list without changing its size or inode which means we have to > rely on the mtime to tell us if it has changed. Unfortunately unless > git is built with USE_NSEC it is possible for the original and edited > list to share the same mtime. > > Ideally "git rebase --edit-todo" would set a flag that we would then > check in sequencer.c. Unfortunately this is approach will not work as > there are scripts in the wild that write to the todo list directly > without running "git rebase --edit-todo". Instead of relying on stat > data this patch simply reads the possibly edited todo list and > compares it to the original with memcmp(). This is much faster than > reparsing the todo list each time. This patch reduces the time to run > > git rebase -r -xtrue v2.32.0~100 v2.32.0 > > which runs 419 exec commands by 6.6%. For comparison fixing the > implementation bug in stat based approach reduces the time by a > further 1.4% and is indistinguishable from never rereading the todo > list. Thank you for fixing my bug _and_ for improving performance, Dscho > > [1] https://lore.kernel.org/git/20191125131833.GD23183@szeder.dev/ > > Reported-by: SZEDER Gábor <szeder.dev@gmail.com> > Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk> > --- > sequencer.c | 19 ++++++++----------- > sequencer.h | 1 - > 2 files changed, 8 insertions(+), 12 deletions(-) > > diff --git a/sequencer.c b/sequencer.c > index a248c886c27..d51440ddcd9 100644 > --- a/sequencer.c > +++ b/sequencer.c > @@ -2671,7 +2671,6 @@ static int read_populate_todo(struct repository *r, > struct todo_list *todo_list, > struct replay_opts *opts) > { > - struct stat st; > const char *todo_file = get_todo_path(opts); > int res; > > @@ -2679,11 +2678,6 @@ static int read_populate_todo(struct repository *r, > if (strbuf_read_file_or_whine(&todo_list->buf, todo_file) < 0) > return -1; > > - res = stat(todo_file, &st); > - if (res) > - return error(_("could not stat '%s'"), todo_file); > - fill_stat_data(&todo_list->stat, &st); > - > res = todo_list_parse_insn_buffer(r, todo_list->buf.buf, todo_list); > if (res) { > if (is_rebase_i(opts)) > @@ -4258,12 +4252,14 @@ static int reread_todo_if_changed(struct repository *r, > struct todo_list *todo_list, > struct replay_opts *opts) > { > - struct stat st; > + int offset; > + struct strbuf buf = STRBUF_INIT; > > - if (stat(get_todo_path(opts), &st)) { > - return error_errno(_("could not stat '%s'"), > - get_todo_path(opts)); > - } else if (match_stat_data(&todo_list->stat, &st)) { > + if (strbuf_read_file_or_whine(&buf, get_todo_path(opts)) < 0) > + return -1; > + offset = get_item_line_offset(todo_list, todo_list->current + 1); > + if (buf.len != todo_list->buf.len - offset || > + memcmp(buf.buf, todo_list->buf.buf + offset, buf.len)) { > /* Reread the todo file if it has changed. */ > todo_list_release(todo_list); > if (read_populate_todo(r, todo_list, opts)) > @@ -4271,6 +4267,7 @@ static int reread_todo_if_changed(struct repository *r, > /* `current` will be incremented on return */ > todo_list->current = -1; > } > + strbuf_release(&buf); > > return 0; > } > diff --git a/sequencer.h b/sequencer.h > index d57d8ea23d7..cdeb0c6be47 100644 > --- a/sequencer.h > +++ b/sequencer.h > @@ -116,7 +116,6 @@ struct todo_list { > struct todo_item *items; > int nr, alloc, current; > int done_nr, total_nr; > - struct stat_data stat; > }; > > #define TODO_LIST_INIT { STRBUF_INIT } > -- > gitgitgadget > >
diff --git a/sequencer.c b/sequencer.c index a248c886c27..d51440ddcd9 100644 --- a/sequencer.c +++ b/sequencer.c @@ -2671,7 +2671,6 @@ static int read_populate_todo(struct repository *r, struct todo_list *todo_list, struct replay_opts *opts) { - struct stat st; const char *todo_file = get_todo_path(opts); int res; @@ -2679,11 +2678,6 @@ static int read_populate_todo(struct repository *r, if (strbuf_read_file_or_whine(&todo_list->buf, todo_file) < 0) return -1; - res = stat(todo_file, &st); - if (res) - return error(_("could not stat '%s'"), todo_file); - fill_stat_data(&todo_list->stat, &st); - res = todo_list_parse_insn_buffer(r, todo_list->buf.buf, todo_list); if (res) { if (is_rebase_i(opts)) @@ -4258,12 +4252,14 @@ static int reread_todo_if_changed(struct repository *r, struct todo_list *todo_list, struct replay_opts *opts) { - struct stat st; + int offset; + struct strbuf buf = STRBUF_INIT; - if (stat(get_todo_path(opts), &st)) { - return error_errno(_("could not stat '%s'"), - get_todo_path(opts)); - } else if (match_stat_data(&todo_list->stat, &st)) { + if (strbuf_read_file_or_whine(&buf, get_todo_path(opts)) < 0) + return -1; + offset = get_item_line_offset(todo_list, todo_list->current + 1); + if (buf.len != todo_list->buf.len - offset || + memcmp(buf.buf, todo_list->buf.buf + offset, buf.len)) { /* Reread the todo file if it has changed. */ todo_list_release(todo_list); if (read_populate_todo(r, todo_list, opts)) @@ -4271,6 +4267,7 @@ static int reread_todo_if_changed(struct repository *r, /* `current` will be incremented on return */ todo_list->current = -1; } + strbuf_release(&buf); return 0; } diff --git a/sequencer.h b/sequencer.h index d57d8ea23d7..cdeb0c6be47 100644 --- a/sequencer.h +++ b/sequencer.h @@ -116,7 +116,6 @@ struct todo_list { struct todo_item *items; int nr, alloc, current; int done_nr, total_nr; - struct stat_data stat; }; #define TODO_LIST_INIT { STRBUF_INIT }