Message ID | 20181025055142.38077-1-nbelakovski@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | worktree: refactor lock_reason_valid and lock_reason to be more sensible | expand |
nbelakovski@gmail.com writes: > From: Nickolai Belakovski <nbelakovski@gmail.com> > > lock_reason_valid is renamed to is_locked and lock_reason is removed as > a field of the worktree struct. Lock reason can be obtained instead by a > standalone function. > > This is done in order to make the worktree struct more intuitive when it > is used elsewhere in the codebase. So a mere action of getting an in-core worktree instance now has to make an extra call to file_exists(), and in addition, the callers who want to learn why the worktree is locked, they need to open and read the contents of the file in addition? Why is that an improvement? > > Some unused variables are cleaned up as well. > > Signed-off-by: Nickolai Belakovski <nbelakovski@gmail.com> > --- > builtin/worktree.c | 16 ++++++++-------- > worktree.c | 55 ++++++++++++++++++++++++++++-------------------------- > worktree.h | 8 +++----- > 3 files changed, 40 insertions(+), 39 deletions(-) > > diff --git a/builtin/worktree.c b/builtin/worktree.c > index 41e771439..844789a21 100644 > --- a/builtin/worktree.c > +++ b/builtin/worktree.c > @@ -634,8 +634,8 @@ static int lock_worktree(int ac, const char **av, const char *prefix) > if (is_main_worktree(wt)) > die(_("The main working tree cannot be locked or unlocked")); > > - old_reason = is_worktree_locked(wt); > - if (old_reason) { > + if (wt->is_locked) { > + old_reason = worktree_locked_reason(wt); > if (*old_reason) > die(_("'%s' is already locked, reason: %s"), > av[0], old_reason); > @@ -666,7 +666,7 @@ static int unlock_worktree(int ac, const char **av, const char *prefix) > die(_("'%s' is not a working tree"), av[0]); > if (is_main_worktree(wt)) > die(_("The main working tree cannot be locked or unlocked")); > - if (!is_worktree_locked(wt)) > + if (!wt->is_locked) > die(_("'%s' is not locked"), av[0]); > ret = unlink_or_warn(git_common_path("worktrees/%s/locked", wt->id)); > free_worktrees(worktrees); > @@ -734,8 +734,8 @@ static int move_worktree(int ac, const char **av, const char *prefix) > > validate_no_submodules(wt); > > - reason = is_worktree_locked(wt); > - if (reason) { > + if (wt->is_locked) { > + reason = worktree_locked_reason(wt); > if (*reason) > die(_("cannot move a locked working tree, lock reason: %s"), > reason); > @@ -860,11 +860,11 @@ static int remove_worktree(int ac, const char **av, const char *prefix) > die(_("'%s' is not a working tree"), av[0]); > if (is_main_worktree(wt)) > die(_("'%s' is a main working tree"), av[0]); > - reason = is_worktree_locked(wt); > - if (reason) { > + if (wt->is_locked) { > + reason = worktree_locked_reason(wt); > if (*reason) > die(_("cannot remove a locked working tree, lock reason: %s"), > - reason); > + reason); > die(_("cannot remove a locked working tree")); > } > if (validate_worktree(wt, &errmsg, WT_VALIDATE_WORKTREE_MISSING_OK)) > diff --git a/worktree.c b/worktree.c > index 97cda5f97..a3082d19d 100644 > --- a/worktree.c > +++ b/worktree.c > @@ -14,7 +14,6 @@ void free_worktrees(struct worktree **worktrees) > free(worktrees[i]->path); > free(worktrees[i]->id); > free(worktrees[i]->head_ref); > - free(worktrees[i]->lock_reason); > free(worktrees[i]); > } > free (worktrees); > @@ -41,13 +40,29 @@ static void add_head_info(struct worktree *wt) > wt->is_detached = 1; > } > > + > +/** > + * Return 1 if the worktree is locked, 0 otherwise > + */ > +static int is_worktree_locked(const struct worktree *wt) > +{ > + struct strbuf path = STRBUF_INIT; > + int locked_file_exists; > + > + assert(!is_main_worktree(wt)); > + > + strbuf_addstr(&path, worktree_git_path(wt, "locked")); > + locked_file_exists = file_exists(path.buf); > + strbuf_release(&path); > + return locked_file_exists; > +} > + > /** > * get the main worktree > */ > static struct worktree *get_main_worktree(void) > { > struct worktree *worktree = NULL; > - struct strbuf path = STRBUF_INIT; > struct strbuf worktree_path = STRBUF_INIT; > int is_bare = 0; > > @@ -56,14 +71,11 @@ static struct worktree *get_main_worktree(void) > if (is_bare) > strbuf_strip_suffix(&worktree_path, "/."); > > - strbuf_addf(&path, "%s/HEAD", get_git_common_dir()); > - > worktree = xcalloc(1, sizeof(*worktree)); > worktree->path = strbuf_detach(&worktree_path, NULL); > worktree->is_bare = is_bare; > add_head_info(worktree); > > - strbuf_release(&path); > strbuf_release(&worktree_path); > return worktree; > } > @@ -89,12 +101,10 @@ static struct worktree *get_linked_worktree(const char *id) > strbuf_strip_suffix(&worktree_path, "/."); > } > > - strbuf_reset(&path); > - strbuf_addf(&path, "%s/worktrees/%s/HEAD", get_git_common_dir(), id); > - > worktree = xcalloc(1, sizeof(*worktree)); > worktree->path = strbuf_detach(&worktree_path, NULL); > worktree->id = xstrdup(id); > + worktree->is_locked = is_worktree_locked(worktree); > add_head_info(worktree); > > done: > @@ -231,27 +241,20 @@ int is_main_worktree(const struct worktree *wt) > return !wt->id; > } > > -const char *is_worktree_locked(struct worktree *wt) > +const char *worktree_locked_reason(const struct worktree *wt) > { > - assert(!is_main_worktree(wt)); > + struct strbuf path = STRBUF_INIT; > + struct strbuf lock_reason = STRBUF_INIT; > > - if (!wt->lock_reason_valid) { > - struct strbuf path = STRBUF_INIT; > - > - strbuf_addstr(&path, worktree_git_path(wt, "locked")); > - if (file_exists(path.buf)) { > - struct strbuf lock_reason = STRBUF_INIT; > - if (strbuf_read_file(&lock_reason, path.buf, 0) < 0) > - die_errno(_("failed to read '%s'"), path.buf); > - strbuf_trim(&lock_reason); > - wt->lock_reason = strbuf_detach(&lock_reason, NULL); > - } else > - wt->lock_reason = NULL; > - wt->lock_reason_valid = 1; > - strbuf_release(&path); > - } > + assert(!is_main_worktree(wt)); > + assert(wt->is_locked); > > - return wt->lock_reason; > + strbuf_addstr(&path, worktree_git_path(wt, "locked")); > + if (strbuf_read_file(&lock_reason, path.buf, 0) < 0) > + die_errno(_("failed to read '%s'"), path.buf); > + strbuf_trim(&lock_reason); > + strbuf_release(&path); > + return strbuf_detach(&lock_reason, NULL); > } > > /* convenient wrapper to deal with NULL strbuf */ > diff --git a/worktree.h b/worktree.h > index df3fc30f7..6717287e8 100644 > --- a/worktree.h > +++ b/worktree.h > @@ -10,12 +10,11 @@ struct worktree { > char *path; > char *id; > char *head_ref; /* NULL if HEAD is broken or detached */ > - char *lock_reason; /* internal use */ > struct object_id head_oid; > int is_detached; > int is_bare; > int is_current; > - int lock_reason_valid; > + int is_locked; > }; > > /* Functions for acting on the information about worktrees. */ > @@ -57,10 +56,9 @@ extern struct worktree *find_worktree(struct worktree **list, > extern int is_main_worktree(const struct worktree *wt); > > /* > - * Return the reason string if the given worktree is locked or NULL > - * otherwise. > + * Return the reason string if the given worktree is locked or die > */ > -extern const char *is_worktree_locked(struct worktree *wt); > +extern const char *worktree_locked_reason(const struct worktree *wt); > > #define WT_VALIDATE_WORKTREE_MISSING_OK (1 << 0)
This was meant to be a reply to https://public-inbox.org/git/CAC05386F1X7TsPr6kgkuLWEwsmdiQ4VKTF5RxaHvzpkwbmXPBw@mail.gmail.com/T/#m8898c8f7c68e1ea234aca21cb2d7776b375c6f51, please look there for some more context. I think it both did and didn't get listed as a reply? In my mailbox I see two separate threads but in public-inbox.org/git it looks like it correctly got labelled as 1 thread. This whole mailing list thing is new to me, thanks for bearing with me as I figure it out :). Next time I'll make sure to change the subject line on updated patches as PATCH v2 (that's the convention, right?). This is an improvement because it fixes an issue in which the fields lock_reason and lock_reason_valid of the worktree struct were not being populated. This is related to work I'm doing to add a worktree atom to ref-filter.c. I see your concerns about extra hits to the filesystem when calling get_worktrees and about users interested in lock_reason having to make extra calls. As regards hits to the filesystem, I could remove is_locked from the worktree struct entirely. To address the second concern, I could refactor worktree_locked_reason to return null if the wt is not locked. I would still want to keep is_worktree_locked around to provide a facility to check whether or not the worktree is locked without having to go get the reason. There's also been some concerns raised about caching. As I pointed out in the other thread, the current use cases for this information die upon accessing it, so caching is a moot point. For the use case of a worktree atom, caching would be relevant, but it could be done within ref-filter.c. Another option is to add the lock_reason back to the worktree struct and have two functions for populating it: get_worktrees_wo_lock_reason and get_worktrees_with_lock_reason. A bit more verbose, but it makes it clear to the caller what they're getting and what they're not getting. I might suggest starting with doing the caching within ref-filter.c first, and if more use cases appear for caching lock_reason we can consider the second option. It could also be get_worktrees and get_worktrees_wo_lock_reason, though I think most callers would be calling the latter name. So, my proposal for driving this patch to completion would be to: -remove is_locked from the worktree struct -refactor worktree_locked_reason to return null if the wt is not locked -refactor calls to is_locked within builtin/worktree.c to call either the refactored worktree_locked_reason or is_worktree_locked In addition to making the worktree code clearer, this patch fixes a bug in which the current is_worktree_locked over-eagerly sets lock_reason_valid. There are currently no consumers of lock_reason_valid within master, but obviously we should fix this before they appear :) Thoughts? On Wed, Oct 24, 2018 at 11:56 PM Junio C Hamano <gitster@pobox.com> wrote: > > nbelakovski@gmail.com writes: > > > From: Nickolai Belakovski <nbelakovski@gmail.com> > > > > lock_reason_valid is renamed to is_locked and lock_reason is removed as > > a field of the worktree struct. Lock reason can be obtained instead by a > > standalone function. > > > > This is done in order to make the worktree struct more intuitive when it > > is used elsewhere in the codebase. > > So a mere action of getting an in-core worktree instance now has to > make an extra call to file_exists(), and in addition, the callers > who want to learn why the worktree is locked, they need to open and > read the contents of the file in addition? > > Why is that an improvement? > > > > > > Some unused variables are cleaned up as well. > > > > Signed-off-by: Nickolai Belakovski <nbelakovski@gmail.com> > > --- > > builtin/worktree.c | 16 ++++++++-------- > > worktree.c | 55 ++++++++++++++++++++++++++++-------------------------- > > worktree.h | 8 +++----- > > 3 files changed, 40 insertions(+), 39 deletions(-) > > > > diff --git a/builtin/worktree.c b/builtin/worktree.c > > index 41e771439..844789a21 100644 > > --- a/builtin/worktree.c > > +++ b/builtin/worktree.c > > @@ -634,8 +634,8 @@ static int lock_worktree(int ac, const char **av, const char *prefix) > > if (is_main_worktree(wt)) > > die(_("The main working tree cannot be locked or unlocked")); > > > > - old_reason = is_worktree_locked(wt); > > - if (old_reason) { > > + if (wt->is_locked) { > > + old_reason = worktree_locked_reason(wt); > > if (*old_reason) > > die(_("'%s' is already locked, reason: %s"), > > av[0], old_reason); > > @@ -666,7 +666,7 @@ static int unlock_worktree(int ac, const char **av, const char *prefix) > > die(_("'%s' is not a working tree"), av[0]); > > if (is_main_worktree(wt)) > > die(_("The main working tree cannot be locked or unlocked")); > > - if (!is_worktree_locked(wt)) > > + if (!wt->is_locked) > > die(_("'%s' is not locked"), av[0]); > > ret = unlink_or_warn(git_common_path("worktrees/%s/locked", wt->id)); > > free_worktrees(worktrees); > > @@ -734,8 +734,8 @@ static int move_worktree(int ac, const char **av, const char *prefix) > > > > validate_no_submodules(wt); > > > > - reason = is_worktree_locked(wt); > > - if (reason) { > > + if (wt->is_locked) { > > + reason = worktree_locked_reason(wt); > > if (*reason) > > die(_("cannot move a locked working tree, lock reason: %s"), > > reason); > > @@ -860,11 +860,11 @@ static int remove_worktree(int ac, const char **av, const char *prefix) > > die(_("'%s' is not a working tree"), av[0]); > > if (is_main_worktree(wt)) > > die(_("'%s' is a main working tree"), av[0]); > > - reason = is_worktree_locked(wt); > > - if (reason) { > > + if (wt->is_locked) { > > + reason = worktree_locked_reason(wt); > > if (*reason) > > die(_("cannot remove a locked working tree, lock reason: %s"), > > - reason); > > + reason); > > die(_("cannot remove a locked working tree")); > > } > > if (validate_worktree(wt, &errmsg, WT_VALIDATE_WORKTREE_MISSING_OK)) > > diff --git a/worktree.c b/worktree.c > > index 97cda5f97..a3082d19d 100644 > > --- a/worktree.c > > +++ b/worktree.c > > @@ -14,7 +14,6 @@ void free_worktrees(struct worktree **worktrees) > > free(worktrees[i]->path); > > free(worktrees[i]->id); > > free(worktrees[i]->head_ref); > > - free(worktrees[i]->lock_reason); > > free(worktrees[i]); > > } > > free (worktrees); > > @@ -41,13 +40,29 @@ static void add_head_info(struct worktree *wt) > > wt->is_detached = 1; > > } > > > > + > > +/** > > + * Return 1 if the worktree is locked, 0 otherwise > > + */ > > +static int is_worktree_locked(const struct worktree *wt) > > +{ > > + struct strbuf path = STRBUF_INIT; > > + int locked_file_exists; > > + > > + assert(!is_main_worktree(wt)); > > + > > + strbuf_addstr(&path, worktree_git_path(wt, "locked")); > > + locked_file_exists = file_exists(path.buf); > > + strbuf_release(&path); > > + return locked_file_exists; > > +} > > + > > /** > > * get the main worktree > > */ > > static struct worktree *get_main_worktree(void) > > { > > struct worktree *worktree = NULL; > > - struct strbuf path = STRBUF_INIT; > > struct strbuf worktree_path = STRBUF_INIT; > > int is_bare = 0; > > > > @@ -56,14 +71,11 @@ static struct worktree *get_main_worktree(void) > > if (is_bare) > > strbuf_strip_suffix(&worktree_path, "/."); > > > > - strbuf_addf(&path, "%s/HEAD", get_git_common_dir()); > > - > > worktree = xcalloc(1, sizeof(*worktree)); > > worktree->path = strbuf_detach(&worktree_path, NULL); > > worktree->is_bare = is_bare; > > add_head_info(worktree); > > > > - strbuf_release(&path); > > strbuf_release(&worktree_path); > > return worktree; > > } > > @@ -89,12 +101,10 @@ static struct worktree *get_linked_worktree(const char *id) > > strbuf_strip_suffix(&worktree_path, "/."); > > } > > > > - strbuf_reset(&path); > > - strbuf_addf(&path, "%s/worktrees/%s/HEAD", get_git_common_dir(), id); > > - > > worktree = xcalloc(1, sizeof(*worktree)); > > worktree->path = strbuf_detach(&worktree_path, NULL); > > worktree->id = xstrdup(id); > > + worktree->is_locked = is_worktree_locked(worktree); > > add_head_info(worktree); > > > > done: > > @@ -231,27 +241,20 @@ int is_main_worktree(const struct worktree *wt) > > return !wt->id; > > } > > > > -const char *is_worktree_locked(struct worktree *wt) > > +const char *worktree_locked_reason(const struct worktree *wt) > > { > > - assert(!is_main_worktree(wt)); > > + struct strbuf path = STRBUF_INIT; > > + struct strbuf lock_reason = STRBUF_INIT; > > > > - if (!wt->lock_reason_valid) { > > - struct strbuf path = STRBUF_INIT; > > - > > - strbuf_addstr(&path, worktree_git_path(wt, "locked")); > > - if (file_exists(path.buf)) { > > - struct strbuf lock_reason = STRBUF_INIT; > > - if (strbuf_read_file(&lock_reason, path.buf, 0) < 0) > > - die_errno(_("failed to read '%s'"), path.buf); > > - strbuf_trim(&lock_reason); > > - wt->lock_reason = strbuf_detach(&lock_reason, NULL); > > - } else > > - wt->lock_reason = NULL; > > - wt->lock_reason_valid = 1; > > - strbuf_release(&path); > > - } > > + assert(!is_main_worktree(wt)); > > + assert(wt->is_locked); > > > > - return wt->lock_reason; > > + strbuf_addstr(&path, worktree_git_path(wt, "locked")); > > + if (strbuf_read_file(&lock_reason, path.buf, 0) < 0) > > + die_errno(_("failed to read '%s'"), path.buf); > > + strbuf_trim(&lock_reason); > > + strbuf_release(&path); > > + return strbuf_detach(&lock_reason, NULL); > > } > > > > /* convenient wrapper to deal with NULL strbuf */ > > diff --git a/worktree.h b/worktree.h > > index df3fc30f7..6717287e8 100644 > > --- a/worktree.h > > +++ b/worktree.h > > @@ -10,12 +10,11 @@ struct worktree { > > char *path; > > char *id; > > char *head_ref; /* NULL if HEAD is broken or detached */ > > - char *lock_reason; /* internal use */ > > struct object_id head_oid; > > int is_detached; > > int is_bare; > > int is_current; > > - int lock_reason_valid; > > + int is_locked; > > }; > > > > /* Functions for acting on the information about worktrees. */ > > @@ -57,10 +56,9 @@ extern struct worktree *find_worktree(struct worktree **list, > > extern int is_main_worktree(const struct worktree *wt); > > > > /* > > - * Return the reason string if the given worktree is locked or NULL > > - * otherwise. > > + * Return the reason string if the given worktree is locked or die > > */ > > -extern const char *is_worktree_locked(struct worktree *wt); > > +extern const char *worktree_locked_reason(const struct worktree *wt); > > > > #define WT_VALIDATE_WORKTREE_MISSING_OK (1 << 0)
On Sun, Oct 28, 2018 at 5:55 PM Nickolai Belakovski > <nbelakovski@gmail.com> wrote: This was meant to be a reply to > https://public-inbox.org/git/CAC05386F1X7TsPr6kgkuLWEwsmdiQ4VKTF5RxaHvzpkwbmXPBw@mail.gmail.com/T/#m8898c8f7c68e1ea234aca21cb2d7776b375c6f51, > please look there for some more context. I think it both did and > didn't get listed as a reply? In my mailbox I see two separate > threads but in public-inbox.org/git it looks like it correctly got > labelled as 1 thread. This whole mailing list thing is new to me, > thanks for bearing with me as I figure it out :). Gmail threads messages entirely by subject; it doesn't pay attention to In-Reply-To: or other headers for threading, which is why you see two separate threads. public-inbox.org, on the other hand, does pay attention to the headers, thus understands that all the messages belong to the same thread. Gmail's behavior may be considered anomalous. > Next time I'll make sure to change the subject line on updated > patches as PATCH v2 (that's the convention, right?). That's correct. > This is an improvement because it fixes an issue in which the fields > lock_reason and lock_reason_valid of the worktree struct were not > being populated. This is related to work I'm doing to add a worktree > atom to ref-filter.c. Those fields are considered private/internal. They are not intended to be accessed by calling code. (Unfortunately, only 'lock_reason' is thus marked; 'lock_reason_valid' should be marked "internal".) Clients are expected to retrieve the lock reason only through the provided API, is_worktree_locked(). > I see your concerns about extra hits to the filesystem when calling > get_worktrees and about users interested in lock_reason having to > make extra calls. As regards hits to the filesystem, I could remove > is_locked from the worktree struct entirely. To address the second > concern, I could refactor worktree_locked_reason to return null if > the wt is not locked. I would still want to keep is_worktree_locked > around to provide a facility to check whether or not the worktree is > locked without having to go get the reason. > > There's also been some concerns raised about caching. As I pointed > out in the other thread, the current use cases for this information > die upon accessing it, so caching is a moot point. For the use case > of a worktree atom, caching would be relevant, but it could be done > within ref-filter.c. Another option is to add the lock_reason back > to the worktree struct and have two functions for populating it: > get_worktrees_wo_lock_reason and get_worktrees_with_lock_reason. A > bit more verbose, but it makes it clear to the caller what they're > getting and what they're not getting. I might suggest starting with > doing the caching within ref-filter.c first, and if more use cases > appear for caching lock_reason we can consider the second option. It > could also be get_worktrees and get_worktrees_wo_lock_reason, though > I think most callers would be calling the latter name. > > So, my proposal for driving this patch to completion would be to: > -remove is_locked from the worktree struct > -refactor worktree_locked_reason to return null if the wt is not locked > -refactor calls to is_locked within builtin/worktree.c to call > either the refactored worktree_locked_reason or is_worktree_locked My impression, thus far, is that this all seems to be complicating rather than simplifying. These changes also seem entirely unnecessary. In [1], I made the observation that it seemed that your new ref-filter atom could be implemented with the existing is_worktree_locked() API. As far as I can tell, it can indeed be implemented without having to make any changes to the worktree API or implementation at all. The worktree API is both compact and orthogonal, and I haven't yet seen a compelling reason to change it. That said, though, the API documentation in worktree.h may be lacking, even if the implementation is not. I'll say a bit more about that below. > In addition to making the worktree code clearer, this patch fixes a > bug in which the current is_worktree_locked over-eagerly sets > lock_reason_valid. There are currently no consumers of > lock_reason_valid within master, but obviously we should fix this > before they appear :) As noted above, 'lock_reason_valid' is private/internal. It's an accident that it is not annotated such (like 'lock_reason', which is correctly annotated as "internal"). So, there should never be any external consumers of that field. It also means that there is no bug in the current code (as far as I can see) since that field is correctly consulted (internally) to determine whether the lock reason has been looked up yet. The missing "internal only" annotation is unfortunate since it may have led you down this road of considering the implementation and API broken. Moreover, the documentation for is_worktree_locked() apparently doesn't convey strongly enough that it serves the dual purpose of (1) telling you whether or not the worktree is locked, and (2) telling you the reason it is locked. A patch which adds the missing "internal only" annotation to 'lock_reason_valid', and which makes it easier to understand the dual purpose of is_worktree_locked() would be welcome, especially if it helps avoid such confusion in the future. Aside from that, it doesn't seem like worktree needs any changes for the ref-filter atom you have in mind. (Don't interpret this observation as me being averse to changes to the API; I'm open to improvements, but haven't seen anything yet indicating a bug or showing that the API is more difficult than it ought to be.) [1]: https://public-inbox.org/git/CAPig+cTvKd2DVu7wW_A31p_o7BaNJszu14kNRz9sqk8h45H4-g@mail.gmail.com/
On Sun, Oct 28, 2018 at 4:03 PM Eric Sunshine <sunshine@sunshineco.com> wrote: > > On Sun, Oct 28, 2018 at 5:55 PM Nickolai Belakovski > > <nbelakovski@gmail.com> wrote: This was meant to be a reply to > > https://public-inbox.org/git/CAC05386F1X7TsPr6kgkuLWEwsmdiQ4VKTF5RxaHvzpkwbmXPBw@mail.gmail.com/T/#m8898c8f7c68e1ea234aca21cb2d7776b375c6f51, > > please look there for some more context. I think it both did and > > didn't get listed as a reply? In my mailbox I see two separate > > threads but in public-inbox.org/git it looks like it correctly got > > labelled as 1 thread. This whole mailing list thing is new to me, > > thanks for bearing with me as I figure it out :). > > Gmail threads messages entirely by subject; it doesn't pay attention > to In-Reply-To: or other headers for threading, which is why you see > two separate threads. public-inbox.org, on the other hand, does pay > attention to the headers, thus understands that all the messages > belong to the same thread. Gmail's behavior may be considered > anomalous. > Got it, thanks! > > Next time I'll make sure to change the subject line on updated > > patches as PATCH v2 (that's the convention, right?). > > That's correct. > (thumbs up) > > This is an improvement because it fixes an issue in which the fields > > lock_reason and lock_reason_valid of the worktree struct were not > > being populated. This is related to work I'm doing to add a worktree > > atom to ref-filter.c. > > Those fields are considered private/internal. They are not intended to > be accessed by calling code. (Unfortunately, only 'lock_reason' is > thus marked; 'lock_reason_valid' should be marked "internal".) Clients > are expected to retrieve the lock reason only through the provided > API, is_worktree_locked(). > > > I see your concerns about extra hits to the filesystem when calling > > get_worktrees and about users interested in lock_reason having to > > make extra calls. As regards hits to the filesystem, I could remove > > is_locked from the worktree struct entirely. To address the second > > concern, I could refactor worktree_locked_reason to return null if > > the wt is not locked. I would still want to keep is_worktree_locked > > around to provide a facility to check whether or not the worktree is > > locked without having to go get the reason. > > > > There's also been some concerns raised about caching. As I pointed > > out in the other thread, the current use cases for this information > > die upon accessing it, so caching is a moot point. For the use case > > of a worktree atom, caching would be relevant, but it could be done > > within ref-filter.c. Another option is to add the lock_reason back > > to the worktree struct and have two functions for populating it: > > get_worktrees_wo_lock_reason and get_worktrees_with_lock_reason. A > > bit more verbose, but it makes it clear to the caller what they're > > getting and what they're not getting. I might suggest starting with > > doing the caching within ref-filter.c first, and if more use cases > > appear for caching lock_reason we can consider the second option. It > > could also be get_worktrees and get_worktrees_wo_lock_reason, though > > I think most callers would be calling the latter name. > > > > So, my proposal for driving this patch to completion would be to: > > -remove is_locked from the worktree struct > > -refactor worktree_locked_reason to return null if the wt is not locked > > -refactor calls to is_locked within builtin/worktree.c to call > > either the refactored worktree_locked_reason or is_worktree_locked > > My impression, thus far, is that this all seems to be complicating > rather than simplifying. These changes also seem entirely unnecessary. > In [1], I made the observation that it seemed that your new ref-filter > atom could be implemented with the existing is_worktree_locked() API. > As far as I can tell, it can indeed be implemented without having to > make any changes to the worktree API or implementation at all. > > The worktree API is both compact and orthogonal, and I haven't yet > seen a compelling reason to change it. That said, though, the API > documentation in worktree.h may be lacking, even if the implementation > is not. I'll say a bit more about that below. > > > In addition to making the worktree code clearer, this patch fixes a > > bug in which the current is_worktree_locked over-eagerly sets > > lock_reason_valid. There are currently no consumers of > > lock_reason_valid within master, but obviously we should fix this > > before they appear :) > > As noted above, 'lock_reason_valid' is private/internal. It's an > accident that it is not annotated such (like 'lock_reason', which is > correctly annotated as "internal"). So, there should never be any > external consumers of that field. It also means that there is no bug > in the current code (as far as I can see) since that field is > correctly consulted (internally) to determine whether the lock reason > has been looked up yet. Thank you for explaining this. Looking at the code now it seems crystal clear, but, yea I clearly got on the wrong path initially. > > The missing "internal only" annotation is unfortunate since it may > have led you down this road of considering the implementation and API > broken. > > Moreover, the documentation for is_worktree_locked() apparently > doesn't convey strongly enough that it serves the dual purpose of (1) > telling you whether or not the worktree is locked, and (2) telling you > the reason it is locked. > > A patch which adds the missing "internal only" annotation to > 'lock_reason_valid', and which makes it easier to understand the dual > purpose of is_worktree_locked() would be welcome, especially if it > helps avoid such confusion in the future. > > Aside from that, it doesn't seem like worktree needs any changes for > the ref-filter atom you have in mind. (Don't interpret this > observation as me being averse to changes to the API; I'm open to > improvements, but haven't seen anything yet indicating a bug or > showing that the API is more difficult than it ought to be.) > > [1]: https://public-inbox.org/git/CAPig+cTvKd2DVu7wW_A31p_o7BaNJszu14kNRz9sqk8h45H4-g@mail.gmail.com/ You're right that these changes are not necessary in order to make a worktree atom. If there's no interest in this patch I'll withdraw it. I had found it really surprising that lock_reason was not populated when I was accessing it while working on the worktree atom. When digging into it, the "internal use" comment told me nothing, both because there's no convention (that I'm aware of) within C to mark fields as such and because it fails to direct the reader to is_worktree_locked. How about this, I can make a patch that changes the comment next to lock_reason to say "/* private - use is_worktree_locked */" (choosing the word "private" since it's a reserved keyword in C++ and other languages for implementation details that are meant to be inaccessible) and a comment next to lock_reason_valid that just says "/* private */"? I would also suggest renaming is_worktree_locked to worktree_lock_reason, the former makes me think the function is returning a boolean, whereas the latter more clearly conveys that a more detailed piece of information is being returned. Lemme know what you think.
Nickolai Belakovski <nbelakovski@gmail.com> writes: > This is an improvement because it fixes an issue in which the fields > lock_reason and lock_reason_valid of the worktree struct were not > being populated. If the field "reason" should always be populated, there is *no* reason why we need the "valid" boolean. They work as a pair to realize lazy population of rarely used field. The lazy evaluation technique is used as an optimization for common case, where majority of operations do not care if worktrees are locked and if so why they are locked, so that only rare operations that do want to find out can ask "is this locked and why?" via is_worktree_locked() interface, and at that point we lazily find it out by reading "locked" file. So it is by design that these fields are not always populated, but are populated on demand as book-keeping info internal to the API's implementation. It is not "an issue", and changing it is not a "fix". In addition, if we have already checked, then we do not even do the same check again. If in an earlier call we found out that a worktree is not locked, we flip the _valid bit to true while setting _reason to NULL, so that the next call can say "oh, that's not locked and we can tell that without looking at the filesystem again" [*1*]. You are forcing the callers of get_worktrees() to pay the cost to check, open and read the "why is this worktree locked?" file for all worktrees, whether they care if these worktrees are locked or why they are locked. Such a change can be an improvement *ONLY* if you can demonstrate that in the current code most codepaths that call get_worktrees() end up calling is_worktree_locked() on all worktrees anyways. If that were the case, not having to lazily evaluate the "locked"-ness, but always check upfront, would have a simplification value, as either approach would be spending the same cost to open and read these "locked" files. But I do not think it is the case. Outside builtin/worktree.c (and you need to admit "git worktree" is a rather rare command in the first place, so you shouldn't be optimizing for that if it hurts other codepaths), builtin/branch.c wants to go to all worktrees and update their HEAD when a branch is renamed (if the old HEAD is pointing at the original name, of course), but that code won't care if the worktree is locked at all. I do not think of any caller of get_worktrees() that want to know if it is locked and why for each and every one of them, and I'd be surprised if that *is* the majority, but as a proposer to burden get_worktrees() with this extra cost, you certainly would have audited the callers and made sure it is worth making them pay the extra cost? If we are going to change anything around this area, I'd not be surprised that the right move is to go in the opposite direction. Right now, you cannot just get "is it locked?" boolean answer (which can be obtained by a simple stat(2) call) without getting "why is it locked?" (which takes open(2) & read(2) & close(2)), and if you are planning a new application that wants to ask "is it locked?" a lot without having to know the reason, you may want to make the lazy evaluation even lazier by splitting _valid field into two (i.e. a "do we know if this is locked?" valid bit covers "is_locked" bit, and another "do we know why this is locked?" valid bit accompanies "locked_reason" string). And the callers would ask two separate questions: is_worktree_locked() that says true or false, and then why_worktree_locked() that yields NULL or string (i.e. essentially that is what we have as is_worktree_locked() today). Of course, such a change must also be justified with a code audit to demonstrate that only minority case of the callers of is-locked? wants to know why [Footnote] *1* The codepaths that want to know if a worktree is locked or not (and wants to learn the reason) are so rare and concentrated in builtin/worktree.c, and more importantly, they do not need to ask the same question twice, so we can stop caching and make is_worktree_locked() always go to the filesystem, I think, and that may be a valid change _if_ we allow worktrees to be randomly locked and unlocked while we are looking at them, but if we want to worry about such concurrent and competing uses, we need a big repository-wide lock anyway, and it is the least of our problems that the current caching may go stale without getting invalidated. The code will be racing against such concurrent processes even if you made it to go to the filesystem all the time.
On Sun, Oct 28, 2018 at 9:11 PM Nickolai Belakovski <nbelakovski@gmail.com> wrote: > On Sun, Oct 28, 2018 at 4:03 PM Eric Sunshine <sunshine@sunshineco.com> wrote: > > Aside from that, it doesn't seem like worktree needs any changes for > > the ref-filter atom you have in mind. (Don't interpret this > > observation as me being averse to changes to the API; I'm open to > > improvements, but haven't seen anything yet indicating a bug or > > showing that the API is more difficult than it ought to be.) > > You're right that these changes are not necessary in order to make a > worktree atom. > If there's no interest in this patch I'll withdraw it. Withdrawing this patch seems reasonable. > I had found it really surprising that lock_reason was not populated > when I was accessing it while working on the worktree atom. When > digging into it, the "internal use" comment told me nothing, both > because there's no convention (that I'm aware of) within C to mark > fields as such and because it fails to direct the reader to > is_worktree_locked. > > How about this, I can make a patch that changes the comment next to > lock_reason to say "/* private - use is_worktree_locked */" (choosing > the word "private" since it's a reserved keyword in C++ and other > languages for implementation details that are meant to be > inaccessible) and a comment next to lock_reason_valid that just says > "/* private */"? A patch clarifying the "private" state of 'lock_reason' and 'lock_reason_valid' and pointing the reader at is_worktree_locked() would be welcome. One extra point: It might be a good idea to mention in the documentation of is_worktree_locked() that, in addition to returning NULL or non-NULL indicating not-locked or locked, the returned lock-reason might very well be empty ("") when no reason was given by the locker. > I would also suggest renaming is_worktree_locked to > worktree_lock_reason, the former makes me think the function is > returning a boolean, whereas the latter more clearly conveys that a > more detailed piece of information is being returned. I think the "boolean"-sounding name was intentional since most (current) callers only care about that; so, the following reads very naturally for such callers: if (is_worktree_locked(wt)) die(_("worktree locked; aborting")); That said, I wouldn't necessarily oppose renaming the function, but I also don't think it's particularly important to do so.
On Sun, Oct 28, 2018 at 8:52 PM Junio C Hamano <gitster@pobox.com> wrote: > > > If the field "reason" should always be populated, there is *no* > reason why we need the "valid" boolean. They work as a pair to > realize lazy population of rarely used field. The lazy evaluation > technique is used as an optimization for common case, where majority > of operations do not care if worktrees are locked and if so why they > are locked, so that only rare operations that do want to find out > can ask "is this locked and why?" via is_worktree_locked() interface, > and at that point we lazily find it out by reading "locked" file. > > So it is by design that these fields are not always populated, but > are populated on demand as book-keeping info internal to the API's > implementation. It is not "an issue", and changing it is not a > "fix". Having fields in a struct that are not populated by a getter function with no documentation indicating that they are not populated and no documentation explaining how to populate them is the issue here. > > In addition, if we have already checked, then we do not even do the > same check again. If in an earlier call we found out that a worktree > is not locked, we flip the _valid bit to true while setting _reason > to NULL, so that the next call can say "oh, that's not locked and we > can tell that without looking at the filesystem again" [*1*]. I clearly misunderstood the use case of the _valid flag, thanks for pointing it out. > > You are forcing the callers of get_worktrees() to pay the cost to > check, open and read the "why is this worktree locked?" file for all > worktrees, whether they care if these worktrees are locked or why > they are locked. Such a change can be an improvement *ONLY* if you > can demonstrate that in the current code most codepaths that call > get_worktrees() end up calling is_worktree_locked() on all worktrees > anyways. If that were the case, not having to lazily evaluate the > "locked"-ness, but always check upfront, would have a simplification > value, as either approach would be spending the same cost to open > and read these "locked" files. > > But I do not think it is the case. Outside builtin/worktree.c (and > you need to admit "git worktree" is a rather rare command in the > first place, so you shouldn't be optimizing for that if it hurts > other codepaths), builtin/branch.c wants to go to all worktrees and > update their HEAD when a branch is renamed (if the old HEAD is > pointing at the original name, of course), but that code won't care > if the worktree is locked at all. I do not think of any caller of > get_worktrees() that want to know if it is locked and why for each > and every one of them, and I'd be surprised if that *is* the > majority, but as a proposer to burden get_worktrees() with this > extra cost, you certainly would have audited the callers and made > sure it is worth making them pay the extra cost? > > If we are going to change anything around this area, I'd not be > surprised that the right move is to go in the opposite direction. > Right now, you cannot just get "is it locked?" boolean answer (which > can be obtained by a simple stat(2) call) without getting "why is it > locked?" (which takes open(2) & read(2) & close(2)), and if you are > planning a new application that wants to ask "is it locked?" a lot > without having to know the reason, you may want to make the lazy > evaluation even lazier by splitting _valid field into two (i.e. a > "do we know if this is locked?" valid bit covers "is_locked" bit, > and another "do we know why this is locked?" valid bit accompanies > "locked_reason" string). And the callers would ask two separate > questions: is_worktree_locked() that says true or false, and then > why_worktree_locked() that yields NULL or string (i.e. essentially > that is what we have as is_worktree_locked() today). Of course, > such a change must also be justified with a code audit to > demonstrate that only minority case of the callers of is-locked? > wants to know why > > > [Footnote] > > *1* The codepaths that want to know if a worktree is locked or not > (and wants to learn the reason) are so rare and concentrated in > builtin/worktree.c, and more importantly, they do not need to ask > the same question twice, so we can stop caching and make > is_worktree_locked() always go to the filesystem, I think, and that > may be a valid change _if_ we allow worktrees to be randomly locked > and unlocked while we are looking at them, but if we want to worry > about such concurrent and competing uses, we need a big > repository-wide lock anyway, and it is the least of our problems > that the current caching may go stale without getting invalidated. > The code will be racing against such concurrent processes even if > you made it to go to the filesystem all the time. > Basically, I already implemented most of what you're saying. The v2 proposal does force all callers of get_worktrees to check the lock status, but by calling stat, not open/read/close. That being said you're right that even forcing them to call stat when most don't care is imposing an extra cost for no gain. The v2 proposal no longer caches the lock reason (in fact it removes it from the worktree struct), since not only do current users have no need to ask for the lock_reason twice, none of them ask for it twice in the first place. The v2 proposal provides a standalone function for getting the actual reason (leaving it up to callers to cache the result if they like). I'd be up for removing is_locked from the struct as well and making a separate standalone function for that. Either way, I do see an issue with the current code that anybody who wants to know the lock status and/or lock reason of a worktree gets faced with a confusing, misleading, and opaque piece of code. I see two possible remedies: a) Remove these fields from the worktree struct and provide standalone functions for answering these questions. Pros are that we follow single responsibility principle with two standalone functions for doing so, and we make the route for answering these questions less circuitous (IMO). Cons are that we remove the caching currently in place since we're no longer storing this info in the struct, but then again that caching is not currently being used and can be implemented by callers if they really need it b) Update the comments in the code to state that lock_reason and lock_reason_valid are to be considered private fields and to use is_worktree_locked for populating them. Pros are that no actual code changes need to be made. Cons are that, IMO, it's still a strange piece of code in that it's doing some sort of quasi object oriented stuff in C, and if we can take the opportunity to make the code look a bit more canonical I think we should, but that's just my 2 cents. Of course there's also option c, which is that I leave this alone and just go back to making my worktree atom :)
On Sun, Oct 28, 2018 at 9:01 PM Eric Sunshine <sunshine@sunshineco.com> wrote: > > On Sun, Oct 28, 2018 at 9:11 PM Nickolai Belakovski > <nbelakovski@gmail.com> wrote: > > I would also suggest renaming is_worktree_locked to > > worktree_lock_reason, the former makes me think the function is > > returning a boolean, whereas the latter more clearly conveys that a > > more detailed piece of information is being returned. > > I think the "boolean"-sounding name was intentional since most > (current) callers only care about that; so, the following reads very > naturally for such callers: > > if (is_worktree_locked(wt)) > die(_("worktree locked; aborting")); > > That said, I wouldn't necessarily oppose renaming the function, but I > also don't think it's particularly important to do so. Actually it's 3:2 in the current code for callers getting the reason out of the function vs callers checking the value of the pointer for null/not null. This leads to some rather unnatural looking code in the current repo like reason = is_worktree_locked(wt); I think it would look a lot more natural if it were "reason = worktree_lock_reason(wt)". The resulting if-statement wouldn't be too bad, IMO if (worktree_lock_reason(wt)) die(_("worktree locked; aborting")); To me, I would just go lookup the signature of worktree_lock_reason and see that it returns a pointer and I'd be satisfied with that. I could also infer that from looking at the code if I'm just skimming through. But if I see code like "reason = is_worktree_locked(wt)" I'm like hold on, what's going on here?! :P
On Mon, Oct 29, 2018 at 1:45 AM Nickolai Belakovski <nbelakovski@gmail.com> wrote: > On Sun, Oct 28, 2018 at 9:01 PM Eric Sunshine <sunshine@sunshineco.com> wrote: > > That said, I wouldn't necessarily oppose renaming the function, but I > > also don't think it's particularly important to do so. > > To me, I would just go lookup the signature of worktree_lock_reason > and see that it returns a pointer and I'd be satisfied with that. I > could also infer that from looking at the code if I'm just skimming > through. But if I see code like "reason = is_worktree_locked(wt)" I'm > like hold on, what's going on here?! :P I don't feel strongly about it, and, as indicated, wouldn't necessarily be opposed to it. If you do want to make that change, perhaps send it as the second patch of a 2-patch series in which patch 1 just updates the API documentation. That way, if anyone does oppose the rename in patch 2, then that patch can be dropped without having to re-send.
Nickolai Belakovski <nbelakovski@gmail.com> writes: > Either way, I do see an issue with the current code that anybody who > wants to know the lock status and/or lock reason of a worktree gets > faced with a confusing, misleading, and opaque piece of code. Sorry, I don't. I do not mind a better documentation for is_worktree_locked() without doing anything else. I do not see any reason to remove fields, split the helper funciton into two, drop the caching, etc., especially when the only justification is "I am new to the codebase and find it confusing".
diff --git a/builtin/worktree.c b/builtin/worktree.c index 41e771439..844789a21 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -634,8 +634,8 @@ static int lock_worktree(int ac, const char **av, const char *prefix) if (is_main_worktree(wt)) die(_("The main working tree cannot be locked or unlocked")); - old_reason = is_worktree_locked(wt); - if (old_reason) { + if (wt->is_locked) { + old_reason = worktree_locked_reason(wt); if (*old_reason) die(_("'%s' is already locked, reason: %s"), av[0], old_reason); @@ -666,7 +666,7 @@ static int unlock_worktree(int ac, const char **av, const char *prefix) die(_("'%s' is not a working tree"), av[0]); if (is_main_worktree(wt)) die(_("The main working tree cannot be locked or unlocked")); - if (!is_worktree_locked(wt)) + if (!wt->is_locked) die(_("'%s' is not locked"), av[0]); ret = unlink_or_warn(git_common_path("worktrees/%s/locked", wt->id)); free_worktrees(worktrees); @@ -734,8 +734,8 @@ static int move_worktree(int ac, const char **av, const char *prefix) validate_no_submodules(wt); - reason = is_worktree_locked(wt); - if (reason) { + if (wt->is_locked) { + reason = worktree_locked_reason(wt); if (*reason) die(_("cannot move a locked working tree, lock reason: %s"), reason); @@ -860,11 +860,11 @@ static int remove_worktree(int ac, const char **av, const char *prefix) die(_("'%s' is not a working tree"), av[0]); if (is_main_worktree(wt)) die(_("'%s' is a main working tree"), av[0]); - reason = is_worktree_locked(wt); - if (reason) { + if (wt->is_locked) { + reason = worktree_locked_reason(wt); if (*reason) die(_("cannot remove a locked working tree, lock reason: %s"), - reason); + reason); die(_("cannot remove a locked working tree")); } if (validate_worktree(wt, &errmsg, WT_VALIDATE_WORKTREE_MISSING_OK)) diff --git a/worktree.c b/worktree.c index 97cda5f97..a3082d19d 100644 --- a/worktree.c +++ b/worktree.c @@ -14,7 +14,6 @@ void free_worktrees(struct worktree **worktrees) free(worktrees[i]->path); free(worktrees[i]->id); free(worktrees[i]->head_ref); - free(worktrees[i]->lock_reason); free(worktrees[i]); } free (worktrees); @@ -41,13 +40,29 @@ static void add_head_info(struct worktree *wt) wt->is_detached = 1; } + +/** + * Return 1 if the worktree is locked, 0 otherwise + */ +static int is_worktree_locked(const struct worktree *wt) +{ + struct strbuf path = STRBUF_INIT; + int locked_file_exists; + + assert(!is_main_worktree(wt)); + + strbuf_addstr(&path, worktree_git_path(wt, "locked")); + locked_file_exists = file_exists(path.buf); + strbuf_release(&path); + return locked_file_exists; +} + /** * get the main worktree */ static struct worktree *get_main_worktree(void) { struct worktree *worktree = NULL; - struct strbuf path = STRBUF_INIT; struct strbuf worktree_path = STRBUF_INIT; int is_bare = 0; @@ -56,14 +71,11 @@ static struct worktree *get_main_worktree(void) if (is_bare) strbuf_strip_suffix(&worktree_path, "/."); - strbuf_addf(&path, "%s/HEAD", get_git_common_dir()); - worktree = xcalloc(1, sizeof(*worktree)); worktree->path = strbuf_detach(&worktree_path, NULL); worktree->is_bare = is_bare; add_head_info(worktree); - strbuf_release(&path); strbuf_release(&worktree_path); return worktree; } @@ -89,12 +101,10 @@ static struct worktree *get_linked_worktree(const char *id) strbuf_strip_suffix(&worktree_path, "/."); } - strbuf_reset(&path); - strbuf_addf(&path, "%s/worktrees/%s/HEAD", get_git_common_dir(), id); - worktree = xcalloc(1, sizeof(*worktree)); worktree->path = strbuf_detach(&worktree_path, NULL); worktree->id = xstrdup(id); + worktree->is_locked = is_worktree_locked(worktree); add_head_info(worktree); done: @@ -231,27 +241,20 @@ int is_main_worktree(const struct worktree *wt) return !wt->id; } -const char *is_worktree_locked(struct worktree *wt) +const char *worktree_locked_reason(const struct worktree *wt) { - assert(!is_main_worktree(wt)); + struct strbuf path = STRBUF_INIT; + struct strbuf lock_reason = STRBUF_INIT; - if (!wt->lock_reason_valid) { - struct strbuf path = STRBUF_INIT; - - strbuf_addstr(&path, worktree_git_path(wt, "locked")); - if (file_exists(path.buf)) { - struct strbuf lock_reason = STRBUF_INIT; - if (strbuf_read_file(&lock_reason, path.buf, 0) < 0) - die_errno(_("failed to read '%s'"), path.buf); - strbuf_trim(&lock_reason); - wt->lock_reason = strbuf_detach(&lock_reason, NULL); - } else - wt->lock_reason = NULL; - wt->lock_reason_valid = 1; - strbuf_release(&path); - } + assert(!is_main_worktree(wt)); + assert(wt->is_locked); - return wt->lock_reason; + strbuf_addstr(&path, worktree_git_path(wt, "locked")); + if (strbuf_read_file(&lock_reason, path.buf, 0) < 0) + die_errno(_("failed to read '%s'"), path.buf); + strbuf_trim(&lock_reason); + strbuf_release(&path); + return strbuf_detach(&lock_reason, NULL); } /* convenient wrapper to deal with NULL strbuf */ diff --git a/worktree.h b/worktree.h index df3fc30f7..6717287e8 100644 --- a/worktree.h +++ b/worktree.h @@ -10,12 +10,11 @@ struct worktree { char *path; char *id; char *head_ref; /* NULL if HEAD is broken or detached */ - char *lock_reason; /* internal use */ struct object_id head_oid; int is_detached; int is_bare; int is_current; - int lock_reason_valid; + int is_locked; }; /* Functions for acting on the information about worktrees. */ @@ -57,10 +56,9 @@ extern struct worktree *find_worktree(struct worktree **list, extern int is_main_worktree(const struct worktree *wt); /* - * Return the reason string if the given worktree is locked or NULL - * otherwise. + * Return the reason string if the given worktree is locked or die */ -extern const char *is_worktree_locked(struct worktree *wt); +extern const char *worktree_locked_reason(const struct worktree *wt); #define WT_VALIDATE_WORKTREE_MISSING_OK (1 << 0)