Message ID | 02bf41d419efd00e510a89a405e1b046b166ba20.1709549619.git.ps@pks.im (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | reftable/stack: register temporary files | expand |
On 24/03/04 12:10PM, Patrick Steinhardt wrote: > We do not register new tables which we're about to add to the stack with > the tempfile API. Those tables will thus not be deleted in case Git gets > killed. > > Refactor the code to register tables as tempfiles. > > Signed-off-by: Patrick Steinhardt <ps@pks.im> > --- > reftable/stack.c | 29 ++++++++++++----------------- > 1 file changed, 12 insertions(+), 17 deletions(-) > > diff --git a/reftable/stack.c b/reftable/stack.c > index b64e55648a..81544fbfa0 100644 > --- a/reftable/stack.c > +++ b/reftable/stack.c > @@ -737,8 +737,9 @@ int reftable_addition_add(struct reftable_addition *add, > struct strbuf tab_file_name = STRBUF_INIT; > struct strbuf next_name = STRBUF_INIT; > struct reftable_writer *wr = NULL; > + struct tempfile *tab_file = NULL; > int err = 0; > - int tab_fd = 0; > + int tab_fd; > > strbuf_reset(&next_name); > format_name(&next_name, add->next_update_index, add->next_update_index); > @@ -746,17 +747,20 @@ int reftable_addition_add(struct reftable_addition *add, > stack_filename(&temp_tab_file_name, add->stack, next_name.buf); > strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX"); > > - tab_fd = mkstemp(temp_tab_file_name.buf); > - if (tab_fd < 0) { > + tab_file = mks_tempfile(temp_tab_file_name.buf); > + if (!tab_file) { > err = REFTABLE_IO_ERROR; > goto done; > } > if (add->stack->config.default_permissions) { > - if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) { > + if (chmod(get_tempfile_path(tab_file), > + add->stack->config.default_permissions)) { > err = REFTABLE_IO_ERROR; > goto done; > } > } Since the tempfile is now being created through the tempfile API, I think the file mode can be set directly through `mks_tempfile_m()` instead of creating the tempfile and then using chmod. Just something I thought to mention. > + tab_fd = get_tempfile_fd(tab_file); -Justin
On Tue, Mar 05, 2024 at 04:30:18PM -0600, Justin Tobler wrote: > On 24/03/04 12:10PM, Patrick Steinhardt wrote: > > We do not register new tables which we're about to add to the stack with > > the tempfile API. Those tables will thus not be deleted in case Git gets > > killed. > > > > Refactor the code to register tables as tempfiles. > > > > Signed-off-by: Patrick Steinhardt <ps@pks.im> > > --- > > reftable/stack.c | 29 ++++++++++++----------------- > > 1 file changed, 12 insertions(+), 17 deletions(-) > > > > diff --git a/reftable/stack.c b/reftable/stack.c > > index b64e55648a..81544fbfa0 100644 > > --- a/reftable/stack.c > > +++ b/reftable/stack.c > > @@ -737,8 +737,9 @@ int reftable_addition_add(struct reftable_addition *add, > > struct strbuf tab_file_name = STRBUF_INIT; > > struct strbuf next_name = STRBUF_INIT; > > struct reftable_writer *wr = NULL; > > + struct tempfile *tab_file = NULL; > > int err = 0; > > - int tab_fd = 0; > > + int tab_fd; > > > > strbuf_reset(&next_name); > > format_name(&next_name, add->next_update_index, add->next_update_index); > > @@ -746,17 +747,20 @@ int reftable_addition_add(struct reftable_addition *add, > > stack_filename(&temp_tab_file_name, add->stack, next_name.buf); > > strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX"); > > > > - tab_fd = mkstemp(temp_tab_file_name.buf); > > - if (tab_fd < 0) { > > + tab_file = mks_tempfile(temp_tab_file_name.buf); > > + if (!tab_file) { > > err = REFTABLE_IO_ERROR; > > goto done; > > } > > if (add->stack->config.default_permissions) { > > - if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) { > > + if (chmod(get_tempfile_path(tab_file), > > + add->stack->config.default_permissions)) { > > err = REFTABLE_IO_ERROR; > > goto done; > > } > > } > > Since the tempfile is now being created through the tempfile API, I > think the file mode can be set directly through `mks_tempfile_m()` > instead of creating the tempfile and then using chmod. Just something I > thought to mention. Unfortunately not. The problem is that `mks_tempfile_m()` will munge passed-in permissions via "core.sharedRepository", but we already pre calculated the target mode in `config.default_permissions`. Thus, the result would have wrong permissions if we used `mks_tempfile_m()`. Patrick
On 24/03/06 12:59PM, Patrick Steinhardt wrote: > On Tue, Mar 05, 2024 at 04:30:18PM -0600, Justin Tobler wrote: > > On 24/03/04 12:10PM, Patrick Steinhardt wrote: > > > We do not register new tables which we're about to add to the stack with > > > the tempfile API. Those tables will thus not be deleted in case Git gets > > > killed. > > > > > > Refactor the code to register tables as tempfiles. > > > > > > Signed-off-by: Patrick Steinhardt <ps@pks.im> > > > --- > > > reftable/stack.c | 29 ++++++++++++----------------- > > > 1 file changed, 12 insertions(+), 17 deletions(-) > > > > > > diff --git a/reftable/stack.c b/reftable/stack.c > > > index b64e55648a..81544fbfa0 100644 > > > --- a/reftable/stack.c > > > +++ b/reftable/stack.c > > > @@ -737,8 +737,9 @@ int reftable_addition_add(struct reftable_addition *add, > > > struct strbuf tab_file_name = STRBUF_INIT; > > > struct strbuf next_name = STRBUF_INIT; > > > struct reftable_writer *wr = NULL; > > > + struct tempfile *tab_file = NULL; > > > int err = 0; > > > - int tab_fd = 0; > > > + int tab_fd; > > > > > > strbuf_reset(&next_name); > > > format_name(&next_name, add->next_update_index, add->next_update_index); > > > @@ -746,17 +747,20 @@ int reftable_addition_add(struct reftable_addition *add, > > > stack_filename(&temp_tab_file_name, add->stack, next_name.buf); > > > strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX"); > > > > > > - tab_fd = mkstemp(temp_tab_file_name.buf); > > > - if (tab_fd < 0) { > > > + tab_file = mks_tempfile(temp_tab_file_name.buf); > > > + if (!tab_file) { > > > err = REFTABLE_IO_ERROR; > > > goto done; > > > } > > > if (add->stack->config.default_permissions) { > > > - if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) { > > > + if (chmod(get_tempfile_path(tab_file), > > > + add->stack->config.default_permissions)) { > > > err = REFTABLE_IO_ERROR; > > > goto done; > > > } > > > } > > > > Since the tempfile is now being created through the tempfile API, I > > think the file mode can be set directly through `mks_tempfile_m()` > > instead of creating the tempfile and then using chmod. Just something I > > thought to mention. > > Unfortunately not. The problem is that `mks_tempfile_m()` will munge > passed-in permissions via "core.sharedRepository", but we already pre > calculated the target mode in `config.default_permissions`. Thus, the > result would have wrong permissions if we used `mks_tempfile_m()`. Ah that makes sense. Thanks for the explanation! -Justin
Patrick Steinhardt <ps@pks.im> writes: >> Since the tempfile is now being created through the tempfile API, I >> think the file mode can be set directly through `mks_tempfile_m()` >> instead of creating the tempfile and then using chmod. Just something I >> thought to mention. > > Unfortunately not. The problem is that `mks_tempfile_m()` will munge > passed-in permissions via "core.sharedRepository", but we already pre > calculated the target mode in `config.default_permissions`. Thus, the > result would have wrong permissions if we used `mks_tempfile_m()`. I somehow found that default_permissions thing always disturbing. Even if we keep a separate mechanism for determining the file permission (perhaps in order to give ourselves a better separation as "an independent library" from the rest of Git), shouldn't the permission setting that is computed by the mechanism and stored in config.default_permissions be consistent with the permission the rest of git computes based on core.sharedRepository?
On Wed, Mar 06, 2024 at 08:36:25AM -0800, Junio C Hamano wrote: > Patrick Steinhardt <ps@pks.im> writes: > > >> Since the tempfile is now being created through the tempfile API, I > >> think the file mode can be set directly through `mks_tempfile_m()` > >> instead of creating the tempfile and then using chmod. Just something I > >> thought to mention. > > > > Unfortunately not. The problem is that `mks_tempfile_m()` will munge > > passed-in permissions via "core.sharedRepository", but we already pre > > calculated the target mode in `config.default_permissions`. Thus, the > > result would have wrong permissions if we used `mks_tempfile_m()`. > > I somehow found that default_permissions thing always disturbing. > > Even if we keep a separate mechanism for determining the file > permission (perhaps in order to give ourselves a better separation > as "an independent library" from the rest of Git), shouldn't the > permission setting that is computed by the mechanism and stored in > config.default_permissions be consistent with the permission the > rest of git computes based on core.sharedRepository? It is consistent. The problem is rather that `mks_tempfile_m()` takes a mode as input, but still ends up applying the umask to that mode. Thus, using that function without a subsequent call to chmod(3P) would end up mishandling "core.sharedRepository". Patrick
Patrick Steinhardt <ps@pks.im> writes: > It is consistent. The problem is rather that `mks_tempfile_m()` takes a > mode as input, but still ends up applying the umask to that mode. Ah, OK. > Thus, > using that function without a subsequent call to chmod(3P) would end up > mishandling "core.sharedRepository". It is understandable because this is meant for tempfile; you want to create it, expect to use it yourself and not by others, before you are done with it. There is no place in this sequence where you need to open access up to those who share access to the repository. The rest of the message is primarily for my own education. I got a bit curious how other parts of the system does this. For example, writing an loose object calls git_mkstemp_mode() with 0444 and then finalize_object_file() calls adjust_shared_perm() after it renames the file to its final name at the end. A tight umask like 0700 may demote the original 0444 to 0400, but adjust_shared_perm() expands the 0400 appropriately. The index file is more interesting. We bypass the whole mkstemp thing (as we know the final filename and add .lock after it), and the call chain looks like this: repo_refresh_and_write_index() -> repo_hold_locked_index() -> hold_lock_file_for_update() -> lock_file() -> create_tempfile_mode() -> open() activate_tempfile() adjust_shared_perm() refresh_index() write_locked_index() -> commit_locked_index() -> commit_lock_file_to() -> rename_tempfile(). After we create the file with open() and before returning to the caller, hold_lock_file_for_update() already sets the permission bits correctly, so the "committing" phase to rename the written lockfile into its final place becomes merely a rename without any futzing with permission bits. So in short, for a file that we intend to create, write, and then commit to the final name, we use at least two approaches: - Let mkstemp_mode() do its thing, and fix the permission bits later with adjust_shared_perm(). - Let hold_lock_file_for_update() take care of the permission bits. I sense there might be some clean-up opportunities around here. After all, lockfile is (or at least pretends to be) built on top of tempfile, and it is for more permanent (as opposed to temporary) files, but it somehow wasn't a good fit to wrap new tables in this series? Thanks.
On Thu, Mar 07, 2024 at 09:59:50AM -0800, Junio C Hamano wrote: > Patrick Steinhardt <ps@pks.im> writes: [snip] > I sense there might be some clean-up opportunities around here. > After all, lockfile is (or at least pretends to be) built on top of > tempfile, and it is for more permanent (as opposed to temporary) > files, but it somehow wasn't a good fit to wrap new tables in this > series? Well, I didn't think lockfiles are a good fit here. I did convert "tables.list" to use a lock because it's a natural fit given that we want to use "table.list.lock". But newly written tables aren't written to a file with ".lock" suffix, but instead to a file ending with ".temp.XXXXXX". This is intentionally so that two processes can write new tables at the same point in time, even though two concurrent writes will end up being mutually exclusive. As lockfiles to me are rather about mutually exclusive locking I think that using tempfiles directly is preferable. As far as I can see there is also no real benefit with lockfiles in our context, except for the mode handling. But given that we have "default_permissions" I'd say it is preferable to consistently use these for now via chmod(3P). I ain't got any strong opinions on this though, I'm rather being pragmatic. So if there is a good reason to use lockfiles that I missed then I wouldn't mind converting the code. Patrick
Patrick Steinhardt <ps@pks.im> writes: > On Thu, Mar 07, 2024 at 09:59:50AM -0800, Junio C Hamano wrote: >> Patrick Steinhardt <ps@pks.im> writes: > [snip] >> I sense there might be some clean-up opportunities around here. >> After all, lockfile is (or at least pretends to be) built on top of >> tempfile, and it is for more permanent (as opposed to temporary) >> files, but it somehow wasn't a good fit to wrap new tables in this >> series? > ... > As lockfiles to me are rather about mutually exclusive locking I think > that using tempfiles directly is preferable. As far as I can see there > is also no real benefit with lockfiles in our context, except for the > mode handling. But given that we have "default_permissions" I'd say it > is preferable to consistently use these for now via chmod(3P). I wasn't talking about the use of temp or lock API in _this_ series, but was talking about the different permission bit handling between the two API. The loose object codepath that uses the tempfile API is closer to what you are doing, which suffers from the same "at creation tempfile API does not bother with permission bits because it may be removed at the end". The index codepath that uses the hold_lock_file_for_update() does not have to care, as it gets the permission right from the start. Because of these differences, the loose object codepath has to do the adjust_perm_bits() itself, and similarly, you have to fix the permission the same. These callers may become simpler if we give an option to the git_mkstemps_mode() to return a file whose permission bits are already correct from the start.
diff --git a/reftable/stack.c b/reftable/stack.c index b64e55648a..81544fbfa0 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -737,8 +737,9 @@ int reftable_addition_add(struct reftable_addition *add, struct strbuf tab_file_name = STRBUF_INIT; struct strbuf next_name = STRBUF_INIT; struct reftable_writer *wr = NULL; + struct tempfile *tab_file = NULL; int err = 0; - int tab_fd = 0; + int tab_fd; strbuf_reset(&next_name); format_name(&next_name, add->next_update_index, add->next_update_index); @@ -746,17 +747,20 @@ int reftable_addition_add(struct reftable_addition *add, stack_filename(&temp_tab_file_name, add->stack, next_name.buf); strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX"); - tab_fd = mkstemp(temp_tab_file_name.buf); - if (tab_fd < 0) { + tab_file = mks_tempfile(temp_tab_file_name.buf); + if (!tab_file) { err = REFTABLE_IO_ERROR; goto done; } if (add->stack->config.default_permissions) { - if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) { + if (chmod(get_tempfile_path(tab_file), + add->stack->config.default_permissions)) { err = REFTABLE_IO_ERROR; goto done; } } + tab_fd = get_tempfile_fd(tab_file); + wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd, &add->stack->config); err = write_table(wr, arg); @@ -771,14 +775,13 @@ int reftable_addition_add(struct reftable_addition *add, if (err < 0) goto done; - err = close(tab_fd); - tab_fd = 0; + err = close_tempfile_gently(tab_file); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } - err = stack_check_addition(add->stack, temp_tab_file_name.buf); + err = stack_check_addition(add->stack, get_tempfile_path(tab_file)); if (err < 0) goto done; @@ -789,14 +792,13 @@ int reftable_addition_add(struct reftable_addition *add, format_name(&next_name, wr->min_update_index, wr->max_update_index); strbuf_addstr(&next_name, ".ref"); - stack_filename(&tab_file_name, add->stack, next_name.buf); /* On windows, this relies on rand() picking a unique destination name. Maybe we should do retry loop as well? */ - err = rename(temp_tab_file_name.buf, tab_file_name.buf); + err = rename_tempfile(&tab_file, tab_file_name.buf); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; @@ -806,14 +808,7 @@ int reftable_addition_add(struct reftable_addition *add, add->new_tables_cap); add->new_tables[add->new_tables_len++] = strbuf_detach(&next_name, NULL); done: - if (tab_fd > 0) { - close(tab_fd); - tab_fd = 0; - } - if (temp_tab_file_name.len > 0) { - unlink(temp_tab_file_name.buf); - } - + delete_tempfile(&tab_file); strbuf_release(&temp_tab_file_name); strbuf_release(&tab_file_name); strbuf_release(&next_name);
We do not register new tables which we're about to add to the stack with the tempfile API. Those tables will thus not be deleted in case Git gets killed. Refactor the code to register tables as tempfiles. Signed-off-by: Patrick Steinhardt <ps@pks.im> --- reftable/stack.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-)