diff mbox series

[2/4] reftable/stack: register new tables as tempfiles

Message ID 02bf41d419efd00e510a89a405e1b046b166ba20.1709549619.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series reftable/stack: register temporary files | expand

Commit Message

Patrick Steinhardt March 4, 2024, 11:10 a.m. UTC
We do not register new tables which we're about to add to the stack with
the tempfile API. Those tables will thus not be deleted in case Git gets
killed.

Refactor the code to register tables as tempfiles.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/stack.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

Comments

Justin Tobler March 5, 2024, 10:30 p.m. UTC | #1
On 24/03/04 12:10PM, Patrick Steinhardt wrote:
> We do not register new tables which we're about to add to the stack with
> the tempfile API. Those tables will thus not be deleted in case Git gets
> killed.
> 
> Refactor the code to register tables as tempfiles.
> 
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  reftable/stack.c | 29 ++++++++++++-----------------
>  1 file changed, 12 insertions(+), 17 deletions(-)
> 
> diff --git a/reftable/stack.c b/reftable/stack.c
> index b64e55648a..81544fbfa0 100644
> --- a/reftable/stack.c
> +++ b/reftable/stack.c
> @@ -737,8 +737,9 @@ int reftable_addition_add(struct reftable_addition *add,
>  	struct strbuf tab_file_name = STRBUF_INIT;
>  	struct strbuf next_name = STRBUF_INIT;
>  	struct reftable_writer *wr = NULL;
> +	struct tempfile *tab_file = NULL;
>  	int err = 0;
> -	int tab_fd = 0;
> +	int tab_fd;
>  
>  	strbuf_reset(&next_name);
>  	format_name(&next_name, add->next_update_index, add->next_update_index);
> @@ -746,17 +747,20 @@ int reftable_addition_add(struct reftable_addition *add,
>  	stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
>  	strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
>  
> -	tab_fd = mkstemp(temp_tab_file_name.buf);
> -	if (tab_fd < 0) {
> +	tab_file = mks_tempfile(temp_tab_file_name.buf);
> +	if (!tab_file) {
>  		err = REFTABLE_IO_ERROR;
>  		goto done;
>  	}
>  	if (add->stack->config.default_permissions) {
> -		if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) {
> +		if (chmod(get_tempfile_path(tab_file),
> +			  add->stack->config.default_permissions)) {
>  			err = REFTABLE_IO_ERROR;
>  			goto done;
>  		}
>  	}

Since the tempfile is now being created through the tempfile API, I
think the file mode can be set directly through `mks_tempfile_m()`
instead of creating the tempfile and then using chmod. Just something I
thought to mention.

> +	tab_fd = get_tempfile_fd(tab_file);

-Justin
Patrick Steinhardt March 6, 2024, 11:59 a.m. UTC | #2
On Tue, Mar 05, 2024 at 04:30:18PM -0600, Justin Tobler wrote:
> On 24/03/04 12:10PM, Patrick Steinhardt wrote:
> > We do not register new tables which we're about to add to the stack with
> > the tempfile API. Those tables will thus not be deleted in case Git gets
> > killed.
> > 
> > Refactor the code to register tables as tempfiles.
> > 
> > Signed-off-by: Patrick Steinhardt <ps@pks.im>
> > ---
> >  reftable/stack.c | 29 ++++++++++++-----------------
> >  1 file changed, 12 insertions(+), 17 deletions(-)
> > 
> > diff --git a/reftable/stack.c b/reftable/stack.c
> > index b64e55648a..81544fbfa0 100644
> > --- a/reftable/stack.c
> > +++ b/reftable/stack.c
> > @@ -737,8 +737,9 @@ int reftable_addition_add(struct reftable_addition *add,
> >  	struct strbuf tab_file_name = STRBUF_INIT;
> >  	struct strbuf next_name = STRBUF_INIT;
> >  	struct reftable_writer *wr = NULL;
> > +	struct tempfile *tab_file = NULL;
> >  	int err = 0;
> > -	int tab_fd = 0;
> > +	int tab_fd;
> >  
> >  	strbuf_reset(&next_name);
> >  	format_name(&next_name, add->next_update_index, add->next_update_index);
> > @@ -746,17 +747,20 @@ int reftable_addition_add(struct reftable_addition *add,
> >  	stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
> >  	strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
> >  
> > -	tab_fd = mkstemp(temp_tab_file_name.buf);
> > -	if (tab_fd < 0) {
> > +	tab_file = mks_tempfile(temp_tab_file_name.buf);
> > +	if (!tab_file) {
> >  		err = REFTABLE_IO_ERROR;
> >  		goto done;
> >  	}
> >  	if (add->stack->config.default_permissions) {
> > -		if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) {
> > +		if (chmod(get_tempfile_path(tab_file),
> > +			  add->stack->config.default_permissions)) {
> >  			err = REFTABLE_IO_ERROR;
> >  			goto done;
> >  		}
> >  	}
> 
> Since the tempfile is now being created through the tempfile API, I
> think the file mode can be set directly through `mks_tempfile_m()`
> instead of creating the tempfile and then using chmod. Just something I
> thought to mention.

Unfortunately not. The problem is that `mks_tempfile_m()` will munge
passed-in permissions via "core.sharedRepository", but we already pre
calculated the target mode in `config.default_permissions`. Thus, the
result would have wrong permissions if we used `mks_tempfile_m()`.

Patrick
Justin Tobler March 6, 2024, 4:34 p.m. UTC | #3
On 24/03/06 12:59PM, Patrick Steinhardt wrote:
> On Tue, Mar 05, 2024 at 04:30:18PM -0600, Justin Tobler wrote:
> > On 24/03/04 12:10PM, Patrick Steinhardt wrote:
> > > We do not register new tables which we're about to add to the stack with
> > > the tempfile API. Those tables will thus not be deleted in case Git gets
> > > killed.
> > > 
> > > Refactor the code to register tables as tempfiles.
> > > 
> > > Signed-off-by: Patrick Steinhardt <ps@pks.im>
> > > ---
> > >  reftable/stack.c | 29 ++++++++++++-----------------
> > >  1 file changed, 12 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/reftable/stack.c b/reftable/stack.c
> > > index b64e55648a..81544fbfa0 100644
> > > --- a/reftable/stack.c
> > > +++ b/reftable/stack.c
> > > @@ -737,8 +737,9 @@ int reftable_addition_add(struct reftable_addition *add,
> > >  	struct strbuf tab_file_name = STRBUF_INIT;
> > >  	struct strbuf next_name = STRBUF_INIT;
> > >  	struct reftable_writer *wr = NULL;
> > > +	struct tempfile *tab_file = NULL;
> > >  	int err = 0;
> > > -	int tab_fd = 0;
> > > +	int tab_fd;
> > >  
> > >  	strbuf_reset(&next_name);
> > >  	format_name(&next_name, add->next_update_index, add->next_update_index);
> > > @@ -746,17 +747,20 @@ int reftable_addition_add(struct reftable_addition *add,
> > >  	stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
> > >  	strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
> > >  
> > > -	tab_fd = mkstemp(temp_tab_file_name.buf);
> > > -	if (tab_fd < 0) {
> > > +	tab_file = mks_tempfile(temp_tab_file_name.buf);
> > > +	if (!tab_file) {
> > >  		err = REFTABLE_IO_ERROR;
> > >  		goto done;
> > >  	}
> > >  	if (add->stack->config.default_permissions) {
> > > -		if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) {
> > > +		if (chmod(get_tempfile_path(tab_file),
> > > +			  add->stack->config.default_permissions)) {
> > >  			err = REFTABLE_IO_ERROR;
> > >  			goto done;
> > >  		}
> > >  	}
> > 
> > Since the tempfile is now being created through the tempfile API, I
> > think the file mode can be set directly through `mks_tempfile_m()`
> > instead of creating the tempfile and then using chmod. Just something I
> > thought to mention.
> 
> Unfortunately not. The problem is that `mks_tempfile_m()` will munge
> passed-in permissions via "core.sharedRepository", but we already pre
> calculated the target mode in `config.default_permissions`. Thus, the
> result would have wrong permissions if we used `mks_tempfile_m()`.

Ah that makes sense. Thanks for the explanation!

-Justin
Junio C Hamano March 6, 2024, 4:36 p.m. UTC | #4
Patrick Steinhardt <ps@pks.im> writes:

>> Since the tempfile is now being created through the tempfile API, I
>> think the file mode can be set directly through `mks_tempfile_m()`
>> instead of creating the tempfile and then using chmod. Just something I
>> thought to mention.
>
> Unfortunately not. The problem is that `mks_tempfile_m()` will munge
> passed-in permissions via "core.sharedRepository", but we already pre
> calculated the target mode in `config.default_permissions`. Thus, the
> result would have wrong permissions if we used `mks_tempfile_m()`.

I somehow found that default_permissions thing always disturbing.

Even if we keep a separate mechanism for determining the file
permission (perhaps in order to give ourselves a better separation
as "an independent library" from the rest of Git), shouldn't the
permission setting that is computed by the mechanism and stored in
config.default_permissions be consistent with the permission the
rest of git computes based on core.sharedRepository?
Patrick Steinhardt March 7, 2024, 6:17 a.m. UTC | #5
On Wed, Mar 06, 2024 at 08:36:25AM -0800, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> >> Since the tempfile is now being created through the tempfile API, I
> >> think the file mode can be set directly through `mks_tempfile_m()`
> >> instead of creating the tempfile and then using chmod. Just something I
> >> thought to mention.
> >
> > Unfortunately not. The problem is that `mks_tempfile_m()` will munge
> > passed-in permissions via "core.sharedRepository", but we already pre
> > calculated the target mode in `config.default_permissions`. Thus, the
> > result would have wrong permissions if we used `mks_tempfile_m()`.
> 
> I somehow found that default_permissions thing always disturbing.
> 
> Even if we keep a separate mechanism for determining the file
> permission (perhaps in order to give ourselves a better separation
> as "an independent library" from the rest of Git), shouldn't the
> permission setting that is computed by the mechanism and stored in
> config.default_permissions be consistent with the permission the
> rest of git computes based on core.sharedRepository?

It is consistent. The problem is rather that `mks_tempfile_m()` takes a
mode as input, but still ends up applying the umask to that mode. Thus,
using that function without a subsequent call to chmod(3P) would end up
mishandling "core.sharedRepository".

Patrick
Junio C Hamano March 7, 2024, 5:59 p.m. UTC | #6
Patrick Steinhardt <ps@pks.im> writes:

> It is consistent. The problem is rather that `mks_tempfile_m()` takes a
> mode as input, but still ends up applying the umask to that mode.

Ah, OK.  

> Thus,
> using that function without a subsequent call to chmod(3P) would end up
> mishandling "core.sharedRepository".

It is understandable because this is meant for tempfile; you want to
create it, expect to use it yourself and not by others, before you
are done with it.  There is no place in this sequence where you need
to open access up to those who share access to the repository.

The rest of the message is primarily for my own education.

I got a bit curious how other parts of the system does this.  For
example, writing an loose object calls git_mkstemp_mode() with 0444
and then finalize_object_file() calls adjust_shared_perm() after it
renames the file to its final name at the end.  A tight umask like
0700 may demote the original 0444 to 0400, but adjust_shared_perm()
expands the 0400 appropriately.

The index file is more interesting.  We bypass the whole mkstemp
thing (as we know the final filename and add .lock after it), and
the call chain looks like this:

    repo_refresh_and_write_index() ->
      repo_hold_locked_index() ->
        hold_lock_file_for_update() ->
          lock_file() ->
            create_tempfile_mode() ->
              open()
              activate_tempfile()
              adjust_shared_perm()
      refresh_index()
      write_locked_index() ->
        commit_locked_index() ->
          commit_lock_file_to() ->
            rename_tempfile().

After we create the file with open() and before returning to the
caller, hold_lock_file_for_update() already sets the permission
bits correctly, so the "committing" phase to rename the written
lockfile into its final place becomes merely a rename without any
futzing with permission bits.

So in short, for a file that we intend to create, write, and then
commit to the final name, we use at least two approaches:

 - Let mkstemp_mode() do its thing, and fix the permission bits
   later with adjust_shared_perm().

 - Let hold_lock_file_for_update() take care of the permission bits.

I sense there might be some clean-up opportunities around here.
After all, lockfile is (or at least pretends to be) built on top of
tempfile, and it is for more permanent (as opposed to temporary)
files, but it somehow wasn't a good fit to wrap new tables in this
series?

Thanks.
Patrick Steinhardt March 7, 2024, 8:54 p.m. UTC | #7
On Thu, Mar 07, 2024 at 09:59:50AM -0800, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
[snip]
> I sense there might be some clean-up opportunities around here.
> After all, lockfile is (or at least pretends to be) built on top of
> tempfile, and it is for more permanent (as opposed to temporary)
> files, but it somehow wasn't a good fit to wrap new tables in this
> series?

Well, I didn't think lockfiles are a good fit here. I did convert
"tables.list" to use a lock because it's a natural fit given that we
want to use "table.list.lock". But newly written tables aren't written
to a file with ".lock" suffix, but instead to a file ending with
".temp.XXXXXX". This is intentionally so that two processes can write
new tables at the same point in time, even though two concurrent writes
will end up being mutually exclusive.

As lockfiles to me are rather about mutually exclusive locking I think
that using tempfiles directly is preferable. As far as I can see there
is also no real benefit with lockfiles in our context, except for the
mode handling. But given that we have "default_permissions" I'd say it
is preferable to consistently use these for now via chmod(3P).

I ain't got any strong opinions on this though, I'm rather being
pragmatic. So if there is a good reason to use lockfiles that I missed
then I wouldn't mind converting the code.

Patrick
Junio C Hamano March 7, 2024, 9:06 p.m. UTC | #8
Patrick Steinhardt <ps@pks.im> writes:

> On Thu, Mar 07, 2024 at 09:59:50AM -0800, Junio C Hamano wrote:
>> Patrick Steinhardt <ps@pks.im> writes:
> [snip]
>> I sense there might be some clean-up opportunities around here.
>> After all, lockfile is (or at least pretends to be) built on top of
>> tempfile, and it is for more permanent (as opposed to temporary)
>> files, but it somehow wasn't a good fit to wrap new tables in this
>> series?
> ...
> As lockfiles to me are rather about mutually exclusive locking I think
> that using tempfiles directly is preferable. As far as I can see there
> is also no real benefit with lockfiles in our context, except for the
> mode handling. But given that we have "default_permissions" I'd say it
> is preferable to consistently use these for now via chmod(3P).

I wasn't talking about the use of temp or lock API in _this_ series,
but was talking about the different permission bit handling between
the two API.  The loose object codepath that uses the tempfile API
is closer to what you are doing, which suffers from the same "at
creation tempfile API does not bother with permission bits because
it may be removed at the end".  The index codepath that uses the
hold_lock_file_for_update() does not have to care, as it gets the
permission right from the start.

Because of these differences, the loose object codepath has to do
the adjust_perm_bits() itself, and similarly, you have to fix the
permission the same.

These callers may become simpler if we give an option to the
git_mkstemps_mode() to return a file whose permission bits are
already correct from the start.
diff mbox series

Patch

diff --git a/reftable/stack.c b/reftable/stack.c
index b64e55648a..81544fbfa0 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -737,8 +737,9 @@  int reftable_addition_add(struct reftable_addition *add,
 	struct strbuf tab_file_name = STRBUF_INIT;
 	struct strbuf next_name = STRBUF_INIT;
 	struct reftable_writer *wr = NULL;
+	struct tempfile *tab_file = NULL;
 	int err = 0;
-	int tab_fd = 0;
+	int tab_fd;
 
 	strbuf_reset(&next_name);
 	format_name(&next_name, add->next_update_index, add->next_update_index);
@@ -746,17 +747,20 @@  int reftable_addition_add(struct reftable_addition *add,
 	stack_filename(&temp_tab_file_name, add->stack, next_name.buf);
 	strbuf_addstr(&temp_tab_file_name, ".temp.XXXXXX");
 
-	tab_fd = mkstemp(temp_tab_file_name.buf);
-	if (tab_fd < 0) {
+	tab_file = mks_tempfile(temp_tab_file_name.buf);
+	if (!tab_file) {
 		err = REFTABLE_IO_ERROR;
 		goto done;
 	}
 	if (add->stack->config.default_permissions) {
-		if (chmod(temp_tab_file_name.buf, add->stack->config.default_permissions)) {
+		if (chmod(get_tempfile_path(tab_file),
+			  add->stack->config.default_permissions)) {
 			err = REFTABLE_IO_ERROR;
 			goto done;
 		}
 	}
+	tab_fd = get_tempfile_fd(tab_file);
+
 	wr = reftable_new_writer(reftable_fd_write, reftable_fd_flush, &tab_fd,
 				 &add->stack->config);
 	err = write_table(wr, arg);
@@ -771,14 +775,13 @@  int reftable_addition_add(struct reftable_addition *add,
 	if (err < 0)
 		goto done;
 
-	err = close(tab_fd);
-	tab_fd = 0;
+	err = close_tempfile_gently(tab_file);
 	if (err < 0) {
 		err = REFTABLE_IO_ERROR;
 		goto done;
 	}
 
-	err = stack_check_addition(add->stack, temp_tab_file_name.buf);
+	err = stack_check_addition(add->stack, get_tempfile_path(tab_file));
 	if (err < 0)
 		goto done;
 
@@ -789,14 +792,13 @@  int reftable_addition_add(struct reftable_addition *add,
 
 	format_name(&next_name, wr->min_update_index, wr->max_update_index);
 	strbuf_addstr(&next_name, ".ref");
-
 	stack_filename(&tab_file_name, add->stack, next_name.buf);
 
 	/*
 	  On windows, this relies on rand() picking a unique destination name.
 	  Maybe we should do retry loop as well?
 	 */
-	err = rename(temp_tab_file_name.buf, tab_file_name.buf);
+	err = rename_tempfile(&tab_file, tab_file_name.buf);
 	if (err < 0) {
 		err = REFTABLE_IO_ERROR;
 		goto done;
@@ -806,14 +808,7 @@  int reftable_addition_add(struct reftable_addition *add,
 			    add->new_tables_cap);
 	add->new_tables[add->new_tables_len++] = strbuf_detach(&next_name, NULL);
 done:
-	if (tab_fd > 0) {
-		close(tab_fd);
-		tab_fd = 0;
-	}
-	if (temp_tab_file_name.len > 0) {
-		unlink(temp_tab_file_name.buf);
-	}
-
+	delete_tempfile(&tab_file);
 	strbuf_release(&temp_tab_file_name);
 	strbuf_release(&tab_file_name);
 	strbuf_release(&next_name);