diff mbox series

[1/2] object-file: use futimes rather than utime

Message ID 2c1ddef6057157d85da74a7274e03eacf0374e45.1629856293.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Implement a bulk-checkin option for core.fsyncObjectFiles | expand

Commit Message

Neeraj Singh (WINDOWS-SFS) Aug. 25, 2021, 1:51 a.m. UTC
From: Neeraj Singh <neerajsi@microsoft.com>

Refactor the loose object file creation code and use the futimes(2) API
rather than utime. This should be slightly faster given that we already
have an FD to work with.

Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
---
 compat/mingw.c | 42 +++++++++++++++++++++++++++++-------------
 compat/mingw.h |  2 ++
 object-file.c  | 17 ++++++++---------
 3 files changed, 39 insertions(+), 22 deletions(-)

Comments

Johannes Schindelin Aug. 25, 2021, 1:51 p.m. UTC | #1
Hi Neeraj,

Thank you so much for this patch series! Overall, I am very happy with the
direction this is going.

I will offer a couple of suggestions below, inlined.

On Wed, 25 Aug 2021, Neeraj Singh via GitGitGadget wrote:

> From: Neeraj Singh <neerajsi@microsoft.com>
>
> Refactor the loose object file creation code and use the futimes(2) API
> rather than utime. This should be slightly faster given that we already
> have an FD to work with.

If I were you, I would spell out "file descriptor" here.

>
> Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
> ---
>  compat/mingw.c | 42 +++++++++++++++++++++++++++++-------------
>  compat/mingw.h |  2 ++
>  object-file.c  | 17 ++++++++---------
>  3 files changed, 39 insertions(+), 22 deletions(-)
>
> diff --git a/compat/mingw.c b/compat/mingw.c
> index 9e0cd1e097f..948f4c3428b 100644
> --- a/compat/mingw.c
> +++ b/compat/mingw.c
> @@ -949,19 +949,40 @@ int mingw_fstat(int fd, struct stat *buf)
>  	}
>  }
>
> -static inline void time_t_to_filetime(time_t t, FILETIME *ft)
> +static inline void timeval_to_filetime(const struct timeval *t, FILETIME *ft)
>  {
> -	long long winTime = t * 10000000LL + 116444736000000000LL;
> +	long long winTime = t->tv_sec * 10000000LL + t->tv_usec * 10 + 116444736000000000LL;

Technically, this is a change in behavior, right? We did not use to use
nanosecond precision. But I don't think that we actually make use of this
in this patch.

>  	ft->dwLowDateTime = winTime;
>  	ft->dwHighDateTime = winTime >> 32;
>  }
>
> -int mingw_utime (const char *file_name, const struct utimbuf *times)
> +int mingw_futimes(int fd, const struct timeval times[2])

At first, I wondered whether it would make sense to pass the access time
and the modified time separately, as pointers. I don't think that we pass
around arrays as function parameters in Git anywhere else.

But then I realized that `futimes()` is available in this precise form on
Linux and on the BSDs. Therefore, it is not up to us to decide the
function's signature.

However, now that I looked at the manual page, I noticed that this
function is not part of any POSIX standard.

Which makes me think that we will have to do a bit more than just define
it on Windows: we will have to introduce a `Makefile` knob (just like you
did with `HAVE_SYNC_FILE_RANGE` in patch 2/2) and set that specifically
for Linux and the BSDs, and use `futimes()` only if it is available
(otherwise fall back to `utime()`).

Then, as a separate patch, we should introduce this Windows-specific shim
and declare that it is available via `config.mak.uname`.

I am a _huge_ fan of patches that are so clear and obvious that bugs have
a hard time creeping in without being spotted immediately. And I think
that this organization would help achieve this goal.

>  {
>  	FILETIME mft, aft;
> +
> +	if (times) {
> +		timeval_to_filetime(&times[0], &aft);
> +		timeval_to_filetime(&times[1], &mft);
> +	} else {
> +		GetSystemTimeAsFileTime(&mft);
> +		aft = mft;
> +	}
> +
> +	if (!SetFileTime((HANDLE)_get_osfhandle(fd), NULL, &aft, &mft)) {
> +		errno = EINVAL;
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +int mingw_utime (const char *file_name, const struct utimbuf *times)

Please lose the space between the function name and the opening
parenthesis. I know, the preimage of this diff has it, but that was an
oversight and definitely disagrees with our current coding style.

> +{
>  	int fh, rc;
>  	DWORD attrs;
>  	wchar_t wfilename[MAX_PATH];
> +	struct timeval tvs[2];
> +
>  	if (xutftowcs_path(wfilename, file_name) < 0)
>  		return -1;
>
> @@ -979,17 +1000,12 @@ int mingw_utime (const char *file_name, const struct utimbuf *times)
>  	}
>
>  	if (times) {
> -		time_t_to_filetime(times->modtime, &mft);
> -		time_t_to_filetime(times->actime, &aft);
> -	} else {
> -		GetSystemTimeAsFileTime(&mft);
> -		aft = mft;
> +		memset(tvs, 0, sizeof(tvs));
> +		tvs[0].tv_sec = times->actime;
> +		tvs[1].tv_sec = times->modtime;

It is too bad that we have to copy around those values just to convert
them, but I cannot think of any better way, either. And it's not like
we're in a hot loop: this code will be dominated by I/O anyways.

>  	}
> -	if (!SetFileTime((HANDLE)_get_osfhandle(fh), NULL, &aft, &mft)) {
> -		errno = EINVAL;
> -		rc = -1;
> -	} else
> -		rc = 0;
> +
> +	rc = mingw_futimes(fh, times ? tvs : NULL);
>  	close(fh);
>
>  revert_attrs:
> diff --git a/compat/mingw.h b/compat/mingw.h
> index c9a52ad64a6..1eb14edb2ed 100644
> --- a/compat/mingw.h
> +++ b/compat/mingw.h
> @@ -398,6 +398,8 @@ int mingw_fstat(int fd, struct stat *buf);
>
>  int mingw_utime(const char *file_name, const struct utimbuf *times);
>  #define utime mingw_utime
> +int mingw_futimes(int fd, const struct timeval times[2]);
> +#define futimes mingw_futimes
>  size_t mingw_strftime(char *s, size_t max,
>  		   const char *format, const struct tm *tm);
>  #define strftime mingw_strftime
> diff --git a/object-file.c b/object-file.c
> index a8be8994814..607e9e2f80b 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -1860,12 +1860,13 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf,
>  }
>
>  /* Finalize a file on disk, and close it. */
> -static void close_loose_object(int fd)
> +static int close_loose_object(int fd, const char *tmpfile, const char *filename)
>  {
>  	if (fsync_object_files)
>  		fsync_or_die(fd, "loose object file");
>  	if (close(fd) != 0)
>  		die_errno(_("error when closing loose object file"));
> +	return finalize_object_file(tmpfile, filename);

While this is a clear change of behavior, this function has only one
caller, and that caller is adjusted accordingly.

Could you add this clarification of context to the commit message? I know
it will help me in the future, when I have to get up to speed again by
reading the commit history.

Thank you,
Johannes

>  }
>
>  /* Size of directory component, including the ending '/' */
> @@ -1973,17 +1974,15 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
>  		die(_("confused by unstable object source data for %s"),
>  		    oid_to_hex(oid));
>
> -	close_loose_object(fd);
> -
>  	if (mtime) {
> -		struct utimbuf utb;
> -		utb.actime = mtime;
> -		utb.modtime = mtime;
> -		if (utime(tmp_file.buf, &utb) < 0)
> -			warning_errno(_("failed utime() on %s"), tmp_file.buf);
> +		struct timeval tvs[2] = {0};
> +		tvs[0].tv_sec = mtime;
> +		tvs[1].tv_sec = mtime;
> +		if (futimes(fd, tvs) < 0)
> +			warning_errno(_("failed futimes() on %s"), tmp_file.buf);
>  	}
>
> -	return finalize_object_file(tmp_file.buf, filename.buf);
> +	return close_loose_object(fd, tmp_file.buf, filename.buf);
>  }
>
>  static int freshen_loose_object(const struct object_id *oid)
> --
> gitgitgadget
>
>
Neeraj Singh Aug. 25, 2021, 10:08 p.m. UTC | #2
On Wed, Aug 25, 2021 at 6:51 AM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
> If I were you, I would spell out "file descriptor" here.
Will do.

> > diff --git a/compat/mingw.c b/compat/mingw.c
> > index 9e0cd1e097f..948f4c3428b 100644
> > --- a/compat/mingw.c
> > +++ b/compat/mingw.c
> > @@ -949,19 +949,40 @@ int mingw_fstat(int fd, struct stat *buf)
> >       }
> >  }
> >
> > -static inline void time_t_to_filetime(time_t t, FILETIME *ft)
> > +static inline void timeval_to_filetime(const struct timeval *t, FILETIME *ft)
> >  {
> > -     long long winTime = t * 10000000LL + 116444736000000000LL;
> > +     long long winTime = t->tv_sec * 10000000LL + t->tv_usec * 10 + 116444736000000000LL;
>
> Technically, this is a change in behavior, right? We did not use to use
> nanosecond precision. But I don't think that we actually make use of this
> in this patch.
>
> >       ft->dwLowDateTime = winTime;
> >       ft->dwHighDateTime = winTime >> 32;
> >  }
> >
> > -int mingw_utime (const char *file_name, const struct utimbuf *times)
> > +int mingw_futimes(int fd, const struct timeval times[2])
>
> At first, I wondered whether it would make sense to pass the access time
> and the modified time separately, as pointers. I don't think that we pass
> around arrays as function parameters in Git anywhere else.
>
> But then I realized that `futimes()` is available in this precise form on
> Linux and on the BSDs. Therefore, it is not up to us to decide the
> function's signature.
>
> However, now that I looked at the manual page, I noticed that this
> function is not part of any POSIX standard.
>
> Which makes me think that we will have to do a bit more than just define
> it on Windows: we will have to introduce a `Makefile` knob (just like you
> did with `HAVE_SYNC_FILE_RANGE` in patch 2/2) and set that specifically
> for Linux and the BSDs, and use `futimes()` only if it is available
> (otherwise fall back to `utime()`).
>
> Then, as a separate patch, we should introduce this Windows-specific shim
> and declare that it is available via `config.mak.uname`.

Thanks for taking another look at the man pages. I looked again too and saw
that futimens is part of POSIX.1-2008:
https://pubs.opengroup.org/onlinepubs/9699919799/functions/futimens.html.
If I switch to futimens and implement the Windows shim at the same
time, is that sufficient to
address your feedback? I'd rather not ifdef this one since the
codeflow is quite different
depending on the presence of the API.

> > +}
> > +
> > +int mingw_utime (const char *file_name, const struct utimbuf *times)
>
> Please lose the space between the function name and the opening
> parenthesis. I know, the preimage of this diff has it, but that was an
> oversight and definitely disagrees with our current coding style.
Will do.

>
> > +{
> >       int fh, rc;
> >       DWORD attrs;
> >       wchar_t wfilename[MAX_PATH];
> > +     struct timeval tvs[2];
> > +
> >       if (xutftowcs_path(wfilename, file_name) < 0)
> >               return -1;
> >
> > @@ -979,17 +1000,12 @@ int mingw_utime (const char *file_name, const struct utimbuf *times)
> >       }
> >
> >       if (times) {
> > -             time_t_to_filetime(times->modtime, &mft);
> > -             time_t_to_filetime(times->actime, &aft);
> > -     } else {
> > -             GetSystemTimeAsFileTime(&mft);
> > -             aft = mft;
> > +             memset(tvs, 0, sizeof(tvs));
> > +             tvs[0].tv_sec = times->actime;
> > +             tvs[1].tv_sec = times->modtime;
>
> It is too bad that we have to copy around those values just to convert
> them, but I cannot think of any better way, either. And it's not like
> we're in a hot loop: this code will be dominated by I/O anyways.

Yeah, the cost of this is approximately 3-4 cycles (load-to-use
latency), so no one will notice relative to the system call overhead.

> > diff --git a/object-file.c b/object-file.c
> > index a8be8994814..607e9e2f80b 100644
> > --- a/object-file.c
> > +++ b/object-file.c
> > @@ -1860,12 +1860,13 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf,
> >  }
> >
> >  /* Finalize a file on disk, and close it. */
> > -static void close_loose_object(int fd)
> > +static int close_loose_object(int fd, const char *tmpfile, const char *filename)
> >  {
> >       if (fsync_object_files)
> >               fsync_or_die(fd, "loose object file");
> >       if (close(fd) != 0)
> >               die_errno(_("error when closing loose object file"));
> > +     return finalize_object_file(tmpfile, filename);
>
> While this is a clear change of behavior, this function has only one
> caller, and that caller is adjusted accordingly.
>
> Could you add this clarification of context to the commit message? I know
> it will help me in the future, when I have to get up to speed again by
> reading the commit history.

How does the following revised wording sound?
```
    object-file: use futimens rather than utime

    Make close_loose_object do all of the steps for syncing and correctly
    naming a new loose object so that it can be reimplemented in the
    upcoming bulk-fsync mode.

    Use futimens, which is available in POSIX.1-2008 to update the file
    timestamps. This should be slightly faster than utime, since we have
    a file descriptor already available. This change allows us to update
    the time before closing, renaming, and potentially fsyincing the file
    being refreshed. This code is currently only invoked by git-pack-objects
    via force_object_loose.

    Implement a futimens shim for the Windows port of Git.

    Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
```

Thanks for the detailed and quick feedback!
-Neeraj
diff mbox series

Patch

diff --git a/compat/mingw.c b/compat/mingw.c
index 9e0cd1e097f..948f4c3428b 100644
--- a/compat/mingw.c
+++ b/compat/mingw.c
@@ -949,19 +949,40 @@  int mingw_fstat(int fd, struct stat *buf)
 	}
 }
 
-static inline void time_t_to_filetime(time_t t, FILETIME *ft)
+static inline void timeval_to_filetime(const struct timeval *t, FILETIME *ft)
 {
-	long long winTime = t * 10000000LL + 116444736000000000LL;
+	long long winTime = t->tv_sec * 10000000LL + t->tv_usec * 10 + 116444736000000000LL;
 	ft->dwLowDateTime = winTime;
 	ft->dwHighDateTime = winTime >> 32;
 }
 
-int mingw_utime (const char *file_name, const struct utimbuf *times)
+int mingw_futimes(int fd, const struct timeval times[2])
 {
 	FILETIME mft, aft;
+
+	if (times) {
+		timeval_to_filetime(&times[0], &aft);
+		timeval_to_filetime(&times[1], &mft);
+	} else {
+		GetSystemTimeAsFileTime(&mft);
+		aft = mft;
+	}
+
+	if (!SetFileTime((HANDLE)_get_osfhandle(fd), NULL, &aft, &mft)) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	return 0;
+}
+
+int mingw_utime (const char *file_name, const struct utimbuf *times)
+{
 	int fh, rc;
 	DWORD attrs;
 	wchar_t wfilename[MAX_PATH];
+	struct timeval tvs[2];
+
 	if (xutftowcs_path(wfilename, file_name) < 0)
 		return -1;
 
@@ -979,17 +1000,12 @@  int mingw_utime (const char *file_name, const struct utimbuf *times)
 	}
 
 	if (times) {
-		time_t_to_filetime(times->modtime, &mft);
-		time_t_to_filetime(times->actime, &aft);
-	} else {
-		GetSystemTimeAsFileTime(&mft);
-		aft = mft;
+		memset(tvs, 0, sizeof(tvs));
+		tvs[0].tv_sec = times->actime;
+		tvs[1].tv_sec = times->modtime;
 	}
-	if (!SetFileTime((HANDLE)_get_osfhandle(fh), NULL, &aft, &mft)) {
-		errno = EINVAL;
-		rc = -1;
-	} else
-		rc = 0;
+
+	rc = mingw_futimes(fh, times ? tvs : NULL);
 	close(fh);
 
 revert_attrs:
diff --git a/compat/mingw.h b/compat/mingw.h
index c9a52ad64a6..1eb14edb2ed 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -398,6 +398,8 @@  int mingw_fstat(int fd, struct stat *buf);
 
 int mingw_utime(const char *file_name, const struct utimbuf *times);
 #define utime mingw_utime
+int mingw_futimes(int fd, const struct timeval times[2]);
+#define futimes mingw_futimes
 size_t mingw_strftime(char *s, size_t max,
 		   const char *format, const struct tm *tm);
 #define strftime mingw_strftime
diff --git a/object-file.c b/object-file.c
index a8be8994814..607e9e2f80b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1860,12 +1860,13 @@  int hash_object_file(const struct git_hash_algo *algo, const void *buf,
 }
 
 /* Finalize a file on disk, and close it. */
-static void close_loose_object(int fd)
+static int close_loose_object(int fd, const char *tmpfile, const char *filename)
 {
 	if (fsync_object_files)
 		fsync_or_die(fd, "loose object file");
 	if (close(fd) != 0)
 		die_errno(_("error when closing loose object file"));
+	return finalize_object_file(tmpfile, filename);
 }
 
 /* Size of directory component, including the ending '/' */
@@ -1973,17 +1974,15 @@  static int write_loose_object(const struct object_id *oid, char *hdr,
 		die(_("confused by unstable object source data for %s"),
 		    oid_to_hex(oid));
 
-	close_loose_object(fd);
-
 	if (mtime) {
-		struct utimbuf utb;
-		utb.actime = mtime;
-		utb.modtime = mtime;
-		if (utime(tmp_file.buf, &utb) < 0)
-			warning_errno(_("failed utime() on %s"), tmp_file.buf);
+		struct timeval tvs[2] = {0};
+		tvs[0].tv_sec = mtime;
+		tvs[1].tv_sec = mtime;
+		if (futimes(fd, tvs) < 0)
+			warning_errno(_("failed futimes() on %s"), tmp_file.buf);
 	}
 
-	return finalize_object_file(tmp_file.buf, filename.buf);
+	return close_loose_object(fd, tmp_file.buf, filename.buf);
 }
 
 static int freshen_loose_object(const struct object_id *oid)