diff mbox series

[v5,3/5] core.fsync: introduce granular fsync control

Message ID e31886717b42837f4e1538a13c8954aa07865af5.1646866998.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series A design for future-proofing fsync() configuration | expand

Commit Message

Neeraj Singh (WINDOWS-SFS) March 9, 2022, 11:03 p.m. UTC
From: Neeraj Singh <neerajsi@microsoft.com>

This commit introduces the `core.fsync` configuration
knob which can be used to control how components of the
repository are made durable on disk.

This setting allows future extensibility of the list of
syncable components:
* We issue a warning rather than an error for unrecognized
  components, so new configs can be used with old Git versions.
* We support negation, so users can choose one of the aggregate
  options and then remove components that they don't want.
  Aggregate options are defined in a later patch in this series.

This also supports the common request of doing absolutely no
fysncing with the `core.fsync=none` value, which is expected
to make the test suite faster.

Complete documentation for the new setting is included in a later patch
in the series so that it can be reviewed in final form.

Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
---
 Documentation/config/core.txt |  8 ----
 builtin/fast-import.c         |  2 +-
 builtin/index-pack.c          |  4 +-
 builtin/pack-objects.c        | 24 ++++++++----
 bulk-checkin.c                |  5 ++-
 cache.h                       | 30 +++++++++++++-
 commit-graph.c                |  3 +-
 config.c                      | 73 ++++++++++++++++++++++++++++++++++-
 csum-file.c                   |  5 ++-
 csum-file.h                   |  3 +-
 environment.c                 |  2 +-
 midx.c                        |  3 +-
 object-file.c                 |  3 +-
 pack-bitmap-write.c           |  3 +-
 pack-write.c                  | 13 ++++---
 read-cache.c                  |  2 +-
 16 files changed, 144 insertions(+), 39 deletions(-)

Comments

Junio C Hamano March 10, 2022, 12:21 a.m. UTC | #1
"Neeraj Singh via GitGitGadget" <gitgitgadget@gmail.com> writes:

> +/*
> + * These values are used to help identify parts of a repository to fsync.
> + * FSYNC_COMPONENT_NONE identifies data that will not be a persistent part of the
> + * repository and so shouldn't be fsynced.
> + */
> +enum fsync_component {
> +	FSYNC_COMPONENT_NONE,
> +	FSYNC_COMPONENT_LOOSE_OBJECT		= 1 << 0,
> +	FSYNC_COMPONENT_PACK			= 1 << 1,
> +	FSYNC_COMPONENT_PACK_METADATA		= 1 << 2,
> +	FSYNC_COMPONENT_COMMIT_GRAPH		= 1 << 3,
> +};

OK, so the idea is that Patrick's "we need to fsync refs" will be
done by adding a new component to this list, and sprinkling a call
to fsync_component_or_die() in the code of ref-files backend?

I am wondering if fsync_or_die() interface is abstracted well
enough, or we need things like "the fd is inside this directory; in
addition to doing the fsync of the fd, please sync the parent
directory as well" support before we start adding more components
(if there is such a need, perhaps it comes before this step).

> +#define FSYNC_COMPONENTS_DEFAULT (FSYNC_COMPONENT_PACK | \
> +				  FSYNC_COMPONENT_PACK_METADATA | \
> +				  FSYNC_COMPONENT_COMMIT_GRAPH)

IOW, everything other than loose object, which already has a
separate core.fsyncObjectFiles knob to loosen.  Everything else we
currently sync unconditionally and the default keeps that
arrangement?

> +static inline void fsync_component_or_die(enum fsync_component component, int fd, const char *msg)
> +{
> +	if (fsync_components & component)
> +		fsync_or_die(fd, msg);
> +}

Do we have a compelling reason to have this as a static inline
function?  We are talking about concluding an I/O operation and
I doubt there is a good performance argument for it.

> +static const struct fsync_component_entry {
> +	const char *name;
> +	enum fsync_component component_bits;
> +} fsync_component_table[] = {

thing[] is an array of "thing" (and thing[4] is the "fourth" such
thing), but this is not an array of a table (it is a name-to-bit
mapping).

I wonder if this array works without "_table" suffix in its name.

> +	{ "loose-object", FSYNC_COMPONENT_LOOSE_OBJECT },
> +	{ "pack", FSYNC_COMPONENT_PACK },
> +	{ "pack-metadata", FSYNC_COMPONENT_PACK_METADATA },
> +	{ "commit-graph", FSYNC_COMPONENT_COMMIT_GRAPH },
> +};
> +
> +static enum fsync_component parse_fsync_components(const char *var, const char *string)
> +{
> +	enum fsync_component output = 0;
> +
> +	if (!strcmp(string, "none"))
> +		return FSYNC_COMPONENT_NONE;
> +
> +	while (string) {
> +		int i;
> +		size_t len;
> +		const char *ep;
> +		int negated = 0;
> +		int found = 0;
> +
> +		string = string + strspn(string, ", \t\n\r");
> +		ep = strchrnul(string, ',');
> +		len = ep - string;
> +
> +		if (*string == '-') {
> +			negated = 1;
> +			string++;
> +			len--;
> +			if (!len)
> +				warning(_("invalid value for variable %s"), var);
> +		}
> +
> +		if (!len)
> +			break;
> +
> +		for (i = 0; i < ARRAY_SIZE(fsync_component_table); ++i) {
> +			const struct fsync_component_entry *entry = &fsync_component_table[i];
> +
> +			if (strncmp(entry->name, string, len))
> +				continue;
> +
> +			found = 1;
> +			if (negated)
> +				output &= ~entry->component_bits;
> +			else
> +				output |= entry->component_bits;
> +		}
> +
> +		if (!found) {
> +			char *component = xstrndup(string, len);
> +			warning(_("ignoring unknown core.fsync component '%s'"), component);
> +			free(component);
> +		}
> +
> +		string = ep;
> +	}
> +
> +	return output;
> +}

Hmph.  I would have expected, with built-in default of
pack,pack-metadata,commit-graph,

 - "none,pack" would choose only "pack" by first clearing the
   built-in default (or whatever was set in configuration files that
   are lower precedence than what we are reading) and then OR'ing
   the "pack" bit in.

 - "-pack" would choose "pack-metadata,commit-graph" by first
   starting from the built-in default and then CLR'ing the "pack"
   bit out.  If there were already changes made by the lower
   precedence configuration files like /etc/gitconfig, the result
   might be different and the only definite thing we can say is that
   the pack bit is cleared.

 - "loose-object" would choose all of the bits by first starting
   from the built-in default and then OR'ing the "loose-object" bit
   in.

Otherwise, parsing "none" is more or less pointless, as the above
parser always start from 0 and OR's in or CLR's out the named bit.
Whoever writes "none" can just write an empty string, no?

I wonder you'd rather want to do it this way?

parse_fsync_components(var, value, current) {
	enum fsync_component positive = 0, negative = 0;

	while (string) {
		int negated = 0;
		enum fsync_component bits;

		parse out a single component into <negated, bits>;

		if (bits == 0) { /* "none" given */
                	current = 0;
		} else if (negated) {
			negative |= bits;
		} else {
			positive |= bits;
		}
		advance <string> pointer;
	}

	return (current | positive) & ~negative;
}

And then ...

> +	if (!strcmp(var, "core.fsync")) {
> +		if (!value)
> +			return config_error_nonbool(var);
> +		fsync_components = parse_fsync_components(var, value);
> +		return 0;
> +	}
> +

... this part would pass the current value of fsync_components as
the third parameter to the parse_fsync_components().  The variable
would be initialized to the FSYNC_COMPONENTS_DEFAULT we saw earlier.


> @@ -1613,7 +1684,7 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
>  	}
>  
>  	if (!strcmp(var, "core.fsyncobjectfiles")) {
> -		fsync_object_files = git_config_bool(var, value);
> +		warning(_("core.fsyncobjectfiles is deprecated; use core.fsync instead"));

This is not deprecating but removing the support, which I am not
sure is a sensible thing to do.  Rather we should pretend that
core.fsync = "loose-object" (or "-loose-object") were found in the
configuration, shouldn't we?
Neeraj Singh March 10, 2022, 2:53 a.m. UTC | #2
On Wed, Mar 9, 2022 at 4:21 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> "Neeraj Singh via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
> > +/*
> > + * These values are used to help identify parts of a repository to fsync.
> > + * FSYNC_COMPONENT_NONE identifies data that will not be a persistent part of the
> > + * repository and so shouldn't be fsynced.
> > + */
> > +enum fsync_component {
> > +     FSYNC_COMPONENT_NONE,
> > +     FSYNC_COMPONENT_LOOSE_OBJECT            = 1 << 0,
> > +     FSYNC_COMPONENT_PACK                    = 1 << 1,
> > +     FSYNC_COMPONENT_PACK_METADATA           = 1 << 2,
> > +     FSYNC_COMPONENT_COMMIT_GRAPH            = 1 << 3,
> > +};
>
> OK, so the idea is that Patrick's "we need to fsync refs" will be
> done by adding a new component to this list, and sprinkling a call
> to fsync_component_or_die() in the code of ref-files backend?
>

Yes. Patrick will need to add fsync_component_or_die wherever his
patch series has already added fsync_or_die.

If he follows Ævar's suggestion of treating remote refs differently
from local refs, he might want to define multiple components.

> I am wondering if fsync_or_die() interface is abstracted well
> enough, or we need things like "the fd is inside this directory; in
> addition to doing the fsync of the fd, please sync the parent
> directory as well" support before we start adding more components
> (if there is such a need, perhaps it comes before this step).
>

I think syncing the parent directory is a separate fsyncMethod that
would require changes across the codebase to obtain an appropriate
directory fd. I'd prefer to treat that as a separable concern.

> > +#define FSYNC_COMPONENTS_DEFAULT (FSYNC_COMPONENT_PACK | \
> > +                               FSYNC_COMPONENT_PACK_METADATA | \
> > +                               FSYNC_COMPONENT_COMMIT_GRAPH)
>
> IOW, everything other than loose object, which already has a
> separate core.fsyncObjectFiles knob to loosen.  Everything else we
> currently sync unconditionally and the default keeps that
> arrangement?
>

Yes, trying to keep default behavior identical on non-Windows
platforms.  Windows will be expected to adopt batch mode, and have
loose objects in this set.

> > +static inline void fsync_component_or_die(enum fsync_component component, int fd, const char *msg)
> > +{
> > +     if (fsync_components & component)
> > +             fsync_or_die(fd, msg);
> > +}
>
> Do we have a compelling reason to have this as a static inline
> function?  We are talking about concluding an I/O operation and
> I doubt there is a good performance argument for it.
>

Yeah, this is meant to optimize the case where the component isn't
being fsynced. I'll move this function to write-or-die.c below
fsync_or_die.

> > +static const struct fsync_component_entry {
> > +     const char *name;
> > +     enum fsync_component component_bits;
> > +} fsync_component_table[] = {
>
> thing[] is an array of "thing" (and thing[4] is the "fourth" such
> thing), but this is not an array of a table (it is a name-to-bit
> mapping).
>
> I wonder if this array works without "_table" suffix in its name.

This is modeled after whitespace_rule_names.  What if I change this to
the following?
static const struct fsync_component_name {
...
} fsync_component_names[]

>
> > +     { "loose-object", FSYNC_COMPONENT_LOOSE_OBJECT },
> > +     { "pack", FSYNC_COMPONENT_PACK },
> > +     { "pack-metadata", FSYNC_COMPONENT_PACK_METADATA },
> > +     { "commit-graph", FSYNC_COMPONENT_COMMIT_GRAPH },
> > +};
> > +
> > +static enum fsync_component parse_fsync_components(const char *var, const char *string)
> > +{
> > +     enum fsync_component output = 0;
> > +
> > +     if (!strcmp(string, "none"))
> > +             return FSYNC_COMPONENT_NONE;
> > +
> > +     while (string) {
> > +             int i;
> > +             size_t len;
> > +             const char *ep;
> > +             int negated = 0;
> > +             int found = 0;
> > +
> > +             string = string + strspn(string, ", \t\n\r");
> > +             ep = strchrnul(string, ',');
> > +             len = ep - string;
> > +
> > +             if (*string == '-') {
> > +                     negated = 1;
> > +                     string++;
> > +                     len--;
> > +                     if (!len)
> > +                             warning(_("invalid value for variable %s"), var);
> > +             }
> > +
> > +             if (!len)
> > +                     break;
> > +
> > +             for (i = 0; i < ARRAY_SIZE(fsync_component_table); ++i) {
> > +                     const struct fsync_component_entry *entry = &fsync_component_table[i];
> > +
> > +                     if (strncmp(entry->name, string, len))
> > +                             continue;
> > +
> > +                     found = 1;
> > +                     if (negated)
> > +                             output &= ~entry->component_bits;
> > +                     else
> > +                             output |= entry->component_bits;
> > +             }
> > +
> > +             if (!found) {
> > +                     char *component = xstrndup(string, len);
> > +                     warning(_("ignoring unknown core.fsync component '%s'"), component);
> > +                     free(component);
> > +             }
> > +
> > +             string = ep;
> > +     }
> > +
> > +     return output;
> > +}
>
> Hmph.  I would have expected, with built-in default of
> pack,pack-metadata,commit-graph,
>

At the conclusion of this series, I defined 'default' as an aggregate
option that includes
the platform default.  I'd prefer not to have any statefulness of the
core.fsync setting so
that there is less confusion about the final fsync configuration. My
colleagues had a fair
amount of confusion internally when testing Git performance internally
with regards to
the core.fsyncObjectFiles setting.  Inline this is how your configs
would be written:

>  - "none,pack" would choose only "pack" by first clearing the
>    built-in default (or whatever was set in configuration files that
>    are lower precedence than what we are reading) and then OR'ing
>    the "pack" bit in.
>

"pack" would choose only "pack"

>  - "-pack" would choose "pack-metadata,commit-graph" by first
>    starting from the built-in default and then CLR'ing the "pack"
>    bit out.  If there were already changes made by the lower
>    precedence configuration files like /etc/gitconfig, the result
>    might be different and the only definite thing we can say is that
>    the pack bit is cleared.
>

"default,-pack" would be the platform default, but not packfiles.

>  - "loose-object" would choose all of the bits by first starting
>    from the built-in default and then OR'ing the "loose-object" bit
>    in.
>

"default,loose-object" would add loose objects to the platform config.

> Otherwise, parsing "none" is more or less pointless, as the above
> parser always start from 0 and OR's in or CLR's out the named bit.
> Whoever writes "none" can just write an empty string, no?

I think the empty string would be disallowed since "core.fsync=" would
be entirely
missing a value. But on testing this doesn't seem to be the case. I'll change
this to be more strict in that the user has to pass an explicit value,
such as 'none'.

>
> I wonder you'd rather want to do it this way?
>
> parse_fsync_components(var, value, current) {
>         enum fsync_component positive = 0, negative = 0;
>
>         while (string) {
>                 int negated = 0;
>                 enum fsync_component bits;
>
>                 parse out a single component into <negated, bits>;
>
>                 if (bits == 0) { /* "none" given */
>                         current = 0;
>                 } else if (negated) {
>                         negative |= bits;
>                 } else {
>                         positive |= bits;
>                 }
>                 advance <string> pointer;
>         }
>
>         return (current | positive) & ~negative;
> }
>
> And then ...
>
> > +     if (!strcmp(var, "core.fsync")) {
> > +             if (!value)
> > +                     return config_error_nonbool(var);
> > +             fsync_components = parse_fsync_components(var, value);
> > +             return 0;
> > +     }
> > +
>
> ... this part would pass the current value of fsync_components as
> the third parameter to the parse_fsync_components().  The variable
> would be initialized to the FSYNC_COMPONENTS_DEFAULT we saw earlier.
>

I'm afraid that this method would lead to statefulness between
different levels configuring
core.fsync.  I'd prefer that the user could know what will happen by
just inspecting the value
returned by `git config core.fsync`.

>
> > @@ -1613,7 +1684,7 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
> >       }
> >
> >       if (!strcmp(var, "core.fsyncobjectfiles")) {
> > -             fsync_object_files = git_config_bool(var, value);
> > +             warning(_("core.fsyncobjectfiles is deprecated; use core.fsync instead"));
>
> This is not deprecating but removing the support, which I am not
> sure is a sensible thing to do.  Rather we should pretend that
> core.fsync = "loose-object" (or "-loose-object") were found in the
> configuration, shouldn't we?
>

The problem I anticipate is that figuring out the final configuration
becomes pretty
complex in the face of conflicting configurations of fsyncObjectFiles
and core.fsync.
The user won't know what will happen without reading the Git code or
doing performance
experiments.  I thought we can avoid all of this complexity by just
having a simple warning
that pushes users toward the new configuration value.  Aside from
seeing a warning, a
user's actual usage of Git functionality shouldn't be affected.

Alternatively, what if we just silently ignore the old
core.fsyncObjectFiles setting?
If neither of these is an option, I'll put back some support for the
old setting.

Thanks,
Neeraj
Junio C Hamano March 10, 2022, 7:19 a.m. UTC | #3
Neeraj Singh <nksingh85@gmail.com> writes:

> This is modeled after whitespace_rule_names.  What if I change this to
> the following?
> static const struct fsync_component_name {
> ...
> } fsync_component_names[]

That's better.

> At the conclusion of this series, I defined 'default' as an aggregate
> option that includes
> the platform default.  I'd prefer not to have any statefulness of the
> core.fsync setting so
> that there is less confusion about the final fsync configuration.

Then scratch your preference ;-) 

Our configuration files are designed to be "hierarchical" in that
system-wide default can be set in /etc/gitconfig, which can be
overridden by per-user default in $HOME/.gitconfig, which can in
turn be overridden by per-repository setting in .git/config, so
starting from the compiled-in default, reading/augmenting "the value
we tentatively decided based on what we have read so far" with what
we read from lower-precedence files to higher-precedence files is a
norm.

Don't make this little corner of the system different from
everything else; that will only confuse users.

The git_config() callback should expect to see the same var with
different values for that reason.  Always restarting from zero will
defeat it.

And always restarting from zero will mean "none" is meaningless,
while it would be a quite nice way to say "forget everything we have
read so far and start from scratch" when you really want to refuse
what the system default wants to give you.

>> > @@ -1613,7 +1684,7 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
>> >       }
>> >
>> >       if (!strcmp(var, "core.fsyncobjectfiles")) {
>> > -             fsync_object_files = git_config_bool(var, value);
>> > +             warning(_("core.fsyncobjectfiles is deprecated; use core.fsync instead"));
>>
>> This is not deprecating but removing the support, which I am not
>> sure is a sensible thing to do.  Rather we should pretend that
>> core.fsync = "loose-object" (or "-loose-object") were found in the
>> configuration, shouldn't we?
>>
>
> The problem I anticipate is that figuring out the final configuration
> becomes pretty
> complex in the face of conflicting configurations of fsyncObjectFiles
> and core.fsync.

Don't start your thinking from too complex configuration that mixes
and matches.  Instead, think what happens to folks who are *NOT*
interested in the new way of doing this.  They aren't interested in
setting core.fsync, and they already have core.fsyncObjectFiles set.
You want to make sure their experience does not suck.

The quoted code simply _IGNORES_ their wish and forces whatever
default configuration core.fsync codepath happens to choose, which
is a grave regression from their point of view.

One way to handle this more gracefully is to delay the final
decision until the end of the configuraiton file processing, and
keep track of core.fsyncObjectFiles and core.fsync separately.  If
the latter is never set but the former is, then you are dealing with
such a user who hasn't migrated.  Give them a warning (the text
above is fine---we can tell them "that's deprecated and you should
use the other one instead"), but in the meantime, until deprecation
turns into removal of support, keep honoring their original wish.
If core.fsync is set to something, you can still give them a warning
when you see core.fsyncObjectFiles, saying "that's deprecated and
because you have core.fsync, we'll ignore the old one", and use the
new method exclusively, without having to worry about mixing.
Johannes Schindelin March 10, 2022, 1:11 p.m. UTC | #4
Hi Neeraj,

On Wed, 9 Mar 2022, Neeraj Singh wrote:

> On Wed, Mar 9, 2022 at 4:21 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> > I am wondering if fsync_or_die() interface is abstracted well enough,
> > or we need things like "the fd is inside this directory; in addition
> > to doing the fsync of the fd, please sync the parent directory as
> > well" support before we start adding more components (if there is such
> > a need, perhaps it comes before this step).
> >
>
> I think syncing the parent directory is a separate fsyncMethod that
> would require changes across the codebase to obtain an appropriate
> directory fd. I'd prefer to treat that as a separable concern.

That makes sense to me because I expect further abstraction to be
necessary here because Unix/Linux semantics differ quite a bit more from
Windows semantics when it comes to directory "file" descriptors than when
talking about files' file descriptors.

> > > +#define FSYNC_COMPONENTS_DEFAULT (FSYNC_COMPONENT_PACK | \
> > > +                               FSYNC_COMPONENT_PACK_METADATA | \
> > > +                               FSYNC_COMPONENT_COMMIT_GRAPH)
> >
> > IOW, everything other than loose object, which already has a
> > separate core.fsyncObjectFiles knob to loosen.  Everything else we
> > currently sync unconditionally and the default keeps that
> > arrangement?
> >
>
> Yes, trying to keep default behavior identical on non-Windows
> platforms.  Windows will be expected to adopt batch mode, and have
> loose objects in this set.

We already adopted an early version of this patch series:
https://github.com/git-for-windows/git/commit/98209a5f6e4

And yes, I will gladly adapt that to whatever lands in core Git.

Thank you _so_ much for working on this!
Dscho
Junio C Hamano March 10, 2022, 5:18 p.m. UTC | #5
Neeraj Singh <nksingh85@gmail.com> writes:

>> I am wondering if fsync_or_die() interface is abstracted well
>> enough, or we need things like "the fd is inside this directory; in
>> addition to doing the fsync of the fd, please sync the parent
>> directory as well" support before we start adding more components
>> (if there is such a need, perhaps it comes before this step).
>>
>
> I think syncing the parent directory is a separate fsyncMethod that
> would require changes across the codebase to obtain an appropriate
> directory fd. I'd prefer to treat that as a separable concern.

Yeah, that would be a sensible direction to go.  If we never did the
"sync the parent" thing, we do not need it in the fsyncMethod world
immediately.  It can be added later.

Thanks.
Neeraj Singh March 10, 2022, 6:38 p.m. UTC | #6
On Wed, Mar 9, 2022 at 11:19 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Neeraj Singh <nksingh85@gmail.com> writes:
>
> > This is modeled after whitespace_rule_names.  What if I change this to
> > the following?
> > static const struct fsync_component_name {
> > ...
> > } fsync_component_names[]
>
> That's better.
>
> > At the conclusion of this series, I defined 'default' as an aggregate
> > option that includes
> > the platform default.  I'd prefer not to have any statefulness of the
> > core.fsync setting so
> > that there is less confusion about the final fsync configuration.
>
> Then scratch your preference ;-)

Just to clarify, linguistically, by 'scratch' do you mean that I should drop
my preference (which I can do to get the important parts of the
series in), or are you saying that that your suggestion is in line with
my preference, and I'm just not seeing it properly?

> Our configuration files are designed to be "hierarchical" in that
> system-wide default can be set in /etc/gitconfig, which can be
> overridden by per-user default in $HOME/.gitconfig, which can in
> turn be overridden by per-repository setting in .git/config, so
> starting from the compiled-in default, reading/augmenting "the value
> we tentatively decided based on what we have read so far" with what
> we read from lower-precedence files to higher-precedence files is a
> norm.
>
> Don't make this little corner of the system different from
> everything else; that will only confuse users.
>
> The git_config() callback should expect to see the same var with
> different values for that reason.  Always restarting from zero will
> defeat it.
>

Consider core.whitespace. The parse_whitespace_rule code starts with
the compiled-in default every time it encounters a config value, and then
modifies it according to what the user passed in.  So the user could figure
out what's going to happen by just looking at the value returned by
`git config --get core.whitespace`.  The user doesn't need to call
`git config --get-all core.whitespace` and then reason about the entries
from top to bottom to figure out the actual state that Git will use.

> And always restarting from zero will mean "none" is meaningless,
> while it would be a quite nice way to say "forget everything we have
> read so far and start from scratch" when you really want to refuse
> what the system default wants to give you.
>

The intention, which I've implemented in my local v6 changes, is for an
empty list or empty string to be an illegal value of core.fsync.  It should be
set to one or more legal values.  But I see the advantage in always resetting to
the system default, like core.whitespace does, so that a set of unrecognized
values results in at least default behavior. An empty string would mean
'unconfigured', which will be meaningful when we integrate core.fsync with
core.fsyncObjectFiles.

I'll update the change to start from default, with none as a reset. I'm still in
favor of making it so that the most recent value of core.fsync in the
hierarchical configuration store stands alone without picking up state from
prior values.

> >> > @@ -1613,7 +1684,7 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
> >> >       }
> >> >
> >> >       if (!strcmp(var, "core.fsyncobjectfiles")) {
> >> > -             fsync_object_files = git_config_bool(var, value);
> >> > +             warning(_("core.fsyncobjectfiles is deprecated; use core.fsync instead"));
> >>
> >> This is not deprecating but removing the support, which I am not
> >> sure is a sensible thing to do.  Rather we should pretend that
> >> core.fsync = "loose-object" (or "-loose-object") were found in the
> >> configuration, shouldn't we?
> >>
> >
> > The problem I anticipate is that figuring out the final configuration
> > becomes pretty
> > complex in the face of conflicting configurations of fsyncObjectFiles
> > and core.fsync.
>
> Don't start your thinking from too complex configuration that mixes
> and matches.  Instead, think what happens to folks who are *NOT*
> interested in the new way of doing this.  They aren't interested in
> setting core.fsync, and they already have core.fsyncObjectFiles set.
> You want to make sure their experience does not suck.
>
> The quoted code simply _IGNORES_ their wish and forces whatever
> default configuration core.fsync codepath happens to choose, which
> is a grave regression from their point of view.
>
> One way to handle this more gracefully is to delay the final
> decision until the end of the configuraiton file processing, and
> keep track of core.fsyncObjectFiles and core.fsync separately.  If
> the latter is never set but the former is, then you are dealing with
> such a user who hasn't migrated.  Give them a warning (the text
> above is fine---we can tell them "that's deprecated and you should
> use the other one instead"), but in the meantime, until deprecation
> turns into removal of support, keep honoring their original wish.
> If core.fsync is set to something, you can still give them a warning
> when you see core.fsyncObjectFiles, saying "that's deprecated and
> because you have core.fsync, we'll ignore the old one", and use the
> new method exclusively, without having to worry about mixing.

Is there a well-defined place where we know that configuration processing
is complete?  The most obvious spot to me to integrate these two values would
be the first time we need to figure out the fsync state. Another spot would be
prepare_repo_settings.  Are there any other good candidates?

Once the right spot is picked, I'll implement the integration of the
settings as you
suggested.  For now I'll stick it in prepare_repo_settings.

Thanks for the review.  Please expect a v6 today.
Junio C Hamano March 10, 2022, 6:44 p.m. UTC | #7
Neeraj Singh <nksingh85@gmail.com> writes:

>> > At the conclusion of this series, I defined 'default' as an aggregate
>> > option that includes
>> > the platform default.  I'd prefer not to have any statefulness of the
>> > core.fsync setting so
>> > that there is less confusion about the final fsync configuration.
>>
>> Then scratch your preference ;-)
>
> Just to clarify, linguistically, by 'scratch' do you mean that I should drop
> my preference

Yes.

> Is there a well-defined place where we know that configuration processing
> is complete?  The most obvious spot to me to integrate these two values would
> be the first time we need to figure out the fsync state.

That sounds like a good place.
Junio C Hamano March 10, 2022, 7:57 p.m. UTC | #8
Junio C Hamano <gitster@pobox.com> writes:

> Neeraj Singh <nksingh85@gmail.com> writes:
>
>>> > At the conclusion of this series, I defined 'default' as an aggregate
>>> > option that includes
>>> > the platform default.  I'd prefer not to have any statefulness of the
>>> > core.fsync setting so
>>> > that there is less confusion about the final fsync configuration.
>>>
>>> Then scratch your preference ;-)
>>
>> Just to clarify, linguistically, by 'scratch' do you mean that I should drop
>> my preference
>
> Yes.

Let me take this part back.

I do not mind too deeply if this were "each occurrence of core.fsync
as a whole replaces whatever we saw earlier, i.e. last-one-wins".

But if we were going that route, instead of starting from an empty
set, I'd prefer to see it begin with the built-in default (i.e. the
one you defined to mimic the traditional behaviour before core.fsync
was introduced) and added or deleted by each (possibly '-' prefixed)
element on the comma-separated list, with an explicit way to clear
the built-in default.  E.g. "none,refs" would clear the components
traditionally fsync'ed by default and choose only "refs" component,
while "-pack-metadata" would mean the default ones minus
"pack-metadata" component are subject for fsync'ing.  An empty
string would naturally mean "By having this core.fsync entry, I am
telling you not to pay any attention to what lower-precedence
configuration files said.  But I want the built-in default, without
any additions or subtractions made by this entry, just the default,
please" in such a scheme, so do not forbid it.

Or, we can inherit from the previous configuration file to allow
/etc/gitconfig and the ones shipped by Git for Windows to augment
the built-in default before letting end-user configuration to
further customize the preference.

Either is fine by me.

Thanks.
Neeraj Singh March 10, 2022, 8:25 p.m. UTC | #9
On Thu, Mar 10, 2022 at 11:57 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Junio C Hamano <gitster@pobox.com> writes:
>
> > Neeraj Singh <nksingh85@gmail.com> writes:
> >
> >>> > At the conclusion of this series, I defined 'default' as an aggregate
> >>> > option that includes
> >>> > the platform default.  I'd prefer not to have any statefulness of the
> >>> > core.fsync setting so
> >>> > that there is less confusion about the final fsync configuration.
> >>>
> >>> Then scratch your preference ;-)
> >>
> >> Just to clarify, linguistically, by 'scratch' do you mean that I should drop
> >> my preference
> >
> > Yes.
>
> Let me take this part back.
>
> I do not mind too deeply if this were "each occurrence of core.fsync
> as a whole replaces whatever we saw earlier, i.e. last-one-wins".
>
> But if we were going that route, instead of starting from an empty
> set, I'd prefer to see it begin with the built-in default (i.e. the
> one you defined to mimic the traditional behaviour before core.fsync
> was introduced) and added or deleted by each (possibly '-' prefixed)
> element on the comma-separated list, with an explicit way to clear
> the built-in default.  E.g. "none,refs" would clear the components
> traditionally fsync'ed by default and choose only "refs" component,
> while "-pack-metadata" would mean the default ones minus
> "pack-metadata" component are subject for fsync'ing.  An empty
> string would naturally mean "By having this core.fsync entry, I am
> telling you not to pay any attention to what lower-precedence
> configuration files said.  But I want the built-in default, without
> any additions or subtractions made by this entry, just the default,
> please" in such a scheme, so do not forbid it.
>
> Or, we can inherit from the previous configuration file to allow
> /etc/gitconfig and the ones shipped by Git for Windows to augment
> the built-in default before letting end-user configuration to
> further customize the preference.
>
> Either is fine by me.
>
> Thanks.

Okay, I'll implement this version, since this is close to my preference.

Under this schema, 'default' isn't useful as an aggregate option, so
I'll eliminate
that.
Junio C Hamano March 10, 2022, 9:17 p.m. UTC | #10
Neeraj Singh <nksingh85@gmail.com> writes:

> Under this schema, 'default' isn't useful as an aggregate option, so
> I'll eliminate
> that.

Yeah, the only difference is the starting point.  Either start with
default set of bits and give an option to clear, or start with an
empty set and give an option to set default.  The former may be a
bit less cumbersome to users but there isn't a huge difference.
diff mbox series

Patch

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index dbb134f7136..74399072843 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -556,14 +556,6 @@  core.fsyncMethod::
   filesystem and storage hardware, data added to the repository may not be
   durable in the event of a system crash. This is the default mode on macOS.
 
-core.fsyncObjectFiles::
-	This boolean will enable 'fsync()' when writing object files.
-+
-This is a total waste of time and effort on a filesystem that orders
-data writes properly, but can be useful for filesystems that do not use
-journalling (traditional UNIX filesystems) or that only journal metadata
-and not file contents (OS X's HFS+, or Linux ext3 with "data=writeback").
-
 core.preloadIndex::
 	Enable parallel index preload for operations like 'git diff'
 +
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index b7105fcad9b..f2c036a8955 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -865,7 +865,7 @@  static void end_packfile(void)
 		struct tag *t;
 
 		close_pack_windows(pack_data);
-		finalize_hashfile(pack_file, cur_pack_oid.hash, 0);
+		finalize_hashfile(pack_file, cur_pack_oid.hash, FSYNC_COMPONENT_PACK, 0);
 		fixup_pack_header_footer(pack_data->pack_fd, pack_data->hash,
 					 pack_data->pack_name, object_count,
 					 cur_pack_oid.hash, pack_size);
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index c45273de3b1..c5f12f14df5 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1290,7 +1290,7 @@  static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
 			    nr_objects - nr_objects_initial);
 		stop_progress_msg(&progress, msg.buf);
 		strbuf_release(&msg);
-		finalize_hashfile(f, tail_hash, 0);
+		finalize_hashfile(f, tail_hash, FSYNC_COMPONENT_PACK, 0);
 		hashcpy(read_hash, pack_hash);
 		fixup_pack_header_footer(output_fd, pack_hash,
 					 curr_pack, nr_objects,
@@ -1512,7 +1512,7 @@  static void final(const char *final_pack_name, const char *curr_pack_name,
 	if (!from_stdin) {
 		close(input_fd);
 	} else {
-		fsync_or_die(output_fd, curr_pack_name);
+		fsync_component_or_die(FSYNC_COMPONENT_PACK, output_fd, curr_pack_name);
 		err = close(output_fd);
 		if (err)
 			die_errno(_("error while closing pack file"));
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 178e611f09d..c14fee8e99f 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1199,16 +1199,26 @@  static void write_pack_file(void)
 			display_progress(progress_state, written);
 		}
 
-		/*
-		 * Did we write the wrong # entries in the header?
-		 * If so, rewrite it like in fast-import
-		 */
 		if (pack_to_stdout) {
-			finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
+			/*
+			 * We never fsync when writing to stdout since we may
+			 * not be writing to an actual pack file. For instance,
+			 * the upload-pack code passes a pipe here. Calling
+			 * fsync on a pipe results in unnecessary
+			 * synchronization with the reader on some platforms.
+			 */
+			finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
+					  CSUM_HASH_IN_STREAM | CSUM_CLOSE);
 		} else if (nr_written == nr_remaining) {
-			finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+			finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
+					  CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
 		} else {
-			int fd = finalize_hashfile(f, hash, 0);
+			/*
+			 * If we wrote the wrong number of entries in the
+			 * header, rewrite it like in fast-import.
+			 */
+
+			int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
 			fixup_pack_header_footer(fd, hash, pack_tmp_name,
 						 nr_written, hash, offset);
 			close(fd);
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 8785b2ac806..a2cf9dcbc8d 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -53,9 +53,10 @@  static void finish_bulk_checkin(struct bulk_checkin_state *state)
 		unlink(state->pack_tmp_name);
 		goto clear_exit;
 	} else if (state->nr_written == 1) {
-		finalize_hashfile(state->f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+		finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
+				  CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
 	} else {
-		int fd = finalize_hashfile(state->f, hash, 0);
+		int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
 		fixup_pack_header_footer(fd, hash, state->pack_tmp_name,
 					 state->nr_written, hash,
 					 state->offset);
diff --git a/cache.h b/cache.h
index 82f0194a3dd..a5eaa60a7a8 100644
--- a/cache.h
+++ b/cache.h
@@ -993,8 +993,27 @@  void reset_shared_repository(void);
 extern int read_replace_refs;
 extern char *git_replace_ref_base;
 
-extern int fsync_object_files;
-extern int use_fsync;
+/*
+ * These values are used to help identify parts of a repository to fsync.
+ * FSYNC_COMPONENT_NONE identifies data that will not be a persistent part of the
+ * repository and so shouldn't be fsynced.
+ */
+enum fsync_component {
+	FSYNC_COMPONENT_NONE,
+	FSYNC_COMPONENT_LOOSE_OBJECT		= 1 << 0,
+	FSYNC_COMPONENT_PACK			= 1 << 1,
+	FSYNC_COMPONENT_PACK_METADATA		= 1 << 2,
+	FSYNC_COMPONENT_COMMIT_GRAPH		= 1 << 3,
+};
+
+#define FSYNC_COMPONENTS_DEFAULT (FSYNC_COMPONENT_PACK | \
+				  FSYNC_COMPONENT_PACK_METADATA | \
+				  FSYNC_COMPONENT_COMMIT_GRAPH)
+
+/*
+ * A bitmask indicating which components of the repo should be fsynced.
+ */
+extern enum fsync_component fsync_components;
 
 enum fsync_method {
 	FSYNC_METHOD_FSYNC,
@@ -1002,6 +1021,7 @@  enum fsync_method {
 };
 
 extern enum fsync_method fsync_method;
+extern int use_fsync;
 extern int core_preload_index;
 extern int precomposed_unicode;
 extern int protect_hfs;
@@ -1708,6 +1728,12 @@  int copy_file_with_time(const char *dst, const char *src, int mode);
 void write_or_die(int fd, const void *buf, size_t count);
 void fsync_or_die(int fd, const char *);
 
+static inline void fsync_component_or_die(enum fsync_component component, int fd, const char *msg)
+{
+	if (fsync_components & component)
+		fsync_or_die(fd, msg);
+}
+
 ssize_t read_in_full(int fd, void *buf, size_t count);
 ssize_t write_in_full(int fd, const void *buf, size_t count);
 ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset);
diff --git a/commit-graph.c b/commit-graph.c
index 265c010122e..64897f57d9f 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1942,7 +1942,8 @@  static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 	}
 
 	close_commit_graph(ctx->r->objects);
-	finalize_hashfile(f, file_hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
+	finalize_hashfile(f, file_hash, FSYNC_COMPONENT_COMMIT_GRAPH,
+			  CSUM_HASH_IN_STREAM | CSUM_FSYNC);
 	free_chunkfile(cf);
 
 	if (ctx->split) {
diff --git a/config.c b/config.c
index f3ff80b01c9..51a35715642 100644
--- a/config.c
+++ b/config.c
@@ -1323,6 +1323,70 @@  static int git_parse_maybe_bool_text(const char *value)
 	return -1;
 }
 
+static const struct fsync_component_entry {
+	const char *name;
+	enum fsync_component component_bits;
+} fsync_component_table[] = {
+	{ "loose-object", FSYNC_COMPONENT_LOOSE_OBJECT },
+	{ "pack", FSYNC_COMPONENT_PACK },
+	{ "pack-metadata", FSYNC_COMPONENT_PACK_METADATA },
+	{ "commit-graph", FSYNC_COMPONENT_COMMIT_GRAPH },
+};
+
+static enum fsync_component parse_fsync_components(const char *var, const char *string)
+{
+	enum fsync_component output = 0;
+
+	if (!strcmp(string, "none"))
+		return FSYNC_COMPONENT_NONE;
+
+	while (string) {
+		int i;
+		size_t len;
+		const char *ep;
+		int negated = 0;
+		int found = 0;
+
+		string = string + strspn(string, ", \t\n\r");
+		ep = strchrnul(string, ',');
+		len = ep - string;
+
+		if (*string == '-') {
+			negated = 1;
+			string++;
+			len--;
+			if (!len)
+				warning(_("invalid value for variable %s"), var);
+		}
+
+		if (!len)
+			break;
+
+		for (i = 0; i < ARRAY_SIZE(fsync_component_table); ++i) {
+			const struct fsync_component_entry *entry = &fsync_component_table[i];
+
+			if (strncmp(entry->name, string, len))
+				continue;
+
+			found = 1;
+			if (negated)
+				output &= ~entry->component_bits;
+			else
+				output |= entry->component_bits;
+		}
+
+		if (!found) {
+			char *component = xstrndup(string, len);
+			warning(_("ignoring unknown core.fsync component '%s'"), component);
+			free(component);
+		}
+
+		string = ep;
+	}
+
+	return output;
+}
+
 int git_parse_maybe_bool(const char *value)
 {
 	int v = git_parse_maybe_bool_text(value);
@@ -1600,6 +1664,13 @@  static int git_default_core_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	if (!strcmp(var, "core.fsync")) {
+		if (!value)
+			return config_error_nonbool(var);
+		fsync_components = parse_fsync_components(var, value);
+		return 0;
+	}
+
 	if (!strcmp(var, "core.fsyncmethod")) {
 		if (!value)
 			return config_error_nonbool(var);
@@ -1613,7 +1684,7 @@  static int git_default_core_config(const char *var, const char *value, void *cb)
 	}
 
 	if (!strcmp(var, "core.fsyncobjectfiles")) {
-		fsync_object_files = git_config_bool(var, value);
+		warning(_("core.fsyncobjectfiles is deprecated; use core.fsync instead"));
 		return 0;
 	}
 
diff --git a/csum-file.c b/csum-file.c
index 26e8a6df44e..59ef3398ca2 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -58,7 +58,8 @@  static void free_hashfile(struct hashfile *f)
 	free(f);
 }
 
-int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int flags)
+int finalize_hashfile(struct hashfile *f, unsigned char *result,
+		      enum fsync_component component, unsigned int flags)
 {
 	int fd;
 
@@ -69,7 +70,7 @@  int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int fl
 	if (flags & CSUM_HASH_IN_STREAM)
 		flush(f, f->buffer, the_hash_algo->rawsz);
 	if (flags & CSUM_FSYNC)
-		fsync_or_die(f->fd, f->name);
+		fsync_component_or_die(component, f->fd, f->name);
 	if (flags & CSUM_CLOSE) {
 		if (close(f->fd))
 			die_errno("%s: sha1 file error on close", f->name);
diff --git a/csum-file.h b/csum-file.h
index 291215b34eb..0d29f528fbc 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -1,6 +1,7 @@ 
 #ifndef CSUM_FILE_H
 #define CSUM_FILE_H
 
+#include "cache.h"
 #include "hash.h"
 
 struct progress;
@@ -38,7 +39,7 @@  int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *);
 struct hashfile *hashfd(int fd, const char *name);
 struct hashfile *hashfd_check(const char *name);
 struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp);
-int finalize_hashfile(struct hashfile *, unsigned char *, unsigned int);
+int finalize_hashfile(struct hashfile *, unsigned char *, enum fsync_component, unsigned int);
 void hashwrite(struct hashfile *, const void *, unsigned int);
 void hashflush(struct hashfile *f);
 void crc32_begin(struct hashfile *);
diff --git a/environment.c b/environment.c
index 3e3620d759f..378424b9af5 100644
--- a/environment.c
+++ b/environment.c
@@ -42,9 +42,9 @@  const char *git_attributes_file;
 const char *git_hooks_path;
 int zlib_compression_level = Z_BEST_SPEED;
 int pack_compression_level = Z_DEFAULT_COMPRESSION;
-int fsync_object_files;
 int use_fsync = -1;
 enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
+enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
 size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
 size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
 size_t delta_base_cache_limit = 96 * 1024 * 1024;
diff --git a/midx.c b/midx.c
index 865170bad05..107365d2114 100644
--- a/midx.c
+++ b/midx.c
@@ -1438,7 +1438,8 @@  static int write_midx_internal(const char *object_dir,
 	write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
 	write_chunkfile(cf, &ctx);
 
-	finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
+	finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA,
+			  CSUM_FSYNC | CSUM_HASH_IN_STREAM);
 	free_chunkfile(cf);
 
 	if (flags & MIDX_WRITE_REV_INDEX &&
diff --git a/object-file.c b/object-file.c
index 03bd6a3baf3..c9de912faca 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1850,8 +1850,7 @@  int hash_object_file(const struct git_hash_algo *algo, const void *buf,
 static void close_loose_object(int fd)
 {
 	if (!the_repository->objects->odb->will_destroy) {
-		if (fsync_object_files)
-			fsync_or_die(fd, "loose object file");
+		fsync_component_or_die(FSYNC_COMPONENT_LOOSE_OBJECT, fd, "loose object file");
 	}
 
 	if (close(fd) != 0)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index cab3eaa2acd..cf681547f2e 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -719,7 +719,8 @@  void bitmap_writer_finish(struct pack_idx_entry **index,
 	if (options & BITMAP_OPT_HASH_CACHE)
 		write_hash_cache(f, index, index_nr);
 
-	finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+	finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA,
+			  CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
 
 	if (adjust_shared_perm(tmp_file.buf))
 		die_errno("unable to make temporary bitmap file readable");
diff --git a/pack-write.c b/pack-write.c
index a5846f3a346..51812cb1299 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -159,9 +159,9 @@  const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
 	}
 
 	hashwrite(f, sha1, the_hash_algo->rawsz);
-	finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
-				    ((opts->flags & WRITE_IDX_VERIFY)
-				    ? 0 : CSUM_FSYNC));
+	finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA,
+			  CSUM_HASH_IN_STREAM | CSUM_CLOSE |
+			  ((opts->flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
 	return index_name;
 }
 
@@ -281,8 +281,9 @@  const char *write_rev_file_order(const char *rev_name,
 	if (rev_name && adjust_shared_perm(rev_name) < 0)
 		die(_("failed to make %s readable"), rev_name);
 
-	finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
-				    ((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
+	finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA,
+			  CSUM_HASH_IN_STREAM | CSUM_CLOSE |
+			  ((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
 
 	return rev_name;
 }
@@ -390,7 +391,7 @@  void fixup_pack_header_footer(int pack_fd,
 		the_hash_algo->final_fn(partial_pack_hash, &old_hash_ctx);
 	the_hash_algo->final_fn(new_pack_hash, &new_hash_ctx);
 	write_or_die(pack_fd, new_pack_hash, the_hash_algo->rawsz);
-	fsync_or_die(pack_fd, pack_name);
+	fsync_component_or_die(FSYNC_COMPONENT_PACK, pack_fd, pack_name);
 }
 
 char *index_pack_lockfile(int ip_out, int *is_well_formed)
diff --git a/read-cache.c b/read-cache.c
index 79b9b99ebf7..df869691fd4 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3089,7 +3089,7 @@  static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 			return -1;
 	}
 
-	finalize_hashfile(f, istate->oid.hash, CSUM_HASH_IN_STREAM);
+	finalize_hashfile(f, istate->oid.hash, FSYNC_COMPONENT_NONE, CSUM_HASH_IN_STREAM);
 	if (close_tempfile_gently(tempfile)) {
 		error(_("could not close '%s'"), get_tempfile_path(tempfile));
 		return -1;