@@ -548,12 +548,29 @@ core.whitespace::
errors. The default tab width is 8. Allowed values are 1 to 63.
core.fsyncObjectFiles::
- This boolean will enable 'fsync()' when writing object files.
-+
-This is a total waste of time and effort on a filesystem that orders
-data writes properly, but can be useful for filesystems that do not use
-journalling (traditional UNIX filesystems) or that only journal metadata
-and not file contents (OS X's HFS+, or Linux ext3 with "data=writeback").
+ A value indicating the level of effort Git will expend in
+ trying to make objects added to the repo durable in the event
+ of an unclean system shutdown. This setting currently only
+ controls loose objects in the object store, so updates to any
+ refs or the index may not be equally durable.
++
+* `false` allows data to remain in file system caches according to
+ operating system policy, whence it may be lost if the system loses power
+ or crashes.
+* `true` triggers a data integrity flush for each loose object added to the
+ object store. This is the safest setting that is likely to ensure durability
+ across all operating systems and file systems that honor the 'fsync' system
+ call. However, this setting comes with a significant performance cost on
+ common hardware. Git does not currently fsync parent directories for
+ newly-added files, so some filesystems may still allow data to be lost on
+ system crash.
+* `batch` enables an experimental mode that uses interfaces available in some
+ operating systems to write loose object data with a minimal set of FLUSH
+ CACHE (or equivalent) commands sent to the storage controller. If the
+ operating system interfaces are not available, this mode behaves the same as
+ `true`. This mode is expected to be as safe as `true` on macOS for repos
+ stored on HFS+ or APFS filesystems and on Windows for repos stored on NTFS or
+ ReFS.
core.preloadIndex::
Enable parallel index preload for operations like 'git diff'
@@ -406,6 +406,8 @@ all::
#
# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
#
+# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range.
+#
# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
#
@@ -1884,6 +1886,10 @@ ifdef HAVE_CLOCK_MONOTONIC
BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
endif
+ifdef HAVE_SYNC_FILE_RANGE
+ BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE
+endif
+
ifdef NEEDS_LIBRT
EXTLIBS += -lrt
endif
@@ -3,14 +3,20 @@
*/
#include "cache.h"
#include "bulk-checkin.h"
+#include "lockfile.h"
#include "repository.h"
#include "csum-file.h"
#include "pack.h"
#include "strbuf.h"
+#include "string-list.h"
+#include "tmp-objdir.h"
#include "packfile.h"
#include "object-store.h"
static int bulk_checkin_plugged;
+static int needs_batch_fsync;
+
+static struct tmp_objdir *bulk_fsync_objdir;
static struct bulk_checkin_state {
char *pack_tmp_name;
@@ -79,6 +85,34 @@ clear_exit:
reprepare_packed_git(the_repository);
}
+/*
+ * Cleanup after batch-mode fsync_object_files.
+ */
+static void do_batch_fsync(void)
+{
+ /*
+ * Issue a full hardware flush against a temporary file to ensure
+ * that all objects are durable before any renames occur. The code in
+ * fsync_loose_object_bulk_checkin has already issued a writeout
+ * request, but it has not flushed any writeback cache in the storage
+ * hardware.
+ */
+
+ if (needs_batch_fsync) {
+ struct strbuf temp_path = STRBUF_INIT;
+ struct tempfile *temp;
+
+ strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", get_object_directory());
+ temp = xmks_tempfile(temp_path.buf);
+ fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
+ delete_tempfile(&temp);
+ strbuf_release(&temp_path);
+ }
+
+ if (bulk_fsync_objdir)
+ tmp_objdir_migrate(bulk_fsync_objdir);
+}
+
static int already_written(struct bulk_checkin_state *state, struct object_id *oid)
{
int i;
@@ -273,6 +307,25 @@ static int deflate_to_pack(struct bulk_checkin_state *state,
return 0;
}
+void fsync_loose_object_bulk_checkin(int fd)
+{
+ assert(fsync_object_files == FSYNC_OBJECT_FILES_BATCH);
+
+ /*
+ * If we have a plugged bulk checkin, we issue a call that
+ * cleans the filesystem page cache but avoids a hardware flush
+ * command. Later on we will issue a single hardware flush
+ * before as part of do_batch_fsync.
+ */
+ if (bulk_checkin_plugged &&
+ git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0) {
+ if (!needs_batch_fsync)
+ needs_batch_fsync = 1;
+ } else {
+ fsync_or_die(fd, "loose object file");
+ }
+}
+
int index_bulk_checkin(struct object_id *oid,
int fd, size_t size, enum object_type type,
const char *path, unsigned flags)
@@ -287,6 +340,19 @@ int index_bulk_checkin(struct object_id *oid,
void plug_bulk_checkin(void)
{
assert(!bulk_checkin_plugged);
+
+ /*
+ * A temporary object directory is used to hold the files
+ * while they are not fsynced.
+ */
+ if (fsync_object_files == FSYNC_OBJECT_FILES_BATCH) {
+ bulk_fsync_objdir = tmp_objdir_create("bulk-fsync");
+ if (!bulk_fsync_objdir)
+ die(_("Could not create temporary object directory for core.fsyncobjectfiles=batch"));
+
+ tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0);
+ }
+
bulk_checkin_plugged = 1;
}
@@ -296,4 +362,6 @@ void unplug_bulk_checkin(void)
bulk_checkin_plugged = 0;
if (bulk_checkin_state.f)
finish_bulk_checkin(&bulk_checkin_state);
+
+ do_batch_fsync();
}
@@ -6,6 +6,8 @@
#include "cache.h"
+void fsync_loose_object_bulk_checkin(int fd);
+
int index_bulk_checkin(struct object_id *oid,
int fd, size_t size, enum object_type type,
const char *path, unsigned flags);
@@ -985,7 +985,13 @@ void reset_shared_repository(void);
extern int read_replace_refs;
extern char *git_replace_ref_base;
-extern int fsync_object_files;
+enum fsync_object_files_mode {
+ FSYNC_OBJECT_FILES_OFF,
+ FSYNC_OBJECT_FILES_ON,
+ FSYNC_OBJECT_FILES_BATCH
+};
+
+extern enum fsync_object_files_mode fsync_object_files;
extern int core_preload_index;
extern int precomposed_unicode;
extern int protect_hfs;
@@ -1491,7 +1491,12 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
}
if (!strcmp(var, "core.fsyncobjectfiles")) {
- fsync_object_files = git_config_bool(var, value);
+ if (value && !strcmp(value, "batch"))
+ fsync_object_files = FSYNC_OBJECT_FILES_BATCH;
+ else if (git_config_bool(var, value))
+ fsync_object_files = FSYNC_OBJECT_FILES_ON;
+ else
+ fsync_object_files = FSYNC_OBJECT_FILES_OFF;
return 0;
}
@@ -57,6 +57,7 @@ ifeq ($(uname_S),Linux)
HAVE_CLOCK_MONOTONIC = YesPlease
# -lrt is needed for clock_gettime on glibc <= 2.16
NEEDS_LIBRT = YesPlease
+ HAVE_SYNC_FILE_RANGE = YesPlease
HAVE_GETDELIM = YesPlease
SANE_TEXT_GREP=-a
FREAD_READS_DIRECTORIES = UnfortunatelyYes
@@ -1090,6 +1090,14 @@ AC_COMPILE_IFELSE([CLOCK_MONOTONIC_SRC],
[AC_MSG_RESULT([no])
HAVE_CLOCK_MONOTONIC=])
GIT_CONF_SUBST([HAVE_CLOCK_MONOTONIC])
+
+#
+# Define HAVE_SYNC_FILE_RANGE=YesPlease if sync_file_range is available.
+GIT_CHECK_FUNC(sync_file_range,
+ [HAVE_SYNC_FILE_RANGE=YesPlease],
+ [HAVE_SYNC_FILE_RANGE])
+GIT_CONF_SUBST([HAVE_SYNC_FILE_RANGE])
+
#
# Define NO_SETITIMER if you don't have setitimer.
GIT_CHECK_FUNC(setitimer,
@@ -42,7 +42,7 @@ const char *git_attributes_file;
const char *git_hooks_path;
int zlib_compression_level = Z_BEST_SPEED;
int pack_compression_level = Z_DEFAULT_COMPRESSION;
-int fsync_object_files;
+enum fsync_object_files_mode fsync_object_files;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
size_t delta_base_cache_limit = 96 * 1024 * 1024;
@@ -1214,6 +1214,13 @@ __attribute__((format (printf, 1, 2))) NORETURN
void BUG(const char *fmt, ...);
#endif
+enum fsync_action {
+ FSYNC_WRITEOUT_ONLY,
+ FSYNC_HARDWARE_FLUSH
+};
+
+int git_fsync(int fd, enum fsync_action action);
+
/*
* Preserves errno, prints a message, but gives no warning for ENOENT.
* Returns 0 on success, which includes trying to unlink an object that does
@@ -1853,8 +1853,18 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf,
static void close_loose_object(int fd)
{
if (!the_repository->objects->odb->will_destroy) {
- if (fsync_object_files)
+ switch (fsync_object_files) {
+ case FSYNC_OBJECT_FILES_OFF:
+ break;
+ case FSYNC_OBJECT_FILES_ON:
fsync_or_die(fd, "loose object file");
+ break;
+ case FSYNC_OBJECT_FILES_BATCH:
+ fsync_loose_object_bulk_checkin(fd);
+ break;
+ default:
+ BUG("Invalid fsync_object_files mode.");
+ }
}
if (close(fd) != 0)
@@ -546,6 +546,50 @@ int xmkstemp_mode(char *filename_template, int mode)
return fd;
}
+int git_fsync(int fd, enum fsync_action action)
+{
+ switch (action) {
+ case FSYNC_WRITEOUT_ONLY:
+
+#ifdef __APPLE__
+ /*
+ * on macOS, fsync just causes filesystem cache writeback but does not
+ * flush hardware caches.
+ */
+ return fsync(fd);
+#endif
+
+#ifdef HAVE_SYNC_FILE_RANGE
+ /*
+ * On linux 2.6.17 and above, sync_file_range is the way to issue
+ * a writeback without a hardware flush. An offset of 0 and size of 0
+ * indicates writeout of the entire file and the wait flags ensure that all
+ * dirty data is written to the disk (potentially in a disk-side cache)
+ * before we continue.
+ */
+
+ return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
+ SYNC_FILE_RANGE_WRITE |
+ SYNC_FILE_RANGE_WAIT_AFTER);
+#endif
+
+ errno = ENOSYS;
+ return -1;
+
+ case FSYNC_HARDWARE_FLUSH:
+
+#ifdef __APPLE__
+ return fcntl(fd, F_FULLFSYNC);
+#else
+ return fsync(fd);
+#endif
+
+ default:
+ BUG("unexpected git_fsync(%d) call", action);
+ }
+
+}
+
static int warn_if_unremovable(const char *op, const char *file, int rc)
{
int err;
@@ -57,7 +57,7 @@ void fprintf_or_die(FILE *f, const char *fmt, ...)
void fsync_or_die(int fd, const char *msg)
{
- while (fsync(fd) < 0) {
+ while (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0) {
if (errno != EINTR)
die_errno("fsync error on '%s'", msg);
}