diff mbox series

[v9,1/9] tmp-objdir: new API for creating temporary writable databases

Message ID 6b27afa60e05c6c0b7752f1bcf6629c446ede520.1637020263.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Implement a batched fsync option for core.fsyncObjectFiles | expand

Commit Message

Neeraj Singh (WINDOWS-SFS) Nov. 15, 2021, 11:50 p.m. UTC
From: Neeraj Singh <neerajsi@microsoft.com>

The tmp_objdir API provides the ability to create temporary object
directories, but was designed with the goal of having subprocesses
access these object stores, followed by the main process migrating
objects from it to the main object store or just deleting it.  The
subprocesses would view it as their primary datastore and write to it.

Here we add the tmp_objdir_replace_primary_odb function that replaces
the current process's writable "main" object directory with the
specified one. The previous main object directory is restored in either
tmp_objdir_migrate or tmp_objdir_destroy.

For the --remerge-diff usecase, add a new `will_destroy` flag in `struct
object_database` to mark ephemeral object databases that do not require
fsync durability.

Add 'git prune' support for removing temporary object databases, and
make sure that they have a name starting with tmp_ and containing an
operation-specific name.

Based-on-patch-by: Elijah Newren <newren@gmail.com>

Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/prune.c        | 23 +++++++++++++++---
 builtin/receive-pack.c |  2 +-
 environment.c          |  5 ++++
 object-file.c          | 44 +++++++++++++++++++++++++++++++--
 object-store.h         | 19 +++++++++++++++
 object.c               |  2 +-
 tmp-objdir.c           | 55 +++++++++++++++++++++++++++++++++++++++---
 tmp-objdir.h           | 29 +++++++++++++++++++---
 8 files changed, 165 insertions(+), 14 deletions(-)

Comments

Elijah Newren Nov. 30, 2021, 9:27 p.m. UTC | #1
On Mon, Nov 15, 2021 at 3:51 PM Neeraj Singh via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Neeraj Singh <neerajsi@microsoft.com>
>
> The tmp_objdir API provides the ability to create temporary object
> directories, but was designed with the goal of having subprocesses
> access these object stores, followed by the main process migrating
> objects from it to the main object store or just deleting it.  The
> subprocesses would view it as their primary datastore and write to it.
>
> Here we add the tmp_objdir_replace_primary_odb function that replaces
> the current process's writable "main" object directory with the
> specified one. The previous main object directory is restored in either
> tmp_objdir_migrate or tmp_objdir_destroy.
>
> For the --remerge-diff usecase, add a new `will_destroy` flag in `struct
> object_database` to mark ephemeral object databases that do not require
> fsync durability.
>
> Add 'git prune' support for removing temporary object databases, and
> make sure that they have a name starting with tmp_ and containing an
> operation-specific name.
>
> Based-on-patch-by: Elijah Newren <newren@gmail.com>
>
> Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
> Signed-off-by: Junio C Hamano <gitster@pobox.com>
> ---
>  builtin/prune.c        | 23 +++++++++++++++---
>  builtin/receive-pack.c |  2 +-
>  environment.c          |  5 ++++
>  object-file.c          | 44 +++++++++++++++++++++++++++++++--
>  object-store.h         | 19 +++++++++++++++
>  object.c               |  2 +-
>  tmp-objdir.c           | 55 +++++++++++++++++++++++++++++++++++++++---
>  tmp-objdir.h           | 29 +++++++++++++++++++---
>  8 files changed, 165 insertions(+), 14 deletions(-)
>
> diff --git a/builtin/prune.c b/builtin/prune.c
> index 485c9a3c56f..a76e6a5f0e8 100644
> --- a/builtin/prune.c
> +++ b/builtin/prune.c
> @@ -18,6 +18,7 @@ static int show_only;
>  static int verbose;
>  static timestamp_t expire;
>  static int show_progress = -1;
> +static struct strbuf remove_dir_buf = STRBUF_INIT;
>
>  static int prune_tmp_file(const char *fullpath)
>  {
> @@ -26,10 +27,20 @@ static int prune_tmp_file(const char *fullpath)
>                 return error("Could not stat '%s'", fullpath);
>         if (st.st_mtime > expire)
>                 return 0;
> -       if (show_only || verbose)
> -               printf("Removing stale temporary file %s\n", fullpath);
> -       if (!show_only)
> -               unlink_or_warn(fullpath);
> +       if (S_ISDIR(st.st_mode)) {
> +               if (show_only || verbose)
> +                       printf("Removing stale temporary directory %s\n", fullpath);
> +               if (!show_only) {
> +                       strbuf_reset(&remove_dir_buf);
> +                       strbuf_addstr(&remove_dir_buf, fullpath);
> +                       remove_dir_recursively(&remove_dir_buf, 0);

Why not just define remove_dir_buf here rather than as a global?  It'd
not only make the code more readable by keeping everything localized,
it would have prevented the forgotten strbuf_reset() bug from the
earlier round of this patch.

Sure, that'd be an extra memory allocation/free for each directory you
hit, which should be negligible compared to the cost of
remove_dir_recursively()...and I'm not sure this is performance
critical anyway (I don't see why we'd expect more than O(1) cruft
temporary directories).

> +               }
> +       } else {
> +               if (show_only || verbose)
> +                       printf("Removing stale temporary file %s\n", fullpath);
> +               if (!show_only)
> +                       unlink_or_warn(fullpath);
> +       }
>         return 0;
>  }
>
> @@ -97,6 +108,9 @@ static int prune_cruft(const char *basename, const char *path, void *data)
>
>  static int prune_subdir(unsigned int nr, const char *path, void *data)
>  {
> +       if (verbose)

Shouldn't this be
    if (show_only || verbose)
?

> +               printf("Removing directory %s\n", path);
> +
>         if (!show_only)
>                 rmdir(path);
>         return 0;
> @@ -184,5 +198,6 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
>                 prune_shallow(show_only ? PRUNE_SHOW_ONLY : 0);
>         }
>
> +       strbuf_release(&remove_dir_buf);
>         return 0;
>  }
> diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
> index 49b846d9605..8815e24cde5 100644
> --- a/builtin/receive-pack.c
> +++ b/builtin/receive-pack.c
> @@ -2213,7 +2213,7 @@ static const char *unpack(int err_fd, struct shallow_info *si)
>                 strvec_push(&child.args, alt_shallow_file);
>         }
>
> -       tmp_objdir = tmp_objdir_create();
> +       tmp_objdir = tmp_objdir_create("incoming");
>         if (!tmp_objdir) {
>                 if (err_fd > 0)
>                         close(err_fd);
> diff --git a/environment.c b/environment.c
> index 9da7f3c1a19..342400fcaad 100644
> --- a/environment.c
> +++ b/environment.c
> @@ -17,6 +17,7 @@
>  #include "commit.h"
>  #include "strvec.h"
>  #include "object-store.h"
> +#include "tmp-objdir.h"
>  #include "chdir-notify.h"
>  #include "shallow.h"
>
> @@ -331,10 +332,14 @@ static void update_relative_gitdir(const char *name,
>                                    void *data)
>  {
>         char *path = reparent_relative_path(old_cwd, new_cwd, get_git_dir());
> +       struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb();
>         trace_printf_key(&trace_setup_key,
>                          "setup: move $GIT_DIR to '%s'",
>                          path);
> +
>         set_git_dir_1(path);
> +       if (tmp_objdir)
> +               tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd);
>         free(path);
>  }
>
> diff --git a/object-file.c b/object-file.c
> index c3d866a287e..0b6a61aeaff 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -683,6 +683,43 @@ void add_to_alternates_memory(const char *reference)
>                              '\n', NULL, 0);
>  }
>
> +struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy)
> +{
> +       struct object_directory *new_odb;
> +
> +       /*
> +        * Make sure alternates are initialized, or else our entry may be
> +        * overwritten when they are.
> +        */
> +       prepare_alt_odb(the_repository);
> +
> +       /*
> +        * Make a new primary odb and link the old primary ODB in as an
> +        * alternate
> +        */
> +       new_odb = xcalloc(1, sizeof(*new_odb));
> +       new_odb->path = xstrdup(dir);
> +       new_odb->will_destroy = will_destroy;
> +       new_odb->next = the_repository->objects->odb;
> +       the_repository->objects->odb = new_odb;
> +       return new_odb->next;
> +}
> +
> +void restore_primary_odb(struct object_directory *restore_odb, const char *old_path)
> +{
> +       struct object_directory *cur_odb = the_repository->objects->odb;
> +
> +       if (strcmp(old_path, cur_odb->path))
> +               BUG("expected %s as primary object store; found %s",
> +                   old_path, cur_odb->path);
> +
> +       if (cur_odb->next != restore_odb)
> +               BUG("we expect the old primary object store to be the first alternate");
> +
> +       the_repository->objects->odb = restore_odb;
> +       free_object_directory(cur_odb);
> +}
> +
>  /*
>   * Compute the exact path an alternate is at and returns it. In case of
>   * error NULL is returned and the human readable error is added to `err`
> @@ -1809,8 +1846,11 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf,
>  /* Finalize a file on disk, and close it. */
>  static void close_loose_object(int fd)
>  {
> -       if (fsync_object_files)
> -               fsync_or_die(fd, "loose object file");
> +       if (!the_repository->objects->odb->will_destroy) {
> +               if (fsync_object_files)
> +                       fsync_or_die(fd, "loose object file");
> +       }
> +
>         if (close(fd) != 0)
>                 die_errno(_("error when closing loose object file"));
>  }
> diff --git a/object-store.h b/object-store.h
> index 952efb6a4be..cb173e69392 100644
> --- a/object-store.h
> +++ b/object-store.h
> @@ -27,6 +27,11 @@ struct object_directory {
>         uint32_t loose_objects_subdir_seen[8]; /* 256 bits */
>         struct oidtree *loose_objects_cache;
>
> +       /*
> +        * This object store is ephemeral, so there is no need to fsync.
> +        */
> +       int will_destroy;
> +
>         /*
>          * Path to the alternative object store. If this is a relative path,
>          * it is relative to the current working directory.
> @@ -58,6 +63,17 @@ void add_to_alternates_file(const char *dir);
>   */
>  void add_to_alternates_memory(const char *dir);
>
> +/*
> + * Replace the current writable object directory with the specified temporary
> + * object directory; returns the former primary object directory.
> + */
> +struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy);
> +
> +/*
> + * Restore a previous ODB replaced by set_temporary_main_odb.
> + */
> +void restore_primary_odb(struct object_directory *restore_odb, const char *old_path);
> +
>  /*
>   * Populate and return the loose object cache array corresponding to the
>   * given object ID.
> @@ -68,6 +84,9 @@ struct oidtree *odb_loose_cache(struct object_directory *odb,
>  /* Empty the loose object cache for the specified object directory. */
>  void odb_clear_loose_cache(struct object_directory *odb);
>
> +/* Clear and free the specified object directory */
> +void free_object_directory(struct object_directory *odb);
> +
>  struct packed_git {
>         struct hashmap_entry packmap_ent;
>         struct packed_git *next;
> diff --git a/object.c b/object.c
> index 23a24e678a8..048f96a260e 100644
> --- a/object.c
> +++ b/object.c
> @@ -513,7 +513,7 @@ struct raw_object_store *raw_object_store_new(void)
>         return o;
>  }
>
> -static void free_object_directory(struct object_directory *odb)
> +void free_object_directory(struct object_directory *odb)
>  {
>         free(odb->path);
>         odb_clear_loose_cache(odb);
> diff --git a/tmp-objdir.c b/tmp-objdir.c
> index b8d880e3626..3d38eeab66b 100644
> --- a/tmp-objdir.c
> +++ b/tmp-objdir.c
> @@ -1,5 +1,6 @@
>  #include "cache.h"
>  #include "tmp-objdir.h"
> +#include "chdir-notify.h"
>  #include "dir.h"
>  #include "sigchain.h"
>  #include "string-list.h"
> @@ -11,6 +12,8 @@
>  struct tmp_objdir {
>         struct strbuf path;
>         struct strvec env;
> +       struct object_directory *prev_odb;
> +       int will_destroy;
>  };
>
>  /*
> @@ -38,6 +41,9 @@ static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal)
>         if (t == the_tmp_objdir)
>                 the_tmp_objdir = NULL;
>
> +       if (!on_signal && t->prev_odb)
> +               restore_primary_odb(t->prev_odb, t->path.buf);
> +
>         /*
>          * This may use malloc via strbuf_grow(), but we should
>          * have pre-grown t->path sufficiently so that this
> @@ -52,6 +58,7 @@ static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal)
>          */
>         if (!on_signal)
>                 tmp_objdir_free(t);
> +
>         return err;
>  }
>
> @@ -121,7 +128,7 @@ static int setup_tmp_objdir(const char *root)
>         return ret;
>  }
>
> -struct tmp_objdir *tmp_objdir_create(void)
> +struct tmp_objdir *tmp_objdir_create(const char *prefix)
>  {
>         static int installed_handlers;
>         struct tmp_objdir *t;
> @@ -129,11 +136,16 @@ struct tmp_objdir *tmp_objdir_create(void)
>         if (the_tmp_objdir)
>                 BUG("only one tmp_objdir can be used at a time");
>
> -       t = xmalloc(sizeof(*t));
> +       t = xcalloc(1, sizeof(*t));
>         strbuf_init(&t->path, 0);
>         strvec_init(&t->env);
>
> -       strbuf_addf(&t->path, "%s/incoming-XXXXXX", get_object_directory());
> +       /*
> +        * Use a string starting with tmp_ so that the builtin/prune.c code
> +        * can recognize any stale objdirs left behind by a crash and delete
> +        * them.
> +        */
> +       strbuf_addf(&t->path, "%s/tmp_objdir-%s-XXXXXX", get_object_directory(), prefix);
>
>         /*
>          * Grow the strbuf beyond any filename we expect to be placed in it.
> @@ -269,6 +281,13 @@ int tmp_objdir_migrate(struct tmp_objdir *t)
>         if (!t)
>                 return 0;
>
> +       if (t->prev_odb) {
> +               if (the_repository->objects->odb->will_destroy)
> +                       BUG("migrating an ODB that was marked for destruction");
> +               restore_primary_odb(t->prev_odb, t->path.buf);
> +               t->prev_odb = NULL;
> +       }
> +
>         strbuf_addbuf(&src, &t->path);
>         strbuf_addstr(&dst, get_object_directory());
>
> @@ -292,3 +311,33 @@ void tmp_objdir_add_as_alternate(const struct tmp_objdir *t)
>  {
>         add_to_alternates_memory(t->path.buf);
>  }
> +
> +void tmp_objdir_replace_primary_odb(struct tmp_objdir *t, int will_destroy)
> +{
> +       if (t->prev_odb)
> +               BUG("the primary object database is already replaced");
> +       t->prev_odb = set_temporary_primary_odb(t->path.buf, will_destroy);
> +       t->will_destroy = will_destroy;
> +}
> +
> +struct tmp_objdir *tmp_objdir_unapply_primary_odb(void)
> +{
> +       if (!the_tmp_objdir || !the_tmp_objdir->prev_odb)
> +               return NULL;
> +
> +       restore_primary_odb(the_tmp_objdir->prev_odb, the_tmp_objdir->path.buf);
> +       the_tmp_objdir->prev_odb = NULL;
> +       return the_tmp_objdir;
> +}
> +
> +void tmp_objdir_reapply_primary_odb(struct tmp_objdir *t, const char *old_cwd,
> +               const char *new_cwd)
> +{
> +       char *path;
> +
> +       path = reparent_relative_path(old_cwd, new_cwd, t->path.buf);
> +       strbuf_reset(&t->path);
> +       strbuf_addstr(&t->path, path);
> +       free(path);
> +       tmp_objdir_replace_primary_odb(t, t->will_destroy);
> +}
> diff --git a/tmp-objdir.h b/tmp-objdir.h
> index b1e45b4c75d..a3145051f25 100644
> --- a/tmp-objdir.h
> +++ b/tmp-objdir.h
> @@ -10,7 +10,7 @@
>   *
>   * Example:
>   *
> - *     struct tmp_objdir *t = tmp_objdir_create();
> + *     struct tmp_objdir *t = tmp_objdir_create("incoming");
>   *     if (!run_command_v_opt_cd_env(cmd, 0, NULL, tmp_objdir_env(t)) &&
>   *         !tmp_objdir_migrate(t))
>   *             printf("success!\n");
> @@ -22,9 +22,10 @@
>  struct tmp_objdir;
>
>  /*
> - * Create a new temporary object directory; returns NULL on failure.
> + * Create a new temporary object directory with the specified prefix;
> + * returns NULL on failure.
>   */
> -struct tmp_objdir *tmp_objdir_create(void);
> +struct tmp_objdir *tmp_objdir_create(const char *prefix);
>
>  /*
>   * Return a list of environment strings, suitable for use with
> @@ -51,4 +52,26 @@ int tmp_objdir_destroy(struct tmp_objdir *);
>   */
>  void tmp_objdir_add_as_alternate(const struct tmp_objdir *);
>
> +/*
> + * Replaces the main object store in the current process with the temporary
> + * object directory and makes the former main object store an alternate.
> + * If will_destroy is nonzero, the object directory may not be migrated.
> + */
> +void tmp_objdir_replace_primary_odb(struct tmp_objdir *, int will_destroy);
> +
> +/*
> + * If the primary object database was replaced by a temporary object directory,
> + * restore it to its original value while keeping the directory contents around.
> + * Returns NULL if the primary object database was not replaced.
> + */
> +struct tmp_objdir *tmp_objdir_unapply_primary_odb(void);
> +
> +/*
> + * Reapplies the former primary temporary object database, after protentially
> + * changing its relative path.
> + */
> +void tmp_objdir_reapply_primary_odb(struct tmp_objdir *, const char *old_cwd,
> +               const char *new_cwd);
> +
> +
>  #endif /* TMP_OBJDIR_H */
> --
> gitgitgadget
Neeraj Singh Nov. 30, 2021, 9:52 p.m. UTC | #2
On Tue, Nov 30, 2021 at 1:27 PM Elijah Newren <newren@gmail.com> wrote:
>
> On Mon, Nov 15, 2021 at 3:51 PM Neeraj Singh via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
> >
> > From: Neeraj Singh <neerajsi@microsoft.com>
> >
> > The tmp_objdir API provides the ability to create temporary object
> > directories, but was designed with the goal of having subprocesses
> > access these object stores, followed by the main process migrating
> > objects from it to the main object store or just deleting it.  The
> > subprocesses would view it as their primary datastore and write to it.
> >
> > Here we add the tmp_objdir_replace_primary_odb function that replaces
> > the current process's writable "main" object directory with the
> > specified one. The previous main object directory is restored in either
> > tmp_objdir_migrate or tmp_objdir_destroy.
> >
> > For the --remerge-diff usecase, add a new `will_destroy` flag in `struct
> > object_database` to mark ephemeral object databases that do not require
> > fsync durability.
> >
> > Add 'git prune' support for removing temporary object databases, and
> > make sure that they have a name starting with tmp_ and containing an
> > operation-specific name.
> >
> > Based-on-patch-by: Elijah Newren <newren@gmail.com>
> >
> > Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
> > Signed-off-by: Junio C Hamano <gitster@pobox.com>
> > ---
> >  builtin/prune.c        | 23 +++++++++++++++---
> >  builtin/receive-pack.c |  2 +-
> >  environment.c          |  5 ++++
> >  object-file.c          | 44 +++++++++++++++++++++++++++++++--
> >  object-store.h         | 19 +++++++++++++++
> >  object.c               |  2 +-
> >  tmp-objdir.c           | 55 +++++++++++++++++++++++++++++++++++++++---
> >  tmp-objdir.h           | 29 +++++++++++++++++++---
> >  8 files changed, 165 insertions(+), 14 deletions(-)
> >
> > diff --git a/builtin/prune.c b/builtin/prune.c
> > index 485c9a3c56f..a76e6a5f0e8 100644
> > --- a/builtin/prune.c
> > +++ b/builtin/prune.c
> > @@ -18,6 +18,7 @@ static int show_only;
> >  static int verbose;
> >  static timestamp_t expire;
> >  static int show_progress = -1;
> > +static struct strbuf remove_dir_buf = STRBUF_INIT;
> >
> >  static int prune_tmp_file(const char *fullpath)
> >  {
> > @@ -26,10 +27,20 @@ static int prune_tmp_file(const char *fullpath)
> >                 return error("Could not stat '%s'", fullpath);
> >         if (st.st_mtime > expire)
> >                 return 0;
> > -       if (show_only || verbose)
> > -               printf("Removing stale temporary file %s\n", fullpath);
> > -       if (!show_only)
> > -               unlink_or_warn(fullpath);
> > +       if (S_ISDIR(st.st_mode)) {
> > +               if (show_only || verbose)
> > +                       printf("Removing stale temporary directory %s\n", fullpath);
> > +               if (!show_only) {
> > +                       strbuf_reset(&remove_dir_buf);
> > +                       strbuf_addstr(&remove_dir_buf, fullpath);
> > +                       remove_dir_recursively(&remove_dir_buf, 0);
>
> Why not just define remove_dir_buf here rather than as a global?  It'd
> not only make the code more readable by keeping everything localized,
> it would have prevented the forgotten strbuf_reset() bug from the
> earlier round of this patch.
>
> Sure, that'd be an extra memory allocation/free for each directory you
> hit, which should be negligible compared to the cost of
> remove_dir_recursively()...and I'm not sure this is performance
> critical anyway (I don't see why we'd expect more than O(1) cruft
> temporary directories).

I'll take this suggestion.

> > +               }
> > +       } else {
> > +               if (show_only || verbose)
> > +                       printf("Removing stale temporary file %s\n", fullpath);
> > +               if (!show_only)
> > +                       unlink_or_warn(fullpath);
> > +       }
> >         return 0;
> >  }
> >
> > @@ -97,6 +108,9 @@ static int prune_cruft(const char *basename, const char *path, void *data)
> >
> >  static int prune_subdir(unsigned int nr, const char *path, void *data)
> >  {
> > +       if (verbose)
>
> Shouldn't this be
>     if (show_only || verbose)
> ?

Doing that breaks one of the tests, since we print extra stuff that's
unexpected. I think I'm going to just revert this change, since it
appears that we call this callback and try to remove the directory
even if it's non-empty.

Do you have any comments or thoughts on how we want to allow the user
to configure fsync settings?

Thanks,
Neeraj
Elijah Newren Nov. 30, 2021, 10:36 p.m. UTC | #3
On Tue, Nov 30, 2021 at 1:52 PM Neeraj Singh <nksingh85@gmail.com> wrote:
>
> On Tue, Nov 30, 2021 at 1:27 PM Elijah Newren <newren@gmail.com> wrote:
> >
> > On Mon, Nov 15, 2021 at 3:51 PM Neeraj Singh via GitGitGadget
> > <gitgitgadget@gmail.com> wrote:
> > >
> > > From: Neeraj Singh <neerajsi@microsoft.com>
> > >
> > > The tmp_objdir API provides the ability to create temporary object
> > > directories, but was designed with the goal of having subprocesses
> > > access these object stores, followed by the main process migrating
> > > objects from it to the main object store or just deleting it.  The
> > > subprocesses would view it as their primary datastore and write to it.
> > >
> > > Here we add the tmp_objdir_replace_primary_odb function that replaces
> > > the current process's writable "main" object directory with the
> > > specified one. The previous main object directory is restored in either
> > > tmp_objdir_migrate or tmp_objdir_destroy.
> > >
> > > For the --remerge-diff usecase, add a new `will_destroy` flag in `struct
> > > object_database` to mark ephemeral object databases that do not require
> > > fsync durability.
> > >
> > > Add 'git prune' support for removing temporary object databases, and
> > > make sure that they have a name starting with tmp_ and containing an
> > > operation-specific name.
> > >
> > > Based-on-patch-by: Elijah Newren <newren@gmail.com>
> > >
> > > Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
> > > Signed-off-by: Junio C Hamano <gitster@pobox.com>
> > > ---
> > >  builtin/prune.c        | 23 +++++++++++++++---
> > >  builtin/receive-pack.c |  2 +-
> > >  environment.c          |  5 ++++
> > >  object-file.c          | 44 +++++++++++++++++++++++++++++++--
> > >  object-store.h         | 19 +++++++++++++++
> > >  object.c               |  2 +-
> > >  tmp-objdir.c           | 55 +++++++++++++++++++++++++++++++++++++++---
> > >  tmp-objdir.h           | 29 +++++++++++++++++++---
> > >  8 files changed, 165 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/builtin/prune.c b/builtin/prune.c
> > > index 485c9a3c56f..a76e6a5f0e8 100644
> > > --- a/builtin/prune.c
> > > +++ b/builtin/prune.c
> > > @@ -18,6 +18,7 @@ static int show_only;
> > >  static int verbose;
> > >  static timestamp_t expire;
> > >  static int show_progress = -1;
> > > +static struct strbuf remove_dir_buf = STRBUF_INIT;
> > >
> > >  static int prune_tmp_file(const char *fullpath)
> > >  {
> > > @@ -26,10 +27,20 @@ static int prune_tmp_file(const char *fullpath)
> > >                 return error("Could not stat '%s'", fullpath);
> > >         if (st.st_mtime > expire)
> > >                 return 0;
> > > -       if (show_only || verbose)
> > > -               printf("Removing stale temporary file %s\n", fullpath);
> > > -       if (!show_only)
> > > -               unlink_or_warn(fullpath);
> > > +       if (S_ISDIR(st.st_mode)) {
> > > +               if (show_only || verbose)
> > > +                       printf("Removing stale temporary directory %s\n", fullpath);
> > > +               if (!show_only) {
> > > +                       strbuf_reset(&remove_dir_buf);
> > > +                       strbuf_addstr(&remove_dir_buf, fullpath);
> > > +                       remove_dir_recursively(&remove_dir_buf, 0);
> >
> > Why not just define remove_dir_buf here rather than as a global?  It'd
> > not only make the code more readable by keeping everything localized,
> > it would have prevented the forgotten strbuf_reset() bug from the
> > earlier round of this patch.
> >
> > Sure, that'd be an extra memory allocation/free for each directory you
> > hit, which should be negligible compared to the cost of
> > remove_dir_recursively()...and I'm not sure this is performance
> > critical anyway (I don't see why we'd expect more than O(1) cruft
> > temporary directories).
>
> I'll take this suggestion.
>
> > > +               }
> > > +       } else {
> > > +               if (show_only || verbose)
> > > +                       printf("Removing stale temporary file %s\n", fullpath);
> > > +               if (!show_only)
> > > +                       unlink_or_warn(fullpath);
> > > +       }
> > >         return 0;
> > >  }
> > >
> > > @@ -97,6 +108,9 @@ static int prune_cruft(const char *basename, const char *path, void *data)
> > >
> > >  static int prune_subdir(unsigned int nr, const char *path, void *data)
> > >  {
> > > +       if (verbose)
> >
> > Shouldn't this be
> >     if (show_only || verbose)
> > ?
>
> Doing that breaks one of the tests, since we print extra stuff that's
> unexpected. I think I'm going to just revert this change, since it
> appears that we call this callback and try to remove the directory
> even if it's non-empty.

Makes sense.

> Do you have any comments or thoughts on how we want to allow the user
> to configure fsync settings?

I don't; sorry.
diff mbox series

Patch

diff --git a/builtin/prune.c b/builtin/prune.c
index 485c9a3c56f..a76e6a5f0e8 100644
--- a/builtin/prune.c
+++ b/builtin/prune.c
@@ -18,6 +18,7 @@  static int show_only;
 static int verbose;
 static timestamp_t expire;
 static int show_progress = -1;
+static struct strbuf remove_dir_buf = STRBUF_INIT;
 
 static int prune_tmp_file(const char *fullpath)
 {
@@ -26,10 +27,20 @@  static int prune_tmp_file(const char *fullpath)
 		return error("Could not stat '%s'", fullpath);
 	if (st.st_mtime > expire)
 		return 0;
-	if (show_only || verbose)
-		printf("Removing stale temporary file %s\n", fullpath);
-	if (!show_only)
-		unlink_or_warn(fullpath);
+	if (S_ISDIR(st.st_mode)) {
+		if (show_only || verbose)
+			printf("Removing stale temporary directory %s\n", fullpath);
+		if (!show_only) {
+			strbuf_reset(&remove_dir_buf);
+			strbuf_addstr(&remove_dir_buf, fullpath);
+			remove_dir_recursively(&remove_dir_buf, 0);
+		}
+	} else {
+		if (show_only || verbose)
+			printf("Removing stale temporary file %s\n", fullpath);
+		if (!show_only)
+			unlink_or_warn(fullpath);
+	}
 	return 0;
 }
 
@@ -97,6 +108,9 @@  static int prune_cruft(const char *basename, const char *path, void *data)
 
 static int prune_subdir(unsigned int nr, const char *path, void *data)
 {
+	if (verbose)
+		printf("Removing directory %s\n", path);
+
 	if (!show_only)
 		rmdir(path);
 	return 0;
@@ -184,5 +198,6 @@  int cmd_prune(int argc, const char **argv, const char *prefix)
 		prune_shallow(show_only ? PRUNE_SHOW_ONLY : 0);
 	}
 
+	strbuf_release(&remove_dir_buf);
 	return 0;
 }
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 49b846d9605..8815e24cde5 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -2213,7 +2213,7 @@  static const char *unpack(int err_fd, struct shallow_info *si)
 		strvec_push(&child.args, alt_shallow_file);
 	}
 
-	tmp_objdir = tmp_objdir_create();
+	tmp_objdir = tmp_objdir_create("incoming");
 	if (!tmp_objdir) {
 		if (err_fd > 0)
 			close(err_fd);
diff --git a/environment.c b/environment.c
index 9da7f3c1a19..342400fcaad 100644
--- a/environment.c
+++ b/environment.c
@@ -17,6 +17,7 @@ 
 #include "commit.h"
 #include "strvec.h"
 #include "object-store.h"
+#include "tmp-objdir.h"
 #include "chdir-notify.h"
 #include "shallow.h"
 
@@ -331,10 +332,14 @@  static void update_relative_gitdir(const char *name,
 				   void *data)
 {
 	char *path = reparent_relative_path(old_cwd, new_cwd, get_git_dir());
+	struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb();
 	trace_printf_key(&trace_setup_key,
 			 "setup: move $GIT_DIR to '%s'",
 			 path);
+
 	set_git_dir_1(path);
+	if (tmp_objdir)
+		tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd);
 	free(path);
 }
 
diff --git a/object-file.c b/object-file.c
index c3d866a287e..0b6a61aeaff 100644
--- a/object-file.c
+++ b/object-file.c
@@ -683,6 +683,43 @@  void add_to_alternates_memory(const char *reference)
 			     '\n', NULL, 0);
 }
 
+struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy)
+{
+	struct object_directory *new_odb;
+
+	/*
+	 * Make sure alternates are initialized, or else our entry may be
+	 * overwritten when they are.
+	 */
+	prepare_alt_odb(the_repository);
+
+	/*
+	 * Make a new primary odb and link the old primary ODB in as an
+	 * alternate
+	 */
+	new_odb = xcalloc(1, sizeof(*new_odb));
+	new_odb->path = xstrdup(dir);
+	new_odb->will_destroy = will_destroy;
+	new_odb->next = the_repository->objects->odb;
+	the_repository->objects->odb = new_odb;
+	return new_odb->next;
+}
+
+void restore_primary_odb(struct object_directory *restore_odb, const char *old_path)
+{
+	struct object_directory *cur_odb = the_repository->objects->odb;
+
+	if (strcmp(old_path, cur_odb->path))
+		BUG("expected %s as primary object store; found %s",
+		    old_path, cur_odb->path);
+
+	if (cur_odb->next != restore_odb)
+		BUG("we expect the old primary object store to be the first alternate");
+
+	the_repository->objects->odb = restore_odb;
+	free_object_directory(cur_odb);
+}
+
 /*
  * Compute the exact path an alternate is at and returns it. In case of
  * error NULL is returned and the human readable error is added to `err`
@@ -1809,8 +1846,11 @@  int hash_object_file(const struct git_hash_algo *algo, const void *buf,
 /* Finalize a file on disk, and close it. */
 static void close_loose_object(int fd)
 {
-	if (fsync_object_files)
-		fsync_or_die(fd, "loose object file");
+	if (!the_repository->objects->odb->will_destroy) {
+		if (fsync_object_files)
+			fsync_or_die(fd, "loose object file");
+	}
+
 	if (close(fd) != 0)
 		die_errno(_("error when closing loose object file"));
 }
diff --git a/object-store.h b/object-store.h
index 952efb6a4be..cb173e69392 100644
--- a/object-store.h
+++ b/object-store.h
@@ -27,6 +27,11 @@  struct object_directory {
 	uint32_t loose_objects_subdir_seen[8]; /* 256 bits */
 	struct oidtree *loose_objects_cache;
 
+	/*
+	 * This object store is ephemeral, so there is no need to fsync.
+	 */
+	int will_destroy;
+
 	/*
 	 * Path to the alternative object store. If this is a relative path,
 	 * it is relative to the current working directory.
@@ -58,6 +63,17 @@  void add_to_alternates_file(const char *dir);
  */
 void add_to_alternates_memory(const char *dir);
 
+/*
+ * Replace the current writable object directory with the specified temporary
+ * object directory; returns the former primary object directory.
+ */
+struct object_directory *set_temporary_primary_odb(const char *dir, int will_destroy);
+
+/*
+ * Restore a previous ODB replaced by set_temporary_main_odb.
+ */
+void restore_primary_odb(struct object_directory *restore_odb, const char *old_path);
+
 /*
  * Populate and return the loose object cache array corresponding to the
  * given object ID.
@@ -68,6 +84,9 @@  struct oidtree *odb_loose_cache(struct object_directory *odb,
 /* Empty the loose object cache for the specified object directory. */
 void odb_clear_loose_cache(struct object_directory *odb);
 
+/* Clear and free the specified object directory */
+void free_object_directory(struct object_directory *odb);
+
 struct packed_git {
 	struct hashmap_entry packmap_ent;
 	struct packed_git *next;
diff --git a/object.c b/object.c
index 23a24e678a8..048f96a260e 100644
--- a/object.c
+++ b/object.c
@@ -513,7 +513,7 @@  struct raw_object_store *raw_object_store_new(void)
 	return o;
 }
 
-static void free_object_directory(struct object_directory *odb)
+void free_object_directory(struct object_directory *odb)
 {
 	free(odb->path);
 	odb_clear_loose_cache(odb);
diff --git a/tmp-objdir.c b/tmp-objdir.c
index b8d880e3626..3d38eeab66b 100644
--- a/tmp-objdir.c
+++ b/tmp-objdir.c
@@ -1,5 +1,6 @@ 
 #include "cache.h"
 #include "tmp-objdir.h"
+#include "chdir-notify.h"
 #include "dir.h"
 #include "sigchain.h"
 #include "string-list.h"
@@ -11,6 +12,8 @@ 
 struct tmp_objdir {
 	struct strbuf path;
 	struct strvec env;
+	struct object_directory *prev_odb;
+	int will_destroy;
 };
 
 /*
@@ -38,6 +41,9 @@  static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal)
 	if (t == the_tmp_objdir)
 		the_tmp_objdir = NULL;
 
+	if (!on_signal && t->prev_odb)
+		restore_primary_odb(t->prev_odb, t->path.buf);
+
 	/*
 	 * This may use malloc via strbuf_grow(), but we should
 	 * have pre-grown t->path sufficiently so that this
@@ -52,6 +58,7 @@  static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal)
 	 */
 	if (!on_signal)
 		tmp_objdir_free(t);
+
 	return err;
 }
 
@@ -121,7 +128,7 @@  static int setup_tmp_objdir(const char *root)
 	return ret;
 }
 
-struct tmp_objdir *tmp_objdir_create(void)
+struct tmp_objdir *tmp_objdir_create(const char *prefix)
 {
 	static int installed_handlers;
 	struct tmp_objdir *t;
@@ -129,11 +136,16 @@  struct tmp_objdir *tmp_objdir_create(void)
 	if (the_tmp_objdir)
 		BUG("only one tmp_objdir can be used at a time");
 
-	t = xmalloc(sizeof(*t));
+	t = xcalloc(1, sizeof(*t));
 	strbuf_init(&t->path, 0);
 	strvec_init(&t->env);
 
-	strbuf_addf(&t->path, "%s/incoming-XXXXXX", get_object_directory());
+	/*
+	 * Use a string starting with tmp_ so that the builtin/prune.c code
+	 * can recognize any stale objdirs left behind by a crash and delete
+	 * them.
+	 */
+	strbuf_addf(&t->path, "%s/tmp_objdir-%s-XXXXXX", get_object_directory(), prefix);
 
 	/*
 	 * Grow the strbuf beyond any filename we expect to be placed in it.
@@ -269,6 +281,13 @@  int tmp_objdir_migrate(struct tmp_objdir *t)
 	if (!t)
 		return 0;
 
+	if (t->prev_odb) {
+		if (the_repository->objects->odb->will_destroy)
+			BUG("migrating an ODB that was marked for destruction");
+		restore_primary_odb(t->prev_odb, t->path.buf);
+		t->prev_odb = NULL;
+	}
+
 	strbuf_addbuf(&src, &t->path);
 	strbuf_addstr(&dst, get_object_directory());
 
@@ -292,3 +311,33 @@  void tmp_objdir_add_as_alternate(const struct tmp_objdir *t)
 {
 	add_to_alternates_memory(t->path.buf);
 }
+
+void tmp_objdir_replace_primary_odb(struct tmp_objdir *t, int will_destroy)
+{
+	if (t->prev_odb)
+		BUG("the primary object database is already replaced");
+	t->prev_odb = set_temporary_primary_odb(t->path.buf, will_destroy);
+	t->will_destroy = will_destroy;
+}
+
+struct tmp_objdir *tmp_objdir_unapply_primary_odb(void)
+{
+	if (!the_tmp_objdir || !the_tmp_objdir->prev_odb)
+		return NULL;
+
+	restore_primary_odb(the_tmp_objdir->prev_odb, the_tmp_objdir->path.buf);
+	the_tmp_objdir->prev_odb = NULL;
+	return the_tmp_objdir;
+}
+
+void tmp_objdir_reapply_primary_odb(struct tmp_objdir *t, const char *old_cwd,
+		const char *new_cwd)
+{
+	char *path;
+
+	path = reparent_relative_path(old_cwd, new_cwd, t->path.buf);
+	strbuf_reset(&t->path);
+	strbuf_addstr(&t->path, path);
+	free(path);
+	tmp_objdir_replace_primary_odb(t, t->will_destroy);
+}
diff --git a/tmp-objdir.h b/tmp-objdir.h
index b1e45b4c75d..a3145051f25 100644
--- a/tmp-objdir.h
+++ b/tmp-objdir.h
@@ -10,7 +10,7 @@ 
  *
  * Example:
  *
- *	struct tmp_objdir *t = tmp_objdir_create();
+ *	struct tmp_objdir *t = tmp_objdir_create("incoming");
  *	if (!run_command_v_opt_cd_env(cmd, 0, NULL, tmp_objdir_env(t)) &&
  *	    !tmp_objdir_migrate(t))
  *		printf("success!\n");
@@ -22,9 +22,10 @@ 
 struct tmp_objdir;
 
 /*
- * Create a new temporary object directory; returns NULL on failure.
+ * Create a new temporary object directory with the specified prefix;
+ * returns NULL on failure.
  */
-struct tmp_objdir *tmp_objdir_create(void);
+struct tmp_objdir *tmp_objdir_create(const char *prefix);
 
 /*
  * Return a list of environment strings, suitable for use with
@@ -51,4 +52,26 @@  int tmp_objdir_destroy(struct tmp_objdir *);
  */
 void tmp_objdir_add_as_alternate(const struct tmp_objdir *);
 
+/*
+ * Replaces the main object store in the current process with the temporary
+ * object directory and makes the former main object store an alternate.
+ * If will_destroy is nonzero, the object directory may not be migrated.
+ */
+void tmp_objdir_replace_primary_odb(struct tmp_objdir *, int will_destroy);
+
+/*
+ * If the primary object database was replaced by a temporary object directory,
+ * restore it to its original value while keeping the directory contents around.
+ * Returns NULL if the primary object database was not replaced.
+ */
+struct tmp_objdir *tmp_objdir_unapply_primary_odb(void);
+
+/*
+ * Reapplies the former primary temporary object database, after protentially
+ * changing its relative path.
+ */
+void tmp_objdir_reapply_primary_odb(struct tmp_objdir *, const char *old_cwd,
+		const char *new_cwd);
+
+
 #endif /* TMP_OBJDIR_H */