@@ -9,7 +9,7 @@ git-backfill - Download missing objects in a partial clone
SYNOPSIS
--------
[verse]
-'git backfill' [--batch-size=<n>]
+'git backfill' [--batch-size=<n>] [--[no-]sparse]
DESCRIPTION
-----------
@@ -46,6 +46,10 @@ OPTIONS
from the server. This size may be exceeded by the last set of
blobs seen at a given path. Default batch size is 16,000.
+--[no-]sparse::
+ Only download objects if they appear at a path that matches the
+ current sparse-checkout.
+
SEE ALSO
--------
linkgit:git-clone[1].
@@ -4,6 +4,7 @@
#include "parse-options.h"
#include "repository.h"
#include "commit.h"
+#include "dir.h"
#include "hex.h"
#include "tree.h"
#include "tree-walk.h"
@@ -21,7 +22,7 @@
#include "path-walk.h"
static const char * const builtin_backfill_usage[] = {
- N_("git backfill [--batch-size=<n>]"),
+ N_("git backfill [--batch-size=<n>] [--[no-]sparse]"),
NULL
};
@@ -29,6 +30,7 @@ struct backfill_context {
struct repository *repo;
struct oid_array current_batch;
size_t batch_size;
+ int sparse;
};
static void clear_backfill_context(struct backfill_context *ctx)
@@ -84,6 +86,12 @@ static int do_backfill(struct backfill_context *ctx)
struct path_walk_info info = PATH_WALK_INFO_INIT;
int ret;
+ if (ctx->sparse) {
+ CALLOC_ARRAY(info.pl, 1);
+ if (get_sparse_checkout_patterns(info.pl))
+ return error(_("problem loading sparse-checkout"));
+ }
+
repo_init_revisions(ctx->repo, &revs, "");
handle_revision_arg("HEAD", &revs, 0, 0);
@@ -107,10 +115,13 @@ int cmd_backfill(int argc, const char **argv, const char *prefix)
.repo = the_repository,
.current_batch = OID_ARRAY_INIT,
.batch_size = 16000,
+ .sparse = 0,
};
struct option options[] = {
OPT_INTEGER(0, "batch-size", &ctx.batch_size,
N_("Minimun number of objects to request at a time")),
+ OPT_BOOL(0, "sparse", &ctx.sparse,
+ N_("Restrict the missing objects to the current sparse-checkout")),
OPT_END(),
};
@@ -10,6 +10,7 @@
#include "hex.h"
#include "object.h"
#include "oid-array.h"
+#include "repository.h"
#include "revision.h"
#include "string-list.h"
#include "strmap.h"
@@ -111,6 +112,23 @@ static int add_children(struct path_walk_context *ctx,
if (type == OBJ_TREE)
strbuf_addch(&path, '/');
+ if (ctx->info->pl) {
+ int dtype;
+ enum pattern_match_result match;
+ match = path_matches_pattern_list(path.buf, path.len,
+ path.buf + base_len, &dtype,
+ ctx->info->pl,
+ ctx->repo->index);
+
+ if (ctx->info->pl->use_cone_patterns &&
+ match == NOT_MATCHED)
+ continue;
+ else if (!ctx->info->pl->use_cone_patterns &&
+ type == OBJ_BLOB &&
+ match != MATCHED)
+ continue;
+ }
+
if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) {
CALLOC_ARRAY(list, 1);
list->type = type;
@@ -6,6 +6,7 @@
struct rev_info;
struct oid_array;
+struct pattern_list;
/**
* The type of a function pointer for the method that is called on a list of
@@ -30,6 +31,16 @@ struct path_walk_info {
*/
path_fn path_fn;
void *path_fn_data;
+
+ /**
+ * Specify a sparse-checkout definition to match our paths to. Do not
+ * walk outside of this sparse definition. If the patterns are in
+ * cone mode, then the search may prune directories that are outside
+ * of the cone. If not in cone mode, then all tree paths will be
+ * explored but the path_fn will only be called when the path matches
+ * the sparse-checkout patterns.
+ */
+ struct pattern_list *pl;
};
#define PATH_WALK_INFO_INIT { 0 }
@@ -80,6 +80,61 @@ test_expect_success 'do partial clone 2, backfill batch size' '
test_line_count = 0 revs2
'
+test_expect_success 'backfill --sparse' '
+ git clone --sparse --filter=blob:none \
+ --single-branch --branch=main \
+ "file://$(pwd)/srv.bare" backfill3 &&
+
+ # Initial checkout includes four files at root.
+ git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 44 missing &&
+
+ # Initial sparse-checkout is just the files at root, so we get the
+ # older versions of the four files at tip.
+ GIT_TRACE2_EVENT="$(pwd)/sparse-trace1" git \
+ -C backfill3 backfill --sparse &&
+ test_trace2_data promisor fetch_count 4 <sparse-trace1 &&
+ test_trace2_data path-walk paths 5 <sparse-trace1 &&
+ git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 40 missing &&
+
+ # Expand the sparse-checkout to include 'd' recursively. This
+ # engages the algorithm to skip the trees for 'a'. Note that
+ # the "sparse-checkout set" command downloads the objects at tip
+ # to satisfy the current checkout.
+ git -C backfill3 sparse-checkout set d &&
+ GIT_TRACE2_EVENT="$(pwd)/sparse-trace2" git \
+ -C backfill3 backfill --sparse &&
+ test_trace2_data promisor fetch_count 8 <sparse-trace2 &&
+ test_trace2_data path-walk paths 15 <sparse-trace2 &&
+ git -C backfill3 rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 24 missing
+'
+
+test_expect_success 'backfill --sparse without cone mode' '
+ git clone --no-checkout --filter=blob:none \
+ --single-branch --branch=main \
+ "file://$(pwd)/srv.bare" backfill4 &&
+
+ # No blobs yet
+ git -C backfill4 rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 48 missing &&
+
+ # Define sparse-checkout by filename regardless of parent directory.
+ # This downloads 6 blobs to satisfy the checkout.
+ git -C backfill4 sparse-checkout set --no-cone "**/file.1.txt" &&
+ git -C backfill4 checkout main &&
+
+ GIT_TRACE2_EVENT="$(pwd)/no-cone-trace1" git \
+ -C backfill4 backfill --sparse &&
+ test_trace2_data promisor fetch_count 6 <no-cone-trace1 &&
+
+ # This walk needed to visit all directories to search for these paths.
+ test_trace2_data path-walk paths 12 <no-cone-trace1 &&
+ git -C backfill4 rev-list --quiet --objects --missing=print HEAD >missing &&
+ test_line_count = 36 missing
+'
+
. "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd