diff mbox series

[v2,2/4] repack: add --filter=<filter-spec> option

Message ID 6e7c8410b8dcd2f4a7e188eb5b55ae8eecb54e40.1644372606.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series repack: add --filter= | expand

Commit Message

John Cai Feb. 9, 2022, 2:10 a.m. UTC
From: John Cai <johncai86@gmail.com>

In order to use a separate http server as a remote to offload large
blobs, imagine the following:

A. an http server to use as a generalized object store.
B. a server update hook that uploads large blobs to (A).
C. a git server
D. a remote helper that knows how to download objects from the http
server
E. a regular job that runs `git repack --filter` to remove large
blobs from (C).

Clients would need to configure both (C) and (A) as promisor remotes to
be able to get everything. When they push new large blobs, they can
still push them to (C), as (B) will upload them to (A), and (E) will
regularly remove those large blobs from (C).

This way with a little bit of client and server configuration, we can
have a native way to support offloading large files without git LFS.
It would be more flexible as you can easily tweak which blobs are
considered large files by tweaking (B) and (E).

A fuller demo can be found at http://tiny.cc/object_storage_demo

Based-on-patch-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: John Cai <johncai86@gmail.com>
---
 Documentation/git-repack.txt |  5 +++++
 builtin/repack.c             | 22 +++++++++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index ee30edc178a..e394ec52ab1 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -126,6 +126,11 @@  depth is 4095.
 	a larger and slower repository; see the discussion in
 	`pack.packSizeLimit`.
 
+--filter=<filter-spec>::
+	Omits certain objects (usually blobs) from the resulting
+	packfile. See linkgit:git-rev-list[1] for valid
+	`<filter-spec>` forms.
+
 -b::
 --write-bitmap-index::
 	Write a reachability bitmap index as part of the repack. This
diff --git a/builtin/repack.c b/builtin/repack.c
index da1e364a756..3f1e8a39a2b 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -152,6 +152,7 @@  struct pack_objects_args {
 	const char *depth;
 	const char *threads;
 	const char *max_pack_size;
+	const char *filter;
 	int no_reuse_delta;
 	int no_reuse_object;
 	int quiet;
@@ -172,6 +173,8 @@  static void prepare_pack_objects(struct child_process *cmd,
 		strvec_pushf(&cmd->args, "--threads=%s", args->threads);
 	if (args->max_pack_size)
 		strvec_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
+	if (args->filter)
+		strvec_pushf(&cmd->args, "--filter=%s", args->filter);
 	if (args->no_reuse_delta)
 		strvec_pushf(&cmd->args, "--no-reuse-delta");
 	if (args->no_reuse_object)
@@ -238,6 +241,13 @@  static unsigned populate_pack_exts(char *name)
 	return ret;
 }
 
+static void write_promisor_file_1(char *p)
+{
+	char *promisor_name = mkpathdup("%s-%s.promisor", packtmp, p);
+	write_promisor_file(promisor_name, NULL, 0);
+	free(promisor_name);
+}
+
 static void repack_promisor_objects(const struct pack_objects_args *args,
 				    struct string_list *names)
 {
@@ -269,7 +279,6 @@  static void repack_promisor_objects(const struct pack_objects_args *args,
 	out = xfdopen(cmd.out, "r");
 	while (strbuf_getline_lf(&line, out) != EOF) {
 		struct string_list_item *item;
-		char *promisor_name;
 
 		if (line.len != the_hash_algo->hexsz)
 			die(_("repack: Expecting full hex object ID lines only from pack-objects."));
@@ -286,13 +295,8 @@  static void repack_promisor_objects(const struct pack_objects_args *args,
 		 * concatenate the contents of all .promisor files instead of
 		 * just creating a new empty file.
 		 */
-		promisor_name = mkpathdup("%s-%s.promisor", packtmp,
-					  line.buf);
-		write_promisor_file(promisor_name, NULL, 0);
-
+		write_promisor_file_1(line.buf);
 		item->util = (void *)(uintptr_t)populate_pack_exts(item->string);
-
-		free(promisor_name);
 	}
 	fclose(out);
 	if (finish_command(&cmd))
@@ -660,6 +664,8 @@  int cmd_repack(int argc, const char **argv, const char *prefix)
 				N_("limits the maximum number of threads")),
 		OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
 				N_("maximum size of each packfile")),
+		OPT_STRING(0, "filter", &po_args.filter, N_("args"),
+				N_("object filtering")),
 		OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
 				N_("repack objects in packs marked with .keep")),
 		OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
@@ -819,6 +825,8 @@  int cmd_repack(int argc, const char **argv, const char *prefix)
 		if (line.len != the_hash_algo->hexsz)
 			die(_("repack: Expecting full hex object ID lines only from pack-objects."));
 		string_list_append(&names, line.buf);
+		if (po_args.filter)
+			write_promisor_file_1(line.buf);
 	}
 	fclose(out);
 	ret = finish_command(&cmd);