diff mbox series

[v3,3/8] maintenance: create auto condition for loose-objects

Message ID d6e382c43effe063fb1137659f616d414ee52682.1598380599.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Maintenance II: prefetch, loose-objects, incremental-repack tasks | expand

Commit Message

Linus Arver via GitGitGadget Aug. 25, 2020, 6:36 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

The loose-objects task deletes loose objects that already exist in a
pack-file, then place the remaining loose objects into a new pack-file.
If this step runs all the time, then we risk creating pack-files with
very few objects with every 'git commit' process. To prevent
overwhelming the packs directory with small pack-files, place a minimum
number of objects to justify the task.

The 'maintenance.loose-objects.auto' config option specifies a minimum
number of loose objects to justify the task to run under the '--auto'
option. This defaults to 100 loose objects. Setting the value to zero
will prevent the step from running under '--auto' while a negative value
will force it to run every time.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 Documentation/config/maintenance.txt |  9 +++++++++
 builtin/gc.c                         | 30 ++++++++++++++++++++++++++++
 t/t7900-maintenance.sh               | 25 +++++++++++++++++++++++
 3 files changed, 64 insertions(+)

Comments

Jonathan Tan Sept. 22, 2020, 11:15 p.m. UTC | #1
> +test_expect_success 'maintenance.loose-objects.auto' '
> +	git repack -adk &&
> +	GIT_TRACE2_EVENT="$(pwd)/trace-lo1.txt" \
> +		git -c maintenance.loose-objects.auto=1 maintenance \
> +		run --auto --task=loose-objects 2>/dev/null &&
> +	test_subcommand ! git prune-packed --quiet <trace-lo1.txt &&
> +	for i in 1 2

Any reason why this is run twice?

> +	do
> +		printf data-A-$i | git hash-object -t blob --stdin -w &&
> +		GIT_TRACE2_EVENT="$(pwd)/trace-loA-$i" \
> +			git -c maintenance.loose-objects.auto=2 \
> +			maintenance run --auto --task=loose-objects 2>/dev/null &&
> +		test_subcommand ! git prune-packed --quiet <trace-loA-$i &&

OK - there is only 1 loose object so the loose-objects task doesn't
get run.

> +		printf data-B-$i | git hash-object -t blob --stdin -w &&
> +		GIT_TRACE2_EVENT="$(pwd)/trace-loB-$i" \
> +			git -c maintenance.loose-objects.auto=2 \
> +			maintenance run --auto --task=loose-objects 2>/dev/null &&
> +		test_subcommand git prune-packed --quiet <trace-loB-$i &&

OK - there are 2 loose objects so the loose-objects task gets run. But
we need to remember that the first time it is run, only the packfile
gets created - the loose objects aren't deleted. "prune-packed" here is
to show that the loose-objects task is run, but it has no effect.

> +		GIT_TRACE2_EVENT="$(pwd)/trace-loC-$i" \
> +			git -c maintenance.loose-objects.auto=2 \
> +			maintenance run --auto --task=loose-objects 2>/dev/null &&
> +		test_subcommand git prune-packed --quiet <trace-loC-$i || return 1

OK - the 2 loose objects still exist, so the loose-objects task gets
run. "prune-packed" here shows that the loose-objects task is run.
Derrick Stolee Sept. 24, 2020, 1:51 p.m. UTC | #2
On 9/22/2020 7:15 PM, Jonathan Tan wrote:
>> +test_expect_success 'maintenance.loose-objects.auto' '
>> +	git repack -adk &&
>> +	GIT_TRACE2_EVENT="$(pwd)/trace-lo1.txt" \
>> +		git -c maintenance.loose-objects.auto=1 maintenance \
>> +		run --auto --task=loose-objects 2>/dev/null &&
>> +	test_subcommand ! git prune-packed --quiet <trace-lo1.txt &&
>> +	for i in 1 2
> 
> Any reason why this is run twice?

I think the original reason was to demonstrate how two runs interact,
but then that was done in the middle of the loop body so the loop is
not necessary.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/Documentation/config/maintenance.txt b/Documentation/config/maintenance.txt
index 7cc6700d57..c31613be62 100644
--- a/Documentation/config/maintenance.txt
+++ b/Documentation/config/maintenance.txt
@@ -14,3 +14,12 @@  maintenance.commit-graph.auto::
 	reachable commits that are not in the commit-graph file is at least
 	the value of `maintenance.commit-graph.auto`. The default value is
 	100.
+
+maintenance.loose-objects.auto::
+	This integer config option controls how often the `loose-objects` task
+	should be run as part of `git maintenance run --auto`. If zero, then
+	the `loose-objects` task will not run with the `--auto` option. A
+	negative value will force the task to run every time. Otherwise, a
+	positive value implies the command should run when the number of
+	loose objects is at least the value of `maintenance.loose-objects.auto`.
+	The default value is 100.
diff --git a/builtin/gc.c b/builtin/gc.c
index 248ccde3c3..25245bcc10 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -899,6 +899,35 @@  struct write_loose_object_data {
 	int batch_size;
 };
 
+static int loose_object_auto_limit = 100;
+
+static int loose_object_count(const struct object_id *oid,
+			       const char *path,
+			       void *data)
+{
+	int *count = (int*)data;
+	if (++(*count) >= loose_object_auto_limit)
+		return 1;
+	return 0;
+}
+
+static int loose_object_auto_condition(void)
+{
+	int count = 0;
+
+	git_config_get_int("maintenance.loose-objects.auto",
+			   &loose_object_auto_limit);
+
+	if (!loose_object_auto_limit)
+		return 0;
+	if (loose_object_auto_limit < 0)
+		return 1;
+
+	return for_each_loose_file_in_objdir(the_repository->objects->odb->path,
+					     loose_object_count,
+					     NULL, NULL, &count);
+}
+
 static int bail_on_loose(const struct object_id *oid,
 			 const char *path,
 			 void *data)
@@ -1009,6 +1038,7 @@  static struct maintenance_task tasks[] = {
 	[TASK_LOOSE_OBJECTS] = {
 		"loose-objects",
 		maintenance_task_loose_objects,
+		loose_object_auto_condition,
 	},
 	[TASK_GC] = {
 		"gc",
diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh
index 2e9e369786..efda1cf69b 100755
--- a/t/t7900-maintenance.sh
+++ b/t/t7900-maintenance.sh
@@ -125,4 +125,29 @@  test_expect_success 'loose-objects task' '
 	test_cmp packs-between packs-after
 '
 
+test_expect_success 'maintenance.loose-objects.auto' '
+	git repack -adk &&
+	GIT_TRACE2_EVENT="$(pwd)/trace-lo1.txt" \
+		git -c maintenance.loose-objects.auto=1 maintenance \
+		run --auto --task=loose-objects 2>/dev/null &&
+	test_subcommand ! git prune-packed --quiet <trace-lo1.txt &&
+	for i in 1 2
+	do
+		printf data-A-$i | git hash-object -t blob --stdin -w &&
+		GIT_TRACE2_EVENT="$(pwd)/trace-loA-$i" \
+			git -c maintenance.loose-objects.auto=2 \
+			maintenance run --auto --task=loose-objects 2>/dev/null &&
+		test_subcommand ! git prune-packed --quiet <trace-loA-$i &&
+		printf data-B-$i | git hash-object -t blob --stdin -w &&
+		GIT_TRACE2_EVENT="$(pwd)/trace-loB-$i" \
+			git -c maintenance.loose-objects.auto=2 \
+			maintenance run --auto --task=loose-objects 2>/dev/null &&
+		test_subcommand git prune-packed --quiet <trace-loB-$i &&
+		GIT_TRACE2_EVENT="$(pwd)/trace-loC-$i" \
+			git -c maintenance.loose-objects.auto=2 \
+			maintenance run --auto --task=loose-objects 2>/dev/null &&
+		test_subcommand git prune-packed --quiet <trace-loC-$i || return 1
+	done
+'
+
 test_done