diff mbox series

[08/21] maintenance: initialize task array and hashmap

Message ID 5cdd38afa60cdf768dd194f90ae0b2190123fdea.1594131695.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Maintenance builtin, allowing 'gc --auto' customization | expand

Commit Message

Linus Arver via GitGitGadget July 7, 2020, 2:21 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

In anticipation of implementing multiple maintenance tasks inside the
'maintenance' builtin, use a list and hashmap of structs to describe the
work to be done.

The struct maintenance_task stores the name of the task (as given by a
future command-line argument) along with a function pointer to its
implementation and a boolean for whether the step is enabled.

A list of pointers to these structs are initialized with the full list
of implemented tasks along with a default order. For now, this list only
contains the "gc" task. This task is also the only task enabled by
default.

This list is also inserted into a hashmap. This allows command-line
arguments to quickly find the tasks by name, not sensitive to case. To
ensure this list and hashmap work well together, the list only contains
pointers to the struct information. This will allow a sort on the list
while preserving the hashmap data.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/gc.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 62 insertions(+), 1 deletion(-)

Comments

Jonathan Tan July 9, 2020, 2:25 a.m. UTC | #1
> This list is also inserted into a hashmap. This allows command-line
> arguments to quickly find the tasks by name, not sensitive to case. To
> ensure this list and hashmap work well together, the list only contains
> pointers to the struct information. This will allow a sort on the list
> while preserving the hashmap data.

I think having the hashmap is unnecessarily complicated in this case -
with the small number of tasks, a list would be fine. But I don't feel
strongly about this.
Derrick Stolee July 9, 2020, 1:15 p.m. UTC | #2
On 7/8/2020 10:25 PM, Jonathan Tan wrote:
>> This list is also inserted into a hashmap. This allows command-line
>> arguments to quickly find the tasks by name, not sensitive to case. To
>> ensure this list and hashmap work well together, the list only contains
>> pointers to the struct information. This will allow a sort on the list
>> while preserving the hashmap data.
> 
> I think having the hashmap is unnecessarily complicated in this case -
> with the small number of tasks, a list would be fine. But I don't feel
> strongly about this.

You're probably right that iterating through a list with (hopefully)
at most a dozen entries is fast enough that a hashmap is overkill here.

Now is the real test: can I change this patch in v2 without needing
to mess with any of the others? The intention here was to make adding
tasks as simple as possible, so we shall see. :D

Thanks,
-Stolee
Junio C Hamano July 9, 2020, 1:51 p.m. UTC | #3
Derrick Stolee <stolee@gmail.com> writes:

> On 7/8/2020 10:25 PM, Jonathan Tan wrote:
>>> This list is also inserted into a hashmap. This allows command-line
>>> arguments to quickly find the tasks by name, not sensitive to case. To
>>> ensure this list and hashmap work well together, the list only contains
>>> pointers to the struct information. This will allow a sort on the list
>>> while preserving the hashmap data.
>> 
>> I think having the hashmap is unnecessarily complicated in this case -
>> with the small number of tasks, a list would be fine. But I don't feel
>> strongly about this.
>
> You're probably right that iterating through a list with (hopefully)
> at most a dozen entries is fast enough that a hashmap is overkill here.
>
> Now is the real test: can I change this patch in v2 without needing
> to mess with any of the others? The intention here was to make adding
> tasks as simple as possible, so we shall see. :D

Adding a new element to a list would be simple no matter how the
list is represented.  But I think the real question is what access
pattern we expect.  Do we need to look up by name a single one or
selected few?  Do we need the iteration/enumeration be stable?  etc.
diff mbox series

Patch

diff --git a/builtin/gc.c b/builtin/gc.c
index 3881a99e9d..c143bf50df 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -705,6 +705,8 @@  int cmd_gc(int argc, const char **argv, const char *prefix)
 	return 0;
 }
 
+#define MAX_NUM_TASKS 1
+
 static const char * const builtin_maintenance_usage[] = {
 	N_("git maintenance run [<options>]"),
 	NULL
@@ -734,9 +736,67 @@  static int maintenance_task_gc(struct repository *r)
 	return result;
 }
 
+typedef int maintenance_task_fn(struct repository *r);
+
+struct maintenance_task {
+	struct hashmap_entry ent;
+	const char *name;
+	maintenance_task_fn *fn;
+	unsigned enabled:1;
+};
+
+static int task_entry_cmp(const void *unused_cmp_data,
+			  const struct hashmap_entry *eptr,
+			  const struct hashmap_entry *entry_or_key,
+			  const void *keydata)
+{
+	const struct maintenance_task *e1, *e2;
+	const char *name = keydata;
+
+	e1 = container_of(eptr, const struct maintenance_task, ent);
+	e2 = container_of(entry_or_key, const struct maintenance_task, ent);
+
+	return strcasecmp(e1->name, name ? name : e2->name);
+}
+
+struct maintenance_task *tasks[MAX_NUM_TASKS];
+int num_tasks;
+struct hashmap task_map;
+
 static int maintenance_run(struct repository *r)
 {
-	return maintenance_task_gc(r);
+	int i;
+	int result = 0;
+
+	for (i = 0; !result && i < num_tasks; i++) {
+		if (!tasks[i]->enabled)
+			continue;
+		result = tasks[i]->fn(r);
+	}
+
+	return result;
+}
+
+static void initialize_tasks(void)
+{
+	int i;
+	num_tasks = 0;
+
+	for (i = 0; i < MAX_NUM_TASKS; i++)
+		tasks[i] = xcalloc(1, sizeof(struct maintenance_task));
+
+	tasks[num_tasks]->name = "gc";
+	tasks[num_tasks]->fn = maintenance_task_gc;
+	tasks[num_tasks]->enabled = 1;
+	num_tasks++;
+
+	hashmap_init(&task_map, task_entry_cmp, NULL, MAX_NUM_TASKS);
+
+	for (i = 0; i < num_tasks; i++) {
+		hashmap_entry_init(&tasks[i]->ent,
+				   strihash(tasks[i]->name));
+		hashmap_add(&task_map, &tasks[i]->ent);
+	}
 }
 
 int cmd_maintenance(int argc, const char **argv, const char *prefix)
@@ -758,6 +818,7 @@  int cmd_maintenance(int argc, const char **argv, const char *prefix)
 				   builtin_maintenance_options);
 
 	opts.quiet = !isatty(2);
+	initialize_tasks();
 
 	argc = parse_options(argc, argv, prefix,
 			     builtin_maintenance_options,