diff mbox series

[v3,02/11] Add initial support for many promisor remotes

Message ID 20190312132959.11764-3-chriscool@tuxfamily.org (mailing list archive)
State New, archived
Headers show
Series Many promisor remotes | expand

Commit Message

Christian Couder March 12, 2019, 1:29 p.m. UTC
From: Christian Couder <christian.couder@gmail.com>

The promisor-remote.{c,h} files will contain functions to
manage many promisor remotes.

We expect that there will not be a lot of promisor remotes,
so it is ok to use a simple linked list to manage them.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Makefile          |   1 +
 promisor-remote.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++
 promisor-remote.h |  17 ++++++++
 3 files changed, 118 insertions(+)
 create mode 100644 promisor-remote.c
 create mode 100644 promisor-remote.h

Comments

Junio C Hamano March 13, 2019, 4:09 a.m. UTC | #1
Christian Couder <christian.couder@gmail.com> writes:

> +struct promisor_remote *promisor_remote_new(const char *remote_name)
> +{

Shouldn't this be static?  The config callback that calls this
function is inside this file.

> +	struct promisor_remote *o;
> +
> +	o = xcalloc(1, sizeof(*o));
> +	o->remote_name = xstrdup(remote_name);

A comment on this later...

> +static struct promisor_remote *promisor_remote_look_up(const char *remote_name,
> +						       struct promisor_remote **previous)

In our codebase, this operation is far more often called "lookup",
one word, according to "git grep -e look_up \*.h".

> +{
> +	struct promisor_remote *o, *p;
> +
> +	for (p = NULL, o = promisors; o; p = o, o = o->next)
> +		if (o->remote_name && !strcmp(o->remote_name, remote_name)) {
> +			if (previous)
> +				*previous = p;

I think the "previous" thing is for the callers to learn what
pointer points at the found entry, allowing e.g. an element to be
inserted just before the found element.  If so, would it make more
sense to use the more familiar pattern to use

	*previous = &promisors;

here?  That would remove the need to switch on NULL-ness of previous
in the caller.

> diff --git a/promisor-remote.h b/promisor-remote.h
> new file mode 100644
> index 0000000000..bfbf7c0f21
> --- /dev/null
> +++ b/promisor-remote.h
> @@ -0,0 +1,17 @@
> +#ifndef PROMISOR_REMOTE_H
> +#define PROMISOR_REMOTE_H
> +
> +/*
> + * Promisor remote linked list
> + * Its information come from remote.XXX config entries.
> + */
> +struct promisor_remote {
> +	const char *remote_name;
> +	struct promisor_remote *next;
> +};

Would it make the management of storage easier to make it

	struct promisor_remote {
		struct promisor_remote *next;
		const char name[FLEX_ARRAY];
	};

that will allow allocation with

	struct promisor_remote *r;
	FLEX_ALLOC_STR(r, name, remote_name);

Or if the remote_name field must be a pointer, perhaps use
FLEXPTR_ALLOC_STR().

What is the rule for these promisor names?  If these entries were on
the configuration file, then:

	[remote "origin"]
		url = ...
		promisor = frotz
		promisor = nitfol

	[remote "mirror"}
		url = ...
		promisor = frotz
		promisor = Frotz
		promisor = nit fol

would the two "frotz" for the two remotes refer to the same thing,
or are "promisor" values scoped to each remote?

Can the name of promisor be any string?  If they end up getting used
as part of a path on the filesystem, we'd need to worry about case
sensitivity and UTF-8 normalization issues as well.

In a large enough project where multi-promisor makes sense, what is
the expected number of promisors a repository would define?  10s?
1000s?  Would a linked list still make sense when deployed in the
real world, or would we be forced to move to something like hashmap
later?

You do not have to have the answers to all these questions, and even
the ones with concrete answers, you do not necessarily have to act
on them right now (e.g. you may anticipate the eventual need to move
to hashmap, but prototyping with linked list is perfectly fine;
being aware of the possibility alone would force us to be careful to
make sure that the implementation detail does not leak through too
much and confined within _lookup(), _find(), etc. functions, and
that awareness is good enough at this point).

Thanks.
Junio C Hamano March 13, 2019, 4:34 a.m. UTC | #2
Junio C Hamano <gitster@pobox.com> writes:

> What is the rule for these promisor names?

Disregard this part (and only this part).  The values are not names,
but just "is this thing a promisor" boolean.
Christian Couder April 1, 2019, 4:41 p.m. UTC | #3
On Wed, Mar 13, 2019 at 5:09 AM Junio C Hamano <gitster@pobox.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> writes:
>
> > +struct promisor_remote *promisor_remote_new(const char *remote_name)
> > +{
>
> Shouldn't this be static?  The config callback that calls this
> function is inside this file.

Yeah, I made it static.

> > +     struct promisor_remote *o;
> > +
> > +     o = xcalloc(1, sizeof(*o));
> > +     o->remote_name = xstrdup(remote_name);
>
> A comment on this later...
>
> > +static struct promisor_remote *promisor_remote_look_up(const char *remote_name,
> > +                                                    struct promisor_remote **previous)
>
> In our codebase, this operation is far more often called "lookup",
> one word, according to "git grep -e look_up \*.h".

Ok, I changed it to "lookup".

> > +{
> > +     struct promisor_remote *o, *p;
> > +
> > +     for (p = NULL, o = promisors; o; p = o, o = o->next)
> > +             if (o->remote_name && !strcmp(o->remote_name, remote_name)) {
> > +                     if (previous)
> > +                             *previous = p;
>
> I think the "previous" thing is for the callers to learn what
> pointer points at the found entry, allowing e.g. an element to be
> inserted just before the found element.

Actually it's to make it easy to move the found element.

> If so, would it make more
> sense to use the more familiar pattern to use
>
>         *previous = &promisors;
>
> here?

If I do that I get an "error: assignment from incompatible pointer
type" as "*previous" is of type "struct promisor_remote *" while
"&promisors" is of type "struct promisor_remote **".

Maybe you mean:

         *previous = promisors;

but I fail to see how that would correctly pass the previous element
when the found one is not the first one.

> That would remove the need to switch on NULL-ness of previous
> in the caller.

In the only caller that passes a non NULL previous, we call
promisor_remote_move_to_tail() which does:

    if (previous)
        previous->next = o->next;
    else
        promisors = o->next ? o->next : o;

So yeah we check the NULL-ness of previous, but if previous has been
set to promisors, then previous->next = o->next will not set promisors
correctly.

I guess we are not here in the case were the familiar pattern you are
thinking about can be applied. Or is there an example, maybe in the
Git source code, that I could learn from?

Another possibility is to just use hashmap as you suggest below or
list.h. It might be a bit wasteful, but the code simplification might
be worth it.

> > diff --git a/promisor-remote.h b/promisor-remote.h
> > new file mode 100644
> > index 0000000000..bfbf7c0f21
> > --- /dev/null
> > +++ b/promisor-remote.h
> > @@ -0,0 +1,17 @@
> > +#ifndef PROMISOR_REMOTE_H
> > +#define PROMISOR_REMOTE_H
> > +
> > +/*
> > + * Promisor remote linked list
> > + * Its information come from remote.XXX config entries.
> > + */
> > +struct promisor_remote {
> > +     const char *remote_name;
> > +     struct promisor_remote *next;
> > +};
>
> Would it make the management of storage easier to make it
>
>         struct promisor_remote {
>                 struct promisor_remote *next;
>                 const char name[FLEX_ARRAY];
>         };
>
> that will allow allocation with
>
>         struct promisor_remote *r;
>         FLEX_ALLOC_STR(r, name, remote_name);

Ok to use a flex array. If we ever use arrays or hashmaps of promisor
remotes, we might have to go back to not using one.

> Or if the remote_name field must be a pointer, perhaps use
> FLEXPTR_ALLOC_STR().

[...]

> Can the name of promisor be any string?  If they end up getting used
> as part of a path on the filesystem, we'd need to worry about case
> sensitivity and UTF-8 normalization issues as well.

It looks like for regular remotes we only check if they start with /.
So I don't think we need to do more than that for promisor remotes. I
added the check.

> In a large enough project where multi-promisor makes sense, what is
> the expected number of promisors a repository would define?  10s?
> 1000s?  Would a linked list still make sense when deployed in the
> real world, or would we be forced to move to something like hashmap
> later?

I am ok to use hashmap to make it similar with regular remotes.

For now I don't expect large projects to use more than 10s promisors
though. They are defined in the config file and I don't think people
will be happy if they have to manage more than 10s promisors in their
config file. If people really start to use more than that, they are
likely to ask us for a new mechanism to manage them (and to
automatically have them configured from servers). So maybe we can
change that if/when we have to work on such mechanism.




> You do not have to have the answers to all these questions, and even
> the ones with concrete answers, you do not necessarily have to act
> on them right now (e.g. you may anticipate the eventual need to move
> to hashmap, but prototyping with linked list is perfectly fine;
> being aware of the possibility alone would force us to be careful to
> make sure that the implementation detail does not leak through too
> much and confined within _lookup(), _find(), etc. functions, and
> that awareness is good enough at this point).
>
> Thanks.
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index 537493822b..4f24ccb3dc 100644
--- a/Makefile
+++ b/Makefile
@@ -972,6 +972,7 @@  LIB_OBJS += preload-index.o
 LIB_OBJS += pretty.o
 LIB_OBJS += prio-queue.o
 LIB_OBJS += progress.o
+LIB_OBJS += promisor-remote.o
 LIB_OBJS += prompt.o
 LIB_OBJS += protocol.o
 LIB_OBJS += quote.o
diff --git a/promisor-remote.c b/promisor-remote.c
new file mode 100644
index 0000000000..d2f574651e
--- /dev/null
+++ b/promisor-remote.c
@@ -0,0 +1,100 @@ 
+#include "cache.h"
+#include "promisor-remote.h"
+#include "config.h"
+
+static struct promisor_remote *promisors;
+static struct promisor_remote **promisors_tail = &promisors;
+
+struct promisor_remote *promisor_remote_new(const char *remote_name)
+{
+	struct promisor_remote *o;
+
+	o = xcalloc(1, sizeof(*o));
+	o->remote_name = xstrdup(remote_name);
+
+	*promisors_tail = o;
+	promisors_tail = &o->next;
+
+	return o;
+}
+
+static struct promisor_remote *promisor_remote_look_up(const char *remote_name,
+						       struct promisor_remote **previous)
+{
+	struct promisor_remote *o, *p;
+
+	for (p = NULL, o = promisors; o; p = o, o = o->next)
+		if (o->remote_name && !strcmp(o->remote_name, remote_name)) {
+			if (previous)
+				*previous = p;
+			return o;
+		}
+
+	return NULL;
+}
+
+static void promisor_remote_move_to_tail(struct promisor_remote *o,
+					 struct promisor_remote *previous)
+{
+	if (previous)
+		previous->next = o->next;
+	else
+		promisors = o->next ? o->next : o;
+	o->next = NULL;
+	*promisors_tail = o;
+	promisors_tail = &o->next;
+}
+
+static int promisor_remote_config(const char *var, const char *value, void *data)
+{
+	struct promisor_remote *o;
+	const char *name;
+	int namelen;
+	const char *subkey;
+
+	if (parse_config_key(var, "remote", &name, &namelen, &subkey) < 0)
+		return 0;
+
+	if (!strcmp(subkey, "promisor")) {
+		char *remote_name;
+
+		if (!git_config_bool(var, value))
+			return 0;
+
+		remote_name = xmemdupz(name, namelen);
+
+		if (!promisor_remote_look_up(remote_name, NULL))
+			promisor_remote_new(remote_name);
+
+		free(remote_name);
+		return 0;
+	}
+
+	return 0;
+}
+
+static void promisor_remote_init(void)
+{
+	static int initialized;
+
+	if (initialized)
+		return;
+	initialized = 1;
+
+	git_config(promisor_remote_config, NULL);
+}
+
+struct promisor_remote *promisor_remote_find(const char *remote_name)
+{
+	promisor_remote_init();
+
+	if (!remote_name)
+		return promisors;
+
+	return promisor_remote_look_up(remote_name, NULL);
+}
+
+int has_promisor_remote(void)
+{
+	return !!promisor_remote_find(NULL);
+}
diff --git a/promisor-remote.h b/promisor-remote.h
new file mode 100644
index 0000000000..bfbf7c0f21
--- /dev/null
+++ b/promisor-remote.h
@@ -0,0 +1,17 @@ 
+#ifndef PROMISOR_REMOTE_H
+#define PROMISOR_REMOTE_H
+
+/*
+ * Promisor remote linked list
+ * Its information come from remote.XXX config entries.
+ */
+struct promisor_remote {
+	const char *remote_name;
+	struct promisor_remote *next;
+};
+
+extern struct promisor_remote *promisor_remote_new(const char *remote_name);
+extern struct promisor_remote *promisor_remote_find(const char *remote_name);
+extern int has_promisor_remote(void);
+
+#endif /* PROMISOR_REMOTE_H */