diff mbox series

[v2,4/5] config: add git_configset_alloc() and git_configset_clear_and_free()

Message ID 908ad0b82fa084fc4e56d7f6dff49e4f255af6ec.1723242556.git.steadmon@google.com (mailing list archive)
State New
Headers show
Series Introduce cgit-rs, a Rust wrapper around libgit.a | expand

Commit Message

Josh Steadmon Aug. 9, 2024, 10:41 p.m. UTC
Add git_configset_alloc() and git_configset_clear_and_free() functions
so that callers can manage config_set structs on the heap. This also
allows non-C external consumers to treat config_sets as opaque structs.

Co-authored-by: Calvin Wan <calvinwan@google.com>
Signed-off-by: Calvin Wan <calvinwan@google.com>
Signed-off-by: Josh Steadmon <steadmon@google.com>
---
 config.c | 11 +++++++++++
 config.h | 10 ++++++++++
 2 files changed, 21 insertions(+)

Comments

Phillip Wood Aug. 12, 2024, 9:10 a.m. UTC | #1
Hi Josh

On 09/08/2024 23:41, Josh Steadmon wrote:
> Add git_configset_alloc() and git_configset_clear_and_free() functions
> so that callers can manage config_set structs on the heap. This also
> allows non-C external consumers to treat config_sets as opaque structs.

Do we really need to add this code to config.c rather than handling it 
in the wrapper layer in the next patch?

Looking ahead I wonder how useful it is to users of the library to 
separate out allocation from initialization. A function that allocates 
and initializes a configset would be more convenient and harder to 
misuse. Calling release functions *_free() rather than 
*_clear_and_free() would be more convenient as well. I also noticed that 
the data types are not namespaced when they are exported. So perhaps we 
could drop this patch and add the following to the next patch.

/*
  * Namespace data types as well as functions and ensure consistent
  * naming of data types and the functions that operate on them.
  */
struct libgit_configset {
	struct config_set set;
};

struct libgit_configset *libgit_configset_new() {
	struct libgit_configset *set = xmalloc(sizeof(*set));

	git_configset_init(&set->set);
	return set;
}

void libgit_configset_free(struct libgit_configset *set) {
	git_configset_clear(&set->set);
	free(set);
}

Best Wishes

Phillip

> Co-authored-by: Calvin Wan <calvinwan@google.com>
> Signed-off-by: Calvin Wan <calvinwan@google.com>
> Signed-off-by: Josh Steadmon <steadmon@google.com>
> ---
>   config.c | 11 +++++++++++
>   config.h | 10 ++++++++++
>   2 files changed, 21 insertions(+)
> 
> diff --git a/config.c b/config.c
> index 6421894614..444db8de79 100644
> --- a/config.c
> +++ b/config.c
> @@ -2324,6 +2324,11 @@ static int config_set_element_cmp(const void *cmp_data UNUSED,
>   	return strcmp(e1->key, e2->key);
>   }
>   
> +struct config_set *git_configset_alloc(void)
> +{
> +	return xmalloc(sizeof(struct config_set));
> +}
> +
>   void git_configset_init(struct config_set *set)
>   {
>   	hashmap_init(&set->config_hash, config_set_element_cmp, NULL, 0);
> @@ -2353,6 +2358,12 @@ void git_configset_clear(struct config_set *set)
>   	set->list.items = NULL;
>   }
>   
> +void git_configset_clear_and_free(struct config_set *set)
> +{
> +	git_configset_clear(set);
> +	free(set);
> +}
> +
>   static int config_set_callback(const char *key, const char *value,
>   			       const struct config_context *ctx,
>   			       void *cb)
> diff --git a/config.h b/config.h
> index 54b47dec9e..074c85a788 100644
> --- a/config.h
> +++ b/config.h
> @@ -472,6 +472,11 @@ struct config_set {
>   	struct configset_list list;
>   };
>   
> +/**
> + * Alloc a config_set
> + */
> +struct config_set *git_configset_alloc(void);
> +
>   /**
>    * Initializes the config_set `cs`.
>    */
> @@ -520,6 +525,11 @@ int git_configset_get_string_multi(struct config_set *cs, const char *key,
>    */
>   void git_configset_clear(struct config_set *cs);
>   
> +/**
> + * Clears and frees a heap-allocated `config_set` structure.
> + */
> +void git_configset_clear_and_free(struct config_set *cs);
> +
>   /*
>    * These functions return 1 if not found, and 0 if found, leaving the found
>    * value in the 'dest' pointer.
Josh Steadmon Aug. 12, 2024, 9:39 p.m. UTC | #2
On 2024.08.12 10:10, Phillip Wood wrote:
> Hi Josh
> 
> On 09/08/2024 23:41, Josh Steadmon wrote:
> > Add git_configset_alloc() and git_configset_clear_and_free() functions
> > so that callers can manage config_set structs on the heap. This also
> > allows non-C external consumers to treat config_sets as opaque structs.
> 
> Do we really need to add this code to config.c rather than handling it in
> the wrapper layer in the next patch?
> 
> Looking ahead I wonder how useful it is to users of the library to separate
> out allocation from initialization. A function that allocates and
> initializes a configset would be more convenient and harder to misuse.
> Calling release functions *_free() rather than *_clear_and_free() would be
> more convenient as well. I also noticed that the data types are not
> namespaced when they are exported. So perhaps we could drop this patch and
> add the following to the next patch.
> 
> /*
>  * Namespace data types as well as functions and ensure consistent
>  * naming of data types and the functions that operate on them.
>  */
> struct libgit_configset {
> 	struct config_set set;
> };
> 
> struct libgit_configset *libgit_configset_new() {
> 	struct libgit_configset *set = xmalloc(sizeof(*set));
> 
> 	git_configset_init(&set->set);
> 	return set;
> }
> 
> void libgit_configset_free(struct libgit_configset *set) {
> 	git_configset_clear(&set->set);
> 	free(set);
> }
> 
> Best Wishes
> 
> Phillip

Hmmm I see your point, but I am also hoping to keep the symbol export
shim as small as possible, so that we can try to autogenerate it rather
than add entries by hand. However, if people feel strongly that we don't
want to add helper functions like *_alloc() or *_free() for types that don't
already have them upstream, perhaps we can just put them in a separate
rust-helpers.c file or something.
Kyle Lippincott Aug. 12, 2024, 9:55 p.m. UTC | #3
On Mon, Aug 12, 2024 at 2:39 PM Josh Steadmon <steadmon@google.com> wrote:
>
> On 2024.08.12 10:10, Phillip Wood wrote:
> > Hi Josh
> >
> > On 09/08/2024 23:41, Josh Steadmon wrote:
> > > Add git_configset_alloc() and git_configset_clear_and_free() functions
> > > so that callers can manage config_set structs on the heap. This also
> > > allows non-C external consumers to treat config_sets as opaque structs.
> >
> > Do we really need to add this code to config.c rather than handling it in
> > the wrapper layer in the next patch?
> >
> > Looking ahead I wonder how useful it is to users of the library to separate
> > out allocation from initialization. A function that allocates and
> > initializes a configset would be more convenient and harder to misuse.
> > Calling release functions *_free() rather than *_clear_and_free() would be
> > more convenient as well. I also noticed that the data types are not
> > namespaced when they are exported. So perhaps we could drop this patch and
> > add the following to the next patch.
> >
> > /*
> >  * Namespace data types as well as functions and ensure consistent
> >  * naming of data types and the functions that operate on them.
> >  */
> > struct libgit_configset {
> >       struct config_set set;
> > };
> >
> > struct libgit_configset *libgit_configset_new() {
> >       struct libgit_configset *set = xmalloc(sizeof(*set));
> >
> >       git_configset_init(&set->set);
> >       return set;
> > }
> >
> > void libgit_configset_free(struct libgit_configset *set) {
> >       git_configset_clear(&set->set);
> >       free(set);
> > }
> >
> > Best Wishes
> >
> > Phillip
>
> Hmmm I see your point, but I am also hoping to keep the symbol export
> shim as small as possible, so that we can try to autogenerate it rather
> than add entries by hand. However, if people feel strongly that we don't
> want to add helper functions like *_alloc() or *_free() for types that don't
> already have them upstream, perhaps we can just put them in a separate
> rust-helpers.c file or something.

I'm thinking of this patch series as two closely related but
technically separable things: the creation of a .a/.so that can be
used outside of git, and the rust wrapper around that library. I think
these functions would be needed by all users of the library,
regardless of what language they're implemented in. i.e. they
shouldn't be thought of as 'rust helpers' and instead just the way
that the library is designed. _All_ functions that allocate memory
should have a paired "free" method, and that should be used
exclusively, regardless of host language.

So nit: I wouldn't call it rust-helpers.c ;)
Phillip Wood Aug. 13, 2024, 9:48 a.m. UTC | #4
Hi Josh

On 12/08/2024 22:39, Josh Steadmon wrote:
> On 2024.08.12 10:10, Phillip Wood wrote:
>> Hi Josh
>>
>> Do we really need to add this code to config.c rather than handling it in
>> the wrapper layer in the next patch?
>>
>> Looking ahead I wonder how useful it is to users of the library to separate
>> out allocation from initialization. A function that allocates and
>> initializes a configset would be more convenient and harder to misuse.
>> Calling release functions *_free() rather than *_clear_and_free() would be
>> more convenient as well. I also noticed that the data types are not
>> namespaced when they are exported. So perhaps we could drop this patch and
>> add the following to the next patch.
>>
>> /*
>>   * Namespace data types as well as functions and ensure consistent
>>   * naming of data types and the functions that operate on them.
>>   */
>> struct libgit_configset {
>> 	struct config_set set;
>> };
>>
>> struct libgit_configset *libgit_configset_new() {
>> 	struct libgit_configset *set = xmalloc(sizeof(*set));
>>
>> 	git_configset_init(&set->set);
>> 	return set;
>> }
>>
>> void libgit_configset_free(struct libgit_configset *set) {
>> 	git_configset_clear(&set->set);
>> 	free(set);
>> }
> 
> Hmmm I see your point, but I am also hoping to keep the symbol export
> shim as small as possible, so that we can try to autogenerate it rather
> than add entries by hand.

That's a nice aim - how do you plan to address namespacing data types 
with that approach? The autogenerator would need to know "struct 
config_set" wants to be wrapped as "struct libgit_configset" to ensure 
we end up with consistent naming for the data type and its functions. 
We'd also still want to make sure we end up with an ergonomic api which 
means one function to allocate and initialize each data type, not 
separate functions for allocation and initialization.

> However, if people feel strongly that we don't
> want to add helper functions like *_alloc() or *_free() for types that don't
> already have them upstream, perhaps we can just put them in a separate
> rust-helpers.c file or something.

If we're not using them upstream they're just clutter as far as git is 
concerned. Having said that if you get the autogeneration working well 
so the output is usable without a lot of manual tweaking I can see an 
argument for having these functions upstream.

Best Wishes

Phillip
Phillip Wood Aug. 13, 2024, 9:51 a.m. UTC | #5
Hi Kyle

On 12/08/2024 22:55, Kyle Lippincott wrote:
> On Mon, Aug 12, 2024 at 2:39 PM Josh Steadmon <steadmon@google.com> wrote:
>> Hmmm I see your point, but I am also hoping to keep the symbol export
>> shim as small as possible, so that we can try to autogenerate it rather
>> than add entries by hand. However, if people feel strongly that we don't
>> want to add helper functions like *_alloc() or *_free() for types that don't
>> already have them upstream, perhaps we can just put them in a separate
>> rust-helpers.c file or something.
> 
> I'm thinking of this patch series as two closely related but
> technically separable things: the creation of a .a/.so that can be
> used outside of git, and the rust wrapper around that library. I think
> these functions would be needed by all users of the library,
> regardless of what language they're implemented in. i.e. they
> shouldn't be thought of as 'rust helpers' and instead just the way
> that the library is designed. _All_ functions that allocate memory
> should have a paired "free" method, and that should be used
> exclusively, regardless of host language.

Thanks for writing this, I agree we should be designing the library 
wrapper as a general purpose library, not as an implementation detail of 
the rust code.

Best Wishes

Phillip

> So nit: I wouldn't call it rust-helpers.c ;)
Junio C Hamano Aug. 13, 2024, 3:16 p.m. UTC | #6
phillip.wood123@gmail.com writes:

>> I'm thinking of this patch series as two closely related but
>> technically separable things: the creation of a .a/.so that can be
>> used outside of git, and the rust wrapper around that library. I think
>> these functions would be needed by all users of the library,
>> regardless of what language they're implemented in. i.e. they
>> shouldn't be thought of as 'rust helpers' and instead just the way
>> that the library is designed.
> ...
> Thanks for writing this, I agree we should be designing the library
> wrapper as a general purpose library, not as an implementation detail
> of the rust code.

Excellent.  Yes, make it wrapped for Rust may be a good motivator,
but cleaning up the interface so that it is more pleasant to wrap
for or directly call from any language should be an important goal
of this effort.

Thanks, all.
Patrick Steinhardt Aug. 16, 2024, 11:24 a.m. UTC | #7
On Mon, Aug 12, 2024 at 10:10:48AM +0100, Phillip Wood wrote:
> Hi Josh
> 
> On 09/08/2024 23:41, Josh Steadmon wrote:
> > Add git_configset_alloc() and git_configset_clear_and_free() functions
> > so that callers can manage config_set structs on the heap. This also
> > allows non-C external consumers to treat config_sets as opaque structs.
> 
> Do we really need to add this code to config.c rather than handling it in
> the wrapper layer in the next patch?
> 
> Looking ahead I wonder how useful it is to users of the library to separate
> out allocation from initialization. A function that allocates and
> initializes a configset would be more convenient and harder to misuse.
> Calling release functions *_free() rather than *_clear_and_free() would be
> more convenient as well. I also noticed that the data types are not
> namespaced when they are exported. So perhaps we could drop this patch and
> add the following to the next patch.

Agreed. This is also part of our coding guidelines as of 10f0723c8d
(Documentation: document idiomatic function names, 2024-07-30).

Patrick
Josh Steadmon Oct. 2, 2024, 10:12 p.m. UTC | #8
On 2024.08.13 10:48, phillip.wood123@gmail.com wrote:
> Hi Josh
> 
> On 12/08/2024 22:39, Josh Steadmon wrote:
> > On 2024.08.12 10:10, Phillip Wood wrote:
> > > Hi Josh
> > > 
> > > Do we really need to add this code to config.c rather than handling it in
> > > the wrapper layer in the next patch?
> > > 
> > > Looking ahead I wonder how useful it is to users of the library to separate
> > > out allocation from initialization. A function that allocates and
> > > initializes a configset would be more convenient and harder to misuse.
> > > Calling release functions *_free() rather than *_clear_and_free() would be
> > > more convenient as well. I also noticed that the data types are not
> > > namespaced when they are exported. So perhaps we could drop this patch and
> > > add the following to the next patch.
> > > 
> > > /*
> > >   * Namespace data types as well as functions and ensure consistent
> > >   * naming of data types and the functions that operate on them.
> > >   */
> > > struct libgit_configset {
> > > 	struct config_set set;
> > > };
> > > 
> > > struct libgit_configset *libgit_configset_new() {
> > > 	struct libgit_configset *set = xmalloc(sizeof(*set));
> > > 
> > > 	git_configset_init(&set->set);
> > > 	return set;
> > > }
> > > 
> > > void libgit_configset_free(struct libgit_configset *set) {
> > > 	git_configset_clear(&set->set);
> > > 	free(set);
> > > }
> > 
> > Hmmm I see your point, but I am also hoping to keep the symbol export
> > shim as small as possible, so that we can try to autogenerate it rather
> > than add entries by hand.
> 
> That's a nice aim - how do you plan to address namespacing data types with
> that approach? The autogenerator would need to know "struct config_set"
> wants to be wrapped as "struct libgit_configset" to ensure we end up with
> consistent naming for the data type and its functions. We'd also still want
> to make sure we end up with an ergonomic api which means one function to
> allocate and initialize each data type, not separate functions for
> allocation and initialization.
> 
> > However, if people feel strongly that we don't
> > want to add helper functions like *_alloc() or *_free() for types that don't
> > already have them upstream, perhaps we can just put them in a separate
> > rust-helpers.c file or something.
> 
> If we're not using them upstream they're just clutter as far as git is
> concerned. Having said that if you get the autogeneration working well so
> the output is usable without a lot of manual tweaking I can see an argument
> for having these functions upstream.
> 
> Best Wishes
> 
> Phillip

For V4 I'm going to drop the ambition to autogenerate the shim library,
and as such there's no longer a reason to keep helper functions out of
the shim library. So I've moved the _alloc and _free functions into the
shim.

For namespacing data types: for now I'm just letting the compiler infer
an opaque "struct libgit_config_set" and manually cast back to "struct
config_set" when we cross the boundary from the shim library back to
libgit.a. I'm not sure this is the right approach, but it avoids having
to wrap a single pointer in a separate struct.
Josh Steadmon Oct. 2, 2024, 10:17 p.m. UTC | #9
On 2024.08.16 13:24, Patrick Steinhardt wrote:
> On Mon, Aug 12, 2024 at 10:10:48AM +0100, Phillip Wood wrote:
> > Hi Josh
> > 
> > On 09/08/2024 23:41, Josh Steadmon wrote:
> > > Add git_configset_alloc() and git_configset_clear_and_free() functions
> > > so that callers can manage config_set structs on the heap. This also
> > > allows non-C external consumers to treat config_sets as opaque structs.
> > 
> > Do we really need to add this code to config.c rather than handling it in
> > the wrapper layer in the next patch?
> > 
> > Looking ahead I wonder how useful it is to users of the library to separate
> > out allocation from initialization. A function that allocates and
> > initializes a configset would be more convenient and harder to misuse.
> > Calling release functions *_free() rather than *_clear_and_free() would be
> > more convenient as well. I also noticed that the data types are not
> > namespaced when they are exported. So perhaps we could drop this patch and
> > add the following to the next patch.
> 
> Agreed. This is also part of our coding guidelines as of 10f0723c8d
> (Documentation: document idiomatic function names, 2024-07-30).
> 
> Patrick

Fixed in V4.
diff mbox series

Patch

diff --git a/config.c b/config.c
index 6421894614..444db8de79 100644
--- a/config.c
+++ b/config.c
@@ -2324,6 +2324,11 @@  static int config_set_element_cmp(const void *cmp_data UNUSED,
 	return strcmp(e1->key, e2->key);
 }
 
+struct config_set *git_configset_alloc(void)
+{
+	return xmalloc(sizeof(struct config_set));
+}
+
 void git_configset_init(struct config_set *set)
 {
 	hashmap_init(&set->config_hash, config_set_element_cmp, NULL, 0);
@@ -2353,6 +2358,12 @@  void git_configset_clear(struct config_set *set)
 	set->list.items = NULL;
 }
 
+void git_configset_clear_and_free(struct config_set *set)
+{
+	git_configset_clear(set);
+	free(set);
+}
+
 static int config_set_callback(const char *key, const char *value,
 			       const struct config_context *ctx,
 			       void *cb)
diff --git a/config.h b/config.h
index 54b47dec9e..074c85a788 100644
--- a/config.h
+++ b/config.h
@@ -472,6 +472,11 @@  struct config_set {
 	struct configset_list list;
 };
 
+/**
+ * Alloc a config_set
+ */
+struct config_set *git_configset_alloc(void);
+
 /**
  * Initializes the config_set `cs`.
  */
@@ -520,6 +525,11 @@  int git_configset_get_string_multi(struct config_set *cs, const char *key,
  */
 void git_configset_clear(struct config_set *cs);
 
+/**
+ * Clears and frees a heap-allocated `config_set` structure.
+ */
+void git_configset_clear_and_free(struct config_set *cs);
+
 /*
  * These functions return 1 if not found, and 0 if found, leaving the found
  * value in the 'dest' pointer.