diff mbox series

[v2,2/2] hex: make hash_to_hex_algop() and friends thread-safe

Message ID b47445fa1cef6d4523dd0ca336f7ee22bce89466.1593208411.git.matheus.bernardino@usp.br (mailing list archive)
State New, archived
Headers show
Series Make oid_to_hex() thread-safe | expand

Commit Message

Matheus Tavares June 26, 2020, 9:54 p.m. UTC
hash_to_hex_algop() returns a static buffer, relieving callers from the
responsibility of freeing memory after use. But the current
implementation uses the same static data for all threads and, thus, is
not thread-safe. We could avoid using this function and its wrappers
in threaded code, but they are sometimes too deep in the call stack to
be noticed or even avoided.

grep.c:grep_source_load_oid(), for example, uses the thread-unsafe
oid_to_hex() (on errors) despite being called in threaded code. And
oid_to_hex() -- which calls hash_to_hex_algop() -- is used in many other
places, as well:

$ git grep 'oid_to_hex(' | wc -l
818

Although hash_to_hex_algop() and its wrappers don't seem to be causing
problems out there for now (at least not reported), making them
thread-safe makes the codebase more robust against race conditions. We
can easily do that by replicating the static buffer in each thread's
local storage.

Original-patch-by: Fredrik Kuivinen <frekui@gmail.com>
Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
 hex.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

Comments

Johannes Schindelin June 29, 2020, 3:11 p.m. UTC | #1
Hi Matheus,

I am fine with the Windows changes (although I have to admit that I did
not find time to test things yet).

There is one problem in that I do not necessarily know that the memory is
released correctly when threads end; You will notice that the
`pthread_key_create()` shim in `compat/win32/pthread.h` does not use the
`destructor` parameter at all. The documentation at

	https://docs.microsoft.com/en-us/windows/win32/procthread/using-thread-local-storage

is also not terribly clear _how_ the memory is released that was assigned
via `TlsSetValue()`. I notice that the example uses `LocalAlloc()`, but we
override `malloc()` via the `compat/nedmalloc/` functions, so that should
cause problems unless I am wrong.

Maybe there is an expert reading this who could jump in and help
understand the ramifications?

A couple more things:

On Fri, 26 Jun 2020, Matheus Tavares wrote:

> hash_to_hex_algop() returns a static buffer, relieving callers from the
> responsibility of freeing memory after use. But the current
> implementation uses the same static data for all threads and, thus, is
> not thread-safe. We could avoid using this function and its wrappers
> in threaded code, but they are sometimes too deep in the call stack to
> be noticed or even avoided.
>
> grep.c:grep_source_load_oid(), for example, uses the thread-unsafe
> oid_to_hex() (on errors) despite being called in threaded code. And
> oid_to_hex() -- which calls hash_to_hex_algop() -- is used in many other
> places, as well:
>
> $ git grep 'oid_to_hex(' | wc -l
> 818
>
> Although hash_to_hex_algop() and its wrappers don't seem to be causing
> problems out there for now (at least not reported), making them
> thread-safe makes the codebase more robust against race conditions. We
> can easily do that by replicating the static buffer in each thread's
> local storage.
>
> Original-patch-by: Fredrik Kuivinen <frekui@gmail.com>
> Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
> Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
> ---
>  hex.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 42 insertions(+), 4 deletions(-)
>
> diff --git a/hex.c b/hex.c
> index da51e64929..4f2f163d5e 100644
> --- a/hex.c
> +++ b/hex.c
> @@ -1,4 +1,5 @@
>  #include "cache.h"
> +#include "thread-utils.h"
>
>  const signed char hexval_table[256] = {
>  	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 00-07 */
> @@ -136,12 +137,49 @@ char *oid_to_hex_r(char *buffer, const struct object_id *oid)
>  	return hash_to_hex_algop_r(buffer, oid->hash, the_hash_algo);
>  }
>
> +struct hexbuf_array {
> +	int idx;

Is there a specific reason why you renamed `bufno` to `idx`? If not, I'd
rather keep the old name.

> +	char bufs[4][GIT_MAX_HEXSZ + 1];
> +};
> +
> +#ifdef HAVE_THREADS
> +static pthread_key_t hexbuf_array_key;
> +static pthread_once_t hexbuf_array_once = PTHREAD_ONCE_INIT;
> +
> +static void init_hexbuf_array_key(void)
> +{
> +	if (pthread_key_create(&hexbuf_array_key, free))
> +		die(_("failed to initialize threads' key for hash to hex conversion"));
> +}
> +
> +#else
> +static struct hexbuf_array default_hexbuf_array;
> +#endif
> +
>  char *hash_to_hex_algop(const unsigned char *hash, const struct git_hash_algo *algop)
>  {
> -	static int bufno;
> -	static char hexbuffer[4][GIT_MAX_HEXSZ + 1];
> -	bufno = (bufno + 1) % ARRAY_SIZE(hexbuffer);
> -	return hash_to_hex_algop_r(hexbuffer[bufno], hash, algop);
> +	struct hexbuf_array *ha;
> +
> +#ifdef HAVE_THREADS
> +	void *value;
> +
> +	if (pthread_once(&hexbuf_array_once, init_hexbuf_array_key))
> +		die(_("failed to initialize threads' key for hash to hex conversion"));
> +
> +	value = pthread_getspecific(hexbuf_array_key);
> +	if (value) {
> +		ha = (struct hexbuf_array *) value;
> +	} else {
> +		ha = xmalloc(sizeof(*ha));
> +		if (pthread_setspecific(hexbuf_array_key, (void *)ha))
> +			die(_("failed to set thread buffer for hash to hex conversion"));
> +	}
> +#else
> +	ha = &default_hexbuf_array;
> +#endif

This introduces two ugly `#ifdef HAVE_THREADS` constructs which are
problematic because they are the most likely places to introduce compile
errors.

I wonder whether you considered introducing a function (and probably a
macro) that transparently gives you a thread-specific instance of a given
data type? The caller would look something like

	struct hexbuf_array *hex_array;

	GET_THREADSPECIFIC(hex_array);

where the macro would look somewhat like this:

	#define GET_THREADSPECIFIC(var) \
		if (get_thread_specific(&var, sizeof(var)) < 0)
			die(_("Failed to get thread-specific %s"), #var);

and the function would allocate and assign the variable. I guess this
scheme won't work, though, as `pthread_once()` does not take a callback
parameter, right? And we would need that parameter to be able to create
the `pthread_key_t`. Hmm.

Ciao,
Dscho

> +
> +	ha->idx = (ha->idx + 1) % ARRAY_SIZE(ha->bufs);
> +	return hash_to_hex_algop_r(ha->bufs[ha->idx], hash, algop);
>  }
>
>  char *hash_to_hex(const unsigned char *hash)
> --
> 2.26.2
>
>
Matheus Tavares June 30, 2020, 8:37 p.m. UTC | #2
On Tue, Jun 30, 2020 at 12:01 PM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Matheus,
>
> I am fine with the Windows changes (although I have to admit that I did
> not find time to test things yet).
>
> There is one problem in that I do not necessarily know that the memory is
> released correctly when threads end; You will notice that the
> `pthread_key_create()` shim in `compat/win32/pthread.h` does not use the
> `destructor` parameter at all. The documentation at
>
>       https://docs.microsoft.com/en-us/windows/win32/procthread/using-thread-local-storage
>
> is also not terribly clear _how_ the memory is released that was assigned
> via `TlsSetValue()`.

Yes, I wasn't sure about that either... It would be great if someone
familiar with TLS memory on Windows could help us with this.

> A couple more things:
>
> On Fri, 26 Jun 2020, Matheus Tavares wrote:
> >
[...]
> > +struct hexbuf_array {
> > +     int idx;
>
> Is there a specific reason why you renamed `bufno` to `idx`? If not, I'd
> rather keep the old name.

OK, I'll change it back in v3.

> > +     char bufs[4][GIT_MAX_HEXSZ + 1];
> > +};
> > +
> > +#ifdef HAVE_THREADS
> > +static pthread_key_t hexbuf_array_key;
> > +static pthread_once_t hexbuf_array_once = PTHREAD_ONCE_INIT;
> > +
> > +static void init_hexbuf_array_key(void)
> > +{
> > +     if (pthread_key_create(&hexbuf_array_key, free))
> > +             die(_("failed to initialize threads' key for hash to hex conversion"));
> > +}
> > +
> > +#else
> > +static struct hexbuf_array default_hexbuf_array;
> > +#endif
> > +
> >  char *hash_to_hex_algop(const unsigned char *hash, const struct git_hash_algo *algop)
> >  {
> > -     static int bufno;
> > -     static char hexbuffer[4][GIT_MAX_HEXSZ + 1];
> > -     bufno = (bufno + 1) % ARRAY_SIZE(hexbuffer);
> > -     return hash_to_hex_algop_r(hexbuffer[bufno], hash, algop);
> > +     struct hexbuf_array *ha;
> > +
> > +#ifdef HAVE_THREADS
> > +     void *value;
> > +
> > +     if (pthread_once(&hexbuf_array_once, init_hexbuf_array_key))
> > +             die(_("failed to initialize threads' key for hash to hex conversion"));
> > +
> > +     value = pthread_getspecific(hexbuf_array_key);
> > +     if (value) {
> > +             ha = (struct hexbuf_array *) value;
> > +     } else {
> > +             ha = xmalloc(sizeof(*ha));
> > +             if (pthread_setspecific(hexbuf_array_key, (void *)ha))
> > +                     die(_("failed to set thread buffer for hash to hex conversion"));
> > +     }
> > +#else
> > +     ha = &default_hexbuf_array;
> > +#endif
>
> This introduces two ugly `#ifdef HAVE_THREADS` constructs which are
> problematic because they are the most likely places to introduce compile
> errors.
>
> I wonder whether you considered introducing a function (and probably a
> macro) that transparently gives you a thread-specific instance of a given
> data type? The caller would look something like
>
>         struct hexbuf_array *hex_array;
>
>         GET_THREADSPECIFIC(hex_array);
>
> where the macro would look somewhat like this:
>
>         #define GET_THREADSPECIFIC(var) \
>                 if (get_thread_specific(&var, sizeof(var)) < 0)
>                         die(_("Failed to get thread-specific %s"), #var);
>
> and the function would allocate and assign the variable.

Hmm, we would need two macros, wouldn't we? GET_THREADSPECIFIC(var)
and a DECLARE_THREADSPECIFIC(var, destructor), to declare the
pthread_once_t and pthread_key_t variables, as well as define a
initialization function for the key (i.e. the callback to
pthread_once()). Or we could provide these declarations ourselves, but
then we would need the "ifdef HAVE_THREADS" guard to avoid calling
pthread_key_create() when there is no multithreading.

I think that would work, yes. Alternatively, I think we could adjust
the dummy pthread_key_* functions in thread-utils.h to emulate the
real ones when HAVE_THREADS == false. Then we could eliminate the
`#ifdef HAVE_THREADS` guards and use the same code for both cases here
(and everywhere else pthread_key is used). I haven't thought about it
carefully enough yet, but I think this *might* be as simple as adding
the following defines in the "#ifdef NO_PTHREADS" section of
thread-utils.h:

#define pthread_key_t void *
/*
 * The destructor is not used in this case as the main thread will only
 * exit when the program terminates.
 */
#define pthread_key_create(key_ptr, unused) return_0((*key_ptr) = NULL)
#define pthread_setspecific(key, value) return_0((key) = (value))
#define pthread_getspecific(key) (key)
#define pthread_key_delete(key) return_0(NULL)

static inline int return_0(void *unused)
{
        return 0;
}

That's the general idea, but we might as well define a `struct
dummy_pthread_key_t` instead of defining the key directly as `void *`
(and have functions instead of macros). This way we could store, e.g.,
an "initialized" flag that could be used to return an error code on
double-initializations.

What do you think?
Johannes Schindelin July 1, 2020, 11:32 a.m. UTC | #3
Hi Matheus,

On Tue, 30 Jun 2020, Matheus Tavares Bernardino wrote:

> On Tue, Jun 30, 2020 at 12:01 PM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> >
> > Hi Matheus,
> >
> > I am fine with the Windows changes (although I have to admit that I did
> > not find time to test things yet).
> >
> > There is one problem in that I do not necessarily know that the memory is
> > released correctly when threads end; You will notice that the
> > `pthread_key_create()` shim in `compat/win32/pthread.h` does not use the
> > `destructor` parameter at all. The documentation at
> >
> >       https://docs.microsoft.com/en-us/windows/win32/procthread/using-thread-local-storage
> >
> > is also not terribly clear _how_ the memory is released that was assigned
> > via `TlsSetValue()`.
>
> Yes, I wasn't sure about that either... It would be great if someone
> familiar with TLS memory on Windows could help us with this.

From reading
https://docs.microsoft.com/en-us/windows/win32/procthread/using-thread-local-storage,
I get the impression that it is the duty of the thread function to take
care of releasing the memory.

Looking at our code base, I do not see any obvious single point where we
could take care of that.

And it does not look like there is a function you can register to be
called just before threads shut down.

Hmm.

A very involved solution would be to override our `pthread_create()`
emulation to override the function being called, passing as function
parameter a pointer to a struct containing the originally-specified thread
function and parameter, and the overriding function would then call that
function, save its return value, take care of releasing the memory, and
return the saved return value.

Likewise, our `pthread_setspecific()` shim in `compat/win32/pthread.h`
would need to learn to append the passed pointer to a list, and we should
probably also edit the `pthread_key_create()` shim to record the function
to call when releasing the memory at the end of a thread.

This strikes me as pretty ugly and complex.

Yet when I read the LGPLv2'ed code of
https://sourceware.org/pthreads-win32/ that is exactly what they use.
Since their source code is still in CVS, and their anonymous CVS viewer no
longer works, I had to find a copy to point to:
https://github.com/jingyu/pthreads-w32/blob/master/pthread_key_create.c

So we could now start to require a non-minimal pthreads emulation on
Windows. I am somewhat opposed to that idea.

Now I wonder just how involved it _really_ would be to convert all of
those `oid_to_hex()` calls into what is effectively an `_r()` variant,
where you _have_ to pass a buffer to store the result. That would make it
a ton easier to keep Git portable.

I do understand that 800+ hits for `oid_to_hex()` make this somewhat of a
pain to implement...

Maybe there is a way to convince Coccinelle to do all that work for us?

> > A couple more things:
> >
> > On Fri, 26 Jun 2020, Matheus Tavares wrote:
> > >
> [...]
> > > +struct hexbuf_array {
> > > +     int idx;
> >
> > Is there a specific reason why you renamed `bufno` to `idx`? If not, I'd
> > rather keep the old name.
>
> OK, I'll change it back in v3.
>
> > > +     char bufs[4][GIT_MAX_HEXSZ + 1];
> > > +};
> > > +
> > > +#ifdef HAVE_THREADS
> > > +static pthread_key_t hexbuf_array_key;
> > > +static pthread_once_t hexbuf_array_once = PTHREAD_ONCE_INIT;
> > > +
> > > +static void init_hexbuf_array_key(void)
> > > +{
> > > +     if (pthread_key_create(&hexbuf_array_key, free))
> > > +             die(_("failed to initialize threads' key for hash to hex conversion"));
> > > +}
> > > +
> > > +#else
> > > +static struct hexbuf_array default_hexbuf_array;
> > > +#endif
> > > +
> > >  char *hash_to_hex_algop(const unsigned char *hash, const struct git_hash_algo *algop)
> > >  {
> > > -     static int bufno;
> > > -     static char hexbuffer[4][GIT_MAX_HEXSZ + 1];
> > > -     bufno = (bufno + 1) % ARRAY_SIZE(hexbuffer);
> > > -     return hash_to_hex_algop_r(hexbuffer[bufno], hash, algop);
> > > +     struct hexbuf_array *ha;
> > > +
> > > +#ifdef HAVE_THREADS
> > > +     void *value;
> > > +
> > > +     if (pthread_once(&hexbuf_array_once, init_hexbuf_array_key))
> > > +             die(_("failed to initialize threads' key for hash to hex conversion"));
> > > +
> > > +     value = pthread_getspecific(hexbuf_array_key);
> > > +     if (value) {
> > > +             ha = (struct hexbuf_array *) value;
> > > +     } else {
> > > +             ha = xmalloc(sizeof(*ha));
> > > +             if (pthread_setspecific(hexbuf_array_key, (void *)ha))
> > > +                     die(_("failed to set thread buffer for hash to hex conversion"));
> > > +     }
> > > +#else
> > > +     ha = &default_hexbuf_array;
> > > +#endif
> >
> > This introduces two ugly `#ifdef HAVE_THREADS` constructs which are
> > problematic because they are the most likely places to introduce compile
> > errors.
> >
> > I wonder whether you considered introducing a function (and probably a
> > macro) that transparently gives you a thread-specific instance of a given
> > data type? The caller would look something like
> >
> >         struct hexbuf_array *hex_array;
> >
> >         GET_THREADSPECIFIC(hex_array);
> >
> > where the macro would look somewhat like this:
> >
> >         #define GET_THREADSPECIFIC(var) \
> >                 if (get_thread_specific(&var, sizeof(var)) < 0)
> >                         die(_("Failed to get thread-specific %s"), #var);
> >
> > and the function would allocate and assign the variable.
>
> Hmm, we would need two macros, wouldn't we? GET_THREADSPECIFIC(var)
> and a DECLARE_THREADSPECIFIC(var, destructor), to declare the
> pthread_once_t and pthread_key_t variables, as well as define a
> initialization function for the key (i.e. the callback to
> pthread_once()). Or we could provide these declarations ourselves, but
> then we would need the "ifdef HAVE_THREADS" guard to avoid calling
> pthread_key_create() when there is no multithreading.

Right. I wanted to avoid the need to call `DECLARE_THREADSPECIFIC()`, in
particular because that would have to be _outside_ of any function
(because it has to define a function, and nested functions are not really
supported in C, at least not widely).

Ciao,
Dscho

>
> I think that would work, yes. Alternatively, I think we could adjust
> the dummy pthread_key_* functions in thread-utils.h to emulate the
> real ones when HAVE_THREADS == false. Then we could eliminate the
> `#ifdef HAVE_THREADS` guards and use the same code for both cases here
> (and everywhere else pthread_key is used). I haven't thought about it
> carefully enough yet, but I think this *might* be as simple as adding
> the following defines in the "#ifdef NO_PTHREADS" section of
> thread-utils.h:
>
> #define pthread_key_t void *
> /*
>  * The destructor is not used in this case as the main thread will only
>  * exit when the program terminates.
>  */
> #define pthread_key_create(key_ptr, unused) return_0((*key_ptr) = NULL)
> #define pthread_setspecific(key, value) return_0((key) = (value))
> #define pthread_getspecific(key) (key)
> #define pthread_key_delete(key) return_0(NULL)
>
> static inline int return_0(void *unused)
> {
>         return 0;
> }
>
> That's the general idea, but we might as well define a `struct
> dummy_pthread_key_t` instead of defining the key directly as `void *`
> (and have functions instead of macros). This way we could store, e.g.,
> an "initialized" flag that could be used to return an error code on
> double-initializations.
>
> What do you think?
>
Johannes Schindelin July 16, 2020, 11:29 a.m. UTC | #4
Hi Matheus,

On Fri, 26 Jun 2020, Matheus Tavares wrote:

> @@ -136,12 +137,49 @@ char *oid_to_hex_r(char *buffer, const struct object_id *oid)
>  	return hash_to_hex_algop_r(buffer, oid->hash, the_hash_algo);
>  }
>
> +struct hexbuf_array {
> +	int idx;
> +	char bufs[4][GIT_MAX_HEXSZ + 1];
> +};
> +
> +#ifdef HAVE_THREADS
> +static pthread_key_t hexbuf_array_key;
> +static pthread_once_t hexbuf_array_once = PTHREAD_ONCE_INIT;
> +
> +static void init_hexbuf_array_key(void)
> +{
> +	if (pthread_key_create(&hexbuf_array_key, free))
> +		die(_("failed to initialize threads' key for hash to hex conversion"));
> +}
> +
> +#else
> +static struct hexbuf_array default_hexbuf_array;
> +#endif
> +
>  char *hash_to_hex_algop(const unsigned char *hash, const struct git_hash_algo *algop)
>  {
> -	static int bufno;
> -	static char hexbuffer[4][GIT_MAX_HEXSZ + 1];
> -	bufno = (bufno + 1) % ARRAY_SIZE(hexbuffer);
> -	return hash_to_hex_algop_r(hexbuffer[bufno], hash, algop);
> +	struct hexbuf_array *ha;
> +
> +#ifdef HAVE_THREADS
> +	void *value;
> +
> +	if (pthread_once(&hexbuf_array_once, init_hexbuf_array_key))
> +		die(_("failed to initialize threads' key for hash to hex conversion"));
> +
> +	value = pthread_getspecific(hexbuf_array_key);
> +	if (value) {
> +		ha = (struct hexbuf_array *) value;
> +	} else {
> +		ha = xmalloc(sizeof(*ha));

I just realized (while trying to debug something independent) that this
leaves `ha->idx` uninitialized. So you will need at least this patch to
fix a bug that currently haunts `seen`'s CI builds (you can use
`--valgrind`, like I did, to identify the problem):

-- snip --
diff --git a/hex.c b/hex.c
index 4f2f163d5e7..365ba94ab11 100644
--- a/hex.c
+++ b/hex.c
@@ -171,6 +171,7 @@ char *hash_to_hex_algop(const unsigned char *hash, const struct git_hash_algo *a
 		ha = (struct hexbuf_array *) value;
 	} else {
 		ha = xmalloc(sizeof(*ha));
+		ha->idx = 0;
 		if (pthread_setspecific(hexbuf_array_key, (void *)ha))
 			die(_("failed to set thread buffer for hash to hex conversion"));
 	}
-- snap --

But as I mentioned before, I would be much more in favor of abandoning
this thread-local idea (because it is _still_ fragile, as the same thread
could try to make use of more than four hex values in the same `printf()`,
for example) and instead using Coccinelle to convert all those
`oid_to_hex()` calls to `oid_to_hex_r()` calls.

Now, I am _far_ from knowing what I'm doing with Coccinelle, but I think
this here semantic patch should get you going:

-- snipsnap --
@@
expression E;
@@
  {
++   char hex[GIT_MAX_HEXSZ + 1];
     ...
-    oid_to_hex(E)
+    oid_to_hex_r(hex, E)
     ...
  }

@@
expression E1, E2;
@@
  {
++   char hex1[GIT_MAX_HEXSZ + 1], hex2[GIT_MAX_HEXSZ + 1];
     ...
-    oid_to_hex(E1)
+    oid_to_hex_r(hex1, E1)
     ...
-    oid_to_hex(E2)
+    oid_to_hex_r(hex2, E2)
     ...
  }
Matheus Tavares July 18, 2020, 3:09 a.m. UTC | #5
Hi, Dscho

On Thu, Jul 16, 2020 at 9:56 AM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> On Fri, 26 Jun 2020, Matheus Tavares wrote:
>
> > +     value = pthread_getspecific(hexbuf_array_key);
> > +     if (value) {
> > +             ha = (struct hexbuf_array *) value;
> > +     } else {
> > +             ha = xmalloc(sizeof(*ha));
>
> I just realized (while trying to debug something independent) that this
> leaves `ha->idx` uninitialized.

Thanks for catching that! I fixed it in my local branch.

> But as I mentioned before, I would be much more in favor of abandoning
> this thread-local idea (because it is _still_ fragile, as the same thread
> could try to make use of more than four hex values in the same `printf()`,
> for example) and instead using Coccinelle to convert all those
> `oid_to_hex()` calls to `oid_to_hex_r()` calls.

Yeah, I agree that removing oid_to_hex() in favor of oid_to_hex_r()
would be great. Unfortunately, I only used Coccinelle for basic
things, such as function renaming. And I won't have the time to study
it further at the moment :( Therefore, I think I'll ask Junio to drop
this series for now, until I or someone else finds some time to work
on the semantic patch.

Alternatively, if using thread-local storage is still an option, I
think I might have solved the problems we had in the previous
iteration with memory leaks on Windows. I changed our
pthread_key_create() emulation to start using the destructor callback
on Windows, through the Fiber Local Storage (FLS) API. As the
documentation says [1] "If no fiber switching occurs, FLS acts exactly
the same as thread local storage". The advantage over TLS is that
FLSAlloc() does take a callback parameter.

I also removed the ugly `#ifdef HAVE_THREADS` guards on the last
patch, as you suggested, and added some tests for our pthread_key
emulation. In case you want to take a look to see if it might be worth
pursuing this route, the patches are here:
https://github.com/matheustavares/git/commits/safe_oid_to_hex_v3

[1]: https://docs.microsoft.com/en-us/windows/win32/procthread/fibers#fiber-local-storage
Matheus Tavares July 18, 2020, 3:52 a.m. UTC | #6
On Thu, Jul 16, 2020 at 9:56 AM Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
>
> Now, I am _far_ from knowing what I'm doing with Coccinelle, but I think
> this here semantic patch should get you going:
>
> -- snipsnap --
> @@
> expression E;
> @@
>   {
> ++   char hex[GIT_MAX_HEXSZ + 1];
>      ...
> -    oid_to_hex(E)
> +    oid_to_hex_r(hex, E)
>      ...
>   }
>
> @@
> expression E1, E2;
> @@
>   {
> ++   char hex1[GIT_MAX_HEXSZ + 1], hex2[GIT_MAX_HEXSZ + 1];
>      ...
> -    oid_to_hex(E1)
> +    oid_to_hex_r(hex1, E1)
>      ...
> -    oid_to_hex(E2)
> +    oid_to_hex_r(hex2, E2)
>      ...
>   }

Thanks for this nice example! This already worked very well in some of
my tests :)

However, with my _very_ limited notion of Coccinelle, I didn't
understand why some code snippets didn't match the above rules. For
example, the structure below:

func(...)
{
	if (cond)
		func2("%s", oid_to_hex(a));
}

I thought it could be because the `if` statement is missing the curly
brackets (and it does work if I add the brackets), but to my surprise,
adding another oid_to_hex() call in an `else` case also made the code
match the rule:

func(...)
{
	if (cond)
		func2("%s", oid_to_hex(a));
	else
		func2("%s", oid_to_hex(a));
}

The following snippet also correctly matches, but spatch introduces only
one `hex` variable:

	if (cond)
		func2("%s, %s", oid_to_hex(a), oid_to_hex(b));
	else
		func2("%s", oid_to_hex(a));

We will probably want our semantic rules to handle an arbitrary number
of `oid_to_hex()` calls in each function, but in scenarios like the
above one, we only really need 2 hex buffers despite having 3 calls...
That might be a little tricky, I guess.

Another thing that might be tricky in this conversion is checking for
name conflicts with the added `hex` variable (but maybe Coccinelle
already has a facilitator mechanism for such cases? IDK).
René Scharfe July 26, 2020, 5:41 p.m. UTC | #7
Am 18.07.20 um 05:52 schrieb Matheus Tavares:
> On Thu, Jul 16, 2020 at 9:56 AM Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
>>
>> Now, I am _far_ from knowing what I'm doing with Coccinelle, but I think
>> this here semantic patch should get you going:
>>
>> -- snipsnap --
>> @@
>> expression E;
>> @@
>>   {
>> ++   char hex[GIT_MAX_HEXSZ + 1];
>>      ...
>> -    oid_to_hex(E)
>> +    oid_to_hex_r(hex, E)
>>      ...
>>   }
>>
>> @@
>> expression E1, E2;
>> @@
>>   {
>> ++   char hex1[GIT_MAX_HEXSZ + 1], hex2[GIT_MAX_HEXSZ + 1];
>>      ...
>> -    oid_to_hex(E1)
>> +    oid_to_hex_r(hex1, E1)
>>      ...
>> -    oid_to_hex(E2)
>> +    oid_to_hex_r(hex2, E2)
>>      ...
>>   }
>
> Thanks for this nice example! This already worked very well in some of
> my tests :)
>
> However, with my _very_ limited notion of Coccinelle, I didn't
> understand why some code snippets didn't match the above rules. For
> example, the structure below:
>
> func(...)
> {
> 	if (cond)
> 		func2("%s", oid_to_hex(a));
> }
>
> I thought it could be because the `if` statement is missing the curly
> brackets (and it does work if I add the brackets), but to my surprise,
> adding another oid_to_hex() call in an `else` case also made the code
> match the rule:
>
> func(...)
> {
> 	if (cond)
> 		func2("%s", oid_to_hex(a));
> 	else
> 		func2("%s", oid_to_hex(a));
> }
>
> The following snippet also correctly matches, but spatch introduces only
> one `hex` variable:
>
> 	if (cond)
> 		func2("%s, %s", oid_to_hex(a), oid_to_hex(b));
> 	else
> 		func2("%s", oid_to_hex(a));
>

The produced diff looks like this:

+	char hex[GIT_MAX_HEXSZ + 1];
 	if (cond)
-		func2("%s, %s", oid_to_hex(a), oid_to_hex(b));
+		func2("%s, %s", oid_to_hex_r(hex, a), oid_to_hex_r(hex, b));
 	else
-		func2("%s", oid_to_hex(a));
+		func2("%s", oid_to_hex_r(hex, a));

So the first rule was applied (the one for a single oid_to_hex call),
but we need the second one (for two oid_to_hex calls).  Using the same
hex buffer for two different values won't work.

> We will probably want our semantic rules to handle an arbitrary number
> of `oid_to_hex()` calls in each function, but in scenarios like the
> above one, we only really need 2 hex buffers despite having 3 calls...

oid_to_hex() has two interesting features, and we need to make sure they
are preserved for callers that use them: It has a ring of four buffers,
so you can use it for four different values, and those buffers are
static, so its results can be passed around arbitrarily.

> That might be a little tricky, I guess.

It may be impossible to cover all cases.  E.g. callers of oid_to_hex()
could return that buffer (like e.g. diff_aligned_abbrev()) or save
them in some global variable (like e.g. string_list_append() with a
non-DUP string_list).  But safe conversion rules can got us surprisingly
far.

> Another thing that might be tricky in this conversion is checking for
> name conflicts with the added `hex` variable (but maybe Coccinelle
> already has a facilitator mechanism for such cases? IDK).

That's easy.  There exists no hex_buffer, yet.  We can just claim that
name for automatic conversions.  It's a bit too long for people to
type out (we have a few variables named hexbuffer, though), but not
crazy long.

So what is safe?  Calls of oid_to_hex() in the argument list of many
functions is.  E.g. puts() and printf() just consume the string that
is passed to them, and they don't store it anywhere.  That means no
static storage is needed for those.

string_list_append() is only safe if it's the duplicating variant.
Since this is a runtime option of the underlying string_list this is
hard to prove in a Coccinelle rule.  The time is better spent
converting them manually, I think.

And function calls with more than four oid_to_hex() calls are broken
already, so we only need to have rules for up to four of them.  Here
is the simplest rule I can come up with for handling up to four
oid_to_hex() calls:

@@
identifier fn =~ "^(.*printf.*|puts)$";
@@
(
  fn(...,
-   oid_to_hex
+   oid_to_hex_r
    (
+     hex_buffer,
      ...
    ), ...,
-   oid_to_hex
+   oid_to_hex_r
    (
+     hex_buffer2,
      ...
    ), ...,
-   oid_to_hex
+   oid_to_hex_r
    (
+     hex_buffer3,
      ...
    ), ...,
-   oid_to_hex
+   oid_to_hex_r
    (
+     hex_buffer4,
      ...
    ), ...
  )
|
  fn(...,
-   oid_to_hex
+   oid_to_hex_r
    (
+     hex_buffer,
      ...
    ), ...,
-   oid_to_hex
+   oid_to_hex_r
    (
+     hex_buffer2,
      ...
    ), ...,
-   oid_to_hex
+   oid_to_hex_r
    (
+     hex_buffer3,
      ...
    ), ...
  )
|
  fn(...,
-   oid_to_hex
+   oid_to_hex_r
    (
+     hex_buffer,
      ...
    ), ...,
-   oid_to_hex
+   oid_to_hex_r
    (
+     hex_buffer2,
      ...
    ), ...
  )
|
  fn(...,
-   oid_to_hex
+   oid_to_hex_r
    (
+     hex_buffer,
      ...
    ), ...
  )
)

So the sub-rules are ordered from matching all four possible
oid_to_hex() calls down to a single one.  Only safe consumers are
matched.  That regex can and should be extended.

Having a list of good consumers has the nice side-effect of speeding up
the diff generation.  It still takes a few minutes on my system, though.

We still need to declare of the local buffers.  We can add them on
demand to each function with rules like this:

@avoid_duplicates@
identifier buf =~ "^hex_buffer[2-4]?$";
@@
- char buf[GIT_MAX_HEXSZ + 1];

@hex_buffer4_on_demand exists@
identifier fn;
@@
  fn(...) {
+   char hex_buffer4[GIT_MAX_HEXSZ + 1];
    <+... hex_buffer4 ...+>
  }

@hex_buffer3_on_demand exists@
identifier fn;
@@
  fn(...) {
+   char hex_buffer3[GIT_MAX_HEXSZ + 1];
    <+... hex_buffer3 ...+>
  }

@hex_buffer2_on_demand exists@
identifier fn;
@@
  fn(...) {
+   char hex_buffer2[GIT_MAX_HEXSZ + 1];
    <+... hex_buffer2 ...+>
  }

@hex_buffer_on_demand exists@
identifier fn;
@@
  fn(...) {
+   char hex_buffer[GIT_MAX_HEXSZ + 1];
    <+... hex_buffer ...+>
  }

Why remove them first?  To avoid duplicates when other convertible
oid_to_hex() calls are added later and the semantic patch applied
again.

Why declare the buffers with function scope?  To avoid shadowing.

Why are the rules reversed?  They add declarations at the top, so
hex_buffer to hex_buffer4 end up being declared in that order in
the resulting file.

Why not use the "fresh identifier" feature of Coccinelle to generate
an unused name each time?  I don't know how to integrate that into
the 4/3/2/1 rule above without having to repeat the list of safe
consumers or declaring unused buffers.  And reusing buffers between
safe consumers is fine.

René
Johannes Schindelin Aug. 10, 2020, 2:15 p.m. UTC | #8
Hi Matheus,

On Sat, 18 Jul 2020, Matheus Tavares Bernardino wrote:

> On Thu, Jul 16, 2020 at 9:56 AM Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
>
> Alternatively, if using thread-local storage is still an option, I
> think I might have solved the problems we had in the previous
> iteration with memory leaks on Windows. I changed our
> pthread_key_create() emulation to start using the destructor callback
> on Windows, through the Fiber Local Storage (FLS) API. As the
> documentation says [1] "If no fiber switching occurs, FLS acts exactly
> the same as thread local storage". The advantage over TLS is that
> FLSAlloc() does take a callback parameter.

Okay, but I am still not so enthusiastic. We can fix this in a much
simpler way, I believe, than introducing the first thread-local storage
user. Let's leave TLS until the time we actually need it?

Ciao,
Dscho
diff mbox series

Patch

diff --git a/hex.c b/hex.c
index da51e64929..4f2f163d5e 100644
--- a/hex.c
+++ b/hex.c
@@ -1,4 +1,5 @@ 
 #include "cache.h"
+#include "thread-utils.h"
 
 const signed char hexval_table[256] = {
 	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 00-07 */
@@ -136,12 +137,49 @@  char *oid_to_hex_r(char *buffer, const struct object_id *oid)
 	return hash_to_hex_algop_r(buffer, oid->hash, the_hash_algo);
 }
 
+struct hexbuf_array {
+	int idx;
+	char bufs[4][GIT_MAX_HEXSZ + 1];
+};
+
+#ifdef HAVE_THREADS
+static pthread_key_t hexbuf_array_key;
+static pthread_once_t hexbuf_array_once = PTHREAD_ONCE_INIT;
+
+static void init_hexbuf_array_key(void)
+{
+	if (pthread_key_create(&hexbuf_array_key, free))
+		die(_("failed to initialize threads' key for hash to hex conversion"));
+}
+
+#else
+static struct hexbuf_array default_hexbuf_array;
+#endif
+
 char *hash_to_hex_algop(const unsigned char *hash, const struct git_hash_algo *algop)
 {
-	static int bufno;
-	static char hexbuffer[4][GIT_MAX_HEXSZ + 1];
-	bufno = (bufno + 1) % ARRAY_SIZE(hexbuffer);
-	return hash_to_hex_algop_r(hexbuffer[bufno], hash, algop);
+	struct hexbuf_array *ha;
+
+#ifdef HAVE_THREADS
+	void *value;
+
+	if (pthread_once(&hexbuf_array_once, init_hexbuf_array_key))
+		die(_("failed to initialize threads' key for hash to hex conversion"));
+
+	value = pthread_getspecific(hexbuf_array_key);
+	if (value) {
+		ha = (struct hexbuf_array *) value;
+	} else {
+		ha = xmalloc(sizeof(*ha));
+		if (pthread_setspecific(hexbuf_array_key, (void *)ha))
+			die(_("failed to set thread buffer for hash to hex conversion"));
+	}
+#else
+	ha = &default_hexbuf_array;
+#endif
+
+	ha->idx = (ha->idx + 1) % ARRAY_SIZE(ha->bufs);
+	return hash_to_hex_algop_r(ha->bufs[ha->idx], hash, algop);
 }
 
 char *hash_to_hex(const unsigned char *hash)