diff mbox series

[6/7] xdiff: remove xdl_malloc() wrapper, use malloc(), not xmalloc()

Message ID patch-6.7-33d93f121a9-20220708T140354Z-avarab@gmail.com (mailing list archive)
State New, archived
Headers show
Series xdiff: use standard alloc macros, share them via git-shared-util.h | expand

Commit Message

Ævar Arnfjörð Bjarmason July 8, 2022, 2:20 p.m. UTC
As noted in 36c83197249 (xdiff: use xmalloc/xrealloc, 2019-04-11) the
reason we have xdl_malloc() in the first place was to make the xdiff
code safer, it was not handling some allocation failures correctly at
the time.

But as noted in that commit doing this was intended as a stopgap, as
various code in xdiff/* did not correctly handle allocation failures,
and would have segfaulted if malloc() returned NULL.

But since then and after preceding commits we can be confident that
malloc() failures are handled correctly. All of these users of
xdl_malloc() are checking their return values, and we're returning
-1 (or similar ) to the top-level in case of failures.

This also has the big advantage of making the compiler and analysis
tools less confused, and potentially avoiding undefined (compiler)
behavior. I.e. we define our own xmalloc() to call die() on failure,
and that function uses the non-standard "noreturn" attribute on our
most common compiler targets.

But xdl_malloc()'s use conflicted with that, confusing both human
readers of this code, and potentially compilers as well.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 xdiff/xdiff.h  |  1 -
 xdiff/xdiffi.c |  2 +-
 xdiff/xmerge.c | 10 +++++-----
 xdiff/xutils.c |  2 +-
 4 files changed, 7 insertions(+), 8 deletions(-)

Comments

Phillip Wood July 8, 2022, 5:42 p.m. UTC | #1
On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
> As noted in 36c83197249 (xdiff: use xmalloc/xrealloc, 2019-04-11) the
> reason we have xdl_malloc() in the first place was to make the xdiff
> code safer, it was not handling some allocation failures correctly at
> the time.

This is untrue, we do not have xdl_malloc to make the xdiff code safer, 
that macro was introduced with the initial import of xdiff. 36c83197249 
  just changed its definition, the entire commit consists of

-#define xdl_malloc(x) malloc(x)
+#define xdl_malloc(x) xmalloc(x)
  #define xdl_free(ptr) free(ptr)
-#define xdl_realloc(ptr,x) realloc(ptr,x)
+#define xdl_realloc(ptr,x) xrealloc(ptr,x)

I can see the argument for reverting that change now that we have fixed 
the error checking but that is not a good reason to remove xdl_malloc().

Best Wishes

Phillip

> But as noted in that commit doing this was intended as a stopgap, as
> various code in xdiff/* did not correctly handle allocation failures,
> and would have segfaulted if malloc() returned NULL.
> 
> But since then and after preceding commits we can be confident that
> malloc() failures are handled correctly. All of these users of
> xdl_malloc() are checking their return values, and we're returning
> -1 (or similar ) to the top-level in case of failures.
> 
> This also has the big advantage of making the compiler and analysis
> tools less confused, and potentially avoiding undefined (compiler)
> behavior. I.e. we define our own xmalloc() to call die() on failure,
> and that function uses the non-standard "noreturn" attribute on our
> most common compiler targets.
> 
> But xdl_malloc()'s use conflicted with that, confusing both human
> readers of this code, and potentially compilers as well.
> 
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
>   xdiff/xdiff.h  |  1 -
>   xdiff/xdiffi.c |  2 +-
>   xdiff/xmerge.c | 10 +++++-----
>   xdiff/xutils.c |  2 +-
>   4 files changed, 7 insertions(+), 8 deletions(-)
> 
> diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
> index 832cf9d977e..df048e0099b 100644
> --- a/xdiff/xdiff.h
> +++ b/xdiff/xdiff.h
> @@ -119,7 +119,6 @@ typedef struct s_bdiffparam {
>   } bdiffparam_t;
>   
>   
> -#define xdl_malloc(x) xmalloc(x)
>   #define xdl_free(ptr) free(ptr)
>   
>   void *xdl_mmfile_first(mmfile_t *mmf, long *size);
> diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
> index 077cc456087..6590811634f 100644
> --- a/xdiff/xdiffi.c
> +++ b/xdiff/xdiffi.c
> @@ -368,7 +368,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
>   static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
>   	xdchange_t *xch;
>   
> -	if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
> +	if (!(xch = (xdchange_t *) malloc(sizeof(xdchange_t))))
>   		return NULL;
>   
>   	xch->next = xscr;
> diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
> index ac0cf52f3be..676ad39020d 100644
> --- a/xdiff/xmerge.c
> +++ b/xdiff/xmerge.c
> @@ -60,7 +60,7 @@ static int xdl_append_merge(xdmerge_t **merge, int mode,
>   		m->chg1 = i1 + chg1 - m->i1;
>   		m->chg2 = i2 + chg2 - m->i2;
>   	} else {
> -		m = xdl_malloc(sizeof(xdmerge_t));
> +		m = malloc(sizeof(xdmerge_t));
>   		if (!m)
>   			return -1;
>   		m->next = NULL;
> @@ -409,7 +409,7 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
>   		m->i2 = xscr->i2 + i2;
>   		m->chg2 = xscr->chg2;
>   		while (xscr->next) {
> -			xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t));
> +			xdmerge_t *m2 = malloc(sizeof(xdmerge_t));
>   			if (!m2) {
>   				xdl_free_env(&xe);
>   				xdl_free_script(x);
> @@ -670,7 +670,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
>   						 ancestor_name,
>   						 favor, changes, NULL, style,
>   						 marker_size);
> -		result->ptr = xdl_malloc(size);
> +		result->ptr = malloc(size);
>   		if (!result->ptr) {
>   			xdl_cleanup_merge(changes);
>   			return -1;
> @@ -718,14 +718,14 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
>   	}
>   	status = 0;
>   	if (!xscr1) {
> -		result->ptr = xdl_malloc(mf2->size);
> +		result->ptr = malloc(mf2->size);
>   		if (!result->ptr)
>   			goto out;
>   		status = 0;
>   		memcpy(result->ptr, mf2->ptr, mf2->size);
>   		result->size = mf2->size;
>   	} else if (!xscr2) {
> -		result->ptr = xdl_malloc(mf1->size);
> +		result->ptr = malloc(mf1->size);
>   		if (!result->ptr)
>   			goto out;
>   		status = 0;
> diff --git a/xdiff/xutils.c b/xdiff/xutils.c
> index c0cd5338c4e..865e08f0e93 100644
> --- a/xdiff/xutils.c
> +++ b/xdiff/xutils.c
> @@ -98,7 +98,7 @@ void *xdl_cha_alloc(chastore_t *cha) {
>   	void *data;
>   
>   	if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) {
> -		if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) {
> +		if (!(ancur = (chanode_t *) malloc(sizeof(chanode_t) + cha->nsize))) {
>   
>   			return NULL;
>   		}
Jeff King July 8, 2022, 7:35 p.m. UTC | #2
On Fri, Jul 08, 2022 at 04:20:18PM +0200, Ævar Arnfjörð Bjarmason wrote:

> As noted in 36c83197249 (xdiff: use xmalloc/xrealloc, 2019-04-11) the
> reason we have xdl_malloc() in the first place was to make the xdiff
> code safer, it was not handling some allocation failures correctly at
> the time.
> 
> But as noted in that commit doing this was intended as a stopgap, as
> various code in xdiff/* did not correctly handle allocation failures,
> and would have segfaulted if malloc() returned NULL.
> 
> But since then and after preceding commits we can be confident that
> malloc() failures are handled correctly. All of these users of
> xdl_malloc() are checking their return values, and we're returning
> -1 (or similar ) to the top-level in case of failures.

This is also losing the other parts mentioned in 36c83197249:
respecting GIT_ALLOC_LIMIT and any memory reclamation strategies.

I think you'd want an xmalloc_gently() instead of a raw malloc().

For the same reason, I suspect it's better to leave this with a layer of
preprocessor indirection. Even if we chose to use malloc() here, libgit2
might not, and having the macro makes it easier to share the code.

-Peff
Ævar Arnfjörð Bjarmason July 8, 2022, 9:44 p.m. UTC | #3
On Fri, Jul 08 2022, Phillip Wood wrote:

> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote:
>> As noted in 36c83197249 (xdiff: use xmalloc/xrealloc, 2019-04-11) the
>> reason we have xdl_malloc() in the first place was to make the xdiff
>> code safer, it was not handling some allocation failures correctly at
>> the time.
>
> This is untrue, we do not have xdl_malloc to make the xdiff code
> safer, that macro was introduced with the initial import of
> xdiff. 36c83197249   just changed its definition, the entire commit
> consists of
>
> -#define xdl_malloc(x) malloc(x)
> +#define xdl_malloc(x) xmalloc(x)
>  #define xdl_free(ptr) free(ptr)
> -#define xdl_realloc(ptr,x) realloc(ptr,x)
> +#define xdl_realloc(ptr,x) xrealloc(ptr,x)

Yes sorry about that, I'm missing a few words there, and meant "the
reason we have this incarnation of xdl_malloc()[...]", or something to
that effect.

> I can see the argument for reverting that change now that we have
> fixed the error checking but that is not a good reason to remove
> xdl_malloc().

Indeed, but hopefully
https://lore.kernel.org/git/220708.86czef9t6y.gmgdl@evledraar.gmail.com/
is that argument.

>> But as noted in that commit doing this was intended as a stopgap, as
>> various code in xdiff/* did not correctly handle allocation failures,
>> and would have segfaulted if malloc() returned NULL.
>> But since then and after preceding commits we can be confident that
>> malloc() failures are handled correctly. All of these users of
>> xdl_malloc() are checking their return values, and we're returning
>> -1 (or similar ) to the top-level in case of failures.
>> This also has the big advantage of making the compiler and analysis
>> tools less confused, and potentially avoiding undefined (compiler)
>> behavior. I.e. we define our own xmalloc() to call die() on failure,
>> and that function uses the non-standard "noreturn" attribute on our
>> most common compiler targets.
>> But xdl_malloc()'s use conflicted with that, confusing both human
>> readers of this code, and potentially compilers as well.
>> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
>> ---
>>   xdiff/xdiff.h  |  1 -
>>   xdiff/xdiffi.c |  2 +-
>>   xdiff/xmerge.c | 10 +++++-----
>>   xdiff/xutils.c |  2 +-
>>   4 files changed, 7 insertions(+), 8 deletions(-)
>> diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
>> index 832cf9d977e..df048e0099b 100644
>> --- a/xdiff/xdiff.h
>> +++ b/xdiff/xdiff.h
>> @@ -119,7 +119,6 @@ typedef struct s_bdiffparam {
>>   } bdiffparam_t;
>>     
>> -#define xdl_malloc(x) xmalloc(x)
>>   #define xdl_free(ptr) free(ptr)
>>     void *xdl_mmfile_first(mmfile_t *mmf, long *size);
>> diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
>> index 077cc456087..6590811634f 100644
>> --- a/xdiff/xdiffi.c
>> +++ b/xdiff/xdiffi.c
>> @@ -368,7 +368,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
>>   static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
>>   	xdchange_t *xch;
>>   -	if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
>> +	if (!(xch = (xdchange_t *) malloc(sizeof(xdchange_t))))
>>   		return NULL;
>>     	xch->next = xscr;
>> diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
>> index ac0cf52f3be..676ad39020d 100644
>> --- a/xdiff/xmerge.c
>> +++ b/xdiff/xmerge.c
>> @@ -60,7 +60,7 @@ static int xdl_append_merge(xdmerge_t **merge, int mode,
>>   		m->chg1 = i1 + chg1 - m->i1;
>>   		m->chg2 = i2 + chg2 - m->i2;
>>   	} else {
>> -		m = xdl_malloc(sizeof(xdmerge_t));
>> +		m = malloc(sizeof(xdmerge_t));
>>   		if (!m)
>>   			return -1;
>>   		m->next = NULL;
>> @@ -409,7 +409,7 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
>>   		m->i2 = xscr->i2 + i2;
>>   		m->chg2 = xscr->chg2;
>>   		while (xscr->next) {
>> -			xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t));
>> +			xdmerge_t *m2 = malloc(sizeof(xdmerge_t));
>>   			if (!m2) {
>>   				xdl_free_env(&xe);
>>   				xdl_free_script(x);
>> @@ -670,7 +670,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
>>   						 ancestor_name,
>>   						 favor, changes, NULL, style,
>>   						 marker_size);
>> -		result->ptr = xdl_malloc(size);
>> +		result->ptr = malloc(size);
>>   		if (!result->ptr) {
>>   			xdl_cleanup_merge(changes);
>>   			return -1;
>> @@ -718,14 +718,14 @@ int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
>>   	}
>>   	status = 0;
>>   	if (!xscr1) {
>> -		result->ptr = xdl_malloc(mf2->size);
>> +		result->ptr = malloc(mf2->size);
>>   		if (!result->ptr)
>>   			goto out;
>>   		status = 0;
>>   		memcpy(result->ptr, mf2->ptr, mf2->size);
>>   		result->size = mf2->size;
>>   	} else if (!xscr2) {
>> -		result->ptr = xdl_malloc(mf1->size);
>> +		result->ptr = malloc(mf1->size);
>>   		if (!result->ptr)
>>   			goto out;
>>   		status = 0;
>> diff --git a/xdiff/xutils.c b/xdiff/xutils.c
>> index c0cd5338c4e..865e08f0e93 100644
>> --- a/xdiff/xutils.c
>> +++ b/xdiff/xutils.c
>> @@ -98,7 +98,7 @@ void *xdl_cha_alloc(chastore_t *cha) {
>>   	void *data;
>>     	if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) {
>> -		if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) {
>> +		if (!(ancur = (chanode_t *) malloc(sizeof(chanode_t) + cha->nsize))) {
>>     			return NULL;
>>   		}
Ævar Arnfjörð Bjarmason July 8, 2022, 9:47 p.m. UTC | #4
On Fri, Jul 08 2022, Jeff King wrote:

> On Fri, Jul 08, 2022 at 04:20:18PM +0200, Ævar Arnfjörð Bjarmason wrote:
>
>> As noted in 36c83197249 (xdiff: use xmalloc/xrealloc, 2019-04-11) the
>> reason we have xdl_malloc() in the first place was to make the xdiff
>> code safer, it was not handling some allocation failures correctly at
>> the time.
>> 
>> But as noted in that commit doing this was intended as a stopgap, as
>> various code in xdiff/* did not correctly handle allocation failures,
>> and would have segfaulted if malloc() returned NULL.
>> 
>> But since then and after preceding commits we can be confident that
>> malloc() failures are handled correctly. All of these users of
>> xdl_malloc() are checking their return values, and we're returning
>> -1 (or similar ) to the top-level in case of failures.
>
> This is also losing the other parts mentioned in 36c83197249:
> respecting GIT_ALLOC_LIMIT and any memory reclamation strategies.
>
> I think you'd want an xmalloc_gently() instead of a raw malloc().

...

> For the same reason, I suspect it's better to leave this with a layer of
> preprocessor indirection. Even if we chose to use malloc() here, libgit2
> might not, and having the macro makes it easier to share the code.

I think
https://lore.kernel.org/git/220708.86czef9t6y.gmgdl@evledraar.gmail.com/
in the related side-thread shows a workable way to do that.

But I didn't think about the GIT_ALLOC_LIMIT, so if we want that still
we'd need to have this code routed to not just malloc() but an
"xmalloc_gently()" as you suggest.

But we also have a few other uses of malloc() in the codebase. I wonder
if the right thing here isn't to just use malloc(), but to have
git-compat-util.h override malloc() (similar to how we we override
e.g. exit() now...), which would also catch those.

Or we could just say that it's not worth the complexity, and xdiff won't
respect GIT_ALLOC_LIMIT .. :(
Jeff King July 11, 2022, 9:33 a.m. UTC | #5
On Fri, Jul 08, 2022 at 11:47:58PM +0200, Ævar Arnfjörð Bjarmason wrote:

> But we also have a few other uses of malloc() in the codebase. I wonder
> if the right thing here isn't to just use malloc(), but to have
> git-compat-util.h override malloc() (similar to how we we override
> e.g. exit() now...), which would also catch those.

I suspect that introduces new complexities, as some calls really may
want an actual no-frills malloc. I'm thinking stuff in compat/, for
example, where extra actions taken by xmalloc() could cause weird
looping or races. This was probably a lot more likely when we closed
packs via xmalloc (e.g., via git_mmap()'s malloc() call) but I think the
general principle still holds. E.g., gitsetenv() calling getenv() is a
potential questionable area.

It does look like the call in submodule--helper.c is just wrong, though,
and should be changed. You could probably detect these with the
preprocessor, but again, you run into complexities with the cases that
_should_ be vanilla malloc. Given how little of a problem this has been
historically, I'm mostly content to notice these in review and
occasionally grep for fixes.

I suppose a coccinelle rule could help, because it makes it easy to
suppress false positives (like all of compat/), which the preprocessor
doesn't.

-Peff
diff mbox series

Patch

diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 832cf9d977e..df048e0099b 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -119,7 +119,6 @@  typedef struct s_bdiffparam {
 } bdiffparam_t;
 
 
-#define xdl_malloc(x) xmalloc(x)
 #define xdl_free(ptr) free(ptr)
 
 void *xdl_mmfile_first(mmfile_t *mmf, long *size);
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 077cc456087..6590811634f 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -368,7 +368,7 @@  int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
 	xdchange_t *xch;
 
-	if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
+	if (!(xch = (xdchange_t *) malloc(sizeof(xdchange_t))))
 		return NULL;
 
 	xch->next = xscr;
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
index ac0cf52f3be..676ad39020d 100644
--- a/xdiff/xmerge.c
+++ b/xdiff/xmerge.c
@@ -60,7 +60,7 @@  static int xdl_append_merge(xdmerge_t **merge, int mode,
 		m->chg1 = i1 + chg1 - m->i1;
 		m->chg2 = i2 + chg2 - m->i2;
 	} else {
-		m = xdl_malloc(sizeof(xdmerge_t));
+		m = malloc(sizeof(xdmerge_t));
 		if (!m)
 			return -1;
 		m->next = NULL;
@@ -409,7 +409,7 @@  static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
 		m->i2 = xscr->i2 + i2;
 		m->chg2 = xscr->chg2;
 		while (xscr->next) {
-			xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t));
+			xdmerge_t *m2 = malloc(sizeof(xdmerge_t));
 			if (!m2) {
 				xdl_free_env(&xe);
 				xdl_free_script(x);
@@ -670,7 +670,7 @@  static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
 						 ancestor_name,
 						 favor, changes, NULL, style,
 						 marker_size);
-		result->ptr = xdl_malloc(size);
+		result->ptr = malloc(size);
 		if (!result->ptr) {
 			xdl_cleanup_merge(changes);
 			return -1;
@@ -718,14 +718,14 @@  int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
 	}
 	status = 0;
 	if (!xscr1) {
-		result->ptr = xdl_malloc(mf2->size);
+		result->ptr = malloc(mf2->size);
 		if (!result->ptr)
 			goto out;
 		status = 0;
 		memcpy(result->ptr, mf2->ptr, mf2->size);
 		result->size = mf2->size;
 	} else if (!xscr2) {
-		result->ptr = xdl_malloc(mf1->size);
+		result->ptr = malloc(mf1->size);
 		if (!result->ptr)
 			goto out;
 		status = 0;
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index c0cd5338c4e..865e08f0e93 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -98,7 +98,7 @@  void *xdl_cha_alloc(chastore_t *cha) {
 	void *data;
 
 	if (!(ancur = cha->ancur) || ancur->icurr == cha->nsize) {
-		if (!(ancur = (chanode_t *) xdl_malloc(sizeof(chanode_t) + cha->nsize))) {
+		if (!(ancur = (chanode_t *) malloc(sizeof(chanode_t) + cha->nsize))) {
 
 			return NULL;
 		}