diff mbox series

[3/3] xdiff: introduce XDL_ALLOC_GROW()

Message ID da996677f0730ec7a50d399524fb58c44dad468a.1656516334.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series xdiff: introduce memory allocation macros | expand

Commit Message

Phillip Wood June 29, 2022, 3:25 p.m. UTC
From: Phillip Wood <phillip.wood@dunelm.org.uk>

Add a helper to grow an array. This is analogous to ALLOC_GROW() in
the rest of the codebase but returns −1 on allocation failure to
accommodate other users of libxdiff such as libgit2. It will also
return a error if the multiplication overflows while calculating the
new allocation size. Note that this keeps doubling on reallocation
like the code it is replacing rather than increasing the existing size
by half like ALLOC_GROW(). It does however copy ALLOC_GROW()'s trick
of adding a small amount to the new allocation to avoid a lot of
reallocations at small sizes.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 xdiff/xmacros.h  | 10 ++++++++++
 xdiff/xprepare.c | 19 ++++---------------
 xdiff/xutils.c   | 17 +++++++++++++++++
 xdiff/xutils.h   |  3 ++-
 4 files changed, 33 insertions(+), 16 deletions(-)

Comments

Ævar Arnfjörð Bjarmason June 30, 2022, 10:54 a.m. UTC | #1
On Wed, Jun 29 2022, Phillip Wood via GitGitGadget wrote:

> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>
> Add a helper to grow an array. This is analogous to ALLOC_GROW() in
> the rest of the codebase but returns −1 on allocation failure to
> accommodate other users of libxdiff such as libgit2.

Urm, does it? I just skimmed this, so maybe I missed something, but I
don't see where you changed the definition of xdl_malloc(),
xdl_realloc() etc.

And those are defined as:

	#define xdl_malloc(x) xmalloc(x)
	#define xdl_free(ptr) free(ptr)
	#define xdl_realloc(ptr,x) xrealloc(ptr,x)

And for e.g. xmalloc() we do:

        return do_xmalloc(size, 0);

Where that 0=gently, i.e. we'll die() instead of error(), the xrealloc()
then has no "gently" option.

Is the (pretty glaring, if that's the case) unstated assumption here
that this doesn't in fact return -1 on allocation failure, but you're
expected to replace the underlying xmalloc() with an implementation that
does?

If so I'm doubly confused by this, since you're providing alternatives
to e.g.:

	#define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc)))

So if that's the plan why would we need an XDL_ALLOC_ARRAY(), can't you
just check that it returns non-NULL?

That's not possible with our current xmalloc(), but ditto for this
series, and apparently the compiler isn't smart enough to yell at you
about that...

I wondered if we were just missing the returns_nonnull attribute, but
playing around with it I couldn't get GCC at least to warn about
checking xmalloc()'s return value for non-NULL.
Phillip Wood June 30, 2022, 12:03 p.m. UTC | #2
On 30/06/2022 11:54, Ævar Arnfjörð Bjarmason wrote:
> 
> On Wed, Jun 29 2022, Phillip Wood via GitGitGadget wrote:
> 
>> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>>
>> Add a helper to grow an array. This is analogous to ALLOC_GROW() in
>> the rest of the codebase but returns −1 on allocation failure to
>> accommodate other users of libxdiff such as libgit2.
> 
> Urm, does it? I just skimmed this, so maybe I missed something, but I
> don't see where you changed the definition of xdl_malloc(),
> xdl_realloc() etc.

XDL_ALLOC_GROW is defined as

+/*
+ * Ensure array p can accommodate at least nr elements, growing the
+ * array and updating alloc (which is the number of allocated
+ * elements) as necessary. Frees p and returns -1 on failure, returns
+ * 0 on success
+ */
+#define XDL_ALLOC_GROW(p, nr, alloc)	\
+	(-!((nr) <= (alloc) ||		\
+	    ((p) = xdl_alloc_grow_helper((p), (nr), &(alloc), sizeof(*(p))))))
+

so it returns -1 if xdl_alloc_grow_helper() returns NULL, which it does 
if there is an allocation failure or the multiplication overflows.

> And those are defined as:
> 
> 	#define xdl_malloc(x) xmalloc(x)
> 	#define xdl_free(ptr) free(ptr)
> 	#define xdl_realloc(ptr,x) xrealloc(ptr,x)
> 
> And for e.g. xmalloc() we do:
> 
>          return do_xmalloc(size, 0);
> 
> Where that 0=gently, i.e. we'll die() instead of error(), the xrealloc()
> then has no "gently" option.
>
> Is the (pretty glaring, if that's the case) unstated assumption here
> that this doesn't in fact return -1 on allocation failure, but you're
> expected to replace the underlying xmalloc() with an implementation that
> does?

I'm not relying on the return value of xrealloc() in the macro

> If so I'm doubly confused by this, since you're providing alternatives
> to e.g.:
> 
> 	#define ALLOC_ARRAY(x, alloc) (x) = xmalloc(st_mult(sizeof(*(x)), (alloc)))
> 
> So if that's the plan why would we need an XDL_ALLOC_ARRAY(), can't you
> just check that it returns non-NULL?
 >
> That's not possible with our current xmalloc(), but ditto for this
> series, and apparently the compiler isn't smart enough to yell at you
> about that...
> 
> I wondered if we were just missing the returns_nonnull attribute, but
> playing around with it I couldn't get GCC at least to warn about
> checking xmalloc()'s return value for non-NULL.

I'm not quite sure what you're saying in these last three paragraphs

Best Wishes

Phillip
Phillip Wood June 30, 2022, 12:38 p.m. UTC | #3
On 30/06/2022 13:03, Phillip Wood wrote:
> On 30/06/2022 11:54, Ævar Arnfjörð Bjarmason wrote:
>>
>> On Wed, Jun 29 2022, Phillip Wood via GitGitGadget wrote:
>>
>>> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>>>
>>> Add a helper to grow an array. This is analogous to ALLOC_GROW() in
>>> the rest of the codebase but returns −1 on allocation failure to
>>> accommodate other users of libxdiff such as libgit2.
>>
>> Urm, does it? I just skimmed this, so maybe I missed something, but I
>> don't see where you changed the definition of xdl_malloc(),
>> xdl_realloc() etc.

Oh I think I might have misunderstood your question. For git.git it will 
still die() but for other users that arrange for xdl_realloc() to return 
NULL on failure it will return -1. The same applies to the comments in 
the previous two patches about XDL_[CM]ALLOC_ARRAY() returning NULL on 
allocation failure.

Best Wishes

Phillip
Ævar Arnfjörð Bjarmason June 30, 2022, 1:25 p.m. UTC | #4
On Thu, Jun 30 2022, Phillip Wood wrote:

> On 30/06/2022 13:03, Phillip Wood wrote:
>> On 30/06/2022 11:54, Ævar Arnfjörð Bjarmason wrote:
>>>
>>> On Wed, Jun 29 2022, Phillip Wood via GitGitGadget wrote:
>>>
>>>> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>>>>
>>>> Add a helper to grow an array. This is analogous to ALLOC_GROW() in
>>>> the rest of the codebase but returns −1 on allocation failure to
>>>> accommodate other users of libxdiff such as libgit2.
>>>
>>> Urm, does it? I just skimmed this, so maybe I missed something, but I
>>> don't see where you changed the definition of xdl_malloc(),
>>> xdl_realloc() etc.
>
> Oh I think I might have misunderstood your question. For git.git it
> will still die() but for other users that arrange for xdl_realloc() to
> return NULL on failure it will return -1. The same applies to the
> comments in the previous two patches about XDL_[CM]ALLOC_ARRAY()
> returning NULL on allocation failure.

Yes, I meant that the "but returns −1 on allocation failure to
accommodate other users of libxdiff such as libgit2" is really more of
a:

	...but *allows for* dropping in another xmalloc(), xrealloc()
	etc. implementation that doesn't die on failure.

So I think the rest of my upthread question still stands, i.e.:

	"So if that's the plan why would we need an XDL_ALLOC_ARRAY(),
	can't you just check that it [I mean ALLOC_ARRAY()] returns
	non-NULL?"

I.e. if the plan is to replace the underlying x*() functions with
non-fatal variants can't you use ALLOC_ARRAY() instead? I haven't tried
that, but I don't see a reason we couldn't do that in principle...

Anyway, I'm all for the end goal here, but the way to get there seems to
be a bit of an exercise in running with scissors the more I think about
it.

I.e. the reason we're using these x*alloc() wrappers at all is because
we're lazy and want to write stuff like:

	struct stuff *foo = xmalloc(...);
	foo->bar = "baz";

Which the compiler is helpfully not yelling at us about, as opposed to
doing the same with "malloc()", where it would spot the potential null
pointer dereference.

(I'm using "compiler" here to be inclusive of extended gcc/clang options
to detect this sort of thing, & other similar analyzers).

But re "scissors": if we're doing to be maintaining the xdiff code
in-tree to be defined as our usual x*alloc() functions we're going to be
carrying code that can't have test or analysis coverage.

Which I think brings me back to my suggestion of whether we can't just
have non-fatal versions of these helper macros, define our own currently
fatal versions in terms of those, and use the non-fatal versions in the
xdiff/ code.
Junio C Hamano June 30, 2022, 6:32 p.m. UTC | #5
"Phillip Wood via GitGitGadget" <gitgitgadget@gmail.com> writes:

> +/*
> + * Ensure array p can accommodate at least nr elements, growing the
> + * array and updating alloc (which is the number of allocated
> + * elements) as necessary. Frees p and returns -1 on failure, returns
> + * 0 on success
> + */
> +#define XDL_ALLOC_GROW(p, nr, alloc)	\
> +	(-!((nr) <= (alloc) ||		\
> +	    ((p) = xdl_alloc_grow_helper((p), (nr), &(alloc), sizeof(*(p))))))
> + ...
> +
> +void* xdl_alloc_grow_helper(void *p, long nr, long *alloc, size_t size)
> +{
> +	void *tmp = NULL;
> +	size_t n = ((LONG_MAX - 16) / 2 >= *alloc) ? 2 * *alloc + 16 : LONG_MAX;

Not counting in size_t but in long?

I assume that 16 mimics the ALLOW_GROW(), but ALLOW_GROW() grows by
1.5, not by 2, so these two are not exactly compatible.

The computation of 'n' tries to avoid arithmetic in "long"
overflowing, but does it ensure that we actually grow if we truncate
at LONG_MAX?  After *alloc grew to LONG_MAX by calling this helper
once, if we need to grow again and call this helper, 'n' will again
get LONG_MAX and we end up not growing at all, no?

> +	if (nr > n)
> +		n = nr;
> +	if (SIZE_MAX / size >= n)
> +		tmp = xdl_realloc(p, n * size);
> +	if (tmp) {
> +		*alloc = n;
> +	} else {
> +		xdl_free(p);
> +		*alloc = 0;
> +	}
> +	return tmp;
> +}
Phillip Wood July 6, 2022, 1:14 p.m. UTC | #6
Hi Junio

On 30/06/2022 19:32, Junio C Hamano wrote:
> "Phillip Wood via GitGitGadget" <gitgitgadget@gmail.com> writes:
> 
>> +/*
>> + * Ensure array p can accommodate at least nr elements, growing the
>> + * array and updating alloc (which is the number of allocated
>> + * elements) as necessary. Frees p and returns -1 on failure, returns
>> + * 0 on success
>> + */
>> +#define XDL_ALLOC_GROW(p, nr, alloc)	\
>> +	(-!((nr) <= (alloc) ||		\
>> +	    ((p) = xdl_alloc_grow_helper((p), (nr), &(alloc), sizeof(*(p))))))
>> + ...
>> +
>> +void* xdl_alloc_grow_helper(void *p, long nr, long *alloc, size_t size)
>> +{
>> +	void *tmp = NULL;
>> +	size_t n = ((LONG_MAX - 16) / 2 >= *alloc) ? 2 * *alloc + 16 : LONG_MAX;
> 
> Not counting in size_t but in long?

Yes, that's to match the existing code.

> I assume that 16 mimics the ALLOW_GROW(), but ALLOW_GROW() grows by
> 1.5, not by 2, so these two are not exactly compatible.
> 
> The computation of 'n' tries to avoid arithmetic in "long"
> overflowing, but does it ensure that we actually grow if we truncate
> at LONG_MAX?  After *alloc grew to LONG_MAX by calling this helper
> once, if we need to grow again and call this helper, 'n' will again
> get LONG_MAX and we end up not growing at all, no?

Right but the caller can only request up to LONG_MAX items in nr. If we 
were to grow beyond that alloc would be truncated when we returned it to 
the caller.

The code here is written to fit in with xdiff using long where it would 
be better using size_t (changing that would be a fairly major undertaking).

Best Wishes

Phillip

>> +	if (nr > n)
>> +		n = nr;
>> +	if (SIZE_MAX / size >= n)
>> +		tmp = xdl_realloc(p, n * size);
>> +	if (tmp) {
>> +		*alloc = n;
>> +	} else {
>> +		xdl_free(p);
>> +		*alloc = 0;
>> +	}
>> +	return tmp;
>> +}
> 
>
Phillip Wood July 6, 2022, 1:23 p.m. UTC | #7
Hi Ævar

On 30/06/2022 14:25, Ævar Arnfjörð Bjarmason wrote:
> 
> On Thu, Jun 30 2022, Phillip Wood wrote:
> 
>> On 30/06/2022 13:03, Phillip Wood wrote:
>>> On 30/06/2022 11:54, Ævar Arnfjörð Bjarmason wrote:
>>>>
>>>> On Wed, Jun 29 2022, Phillip Wood via GitGitGadget wrote:
>>>>
>>>>> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>>>>>
>>>>> Add a helper to grow an array. This is analogous to ALLOC_GROW() in
>>>>> the rest of the codebase but returns −1 on allocation failure to
>>>>> accommodate other users of libxdiff such as libgit2.
>>>>
>>>> Urm, does it? I just skimmed this, so maybe I missed something, but I
>>>> don't see where you changed the definition of xdl_malloc(),
>>>> xdl_realloc() etc.
>>
>> Oh I think I might have misunderstood your question. For git.git it
>> will still die() but for other users that arrange for xdl_realloc() to
>> return NULL on failure it will return -1. The same applies to the
>> comments in the previous two patches about XDL_[CM]ALLOC_ARRAY()
>> returning NULL on allocation failure.
> 
> Yes, I meant that the "but returns −1 on allocation failure to
> accommodate other users of libxdiff such as libgit2" is really more of
> a:
> 
> 	...but *allows for* dropping in another xmalloc(), xrealloc()
> 	etc. implementation that doesn't die on failure.
> 
> So I think the rest of my upthread question still stands, i.e.:
> 
> 	"So if that's the plan why would we need an XDL_ALLOC_ARRAY(),
> 	can't you just check that it [I mean ALLOC_ARRAY()] returns
> 	non-NULL?"
> 
> I.e. if the plan is to replace the underlying x*() functions with
> non-fatal variants can't you use ALLOC_ARRAY() instead? I haven't tried
> that, but I don't see a reason we couldn't do that in principle...

As the cover letter says, the aim of this series is not to replace 
xmalloc() etc with non-fatal variants, it is just to make the xdiff code 
more readable. (One can already use a non-fatal allocation function for 
xdl_malloc()) I don't think that using ALLOC_ARRAY() in xdiff is helpful 
for other users as they would have to define their own array allocation 
macros, rather than just providing their own allocation functions. I 
would like to reduce the friction others have upstreaming xdiff patches 
to us, not increase it.

Best Wishes

Phillip

> Anyway, I'm all for the end goal here, but the way to get there seems to
> be a bit of an exercise in running with scissors the more I think about
> it.
> 
> I.e. the reason we're using these x*alloc() wrappers at all is because
> we're lazy and want to write stuff like:
> 
> 	struct stuff *foo = xmalloc(...);
> 	foo->bar = "baz";
> 
> Which the compiler is helpfully not yelling at us about, as opposed to
> doing the same with "malloc()", where it would spot the potential null
> pointer dereference.
> 
> (I'm using "compiler" here to be inclusive of extended gcc/clang options
> to detect this sort of thing, & other similar analyzers).
> 
> But re "scissors": if we're doing to be maintaining the xdiff code
> in-tree to be defined as our usual x*alloc() functions we're going to be
> carrying code that can't have test or analysis coverage.
> 
> Which I think brings me back to my suggestion of whether we can't just
> have non-fatal versions of these helper macros, define our own currently
> fatal versions in terms of those, and use the non-fatal versions in the
> xdiff/ code.
> 
>
Ævar Arnfjörð Bjarmason July 7, 2022, 11:17 a.m. UTC | #8
On Wed, Jul 06 2022, Phillip Wood wrote:

> Hi Ævar
>
> On 30/06/2022 14:25, Ævar Arnfjörð Bjarmason wrote:
>> On Thu, Jun 30 2022, Phillip Wood wrote:
>> 
>>> On 30/06/2022 13:03, Phillip Wood wrote:
>>>> On 30/06/2022 11:54, Ævar Arnfjörð Bjarmason wrote:
>>>>>
>>>>> On Wed, Jun 29 2022, Phillip Wood via GitGitGadget wrote:
>>>>>
>>>>>> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>>>>>>
>>>>>> Add a helper to grow an array. This is analogous to ALLOC_GROW() in
>>>>>> the rest of the codebase but returns −1 on allocation failure to
>>>>>> accommodate other users of libxdiff such as libgit2.
>>>>>
>>>>> Urm, does it? I just skimmed this, so maybe I missed something, but I
>>>>> don't see where you changed the definition of xdl_malloc(),
>>>>> xdl_realloc() etc.
>>>
>>> Oh I think I might have misunderstood your question. For git.git it
>>> will still die() but for other users that arrange for xdl_realloc() to
>>> return NULL on failure it will return -1. The same applies to the
>>> comments in the previous two patches about XDL_[CM]ALLOC_ARRAY()
>>> returning NULL on allocation failure.
>> Yes, I meant that the "but returns −1 on allocation failure to
>> accommodate other users of libxdiff such as libgit2" is really more of
>> a:
>> 	...but *allows for* dropping in another xmalloc(), xrealloc()
>> 	etc. implementation that doesn't die on failure.
>> So I think the rest of my upthread question still stands, i.e.:
>> 	"So if that's the plan why would we need an XDL_ALLOC_ARRAY(),
>> 	can't you just check that it [I mean ALLOC_ARRAY()] returns
>> 	non-NULL?"
>> I.e. if the plan is to replace the underlying x*() functions with
>> non-fatal variants can't you use ALLOC_ARRAY() instead? I haven't tried
>> that, but I don't see a reason we couldn't do that in principle...
>
> As the cover letter says, the aim of this series is not to replace
> xmalloc() etc with non-fatal variants, it is just to make the xdiff
> code more readable.

I don't think it's more readable to carry code in-tree that's
unreachable except when combined with code out-of-tree. I.e. this series
leaves us with the equivalent of:

	ptr = xmalloc(...);
        if (!ptr)
		/* unreachable in git.git ... */

I don't think it's more readable to have code that rather trivial
analysis will show goes against the "__attribute__((noreturn))" we're
placing on our die() function.

Which is what I'm pointing out with "running with scissors". I.e. I'm
fully on-board with the end goal, but that can be accomplished in a way
that doesn't confuse humans & analyzers alike.

> (One can already use a non-fatal allocation
> function for xdl_malloc()) [...]

That just seems inviting a segfault or undefined/untested behavior
(whether in the sense of "undefined by C" or "untested by git.git's
codebase logic"). Everything around xmalloc() now assumes "never returns
NULL", and you want to:

 * Make it return NULL when combined with out-of-tree-code
 * Maintain the code in git.git, where it never returns NULL, but in a
   way where we won't have bugs when combined with a new macro that
   behaves differently, in a way we never even test ourselves.

Isn't that correct, or am I missing something?

> I don't think that using ALLOC_ARRAY() in
> xdiff is helpful for other users as they would have to define their
> own array allocation macros, rather than just providing their own
> allocation functions. I would like to reduce the friction others have
> upstreaming xdiff patches to us, not increase it.

Yes, I'm totally on-board with reducing the friction in others using
xdiff, and would like to see more of that sort of out-of-tree use in
general (although for things outside of xdiff GPL v.s. LGPL concerns
come into play).

I'd even like for us to explicitly make that much easier. I.e. if you
want to use xdiff now you search for it, and find the at this point
unmaintained upstream, and if you find that git has a "newer" version
you'll have some trouble extracting it already.

After this series you'll need to start writing & maintaining your own
non-trivial alloc wrapper logic if you're doing that. If you get it
subtly wrong you'll have a buggy xdiff, and most likely users will just
copy/paste the git.git version from our git-compat-util.h & cache.h,
which is rather silly.


Which is why I'm saying we could/should do this in a much easier way,
i.e.:

 * Factor out the now only-fatal-on-NULL ALLOC_GROW() such that we can
   have a non-fatal version (ALLOC_GROW) and a non-fatal one.

   I don't know what we'd call it. we usually have X*() meaning "fatal",
   but these are fatal by default, maybe G* for gently? So
   GALLOC_GROW().  Urgh, anyway, continuing with that ugly name...

 * Have xdiff/ use that GALLOC_GROW() & malloc(), not ALLOC_GROW() &
   xmalloc(), as we really want to have the appropriate code flow
   analysis etc. spot for us that we should handle NULL returns,
   otherwise combining this code with libgit2 will be buggy/broken.

This makes it much easier for libgit2 to use this, as it won't need to
do anything special at all. Since our GALLOC_GROW() will eventualy use
malloc() instead of xmalloc() you don't need to define anything that
re-implements the GALLOC_GROW() or whatever other non-fatal versions of
our only-fatal helpers we have.

This assumes that we'd move these macros out of git-compat-util.h and a
new git-exernal-compat.h, or that instead of *just* copying the xdiff/
directory your import script would need to run some small bit of cc -E
and or perl/sed to one-off extract the smell bits of
git-exernal-compat.h or cache.h that we need.
Phillip Wood July 8, 2022, 9:35 a.m. UTC | #9
On 07/07/2022 12:17, Ævar Arnfjörð Bjarmason wrote:
> 
> On Wed, Jul 06 2022, Phillip Wood wrote:
> 
>> Hi Ævar
>>
>> On 30/06/2022 14:25, Ævar Arnfjörð Bjarmason wrote:
>>> On Thu, Jun 30 2022, Phillip Wood wrote:
>>>
>>>> On 30/06/2022 13:03, Phillip Wood wrote:
>>>>> On 30/06/2022 11:54, Ævar Arnfjörð Bjarmason wrote:
>>>>>>
>>>>>> On Wed, Jun 29 2022, Phillip Wood via GitGitGadget wrote:
>>>>>>
>>>>>>> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>>>>>>>
>>>>>>> Add a helper to grow an array. This is analogous to ALLOC_GROW() in
>>>>>>> the rest of the codebase but returns −1 on allocation failure to
>>>>>>> accommodate other users of libxdiff such as libgit2.
>>>>>>
>>>>>> Urm, does it? I just skimmed this, so maybe I missed something, but I
>>>>>> don't see where you changed the definition of xdl_malloc(),
>>>>>> xdl_realloc() etc.
>>>>
>>>> Oh I think I might have misunderstood your question. For git.git it
>>>> will still die() but for other users that arrange for xdl_realloc() to
>>>> return NULL on failure it will return -1. The same applies to the
>>>> comments in the previous two patches about XDL_[CM]ALLOC_ARRAY()
>>>> returning NULL on allocation failure.
>>> Yes, I meant that the "but returns −1 on allocation failure to
>>> accommodate other users of libxdiff such as libgit2" is really more of
>>> a:
>>> 	...but *allows for* dropping in another xmalloc(), xrealloc()
>>> 	etc. implementation that doesn't die on failure.
>>> So I think the rest of my upthread question still stands, i.e.:
>>> 	"So if that's the plan why would we need an XDL_ALLOC_ARRAY(),
>>> 	can't you just check that it [I mean ALLOC_ARRAY()] returns
>>> 	non-NULL?"
>>> I.e. if the plan is to replace the underlying x*() functions with
>>> non-fatal variants can't you use ALLOC_ARRAY() instead? I haven't tried
>>> that, but I don't see a reason we couldn't do that in principle...
>>
>> As the cover letter says, the aim of this series is not to replace
>> xmalloc() etc with non-fatal variants, it is just to make the xdiff
>> code more readable.
> 
> I don't think it's more readable to carry code in-tree that's
> unreachable except when combined with code out-of-tree. I.e. this series
> leaves us with the equivalent of:
> 
> 	ptr = xmalloc(...);
>          if (!ptr)
> 		/* unreachable in git.git ... */
> 
> I don't think it's more readable to have code that rather trivial
> analysis will show goes against the "__attribute__((noreturn))" we're
> placing on our die() function.

We're already in this situation. The code in xdiff is written to handle 
allocation failures and we use an allocation function that dies instead. 
This patch series does nothing to alter that situation.

> Which is what I'm pointing out with "running with scissors". I.e. I'm
> fully on-board with the end goal, but that can be accomplished in a way
> that doesn't confuse humans & analyzers alike.
> 
>> (One can already use a non-fatal allocation
>> function for xdl_malloc()) [...]
> 
> That just seems inviting a segfault or undefined/untested behavior
> (whether in the sense of "undefined by C" or "untested by git.git's
> codebase logic"). Everything around xmalloc() now assumes "never returns
> NULL", and you want to:
> 
>   * Make it return NULL when combined with out-of-tree-code

No I do not want to alter the behavior of xmalloc() at all, that is why 
this series does not alter the behavior of xmalloc()

>   * Maintain the code in git.git, where it never returns NULL, but in a
>     way where we won't have bugs when combined with a new macro that
>     behaves differently, in a way we never even test ourselves.

That describes the current situation with xdiff, this series does not 
alter that.

> Isn't that correct, or am I missing something?

You should note that libgit2 uses malloc() as it's default allocator, 
seeming without issue.

>> I don't think that using ALLOC_ARRAY() in
>> xdiff is helpful for other users as they would have to define their
>> own array allocation macros, rather than just providing their own
>> allocation functions. I would like to reduce the friction others have
>> upstreaming xdiff patches to us, not increase it.
> 
> Yes, I'm totally on-board with reducing the friction in others using
> xdiff, and would like to see more of that sort of out-of-tree use in
> general (although for things outside of xdiff GPL v.s. LGPL concerns
> come into play).
> 
> I'd even like for us to explicitly make that much easier. I.e. if you
> want to use xdiff now you search for it, and find the at this point
> unmaintained upstream, and if you find that git has a "newer" version
> you'll have some trouble extracting it already.
> 
> After this series you'll need to start writing & maintaining your own
> non-trivial alloc wrapper logic if you're doing that. If you get it
> subtly wrong you'll have a buggy xdiff, and most likely users will just
> copy/paste the git.git version from our git-compat-util.h & cache.h,
> which is rather silly.

This series does not alter what wrappers you need to write whereas your 
suggestion of using ALLOC_ARRAY() would force more work on potential 
xdiff users (though below I think you're suggesting we provide them in a 
separate header so they can be reused more easily).

> Which is why I'm saying we could/should do this in a much easier way,
> i.e.:
> 
>   * Factor out the now only-fatal-on-NULL ALLOC_GROW() such that we can
>     have a non-fatal version (ALLOC_GROW) and a non-fatal one.
> 
>     I don't know what we'd call it. we usually have X*() meaning "fatal",
>     but these are fatal by default, maybe G* for gently? So
>     GALLOC_GROW().  Urgh, anyway, continuing with that ugly name...

Further proof that naming is hard...

>   * Have xdiff/ use that GALLOC_GROW() & malloc(), not ALLOC_GROW() &
>     xmalloc(), as we really want to have the appropriate code flow
>     analysis etc. spot for us that we should handle NULL returns,
>     otherwise combining this code with libgit2 will be buggy/broken.
> 
> This makes it much easier for libgit2 to use this, as it won't need to
> do anything special at all. Since our GALLOC_GROW() will eventualy use
> malloc() instead of xmalloc() you don't need to define anything that
> re-implements the GALLOC_GROW() or whatever other non-fatal versions of
> our only-fatal helpers we have.
> 
> This assumes that we'd move these macros out of git-compat-util.h and a
> new git-exernal-compat.h, or that instead of *just* copying the xdiff/
> directory your import script would need to run some small bit of cc -E
> and or perl/sed to one-off extract the smell bits of
> git-exernal-compat.h or cache.h that we need.

I think there is an argument that we should change our xdiff wrapper to 
use malloc() rather than xmalloc() so we're able to test the error 
handling. That then begs the question as to how we actually get the 
allocation functions to fail when they're being tested. I also think 
that is an orthogonal change that could happen with or without this 
patch series.

Best Wishes

Phillip
diff mbox series

Patch

diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h
index 23db8e785d7..d13a6724629 100644
--- a/xdiff/xmacros.h
+++ b/xdiff/xmacros.h
@@ -61,4 +61,14 @@  do { \
 		? memset((p), 0, (nr) * sizeof(*(p)))	\
 		: NULL)
 
+/*
+ * Ensure array p can accommodate at least nr elements, growing the
+ * array and updating alloc (which is the number of allocated
+ * elements) as necessary. Frees p and returns -1 on failure, returns
+ * 0 on success
+ */
+#define XDL_ALLOC_GROW(p, nr, alloc)	\
+	(-!((nr) <= (alloc) ||		\
+	    ((p) = xdl_alloc_grow_helper((p), (nr), &(alloc), sizeof(*(p))))))
+
 #endif /* #if !defined(XMACROS_H) */
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index b016570c488..c84549f6c50 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -111,7 +111,6 @@  static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
 	long hi;
 	char const *line;
 	xdlclass_t *rcrec;
-	xdlclass_t **rcrecs;
 
 	line = rec->ptr;
 	hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
@@ -127,14 +126,8 @@  static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
 			return -1;
 		}
 		rcrec->idx = cf->count++;
-		if (cf->count > cf->alloc) {
-			cf->alloc *= 2;
-			if (!(rcrecs = (xdlclass_t **) xdl_realloc(cf->rcrecs, cf->alloc * sizeof(xdlclass_t *)))) {
-
+		if (XDL_ALLOC_GROW(cf->rcrecs, cf->count, cf->alloc))
 				return -1;
-			}
-			cf->rcrecs = rcrecs;
-		}
 		cf->rcrecs[rcrec->idx] = rcrec;
 		rcrec->line = line;
 		rcrec->size = rec->size;
@@ -163,7 +156,7 @@  static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 	unsigned long hav;
 	char const *blk, *cur, *top, *prev;
 	xrecord_t *crec;
-	xrecord_t **recs, **rrecs;
+	xrecord_t **recs;
 	xrecord_t **rhash;
 	unsigned long *ha;
 	char *rchg;
@@ -190,12 +183,8 @@  static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 		for (top = blk + bsize; cur < top; ) {
 			prev = cur;
 			hav = xdl_hash_record(&cur, top, xpp->flags);
-			if (nrec >= narec) {
-				narec *= 2;
-				if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *))))
-					goto abort;
-				recs = rrecs;
-			}
+			if (XDL_ALLOC_GROW(recs, nrec + 1, narec))
+				goto abort;
 			if (!(crec = xdl_cha_alloc(&xdf->rcha)))
 				goto abort;
 			crec->ptr = prev;
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 115b2b1640b..9e36f24875d 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -432,3 +432,20 @@  int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
 
 	return 0;
 }
+
+void* xdl_alloc_grow_helper(void *p, long nr, long *alloc, size_t size)
+{
+	void *tmp = NULL;
+	size_t n = ((LONG_MAX - 16) / 2 >= *alloc) ? 2 * *alloc + 16 : LONG_MAX;
+	if (nr > n)
+		n = nr;
+	if (SIZE_MAX / size >= n)
+		tmp = xdl_realloc(p, n * size);
+	if (tmp) {
+		*alloc = n;
+	} else {
+		xdl_free(p);
+		*alloc = 0;
+	}
+	return tmp;
+}
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index fba7bae03c7..fd0bba94e8b 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -42,6 +42,7 @@  int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
 int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
 		       int line1, int count1, int line2, int count2);
 
-
+/* Do not call this function, use XDL_ALLOC_GROW instead */
+void* xdl_alloc_grow_helper(void* p, long nr, long* alloc, size_t size);
 
 #endif /* #if !defined(XUTILS_H) */