Message ID | patch-7.7-a1bf9a94f0a-20220708T140354Z-avarab@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | xdiff: use standard alloc macros, share them via git-shared-util.h | expand |
Hi Ævar On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote: > Remove the xdl_free() wrapper macro in favor of using free() > directly. The wrapper macro was brought in with the initial import of > xdiff/ in 3443546f6ef (Use a *real* built-in diff generator, > 2006-03-24). > > As subsequent discussions on the topic[1] have made clear there's no > reason to use this wrapper. That link is to a message where you assert that there is no reason for the wrapper, you seem to be the only person in that thread making that argument. The libgit2 maintainer and others are arguing that it is worth keeping to make it easy to swap the allocator. > Both git itself as well as any external > users such as libgit2 compile the xdiff/* code as part of their own > compilation, and can thus find the right malloc() and free() at > link-time. I'm struggling to see how that is simpler than the current situation with xdl_malloc(). Perhaps you could show how you think libgit2 could easily make xdiff use git__malloc() instead of malloc() without changing the malloc() calls (the message you linked to essentially suggests they do a search and replace). If people cannot swap in another allocator without changing the code then you are imposing a barrier to them contributing patches back to git's xdiff. Best Wishes Phillip > When compiling git we already find a custom malloc() and free() > e.g. if compiled with USE_NED_ALLOCATOR=YesPlease. > > 1. https://lore.kernel.org/git/220415.867d7qbaad.gmgdl@evledraar.gmail.com/ > > Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> > --- > xdiff/xdiff.h | 3 --- > xdiff/xdiffi.c | 4 ++-- > xdiff/xhistogram.c | 6 +++--- > xdiff/xpatience.c | 8 ++++---- > xdiff/xprepare.c | 28 ++++++++++++++-------------- > xdiff/xutils.c | 2 +- > 6 files changed, 24 insertions(+), 27 deletions(-) > > diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h > index df048e0099b..a37d89fcdaf 100644 > --- a/xdiff/xdiff.h > +++ b/xdiff/xdiff.h > @@ -118,9 +118,6 @@ typedef struct s_bdiffparam { > long bsize; > } bdiffparam_t; > > - > -#define xdl_free(ptr) free(ptr) > - > void *xdl_mmfile_first(mmfile_t *mmf, long *size); > long xdl_mmfile_size(mmfile_t *mmf); > > diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c > index 6590811634f..375bb81a8aa 100644 > --- a/xdiff/xdiffi.c > +++ b/xdiff/xdiffi.c > @@ -359,7 +359,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, > res = xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, > kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, > &xenv); > - xdl_free(kvd); > + free(kvd); > > return res; > } > @@ -960,7 +960,7 @@ void xdl_free_script(xdchange_t *xscr) { > > while ((xch = xscr) != NULL) { > xscr = xscr->next; > - xdl_free(xch); > + free(xch); > } > } > > diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c > index f20592bfbdd..be35d9c9697 100644 > --- a/xdiff/xhistogram.c > +++ b/xdiff/xhistogram.c > @@ -240,9 +240,9 @@ static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env, > > static inline void free_index(struct histindex *index) > { > - xdl_free(index->records); > - xdl_free(index->line_map); > - xdl_free(index->next_ptrs); > + free(index->records); > + free(index->line_map); > + free(index->next_ptrs); > xdl_cha_free(&index->rcha); > } > > diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c > index bb328d9f852..8fffd2b8297 100644 > --- a/xdiff/xpatience.c > +++ b/xdiff/xpatience.c > @@ -233,7 +233,7 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res) > /* No common unique lines were found */ > if (!longest) { > *res = NULL; > - xdl_free(sequence); > + free(sequence); > return 0; > } > > @@ -245,7 +245,7 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res) > entry = entry->previous; > } > *res = entry; > - xdl_free(sequence); > + free(sequence); > return 0; > } > > @@ -358,7 +358,7 @@ static int patience_diff(mmfile_t *file1, mmfile_t *file2, > env->xdf1.rchg[line1++ - 1] = 1; > while(count2--) > env->xdf2.rchg[line2++ - 1] = 1; > - xdl_free(map.entries); > + free(map.entries); > return 0; > } > > @@ -372,7 +372,7 @@ static int patience_diff(mmfile_t *file1, mmfile_t *file2, > result = fall_back_to_classic_diff(&map, > line1, count1, line2, count2); > out: > - xdl_free(map.entries); > + free(map.entries); > return result; > } > > diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c > index 4182d9e1c0a..169629761c0 100644 > --- a/xdiff/xprepare.c > +++ b/xdiff/xprepare.c > @@ -89,7 +89,7 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { > > GALLOC_ARRAY(cf->rcrecs, cf->alloc); > if (!cf->rcrecs) { > - xdl_free(cf->rchash); > + free(cf->rchash); > xdl_cha_free(&cf->ncha); > return -1; > } > @@ -102,8 +102,8 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { > > static void xdl_free_classifier(xdlclassifier_t *cf) { > > - xdl_free(cf->rcrecs); > - xdl_free(cf->rchash); > + free(cf->rcrecs); > + free(cf->rchash); > xdl_cha_free(&cf->ncha); > } > > @@ -230,11 +230,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ > return 0; > > abort: > - xdl_free(ha); > - xdl_free(rindex); > - xdl_free(rchg); > - xdl_free(rhash); > - xdl_free(recs); > + free(ha); > + free(rindex); > + free(rchg); > + free(rhash); > + free(recs); > xdl_cha_free(&xdf->rcha); > return -1; > } > @@ -242,11 +242,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ > > static void xdl_free_ctx(xdfile_t *xdf) { > > - xdl_free(xdf->rhash); > - xdl_free(xdf->rindex); > - xdl_free(xdf->rchg - 1); > - xdl_free(xdf->ha); > - xdl_free(xdf->recs); > + free(xdf->rhash); > + free(xdf->rindex); > + free(xdf->rchg - 1); > + free(xdf->ha); > + free(xdf->recs); > xdl_cha_free(&xdf->rcha); > } > > @@ -424,7 +424,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd > } > xdf2->nreff = nreff; > > - xdl_free(dis); > + free(dis); > > return 0; > } > diff --git a/xdiff/xutils.c b/xdiff/xutils.c > index 865e08f0e93..00eeba452a5 100644 > --- a/xdiff/xutils.c > +++ b/xdiff/xutils.c > @@ -88,7 +88,7 @@ void xdl_cha_free(chastore_t *cha) { > > for (cur = cha->head; (tmp = cur) != NULL;) { > cur = cur->next; > - xdl_free(tmp); > + free(tmp); > } > } >
On Fri, Jul 08 2022, Phillip Wood wrote: > Hi Ævar > > On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote: >> Remove the xdl_free() wrapper macro in favor of using free() >> directly. The wrapper macro was brought in with the initial import of >> xdiff/ in 3443546f6ef (Use a *real* built-in diff generator, >> 2006-03-24). >> As subsequent discussions on the topic[1] have made clear there's no >> reason to use this wrapper. > > That link is to a message where you assert that there is no reason for > the wrapper, you seem to be the only person in that thread making that > argument. The libgit2 maintainer and others are arguing that it is > worth keeping to make it easy to swap the allocator. Arguing that it's not needed for a technical reason, but an "aesthetic and ergonomic one", per: https://lore.kernel.org/git/20220217225804.GC7@edef91d97c94/; Perhaps I misread it, but I took this as Junio concurring with what I was pointing out there: https://lore.kernel.org/git/xmqqfsohbdre.fsf@gitster.g/ >> Both git itself as well as any external >> users such as libgit2 compile the xdiff/* code as part of their own >> compilation, and can thus find the right malloc() and free() at >> link-time. > > I'm struggling to see how that is simpler than the current situation > with xdl_malloc(). It's simpler because when maintaining that code in git.git it's less of a special case, e.g. we have coccinelle rules that match free(), now every such rule needs to account for the xdiff special-case. And it's less buggy because if you're relying on us always using a wrapper bugs will creep in, current master has this: $ git -P grep '\bfree\(' -- xdiff xdiff/xdiff.h:#define xdl_free(ptr) free(ptr) xdiff/xmerge.c: free(c); xdiff/xmerge.c: free(next_m); > Perhaps you could show how you think libgit2 could > easily make xdiff use git__malloc() instead of malloc() without > changing the malloc() calls (the message you linked to essentially > suggests they do a search and replace). If people cannot swap in > another allocator without changing the code then you are imposing a > barrier to them contributing patches back to git's xdiff. I was suggesting that if libgit2 wanted to maintain a copy of xdiff that catered to the asthetic desires of the maintainer adjusting whatever import script you use to apply a trivial coccinelle transformation would give you what you wanted. Except without a maintenance burden on git.git, or the bugs you'd get now since you're not catching those two free() cases (or any future such cases). But just having the code use malloc() and free() directly and getting you what you get now is easy, and doesn't require any such search-replacement. Here's how: I imported the xdiff/*.[ch] files at the tip of my branch into current libgit2.git's src/libgit2/xdiff/, and then applied this on top, which is partially re-applying libgit2's own monkeypatches, and partially adjusting for upstream changes (including this one): diff --git a/src/libgit2/xdiff/git-xdiff.h b/src/libgit2/xdiff/git-xdiff.h index b75dba819..2e00764d4 100644 --- a/src/libgit2/xdiff/git-xdiff.h +++ b/src/libgit2/xdiff/git-xdiff.h @@ -14,6 +14,7 @@ #ifndef INCLUDE_git_xdiff_h__ #define INCLUDE_git_xdiff_h__ +#include <regex.h> #include "regexp.h" /* Work around C90-conformance issues */ @@ -27,11 +28,10 @@ # endif #endif -#define xdl_malloc(x) git__malloc(x) -#define xdl_free(ptr) git__free(ptr) -#define xdl_realloc(ptr, x) git__realloc(ptr, x) +#define malloc(x) git__malloc(x) +#define free(ptr) git__free(ptr) -#define XDL_BUG(msg) GIT_ASSERT(msg) +#define BUG(msg) GIT_ASSERT(msg) #define xdl_regex_t git_regexp #define xdl_regmatch_t git_regmatch @@ -50,4 +50,17 @@ GIT_INLINE(int) xdl_regexec_buf( return -1; } +static inline size_t st_mult(size_t a, size_t b) +{ + return a * b; +} + +static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + assert(nmatch > 0 && pmatch); + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = size; + return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND); +} #endif diff --git a/src/libgit2/xdiff/xdiff.h b/src/libgit2/xdiff/xdiff.h index a37d89fcd..5e5b525c2 100644 --- a/src/libgit2/xdiff/xdiff.h +++ b/src/libgit2/xdiff/xdiff.h @@ -23,6 +23,8 @@ #if !defined(XDIFF_H) #define XDIFF_H +#include "git-xdiff.h" + #ifdef __cplusplus extern "C" { #endif /* #ifdef __cplusplus */ diff --git a/src/libgit2/xdiff/xinclude.h b/src/libgit2/xdiff/xinclude.h index a4285ac0e..79812ad8a 100644 --- a/src/libgit2/xdiff/xinclude.h +++ b/src/libgit2/xdiff/xinclude.h @@ -23,7 +23,8 @@ #if !defined(XINCLUDE_H) #define XINCLUDE_H -#include "git-compat-util.h" +#include "git-xdiff.h" +#include "git-shared-util.h" #include "xmacros.h" #include "xdiff.h" #include "xtypes.h" If you then build it and run e.g.: gdb -args ./libgit2_tests -smerge::files You'll get stack traces like this if you break on stdalloc__malloc (which it uses for me in its default configuration): (gdb) bt #0 stdalloc__malloc (len=144, file=0x87478d "/home/avar/g/libgit2/src/libgit2/xdiff/xutils.c", line=101) at /home/avar/g/libgit2/src/util/allocators/stdalloc.c:14 #1 0x00000000006ec15c in xdl_cha_alloc (cha=0x7fffffffcaa8) at /home/avar/g/libgit2/src/libgit2/xdiff/xutils.c:101 #2 0x00000000006eaee9 in xdl_prepare_ctx (pass=1, mf=0x7fffffffcc98, narec=13, xpp=0x7fffffffcca8, cf=0x7fffffffc7d0, xdf=0x7fffffffcaa8) at /home/avar/g/libgit2/src/libgit2/xdiff/xprepare.c:194 IOW this was all seamlessly routed to your git__malloc() without us having to maintain an xdl_{malloc,free}(). Now, I think that's obviously worth adjusting, e.g. the series I've got here could make this easier for libgit2 by including st_mult() at least, I'm not sure what you want to do about regexec_buf(). For the monkeypatching you do now of creating a "git-xdiff.h" I think it would be very good to get a patch like what I managed to get sha1collisiondetection.git to accept for our own use-case, which allows us to use their upstream code as-is from a submodule: https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef
Hi Ævar On 08/07/2022 22:26, Ævar Arnfjörð Bjarmason wrote: > > On Fri, Jul 08 2022, Phillip Wood wrote: > >> Hi Ævar >> >> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote: >>> Remove the xdl_free() wrapper macro in favor of using free() >>> directly. The wrapper macro was brought in with the initial import of >>> xdiff/ in 3443546f6ef (Use a *real* built-in diff generator, >>> 2006-03-24). >>> As subsequent discussions on the topic[1] have made clear there's no >>> reason to use this wrapper. >> >> That link is to a message where you assert that there is no reason for >> the wrapper, you seem to be the only person in that thread making that >> argument. The libgit2 maintainer and others are arguing that it is >> worth keeping to make it easy to swap the allocator. > > Arguing that it's not needed for a technical reason, but an "aesthetic > and ergonomic one", per: > https://lore.kernel.org/git/20220217225804.GC7@edef91d97c94/; > > Perhaps I misread it, but I took this as Junio concurring with what I > was pointing out there: > https://lore.kernel.org/git/xmqqfsohbdre.fsf@gitster.g/ > >>> Both git itself as well as any external >>> users such as libgit2 compile the xdiff/* code as part of their own >>> compilation, and can thus find the right malloc() and free() at >>> link-time. >> >> I'm struggling to see how that is simpler than the current situation >> with xdl_malloc(). > > It's simpler because when maintaining that code in git.git it's less of > a special case, e.g. we have coccinelle rules that match free(), now > every such rule needs to account for the xdiff special-case. > > And it's less buggy because if you're relying on us always using a > wrapper bugs will creep in, current master has this: > > $ git -P grep '\bfree\(' -- xdiff > xdiff/xdiff.h:#define xdl_free(ptr) free(ptr) > xdiff/xmerge.c: free(c); > xdiff/xmerge.c: free(next_m); > >> Perhaps you could show how you think libgit2 could >> easily make xdiff use git__malloc() instead of malloc() without >> changing the malloc() calls (the message you linked to essentially >> suggests they do a search and replace). If people cannot swap in >> another allocator without changing the code then you are imposing a >> barrier to them contributing patches back to git's xdiff. > > I was suggesting that if libgit2 wanted to maintain a copy of xdiff that > catered to the asthetic desires of the maintainer adjusting whatever > import script you use to apply a trivial coccinelle transformation would > give you what you wanted. Except without a maintenance burden on > git.git, or the bugs you'd get now since you're not catching those two > free() cases (or any future such cases). > > But just having the code use malloc() and free() directly and getting > you what you get now is easy, and doesn't require any such > search-replacement. > > Here's how: > > I imported the xdiff/*.[ch] files at the tip of my branch into current > libgit2.git's src/libgit2/xdiff/, and then applied this on top, which is > partially re-applying libgit2's own monkeypatches, and partially > adjusting for upstream changes (including this one): > > diff --git a/src/libgit2/xdiff/git-xdiff.h b/src/libgit2/xdiff/git-xdiff.h > index b75dba819..2e00764d4 100644 > --- a/src/libgit2/xdiff/git-xdiff.h > +++ b/src/libgit2/xdiff/git-xdiff.h > @@ -14,6 +14,7 @@ > #ifndef INCLUDE_git_xdiff_h__ > #define INCLUDE_git_xdiff_h__ > > +#include <regex.h> > #include "regexp.h" > > /* Work around C90-conformance issues */ > @@ -27,11 +28,10 @@ > # endif > #endif > > -#define xdl_malloc(x) git__malloc(x) > -#define xdl_free(ptr) git__free(ptr) > -#define xdl_realloc(ptr, x) git__realloc(ptr, x) > +#define malloc(x) git__malloc(x) > +#define free(ptr) git__free(ptr) > > -#define XDL_BUG(msg) GIT_ASSERT(msg) > +#define BUG(msg) GIT_ASSERT(msg) > > #define xdl_regex_t git_regexp > #define xdl_regmatch_t git_regmatch > @@ -50,4 +50,17 @@ GIT_INLINE(int) xdl_regexec_buf( > return -1; > } > > +static inline size_t st_mult(size_t a, size_t b) > +{ > + return a * b; > +} > + > +static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size, > + size_t nmatch, regmatch_t pmatch[], int eflags) > +{ > + assert(nmatch > 0 && pmatch); > + pmatch[0].rm_so = 0; > + pmatch[0].rm_eo = size; > + return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND); > +} > #endif > diff --git a/src/libgit2/xdiff/xdiff.h b/src/libgit2/xdiff/xdiff.h > index a37d89fcd..5e5b525c2 100644 > --- a/src/libgit2/xdiff/xdiff.h > +++ b/src/libgit2/xdiff/xdiff.h > @@ -23,6 +23,8 @@ > #if !defined(XDIFF_H) > #define XDIFF_H > > +#include "git-xdiff.h" > + > #ifdef __cplusplus > extern "C" { > #endif /* #ifdef __cplusplus */ > diff --git a/src/libgit2/xdiff/xinclude.h b/src/libgit2/xdiff/xinclude.h > index a4285ac0e..79812ad8a 100644 > --- a/src/libgit2/xdiff/xinclude.h > +++ b/src/libgit2/xdiff/xinclude.h > @@ -23,7 +23,8 @@ > #if !defined(XINCLUDE_H) > #define XINCLUDE_H > > -#include "git-compat-util.h" > +#include "git-xdiff.h" > +#include "git-shared-util.h" > #include "xmacros.h" > #include "xdiff.h" > #include "xtypes.h" > > If you then build it and run e.g.: > > gdb -args ./libgit2_tests -smerge::files > > You'll get stack traces like this if you break on stdalloc__malloc > (which it uses for me in its default configuration): > > (gdb) bt > #0 stdalloc__malloc (len=144, file=0x87478d "/home/avar/g/libgit2/src/libgit2/xdiff/xutils.c", line=101) at /home/avar/g/libgit2/src/util/allocators/stdalloc.c:14 > #1 0x00000000006ec15c in xdl_cha_alloc (cha=0x7fffffffcaa8) at /home/avar/g/libgit2/src/libgit2/xdiff/xutils.c:101 > #2 0x00000000006eaee9 in xdl_prepare_ctx (pass=1, mf=0x7fffffffcc98, narec=13, xpp=0x7fffffffcca8, cf=0x7fffffffc7d0, xdf=0x7fffffffcaa8) > at /home/avar/g/libgit2/src/libgit2/xdiff/xprepare.c:194 > > IOW this was all seamlessly routed to your git__malloc() without us > having to maintain an xdl_{malloc,free}(). Thanks for showing this, it is really helpful to see a concrete example. I was especially interested to see how you went about the conversion without redefining standard library functions or introducing non-namespaced identifiers in files that included xdiff.h. Unfortunately you have not done that and so I don't think the approach above is viable for a well behaved library. > Now, I think that's obviously worth adjusting, e.g. the series I've got > here could make this easier for libgit2 by including st_mult() at least, > I'm not sure what you want to do about regexec_buf(). > > For the monkeypatching you do now of creating a "git-xdiff.h" I think it > would be very good to get a patch like what I managed to get > sha1collisiondetection.git to accept for our own use-case, which allows > us to use their upstream code as-is from a submodule: > > https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef Thanks for the link, That's a really good example where all the identifiers are namespaced and the public include file does not pollute the code that is including it. If you come up with something like that where the client code can set up same #defines for malloc, calloc, realloc and free that would be a good way forward. I do not think we should require projects to be defining there own versions of [C]ALLOC_*() and BUG() just to use xdiff. Best Wishes Phillip
On 11/07/2022 10:26, Phillip Wood wrote: >> For the monkeypatching you do now of creating a "git-xdiff.h" I think it >> would be very good to get a patch like what I managed to get >> sha1collisiondetection.git to accept for our own use-case, which allows >> us to use their upstream code as-is from a submodule: >> >> https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef >> > > Thanks for the link, That's a really good example where all the > identifiers are namespaced and the public include file does not pollute > the code that is including it. If you come up with something like that > where the client code can set up same #defines for malloc, calloc, > realloc and free that would be a good way forward. To be clear those should be namespaced, so we'd have -Dxdl_malloc=xmalloc (or xmalloc_gently) and libgit2 would have -D xdl_malloc=git__malloc Best Wishes Phillip > I do not think we > should require projects to be defining there own versions of > [C]ALLOC_*() and BUG() just to use xdiff. > > Best Wishes > > Phillip >
On Mon, Jul 11 2022, Phillip Wood wrote: > Hi Ævar > > On 08/07/2022 22:26, Ævar Arnfjörð Bjarmason wrote: >> On Fri, Jul 08 2022, Phillip Wood wrote: >> >>> Hi Ævar >>> >>> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote: >>>> Remove the xdl_free() wrapper macro in favor of using free() >>>> directly. The wrapper macro was brought in with the initial import of >>>> xdiff/ in 3443546f6ef (Use a *real* built-in diff generator, >>>> 2006-03-24). >>>> As subsequent discussions on the topic[1] have made clear there's no >>>> reason to use this wrapper. >>> >>> That link is to a message where you assert that there is no reason for >>> the wrapper, you seem to be the only person in that thread making that >>> argument. The libgit2 maintainer and others are arguing that it is >>> worth keeping to make it easy to swap the allocator. >> Arguing that it's not needed for a technical reason, but an >> "aesthetic >> and ergonomic one", per: >> https://lore.kernel.org/git/20220217225804.GC7@edef91d97c94/; >> Perhaps I misread it, but I took this as Junio concurring with what >> I >> was pointing out there: >> https://lore.kernel.org/git/xmqqfsohbdre.fsf@gitster.g/ >> >>>> Both git itself as well as any external >>>> users such as libgit2 compile the xdiff/* code as part of their own >>>> compilation, and can thus find the right malloc() and free() at >>>> link-time. >>> >>> I'm struggling to see how that is simpler than the current situation >>> with xdl_malloc(). >> It's simpler because when maintaining that code in git.git it's less >> of >> a special case, e.g. we have coccinelle rules that match free(), now >> every such rule needs to account for the xdiff special-case. >> And it's less buggy because if you're relying on us always using a >> wrapper bugs will creep in, current master has this: >> >> $ git -P grep '\bfree\(' -- xdiff >> xdiff/xdiff.h:#define xdl_free(ptr) free(ptr) >> xdiff/xmerge.c: free(c); >> xdiff/xmerge.c: free(next_m); >> >>> Perhaps you could show how you think libgit2 could >>> easily make xdiff use git__malloc() instead of malloc() without >>> changing the malloc() calls (the message you linked to essentially >>> suggests they do a search and replace). If people cannot swap in >>> another allocator without changing the code then you are imposing a >>> barrier to them contributing patches back to git's xdiff. >> I was suggesting that if libgit2 wanted to maintain a copy of xdiff >> that >> catered to the asthetic desires of the maintainer adjusting whatever >> import script you use to apply a trivial coccinelle transformation would >> give you what you wanted. Except without a maintenance burden on >> git.git, or the bugs you'd get now since you're not catching those two >> free() cases (or any future such cases). >> But just having the code use malloc() and free() directly and >> getting >> you what you get now is easy, and doesn't require any such >> search-replacement. >> Here's how: >> I imported the xdiff/*.[ch] files at the tip of my branch into >> current >> libgit2.git's src/libgit2/xdiff/, and then applied this on top, which is >> partially re-applying libgit2's own monkeypatches, and partially >> adjusting for upstream changes (including this one): >> >> diff --git a/src/libgit2/xdiff/git-xdiff.h b/src/libgit2/xdiff/git-xdiff.h >> index b75dba819..2e00764d4 100644 >> --- a/src/libgit2/xdiff/git-xdiff.h >> +++ b/src/libgit2/xdiff/git-xdiff.h >> @@ -14,6 +14,7 @@ >> #ifndef INCLUDE_git_xdiff_h__ >> #define INCLUDE_git_xdiff_h__ >> >> +#include <regex.h> >> #include "regexp.h" >> >> /* Work around C90-conformance issues */ >> @@ -27,11 +28,10 @@ >> # endif >> #endif >> >> -#define xdl_malloc(x) git__malloc(x) >> -#define xdl_free(ptr) git__free(ptr) >> -#define xdl_realloc(ptr, x) git__realloc(ptr, x) >> +#define malloc(x) git__malloc(x) >> +#define free(ptr) git__free(ptr) >> >> -#define XDL_BUG(msg) GIT_ASSERT(msg) >> +#define BUG(msg) GIT_ASSERT(msg) >> >> #define xdl_regex_t git_regexp >> #define xdl_regmatch_t git_regmatch >> @@ -50,4 +50,17 @@ GIT_INLINE(int) xdl_regexec_buf( >> return -1; >> } >> >> +static inline size_t st_mult(size_t a, size_t b) >> +{ >> + return a * b; >> +} >> + >> +static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size, >> + size_t nmatch, regmatch_t pmatch[], int eflags) >> +{ >> + assert(nmatch > 0 && pmatch); >> + pmatch[0].rm_so = 0; >> + pmatch[0].rm_eo = size; >> + return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND); >> +} >> #endif >> diff --git a/src/libgit2/xdiff/xdiff.h b/src/libgit2/xdiff/xdiff.h >> index a37d89fcd..5e5b525c2 100644 >> --- a/src/libgit2/xdiff/xdiff.h >> +++ b/src/libgit2/xdiff/xdiff.h >> @@ -23,6 +23,8 @@ >> #if !defined(XDIFF_H) >> #define XDIFF_H >> >> +#include "git-xdiff.h" >> + >> #ifdef __cplusplus >> extern "C" { >> #endif /* #ifdef __cplusplus */ >> diff --git a/src/libgit2/xdiff/xinclude.h b/src/libgit2/xdiff/xinclude.h >> index a4285ac0e..79812ad8a 100644 >> --- a/src/libgit2/xdiff/xinclude.h >> +++ b/src/libgit2/xdiff/xinclude.h >> @@ -23,7 +23,8 @@ >> #if !defined(XINCLUDE_H) >> #define XINCLUDE_H >> >> -#include "git-compat-util.h" >> +#include "git-xdiff.h" >> +#include "git-shared-util.h" >> #include "xmacros.h" >> #include "xdiff.h" >> #include "xtypes.h" >> If you then build it and run e.g.: >> gdb -args ./libgit2_tests -smerge::files >> You'll get stack traces like this if you break on stdalloc__malloc >> (which it uses for me in its default configuration): >> >> (gdb) bt >> #0 stdalloc__malloc (len=144, file=0x87478d "/home/avar/g/libgit2/src/libgit2/xdiff/xutils.c", line=101) at /home/avar/g/libgit2/src/util/allocators/stdalloc.c:14 >> #1 0x00000000006ec15c in xdl_cha_alloc (cha=0x7fffffffcaa8) at /home/avar/g/libgit2/src/libgit2/xdiff/xutils.c:101 >> #2 0x00000000006eaee9 in xdl_prepare_ctx (pass=1, mf=0x7fffffffcc98, narec=13, xpp=0x7fffffffcca8, cf=0x7fffffffc7d0, xdf=0x7fffffffcaa8) >> at /home/avar/g/libgit2/src/libgit2/xdiff/xprepare.c:194 >> IOW this was all seamlessly routed to your git__malloc() without us >> having to maintain an xdl_{malloc,free}(). > > Thanks for showing this, it is really helpful to see a concrete > example. I was especially interested to see how you went about the > conversion without redefining standard library functions or > introducing non-namespaced identifiers in files that included > xdiff.h. Unfortunately you have not done that and so I don't think the > approach above is viable for a well behaved library. Why? Because there's some header where doing e.g.: #include "git2/something.h" Will directly include git-xdiff.h by proxy into the user's program? I admit I didn't check that, and assumed that these were only included by other *.c files in libgit2 itself. I.e. it would compile xdiff for its own use, but then expose its own API. Anyway, if it is directly included in some user-exposed *.h file then yes, you don't want to overwrite their "malloc", but that's a small matter of doing an "#undef malloc" at the end (which as the below linked-to patch shows we'd support by having macros like SHA1DC_CUSTOM_TRAILING_INCLUDE_UBC_CHECK_C). Aside from what I'm proposing here doing such "undef at the end" seems like a good idea in any case, because if there is any macro leakage here you're e.g. re-defining "XDL_BUG" and other things that aren't clearly in the libgit2 namespace now, no? >> Now, I think that's obviously worth adjusting, e.g. the series I've got >> here could make this easier for libgit2 by including st_mult() at least, >> I'm not sure what you want to do about regexec_buf(). >> For the monkeypatching you do now of creating a "git-xdiff.h" I >> think it >> would be very good to get a patch like what I managed to get >> sha1collisiondetection.git to accept for our own use-case, which allows >> us to use their upstream code as-is from a submodule: >> https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef > > Thanks for the link, That's a really good example where all the > identifiers are namespaced and the public include file does not > pollute the code that is including it. If you come up with something > like that where the client code can set up same #defines for malloc, > calloc, realloc and free that would be a good way forward. I don't need to come up with that, you've got the linker to do that. I.e. for almost any "normal" use of xdiff you'd simply compile it with its reference to malloc(), and then you either link that library that already links to libc into your program. So if you use a custom malloc the xdiff code might still use libc malloc, or you'd link the two together and link the resulting program with your own custom malloc. As explained in the previous & linked-to threads this is how almost everyone would include & use such a library, and indeed that's how git itself can use malloc() and free() in its sources, but have options to link to libc malloc, nedmalloc etc. But instead of using the linker to resolve "malloc" to git__malloc you'd like to effectively include the upstream *.[ch] files directly, and pretend as though the upstream source used git__malloc() or whatever to begin with. I don't really understand why you can't just do that at the linker level, especially as doing so guards you better against namespace pollution. But whatever, the suggestion(s) above assume you've got a good reason, but show that we don't need to prefix every standard function just to accommodate that. Instead we can just provide a knob to include a header of yours after all others have been included (which the libgit2 monkeypatches to xdiff already include), and have that header define "malloc" as "git__malloc", "BUG" as "GIT_ASSERT" etc. And yes, if you're in turn providing an interface where others will then include your header that's doing that you'll need to apply some namespacing paranoia, namely to "#undef malloc" etc. But none of that requires git to carry prefixed versions of standard functions you'd wish to replace, which the patches here show. > I do not think we should require projects to be defining there own > versions of [C]ALLOC_*() and BUG() just to use xdiff. No, I don't think so either. I.e. the idea with these patches is that we could bundle up xdiff/* and git-shared-util.h into one distributed libgit, which down the road we could even roll independent releases for (as upstream seems to have forever gone away). Whereas the proposals coming out of libgit2[1] for generalizing xdiff/ for general use seem to stop just short of the specific hacks we need to get it running for libgit2, but e.g. leave "xdl_malloc" defined as "xmalloc". That isn't a standard library function, and therefore the first thing you'd need to do when using it as a library is to find out how git.git uses that, and copy/paste it or define your own replacement. Whereas I think it should be the other way around, we should e.g. ship a shimmy BUG() that calls fprintf(stderr, ...) and abort(), but for advanced users such as libgit2 guard those with "ifndef" or whatever, so you can have it call GIT_ASSERT() and the like instead. 1. https://lore.kernel.org/git/20220209013354.GB7@abe733c6e288/
On 11/07/2022 11:02, Ævar Arnfjörð Bjarmason wrote: > > On Mon, Jul 11 2022, Phillip Wood wrote: > >> Hi Ævar >> >> On 08/07/2022 22:26, Ævar Arnfjörð Bjarmason wrote: >>> On Fri, Jul 08 2022, Phillip Wood wrote: >>> >>>> Hi Ævar >>>> >>>> On 08/07/2022 15:20, Ævar Arnfjörð Bjarmason wrote: >>>>> Remove the xdl_free() wrapper macro in favor of using free() >>>>> directly. The wrapper macro was brought in with the initial import of >>>>> xdiff/ in 3443546f6ef (Use a *real* built-in diff generator, >>>>> 2006-03-24). >>>>> As subsequent discussions on the topic[1] have made clear there's no >>>>> reason to use this wrapper. >>>> >>>> That link is to a message where you assert that there is no reason for >>>> the wrapper, you seem to be the only person in that thread making that >>>> argument. The libgit2 maintainer and others are arguing that it is >>>> worth keeping to make it easy to swap the allocator. >>> Arguing that it's not needed for a technical reason, but an >>> "aesthetic >>> and ergonomic one", per: >>> https://lore.kernel.org/git/20220217225804.GC7@edef91d97c94/; >>> Perhaps I misread it, but I took this as Junio concurring with what >>> I >>> was pointing out there: >>> https://lore.kernel.org/git/xmqqfsohbdre.fsf@gitster.g/ >>> >>>>> Both git itself as well as any external >>>>> users such as libgit2 compile the xdiff/* code as part of their own >>>>> compilation, and can thus find the right malloc() and free() at >>>>> link-time. >>>> >>>> I'm struggling to see how that is simpler than the current situation >>>> with xdl_malloc(). >>> It's simpler because when maintaining that code in git.git it's less >>> of >>> a special case, e.g. we have coccinelle rules that match free(), now >>> every such rule needs to account for the xdiff special-case. >>> And it's less buggy because if you're relying on us always using a >>> wrapper bugs will creep in, current master has this: >>> >>> $ git -P grep '\bfree\(' -- xdiff >>> xdiff/xdiff.h:#define xdl_free(ptr) free(ptr) >>> xdiff/xmerge.c: free(c); >>> xdiff/xmerge.c: free(next_m); >>> >>>> Perhaps you could show how you think libgit2 could >>>> easily make xdiff use git__malloc() instead of malloc() without >>>> changing the malloc() calls (the message you linked to essentially >>>> suggests they do a search and replace). If people cannot swap in >>>> another allocator without changing the code then you are imposing a >>>> barrier to them contributing patches back to git's xdiff. >>> I was suggesting that if libgit2 wanted to maintain a copy of xdiff >>> that >>> catered to the asthetic desires of the maintainer adjusting whatever >>> import script you use to apply a trivial coccinelle transformation would >>> give you what you wanted. Except without a maintenance burden on >>> git.git, or the bugs you'd get now since you're not catching those two >>> free() cases (or any future such cases). >>> But just having the code use malloc() and free() directly and >>> getting >>> you what you get now is easy, and doesn't require any such >>> search-replacement. >>> Here's how: >>> I imported the xdiff/*.[ch] files at the tip of my branch into >>> current >>> libgit2.git's src/libgit2/xdiff/, and then applied this on top, which is >>> partially re-applying libgit2's own monkeypatches, and partially >>> adjusting for upstream changes (including this one): >>> >>> diff --git a/src/libgit2/xdiff/git-xdiff.h b/src/libgit2/xdiff/git-xdiff.h >>> index b75dba819..2e00764d4 100644 >>> --- a/src/libgit2/xdiff/git-xdiff.h >>> +++ b/src/libgit2/xdiff/git-xdiff.h >>> @@ -14,6 +14,7 @@ >>> #ifndef INCLUDE_git_xdiff_h__ >>> #define INCLUDE_git_xdiff_h__ >>> >>> +#include <regex.h> >>> #include "regexp.h" >>> >>> /* Work around C90-conformance issues */ >>> @@ -27,11 +28,10 @@ >>> # endif >>> #endif >>> >>> -#define xdl_malloc(x) git__malloc(x) >>> -#define xdl_free(ptr) git__free(ptr) >>> -#define xdl_realloc(ptr, x) git__realloc(ptr, x) >>> +#define malloc(x) git__malloc(x) >>> +#define free(ptr) git__free(ptr) >>> >>> -#define XDL_BUG(msg) GIT_ASSERT(msg) >>> +#define BUG(msg) GIT_ASSERT(msg) >>> >>> #define xdl_regex_t git_regexp >>> #define xdl_regmatch_t git_regmatch >>> @@ -50,4 +50,17 @@ GIT_INLINE(int) xdl_regexec_buf( >>> return -1; >>> } >>> >>> +static inline size_t st_mult(size_t a, size_t b) >>> +{ >>> + return a * b; >>> +} >>> + >>> +static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size, >>> + size_t nmatch, regmatch_t pmatch[], int eflags) >>> +{ >>> + assert(nmatch > 0 && pmatch); >>> + pmatch[0].rm_so = 0; >>> + pmatch[0].rm_eo = size; >>> + return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND); >>> +} >>> #endif >>> diff --git a/src/libgit2/xdiff/xdiff.h b/src/libgit2/xdiff/xdiff.h >>> index a37d89fcd..5e5b525c2 100644 >>> --- a/src/libgit2/xdiff/xdiff.h >>> +++ b/src/libgit2/xdiff/xdiff.h >>> @@ -23,6 +23,8 @@ >>> #if !defined(XDIFF_H) >>> #define XDIFF_H >>> >>> +#include "git-xdiff.h" >>> + >>> #ifdef __cplusplus >>> extern "C" { >>> #endif /* #ifdef __cplusplus */ >>> diff --git a/src/libgit2/xdiff/xinclude.h b/src/libgit2/xdiff/xinclude.h >>> index a4285ac0e..79812ad8a 100644 >>> --- a/src/libgit2/xdiff/xinclude.h >>> +++ b/src/libgit2/xdiff/xinclude.h >>> @@ -23,7 +23,8 @@ >>> #if !defined(XINCLUDE_H) >>> #define XINCLUDE_H >>> >>> -#include "git-compat-util.h" >>> +#include "git-xdiff.h" >>> +#include "git-shared-util.h" >>> #include "xmacros.h" >>> #include "xdiff.h" >>> #include "xtypes.h" >>> If you then build it and run e.g.: >>> gdb -args ./libgit2_tests -smerge::files >>> You'll get stack traces like this if you break on stdalloc__malloc >>> (which it uses for me in its default configuration): >>> >>> (gdb) bt >>> #0 stdalloc__malloc (len=144, file=0x87478d "/home/avar/g/libgit2/src/libgit2/xdiff/xutils.c", line=101) at /home/avar/g/libgit2/src/util/allocators/stdalloc.c:14 >>> #1 0x00000000006ec15c in xdl_cha_alloc (cha=0x7fffffffcaa8) at /home/avar/g/libgit2/src/libgit2/xdiff/xutils.c:101 >>> #2 0x00000000006eaee9 in xdl_prepare_ctx (pass=1, mf=0x7fffffffcc98, narec=13, xpp=0x7fffffffcca8, cf=0x7fffffffc7d0, xdf=0x7fffffffcaa8) >>> at /home/avar/g/libgit2/src/libgit2/xdiff/xprepare.c:194 >>> IOW this was all seamlessly routed to your git__malloc() without us >>> having to maintain an xdl_{malloc,free}(). >> >> Thanks for showing this, it is really helpful to see a concrete >> example. I was especially interested to see how you went about the >> conversion without redefining standard library functions or >> introducing non-namespaced identifiers in files that included >> xdiff.h. Unfortunately you have not done that and so I don't think the >> approach above is viable for a well behaved library. > > Why? Because there's some header where doing e.g.: > > #include "git2/something.h" > > Will directly include git-xdiff.h by proxy into the user's program? No because you're redefining malloc() and introducing ALLOC_GROW() etc in src/libgit2/{Blame_git.c,Diff_xdiff.c,Merge_file.c,Patch_generate.c,Checkout.c} and Test/libgit2/merge/files.c. It is not expected that including xdiff.h will change a bunch of symbols in the file it is included in. Best Wishes Phillip > > I admit I didn't check that, and assumed that these were only included > by other *.c files in libgit2 itself. I.e. it would compile xdiff for > its own use, but then expose its own API. > > Anyway, if it is directly included in some user-exposed *.h file then > yes, you don't want to overwrite their "malloc", but that's a small > matter of doing an "#undef malloc" at the end (which as the below > linked-to patch shows we'd support by having macros like > SHA1DC_CUSTOM_TRAILING_INCLUDE_UBC_CHECK_C). > > Aside from what I'm proposing here doing such "undef at the end" seems > like a good idea in any case, because if there is any macro leakage here > you're e.g. re-defining "XDL_BUG" and other things that aren't clearly > in the libgit2 namespace now, no? > >>> Now, I think that's obviously worth adjusting, e.g. the series I've got >>> here could make this easier for libgit2 by including st_mult() at least, >>> I'm not sure what you want to do about regexec_buf(). >>> For the monkeypatching you do now of creating a "git-xdiff.h" I >>> think it >>> would be very good to get a patch like what I managed to get >>> sha1collisiondetection.git to accept for our own use-case, which allows >>> us to use their upstream code as-is from a submodule: >>> https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef >> >> Thanks for the link, That's a really good example where all the >> identifiers are namespaced and the public include file does not >> pollute the code that is including it. If you come up with something >> like that where the client code can set up same #defines for malloc, >> calloc, realloc and free that would be a good way forward. > > I don't need to come up with that, you've got the linker to do that. > > I.e. for almost any "normal" use of xdiff you'd simply compile it with > its reference to malloc(), and then you either link that library that > already links to libc into your program. > > So if you use a custom malloc the xdiff code might still use libc > malloc, or you'd link the two together and link the resulting program > with your own custom malloc. > > As explained in the previous & linked-to threads this is how almost > everyone would include & use such a library, and indeed that's how git > itself can use malloc() and free() in its sources, but have options to > link to libc malloc, nedmalloc etc. > > But instead of using the linker to resolve "malloc" to git__malloc you'd > like to effectively include the upstream *.[ch] files directly, and > pretend as though the upstream source used git__malloc() or whatever to > begin with. > > I don't really understand why you can't just do that at the linker > level, especially as doing so guards you better against namespace > pollution. But whatever, the suggestion(s) above assume you've got a > good reason, but show that we don't need to prefix every standard > function just to accommodate that. > > Instead we can just provide a knob to include a header of yours after > all others have been included (which the libgit2 monkeypatches to xdiff > already include), and have that header define "malloc" as "git__malloc", > "BUG" as "GIT_ASSERT" etc. > > And yes, if you're in turn providing an interface where others will then > include your header that's doing that you'll need to apply some > namespacing paranoia, namely to "#undef malloc" etc. > > But none of that requires git to carry prefixed versions of standard > functions you'd wish to replace, which the patches here show. > >> I do not think we should require projects to be defining there own >> versions of [C]ALLOC_*() and BUG() just to use xdiff. > > No, I don't think so either. I.e. the idea with these patches is that we > could bundle up xdiff/* and git-shared-util.h into one distributed > libgit, which down the road we could even roll independent releases for > (as upstream seems to have forever gone away). > > Whereas the proposals coming out of libgit2[1] for generalizing xdiff/ > for general use seem to stop just short of the specific hacks we need to > get it running for libgit2, but e.g. leave "xdl_malloc" defined as > "xmalloc". > > That isn't a standard library function, and therefore the first thing > you'd need to do when using it as a library is to find out how git.git > uses that, and copy/paste it or define your own replacement. > > Whereas I think it should be the other way around, we should e.g. ship a > shimmy BUG() that calls fprintf(stderr, ...) and abort(), but for > advanced users such as libgit2 guard those with "ifndef" or whatever, so > you can have it call GIT_ASSERT() and the like instead. > > 1. https://lore.kernel.org/git/20220209013354.GB7@abe733c6e288/
On Wed, Jul 13 2022, Phillip Wood wrote: > On 11/07/2022 11:02, Ævar Arnfjörð Bjarmason wrote: >> On Mon, Jul 11 2022, Phillip Wood wrote: > [...] >>> Thanks for showing this, it is really helpful to see a concrete >>> example. I was especially interested to see how you went about the >>> conversion without redefining standard library functions or >>> introducing non-namespaced identifiers in files that included >>> xdiff.h. Unfortunately you have not done that and so I don't think the >>> approach above is viable for a well behaved library. >> Why? Because there's some header where doing e.g.: >> #include "git2/something.h" >> Will directly include git-xdiff.h by proxy into the user's program? > > No because you're redefining malloc() and introducing ALLOC_GROW() etc > in > src/libgit2/{Blame_git.c,Diff_xdiff.c,Merge_file.c,Patch_generate.c,Checkout.c} > and > Test/libgit2/merge/files.c. It is not expected that including xdiff.h > will change a bunch of symbols in the file it is included in. ...which is why if you read to the sha1collisiondetection commit below you'd follow that up with including a header at the end of xdiff.h which would do: #undef malloc etc., so you wouldn't leak that macro beyond the code that needs it, and you wouldn't leak the xdl_* macros either, which are purely needed internally in that code. So even aside from my suggestion of doing it this way it seems to me the structure has macro hygene issues worth fixing, see e.g. how we have refs-internal.h v.s. refs.h in git.git for similar reasons. >> I admit I didn't check that, and assumed that these were only >> included >> by other *.c files in libgit2 itself. I.e. it would compile xdiff for >> its own use, but then expose its own API. >> Anyway, if it is directly included in some user-exposed *.h file >> then >> yes, you don't want to overwrite their "malloc", but that's a small >> matter of doing an "#undef malloc" at the end (which as the below >> linked-to patch shows we'd support by having macros like >> SHA1DC_CUSTOM_TRAILING_INCLUDE_UBC_CHECK_C). >> Aside from what I'm proposing here doing such "undef at the end" >> seems >> like a good idea in any case, because if there is any macro leakage here >> you're e.g. re-defining "XDL_BUG" and other things that aren't clearly >> in the libgit2 namespace now, no? >> >>>> Now, I think that's obviously worth adjusting, e.g. the series I've got >>>> here could make this easier for libgit2 by including st_mult() at least, >>>> I'm not sure what you want to do about regexec_buf(). >>>> For the monkeypatching you do now of creating a "git-xdiff.h" I >>>> think it >>>> would be very good to get a patch like what I managed to get >>>> sha1collisiondetection.git to accept for our own use-case, which allows >>>> us to use their upstream code as-is from a submodule: >>>> https://github.com/cr-marcstevens/sha1collisiondetection/commit/b45fcef >>> >>> Thanks for the link, That's a really good example where all the >>> identifiers are namespaced and the public include file does not >>> pollute the code that is including it. If you come up with something >>> like that where the client code can set up same #defines for malloc, >>> calloc, realloc and free that would be a good way forward. >> I don't need to come up with that, you've got the linker to do that. >> I.e. for almost any "normal" use of xdiff you'd simply compile it >> with >> its reference to malloc(), and then you either link that library that >> already links to libc into your program. >> So if you use a custom malloc the xdiff code might still use libc >> malloc, or you'd link the two together and link the resulting program >> with your own custom malloc. >> As explained in the previous & linked-to threads this is how almost >> everyone would include & use such a library, and indeed that's how git >> itself can use malloc() and free() in its sources, but have options to >> link to libc malloc, nedmalloc etc. >> But instead of using the linker to resolve "malloc" to git__malloc >> you'd >> like to effectively include the upstream *.[ch] files directly, and >> pretend as though the upstream source used git__malloc() or whatever to >> begin with. >> I don't really understand why you can't just do that at the linker >> level, especially as doing so guards you better against namespace >> pollution. But whatever, the suggestion(s) above assume you've got a >> good reason, but show that we don't need to prefix every standard >> function just to accommodate that. >> Instead we can just provide a knob to include a header of yours >> after >> all others have been included (which the libgit2 monkeypatches to xdiff >> already include), and have that header define "malloc" as "git__malloc", >> "BUG" as "GIT_ASSERT" etc. >> And yes, if you're in turn providing an interface where others will >> then >> include your header that's doing that you'll need to apply some >> namespacing paranoia, namely to "#undef malloc" etc. >> But none of that requires git to carry prefixed versions of standard >> functions you'd wish to replace, which the patches here show. >> >>> I do not think we should require projects to be defining there own >>> versions of [C]ALLOC_*() and BUG() just to use xdiff. >> No, I don't think so either. I.e. the idea with these patches is >> that we >> could bundle up xdiff/* and git-shared-util.h into one distributed >> libgit, which down the road we could even roll independent releases for >> (as upstream seems to have forever gone away). >> Whereas the proposals coming out of libgit2[1] for generalizing >> xdiff/ >> for general use seem to stop just short of the specific hacks we need to >> get it running for libgit2, but e.g. leave "xdl_malloc" defined as >> "xmalloc". >> That isn't a standard library function, and therefore the first >> thing >> you'd need to do when using it as a library is to find out how git.git >> uses that, and copy/paste it or define your own replacement. >> Whereas I think it should be the other way around, we should >> e.g. ship a >> shimmy BUG() that calls fprintf(stderr, ...) and abort(), but for >> advanced users such as libgit2 guard those with "ifndef" or whatever, so >> you can have it call GIT_ASSERT() and the like instead. >> 1. https://lore.kernel.org/git/20220209013354.GB7@abe733c6e288/ ^ I.e. this.
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index df048e0099b..a37d89fcdaf 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -118,9 +118,6 @@ typedef struct s_bdiffparam { long bsize; } bdiffparam_t; - -#define xdl_free(ptr) free(ptr) - void *xdl_mmfile_first(mmfile_t *mmf, long *size); long xdl_mmfile_size(mmfile_t *mmf); diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 6590811634f..375bb81a8aa 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -359,7 +359,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, res = xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv); - xdl_free(kvd); + free(kvd); return res; } @@ -960,7 +960,7 @@ void xdl_free_script(xdchange_t *xscr) { while ((xch = xscr) != NULL) { xscr = xscr->next; - xdl_free(xch); + free(xch); } } diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index f20592bfbdd..be35d9c9697 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -240,9 +240,9 @@ static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env, static inline void free_index(struct histindex *index) { - xdl_free(index->records); - xdl_free(index->line_map); - xdl_free(index->next_ptrs); + free(index->records); + free(index->line_map); + free(index->next_ptrs); xdl_cha_free(&index->rcha); } diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c index bb328d9f852..8fffd2b8297 100644 --- a/xdiff/xpatience.c +++ b/xdiff/xpatience.c @@ -233,7 +233,7 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res) /* No common unique lines were found */ if (!longest) { *res = NULL; - xdl_free(sequence); + free(sequence); return 0; } @@ -245,7 +245,7 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res) entry = entry->previous; } *res = entry; - xdl_free(sequence); + free(sequence); return 0; } @@ -358,7 +358,7 @@ static int patience_diff(mmfile_t *file1, mmfile_t *file2, env->xdf1.rchg[line1++ - 1] = 1; while(count2--) env->xdf2.rchg[line2++ - 1] = 1; - xdl_free(map.entries); + free(map.entries); return 0; } @@ -372,7 +372,7 @@ static int patience_diff(mmfile_t *file1, mmfile_t *file2, result = fall_back_to_classic_diff(&map, line1, count1, line2, count2); out: - xdl_free(map.entries); + free(map.entries); return result; } diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 4182d9e1c0a..169629761c0 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -89,7 +89,7 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { GALLOC_ARRAY(cf->rcrecs, cf->alloc); if (!cf->rcrecs) { - xdl_free(cf->rchash); + free(cf->rchash); xdl_cha_free(&cf->ncha); return -1; } @@ -102,8 +102,8 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { static void xdl_free_classifier(xdlclassifier_t *cf) { - xdl_free(cf->rcrecs); - xdl_free(cf->rchash); + free(cf->rcrecs); + free(cf->rchash); xdl_cha_free(&cf->ncha); } @@ -230,11 +230,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ return 0; abort: - xdl_free(ha); - xdl_free(rindex); - xdl_free(rchg); - xdl_free(rhash); - xdl_free(recs); + free(ha); + free(rindex); + free(rchg); + free(rhash); + free(recs); xdl_cha_free(&xdf->rcha); return -1; } @@ -242,11 +242,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ static void xdl_free_ctx(xdfile_t *xdf) { - xdl_free(xdf->rhash); - xdl_free(xdf->rindex); - xdl_free(xdf->rchg - 1); - xdl_free(xdf->ha); - xdl_free(xdf->recs); + free(xdf->rhash); + free(xdf->rindex); + free(xdf->rchg - 1); + free(xdf->ha); + free(xdf->recs); xdl_cha_free(&xdf->rcha); } @@ -424,7 +424,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd } xdf2->nreff = nreff; - xdl_free(dis); + free(dis); return 0; } diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 865e08f0e93..00eeba452a5 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -88,7 +88,7 @@ void xdl_cha_free(chastore_t *cha) { for (cur = cha->head; (tmp = cur) != NULL;) { cur = cur->next; - xdl_free(tmp); + free(tmp); } }
Remove the xdl_free() wrapper macro in favor of using free() directly. The wrapper macro was brought in with the initial import of xdiff/ in 3443546f6ef (Use a *real* built-in diff generator, 2006-03-24). As subsequent discussions on the topic[1] have made clear there's no reason to use this wrapper. Both git itself as well as any external users such as libgit2 compile the xdiff/* code as part of their own compilation, and can thus find the right malloc() and free() at link-time. When compiling git we already find a custom malloc() and free() e.g. if compiled with USE_NED_ALLOCATOR=YesPlease. 1. https://lore.kernel.org/git/220415.867d7qbaad.gmgdl@evledraar.gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> --- xdiff/xdiff.h | 3 --- xdiff/xdiffi.c | 4 ++-- xdiff/xhistogram.c | 6 +++--- xdiff/xpatience.c | 8 ++++---- xdiff/xprepare.c | 28 ++++++++++++++-------------- xdiff/xutils.c | 2 +- 6 files changed, 24 insertions(+), 27 deletions(-)