Message ID | CAKk8isqpAXLoiXxOP3uAc00M+OM0FaU3Uhnt5R1FnFMD=xGARg@mail.gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [BUGREPORT] git diff-tree --cc SEGFAUTs | expand |
On Fri, Jan 03, 2025 at 11:28:47AM -0800, Wink Saville wrote: > `git diff-tree --cc` SEGFAUTs after adding trace_printf to diff_tree_combined. Hmm, is it really a bug in Git if you had to add new code which contains the bug? :) > @@ -1595,8 +1597,16 @@ void diff_tree_combined(const struct object_id *oid, > } > > /* find out number of surviving paths */ > - for (num_paths = 0, p = paths; p; p = p->next) > + trace_printf("Wink diff_tree_combined: find number of surviving paths num_parent=%d\n", num_parent); > + for (num_paths = 0, p = paths; p; p = p->next) { > + trace_printf("Wink diff_tree_combined: num_paths=%d &p=%p mode=%0x, oid=%s path=%s\n", num_paths, p, p->mode, oid_to_hex(&p->oid), p->path); > + for (i = 0; i < num_parent; i++) { > + trace_printf("Wink diff_tree_combined: &p->parent[%d]=%p status=%c mode=%x oid=%s path.buf=%p contents path.buf=%s\n", > + i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), p->parent[i].path.buf, p->parent[i].path.buf); > + } The parent "path" strbufs are only initialized in intersect_paths() if combined_all_paths is set, and if there was an actual path change (a copy or rename). So you'd probably need something like this: diff --git a/combine-diff.c b/combine-diff.c index 455bc19087..1e58809c4e 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1601,8 +1601,11 @@ void diff_tree_combined(const struct object_id *oid, for (num_paths = 0, p = paths; p; p = p->next) { trace_printf("Wink diff_tree_combined: num_paths=%d &p=%p mode=%0x, oid=%s path=%s\n", num_paths, p, p->mode, oid_to_hex(&p->oid), p->path); for (i = 0; i < num_parent; i++) { + const char *path = rev->combine_all_paths && + filename_changed(p->parent[i].status) ? + p->parent[i].path.buf : NULL; trace_printf("Wink diff_tree_combined: &p->parent[%d]=%p status=%c mode=%x oid=%s path.buf=%p contents path.buf=%s\n", - i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), p->parent[i].path.buf, p->parent[i].path.buf); + i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), path, path); } num_paths++; } -Peff
On Fri, Jan 3, 2025 at 12:46 PM Jeff King <peff@peff.net> wrote: > > On Fri, Jan 03, 2025 at 11:28:47AM -0800, Wink Saville wrote: > > > `git diff-tree --cc` SEGFAUTs after adding trace_printf to diff_tree_combined. > > Hmm, is it really a bug in Git if you had to add new code which contains > the bug? :) > > > @@ -1595,8 +1597,16 @@ void diff_tree_combined(const struct object_id *oid, > > } > > > > /* find out number of surviving paths */ > > - for (num_paths = 0, p = paths; p; p = p->next) > > + trace_printf("Wink diff_tree_combined: find number of surviving paths num_parent=%d\n", num_parent); > > + for (num_paths = 0, p = paths; p; p = p->next) { > > + trace_printf("Wink diff_tree_combined: num_paths=%d &p=%p mode=%0x, oid=%s path=%s\n", num_paths, p, p->mode, oid_to_hex(&p->oid), p->path); > > + for (i = 0; i < num_parent; i++) { > > + trace_printf("Wink diff_tree_combined: &p->parent[%d]=%p status=%c mode=%x oid=%s path.buf=%p contents path.buf=%s\n", > > + i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), p->parent[i].path.buf, p->parent[i].path.buf); > > + } > > The parent "path" strbufs are only initialized in intersect_paths() if > combined_all_paths is set, and if there was an actual path change (a > copy or rename). > > So you'd probably need something like this: > > diff --git a/combine-diff.c b/combine-diff.c > index 455bc19087..1e58809c4e 100644 > --- a/combine-diff.c > +++ b/combine-diff.c > @@ -1601,8 +1601,11 @@ void diff_tree_combined(const struct object_id *oid, > for (num_paths = 0, p = paths; p; p = p->next) { > trace_printf("Wink diff_tree_combined: num_paths=%d &p=%p mode=%0x, oid=%s path=%s\n", num_paths, p, p->mode, oid_to_hex(&p->oid), p->path); > for (i = 0; i < num_parent; i++) { > + const char *path = rev->combine_all_paths && > + filename_changed(p->parent[i].status) ? > + p->parent[i].path.buf : NULL; > trace_printf("Wink diff_tree_combined: &p->parent[%d]=%p status=%c mode=%x oid=%s path.buf=%p contents path.buf=%s\n", > - i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), p->parent[i].path.buf, p->parent[i].path.buf); > + i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), path, path); > } > num_paths++; > } > > -Peff TYVM! That worked but changed the name and fixed a typo in `combined_all_paths`: ``` wink@3900x 25-01-03T23:06:08.344Z:~/data/prgs/forks/git (wink-segfault-with-minimal-changes) $ git diff diff --git a/combine-diff.c b/combine-diff.c index 455bc19087..70394c3350 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1601,8 +1601,9 @@ void diff_tree_combined(const struct object_id *oid, for (num_paths = 0, p = paths; p; p = p->next) { trace_printf("Wink diff_tree_combined: num_paths=%d &p=%p mode=%0x, oid=%s path=%s\n", num_paths, p, p->mode, oid_to_hex(&p->oid), p->path); for (i = 0; i < num_parent; i++) { + const char *parent_path = rev->combined_all_paths && filename_changed(p->parent[i].status) ? p->parent[i].path.buf : NULL; trace_printf("Wink diff_tree_combined: &p->parent[%d]=%p status=%c mode=%x oid=%s path.buf=%p contents path.buf=%s\n", - i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), p->parent[i].path.buf, p->parent[i].path.buf); + i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), parent_path, parent_path); } num_paths++; } ``` But having to protect yourself is unobvious and especially if it isn't necessary when using the `fetch_paths_generic`. In addition, from strbuf.h `buf` is never NULL: " * strbufs have some invariants that are very important to keep in mind: * * - The `buf` member is never NULL, so it can be used in any usual C * string operations safely. strbufs _have_ to be initialized either by * `strbuf_init()` or by `= STRBUF_INIT` before the invariants, though. * " So I'd say this could be considered a bug in git at least in how combine_diff_path is being managed. I assume you agree that neither find_paths_generic or find_paths_multitree are adhering to at least that strbuf invariant and I wonder if the other strbuf invariants are being upheld. So, should this bug be "closed" and a new one "created"? Actually, using the mailing list to identify bugs and initially discuss them, seems fine. But is there a place where there is a list of current bugs and their state? -- wink
On Fri, Jan 03, 2025 at 03:34:58PM -0800, Wink Saville wrote: > But having to protect yourself is unobvious and especially if it isn't necessary > when using the `fetch_paths_generic`. > > In addition, from strbuf.h `buf` is never NULL: > > " > * strbufs have some invariants that are very important to keep in mind: > * > * - The `buf` member is never NULL, so it can be used in any usual C > * string operations safely. strbufs _have_ to be initialized either by > * `strbuf_init()` or by `= STRBUF_INIT` before the invariants, though. > * > " > > So I'd say this could be considered a bug in git at least in how > combine_diff_path > is being managed. I assume you agree that neither find_paths_generic or > find_paths_multitree are adhering to at least that strbuf invariant and I wonder > if the other strbuf invariants are being upheld. The strbuf invariant can only be held on strbufs which have been initialized, and this one has not. I don't think it's wrong to have variables which not (yet) been initialized. It can make for a fragile interface, though, if uninitialized struct members are exposed widely. I'm not sure how wide this case is. It's mostly an internal combine-diff data structure, though it looks like it gets exposed to other code in a few spots (though nobody outside of combine-diff.c currently looks at the parent paths at all). So I wouldn't call it a bug, as the internals of Git are not part of the public interface and there is no user-visible behavior problem without patching. But I doubt anybody would object to a patch making the API less fragile if it can be done cheaply and easily. And strbufs are designed to be cheap to initialize. So something like (completely untested): diff --git a/combine-diff.c b/combine-diff.c index 641bc92dbd..452b5f5beb 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -67,9 +67,9 @@ static struct combine_diff_path *intersect_paths( p->parent[n].mode = q->queue[i]->one->mode; p->parent[n].status = q->queue[i]->status; + strbuf_init(&p->parent[n].path, 0); if (combined_all_paths && filename_changed(p->parent[n].status)) { - strbuf_init(&p->parent[n].path, 0); strbuf_addstr(&p->parent[n].path, q->queue[i]->one->path); } @@ -92,9 +92,7 @@ static struct combine_diff_path *intersect_paths( /* p->path not in q->queue[]; drop it */ *tail = p->next; for (j = 0; j < num_parent; j++) - if (combined_all_paths && - filename_changed(p->parent[j].status)) - strbuf_release(&p->parent[j].path); + strbuf_release(&p->parent[j].path); free(p); continue; } @@ -1645,9 +1643,7 @@ void diff_tree_combined(const struct object_id *oid, struct combine_diff_path *tmp = paths; paths = paths->next; for (i = 0; i < num_parent; i++) - if (rev->combined_all_paths && - filename_changed(tmp->parent[i].status)) - strbuf_release(&tmp->parent[i].path); + strbuf_release(&tmp->parent[i].path); free(tmp); } might help the uninitialized-pointer issue. OTOH it is not really solving the more fundamental problem, which is that p->parent[i].path is only sometimes useful (we do not fill it in if it would just be the same as p->path, so the patch only changes it from uninitialized memory into an empty strbuf). And that is probably not something we want to change, as allocating duplicates of each path may be expensive. Probably we'd be better to encapsulate it in a function which falls back to p->path automatically. But then, AFAICT there are only two sites (both inside combine-diff.c) which look at it, so it would mostly be hypothetical future-proofing. I dunno. > So, should this bug be "closed" and a new one "created"? > > Actually, using the mailing list to identify bugs and initially discuss > them, seems fine. But is there a place where there is a list of current bugs and > their state? No, there's no bug tracker for the project[1]. Discussion may lead to a patch or not, which may be applied or not, but there is no formal classification of "open" or "fixed" or "won't fix". -Peff [1] Some folks seem to be using: https://git.issues.gerritcodereview.com/issues?q=status:open but I don't know how active it is. I never use it and had to dig out the link to https://crbug.com, which now redirects there, from a message from 2017. There's some recent activity, but I wouldn't count on opening something there to get wide attention.
Jeff King <peff@peff.net> writes: > ... OTOH it is not really > solving the more fundamental problem, which is that p->parent[i].path is > only sometimes useful (we do not fill it in if it would just be the same > as p->path, so the patch only changes it from uninitialized memory into > an empty strbuf). > > And that is probably not something we want to change, as allocating > duplicates of each path may be expensive. Nicely said. I reached the same conclusion after looking at the existing code, even though I have to admit that I am not a huge fan of the more recent part of combine-diff.c and its data structures. Thanks.
On Fri, Jan 03, 2025 at 06:55:18PM -0800, Junio C Hamano wrote: > Jeff King <peff@peff.net> writes: > > > ... OTOH it is not really > > solving the more fundamental problem, which is that p->parent[i].path is > > only sometimes useful (we do not fill it in if it would just be the same > > as p->path, so the patch only changes it from uninitialized memory into > > an empty strbuf). > > > > And that is probably not something we want to change, as allocating > > duplicates of each path may be expensive. > > Nicely said. I reached the same conclusion after looking at the > existing code, even though I have to admit that I am not a huge fan > of the more recent part of combine-diff.c and its data structures. I poked at this a little bit more, so here are a few tidbits: - the patch I showed earlier is not sufficient! There are lots of other spots that create combine_diff_path structs but don't bother to put anything in the parent paths at all. It works now because they also don't set a status that triggers filename_changed(). But what I showed earlier was wrong, because it was assuming in the cleanup functions that the strbufs were always initialized. - there's really no need for a strbuf at all here. It is always uninitialized/empty, or contains a direct copy of a path string. So a raw pointer with xstrdup() is plenty. And then we can use NULL to mean "it was not set". Which would Just Work for all those other spots if they bothered to zero the memory they allocated, but they don't. So we have to update them to set it to NULL anyway. That patch is below. - it is not at all clear to me that we need to be allocating at all. We always copy a string from the diff_queue. Do our combine_diff_path structs persist beyond then? I'm not sure. It is probably asking for trouble to just point to them directly without copying, as it creates a dependency (that even if it is not needed now, is a trap for somebody later). But it would drop some allocation/cleanup code, and we could just have p->parent[i].path fall back to p->path naturally. diff --git a/combine-diff.c b/combine-diff.c index 641bc92dbd..0d9d344c4e 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -66,13 +66,9 @@ static struct combine_diff_path *intersect_paths( oidcpy(&p->parent[n].oid, &q->queue[i]->one->oid); p->parent[n].mode = q->queue[i]->one->mode; p->parent[n].status = q->queue[i]->status; - - if (combined_all_paths && - filename_changed(p->parent[n].status)) { - strbuf_init(&p->parent[n].path, 0); - strbuf_addstr(&p->parent[n].path, - q->queue[i]->one->path); - } + p->parent[n].path = combined_all_paths && + filename_changed(p->parent[n].status) ? + xstrdup(q->queue[i]->one->path) : NULL; *tail = p; tail = &p->next; } @@ -92,9 +88,7 @@ static struct combine_diff_path *intersect_paths( /* p->path not in q->queue[]; drop it */ *tail = p->next; for (j = 0; j < num_parent; j++) - if (combined_all_paths && - filename_changed(p->parent[j].status)) - strbuf_release(&p->parent[j].path); + free(p->parent[j].path); free(p); continue; } @@ -108,10 +102,9 @@ static struct combine_diff_path *intersect_paths( oidcpy(&p->parent[n].oid, &q->queue[i]->one->oid); p->parent[n].mode = q->queue[i]->one->mode; p->parent[n].status = q->queue[i]->status; - if (combined_all_paths && - filename_changed(p->parent[n].status)) - strbuf_addstr(&p->parent[n].path, - q->queue[i]->one->path); + p->parent[n].path = combined_all_paths && + filename_changed(p->parent[n].status) ? + xstrdup(q->queue[i]->one->path) : NULL; tail = &p->next; i++; @@ -996,8 +989,9 @@ static void show_combined_header(struct combine_diff_path *elem, if (rev->combined_all_paths) { for (i = 0; i < num_parent; i++) { - char *path = filename_changed(elem->parent[i].status) - ? elem->parent[i].path.buf : elem->path; + const char *path = elem->parent[i].path ? + elem->parent[i].path : + elem->path; if (elem->parent[i].status == DIFF_STATUS_ADDED) dump_quoted_path("--- ", "", "/dev/null", line_prefix, c_meta, c_reset); @@ -1278,12 +1272,10 @@ static void show_raw_diff(struct combine_diff_path *p, int num_parent, struct re for (i = 0; i < num_parent; i++) if (rev->combined_all_paths) { - if (filename_changed(p->parent[i].status)) - write_name_quoted(p->parent[i].path.buf, stdout, - inter_name_termination); - else - write_name_quoted(p->path, stdout, - inter_name_termination); + const char *path = p->parent[i].path ? + p->parent[i].path : + p->path; + write_name_quoted(path, stdout, inter_name_termination); } write_name_quoted(p->path, stdout, line_termination); } @@ -1645,9 +1637,7 @@ void diff_tree_combined(const struct object_id *oid, struct combine_diff_path *tmp = paths; paths = paths->next; for (i = 0; i < num_parent; i++) - if (rev->combined_all_paths && - filename_changed(tmp->parent[i].status)) - strbuf_release(&tmp->parent[i].path); + free(tmp->parent[i].path); free(tmp); } diff --git a/diff-lib.c b/diff-lib.c index c6d3bc4d37..88a5aed736 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -417,9 +417,11 @@ static int show_modified(struct rev_info *revs, memset(p->parent, 0, 2 * sizeof(struct combine_diff_parent)); p->parent[0].status = DIFF_STATUS_MODIFIED; p->parent[0].mode = new_entry->ce_mode; + p->parent[0].path = NULL; oidcpy(&p->parent[0].oid, &new_entry->oid); p->parent[1].status = DIFF_STATUS_MODIFIED; p->parent[1].mode = old_entry->ce_mode; + p->parent[1].path = NULL; oidcpy(&p->parent[1].oid, &old_entry->oid); show_combined_diff(p, 2, revs); free(p); diff --git a/diff.h b/diff.h index 6e6007c17b..3157faeabb 100644 --- a/diff.h +++ b/diff.h @@ -480,7 +480,7 @@ struct combine_diff_path { char status; unsigned int mode; struct object_id oid; - struct strbuf path; + char *path; } parent[FLEX_ARRAY]; }; #define combine_diff_path_size(n, l) \ diff --git a/tree-diff.c b/tree-diff.c index d9237ffd9b..57af377c2b 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -272,6 +272,7 @@ static struct combine_diff_path *emit_path(struct combine_diff_path *p, } p->parent[i].mode = mode_i; + p->parent[i].path = NULL; oidcpy(&p->parent[i].oid, oid_i); }
On Fri, Jan 3, 2025 at 7:32 PM Jeff King <peff@peff.net> wrote: > > On Fri, Jan 03, 2025 at 06:55:18PM -0800, Junio C Hamano wrote: > > > Jeff King <peff@peff.net> writes: > > > > > ... OTOH it is not really > > > solving the more fundamental problem, which is that p->parent[i].path is > > > only sometimes useful (we do not fill it in if it would just be the same > > > as p->path, so the patch only changes it from uninitialized memory into > > > an empty strbuf). > > > > > > And that is probably not something we want to change, as allocating > > > duplicates of each path may be expensive. > > > > Nicely said. I reached the same conclusion after looking at the > > existing code, even though I have to admit that I am not a huge fan > > of the more recent part of combine-diff.c and its data structures. > > I poked at this a little bit more, so here are a few tidbits: > > - the patch I showed earlier is not sufficient! There are lots of > other spots that create combine_diff_path structs but don't bother > to put anything in the parent paths at all. It works now because > they also don't set a status that triggers filename_changed(). But > what I showed earlier was wrong, because it was assuming in the > cleanup functions that the strbufs were always initialized. > > - there's really no need for a strbuf at all here. It is always > uninitialized/empty, or contains a direct copy of a path string. So > a raw pointer with xstrdup() is plenty. And then we can use NULL to > mean "it was not set". > > Which would Just Work for all those other spots if they bothered to > zero the memory they allocated, but they don't. So we have to update > them to set it to NULL anyway. That patch is below. > > - it is not at all clear to me that we need to be allocating at all. > We always copy a string from the diff_queue. Do our > combine_diff_path structs persist beyond then? I'm not sure. It is > probably asking for trouble to just point to them directly without > copying, as it creates a dependency (that even if it is not needed > now, is a trap for somebody later). But it would drop some > allocation/cleanup code, and we could just have p->parent[i].path > fall back to p->path naturally. > > diff --git a/combine-diff.c b/combine-diff.c > index 641bc92dbd..0d9d344c4e 100644 > --- a/combine-diff.c > +++ b/combine-diff.c > @@ -66,13 +66,9 @@ static struct combine_diff_path *intersect_paths( > oidcpy(&p->parent[n].oid, &q->queue[i]->one->oid); > p->parent[n].mode = q->queue[i]->one->mode; > p->parent[n].status = q->queue[i]->status; > - > - if (combined_all_paths && > - filename_changed(p->parent[n].status)) { > - strbuf_init(&p->parent[n].path, 0); > - strbuf_addstr(&p->parent[n].path, > - q->queue[i]->one->path); > - } > + p->parent[n].path = combined_all_paths && > + filename_changed(p->parent[n].status) ? > + xstrdup(q->queue[i]->one->path) : NULL; > *tail = p; > tail = &p->next; > } > @@ -92,9 +88,7 @@ static struct combine_diff_path *intersect_paths( > /* p->path not in q->queue[]; drop it */ > *tail = p->next; > for (j = 0; j < num_parent; j++) > - if (combined_all_paths && > - filename_changed(p->parent[j].status)) > - strbuf_release(&p->parent[j].path); > + free(p->parent[j].path); > free(p); > continue; > } > @@ -108,10 +102,9 @@ static struct combine_diff_path *intersect_paths( > oidcpy(&p->parent[n].oid, &q->queue[i]->one->oid); > p->parent[n].mode = q->queue[i]->one->mode; > p->parent[n].status = q->queue[i]->status; > - if (combined_all_paths && > - filename_changed(p->parent[n].status)) > - strbuf_addstr(&p->parent[n].path, > - q->queue[i]->one->path); > + p->parent[n].path = combined_all_paths && > + filename_changed(p->parent[n].status) ? > + xstrdup(q->queue[i]->one->path) : NULL; > > tail = &p->next; > i++; > @@ -996,8 +989,9 @@ static void show_combined_header(struct combine_diff_path *elem, > > if (rev->combined_all_paths) { > for (i = 0; i < num_parent; i++) { > - char *path = filename_changed(elem->parent[i].status) > - ? elem->parent[i].path.buf : elem->path; > + const char *path = elem->parent[i].path ? > + elem->parent[i].path : > + elem->path; > if (elem->parent[i].status == DIFF_STATUS_ADDED) > dump_quoted_path("--- ", "", "/dev/null", > line_prefix, c_meta, c_reset); > @@ -1278,12 +1272,10 @@ static void show_raw_diff(struct combine_diff_path *p, int num_parent, struct re > > for (i = 0; i < num_parent; i++) > if (rev->combined_all_paths) { > - if (filename_changed(p->parent[i].status)) > - write_name_quoted(p->parent[i].path.buf, stdout, > - inter_name_termination); > - else > - write_name_quoted(p->path, stdout, > - inter_name_termination); > + const char *path = p->parent[i].path ? > + p->parent[i].path : > + p->path; > + write_name_quoted(path, stdout, inter_name_termination); > } > write_name_quoted(p->path, stdout, line_termination); > } > @@ -1645,9 +1637,7 @@ void diff_tree_combined(const struct object_id *oid, > struct combine_diff_path *tmp = paths; > paths = paths->next; > for (i = 0; i < num_parent; i++) > - if (rev->combined_all_paths && > - filename_changed(tmp->parent[i].status)) > - strbuf_release(&tmp->parent[i].path); > + free(tmp->parent[i].path); > free(tmp); > } > > diff --git a/diff-lib.c b/diff-lib.c > index c6d3bc4d37..88a5aed736 100644 > --- a/diff-lib.c > +++ b/diff-lib.c > @@ -417,9 +417,11 @@ static int show_modified(struct rev_info *revs, > memset(p->parent, 0, 2 * sizeof(struct combine_diff_parent)); > p->parent[0].status = DIFF_STATUS_MODIFIED; > p->parent[0].mode = new_entry->ce_mode; > + p->parent[0].path = NULL; > oidcpy(&p->parent[0].oid, &new_entry->oid); > p->parent[1].status = DIFF_STATUS_MODIFIED; > p->parent[1].mode = old_entry->ce_mode; > + p->parent[1].path = NULL; > oidcpy(&p->parent[1].oid, &old_entry->oid); > show_combined_diff(p, 2, revs); > free(p); > diff --git a/diff.h b/diff.h > index 6e6007c17b..3157faeabb 100644 > --- a/diff.h > +++ b/diff.h > @@ -480,7 +480,7 @@ struct combine_diff_path { > char status; > unsigned int mode; > struct object_id oid; > - struct strbuf path; > + char *path; > } parent[FLEX_ARRAY]; > }; > #define combine_diff_path_size(n, l) \ > diff --git a/tree-diff.c b/tree-diff.c > index d9237ffd9b..57af377c2b 100644 > --- a/tree-diff.c > +++ b/tree-diff.c > @@ -272,6 +272,7 @@ static struct combine_diff_path *emit_path(struct combine_diff_path *p, > } > > p->parent[i].mode = mode_i; > + p->parent[i].path = NULL; > oidcpy(&p->parent[i].oid, oid_i); > } The above LGTM and hopefully it can be accepted. With that change I can revert my trace_printfs of combine_diff_path back to something simple: ``` $ git diff HEAD^ diff --git a/combine-diff.c b/combine-diff.c index 5e0b7919bc..4764383f20 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1593,8 +1593,8 @@ void diff_tree_combined(const struct object_id *oid, for (num_paths = 0, p = paths; p; p = p->next) { trace_printf("Wink diff_tree_combined: num_paths=%d &p=%p mode=%0x, oid=%s path=%s\n", num_paths, p, p->mode, oid_to_hex(&p->oid), p->path); for (i = 0; i < num_parent; i++) { - trace_printf("Wink diff_tree_combined: &p->parent[%d]=%p status=%c mode=%x oid=%s path.buf=%p contents path.buf=%s\n", - i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), p->parent[i].path.buf, p->parent[i].path.buf); + trace_printf("Wink diff_tree_combined: &p->parent[%d]=%p status=%c mode=%x oid=%s path=%s\n", + i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), p->parent[i].path); } num_paths++; } ``` And the output doesn't SEGFAULT :) ``` $ GIT_TRACE=1 ./git diff-tree --cc 6f8ae955bda8ad246cc1f5f7a15f1c3b1c04696a 10:06:11.716284 git.c:476 trace: built-in: git diff-tree --cc 6f8ae955bda8ad246cc1f5f7a15f1c3b1c04696a 6f8ae955bda8ad246cc1f5f7a15f1c3b1c04696a 10:06:11.718102 combine-diff.c:1592 Wink diff_tree_combined: find number of surviving paths num_parent=2 10:06:11.718108 combine-diff.c:1594 Wink diff_tree_combined: num_paths=0 &p=0x643ac70f7ef0 mode=81a4, oid=0f41b2fd4a6b679a1cfcaa9a584c382068146212 path=refs.c 10:06:11.718112 combine-diff.c:1596 Wink diff_tree_combined: &p->parent[0]=0x643ac70f7f28 status=M mode=81a4 oid=7dd5e9fa3323111f06303674b213ae24ed2d04b6 path=(null) 10:06:11.718116 combine-diff.c:1596 Wink diff_tree_combined: &p->parent[1]=0x643ac70f7f60 status=M mode=81a4 oid=c55583986940d8ef1e1c839364c03cd92d4f7114 path=(null) 10:06:11.718120 combine-diff.c:1594 Wink diff_tree_combined: num_paths=1 &p=0x643ac70f7fb0 mode=81a4, oid=a0cdd99250e8286b55808b697b0a94afac5d8319 path=refs.h 10:06:11.718123 combine-diff.c:1596 Wink diff_tree_combined: &p->parent[0]=0x643ac70f7fe8 status=M mode=81a4 oid=09be47afbee51e99f4ae49588cd65596ccfcb07e path=(null) 10:06:11.718126 combine-diff.c:1596 Wink diff_tree_combined: &p->parent[1]=0x643ac70f8020 status=M mode=81a4 oid=b0dfc65ed2e59c4b66967840339f81e7746a96d3 path=(null) 10:06:11.718129 combine-diff.c:1594 Wink diff_tree_combined: num_paths=2 &p=0x643ac70f8900 mode=81a4, oid=5cfb8b7ca8678e171b8e8a7ad6daf1af74a81b59 path=refs/files-backend.c 10:06:11.718132 combine-diff.c:1596 Wink diff_tree_combined: &p->parent[0]=0x643ac70f8938 status=M mode=81a4 oid=467fe347fa7e7d82ed7a2836e43ea749bb90ad7d path=(null) 10:06:11.718135 combine-diff.c:1596 Wink diff_tree_combined: &p->parent[1]=0x643ac70f8970 status=M mode=81a4 oid=8953d1c6d37b13b0db701888b3db92fd87a68aaa path=(null) 10:06:11.718138 combine-diff.c:1594 Wink diff_tree_combined: num_paths=3 &p=0x643ac70f89d0 mode=81a4, oid=16550862d3ebe3b357c52254088b143c7ba000d6 path=refs/refs-internal.h 10:06:11.718142 combine-diff.c:1596 Wink diff_tree_combined: &p->parent[0]=0x643ac70f8a08 status=M mode=81a4 oid=66e66e0fc1e812ebebd1d4b0119899c84bf1c0ae path=(null) 10:06:11.718162 combine-diff.c:1596 Wink diff_tree_combined: &p->parent[1]=0x643ac70f8a40 status=M mode=81a4 oid=79b287c5ec5c7d8f759869cf93cda405640186dc path=(null) 10:06:11.718181 combine-diff.c:1594 Wink diff_tree_combined: num_paths=4 &p=0x643ac70f8aa0 mode=81a4, oid=00d95a9a2f42ce74c5cb4a42175b0953287851a6 path=refs/reftable-backend.c 10:06:11.718184 combine-diff.c:1596 Wink diff_tree_combined: &p->parent[0]=0x643ac70f8ad8 status=M mode=81a4 oid=8a2a5b847c3d86332e319da69bfb5c8a56a10e86 path=(null) 10:06:11.718188 combine-diff.c:1596 Wink diff_tree_combined: &p->parent[1]=0x643ac70f8b10 status=M mode=81a4 oid=bec5962debea7b62572d08f6fa8fd38ab4cd8af6 path=(null) 10:06:11.718192 combine-diff.c:1601 Wink diff_tree_combined: found 5 surviving paths diff --cc refs/files-backend.c index 467fe347fa,8953d1c6d3..5cfb8b7ca8 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@@ -2533,9 -2539,15 +2543,15 @@@ static int check_old_oid(struct ref_upd oid_to_hex(oid), oid_to_hex(&update->old_oid)); - return -1; + return ret; } + struct files_transaction_backend_data { + struct ref_transaction *packed_transaction; + int packed_refs_locked; + struct strmap ref_locks; + }; + /* * Prepare for carrying out update: * - Lock the reference referred to by update. ```
I'd like to suggest one minor tweak to Peff's change. Rather than just changing the type of combine_diff_parent::path to `char *` I like to suggest changing the name and adding a comment. My inclination is to use `changed_path` as the field name. The resulting diff against next is attached.
diff --git a/combine-diff.c b/combine-diff.c index 641bc92dbd..455bc19087 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -20,6 +20,8 @@ #include "oid-array.h" #include "revision.h" +#include "trace.h" + static int compare_paths(const struct combine_diff_path *one, const struct diff_filespec *two) { @@ -1595,8 +1597,16 @@ void diff_tree_combined(const struct object_id *oid, } /* find out number of surviving paths */ - for (num_paths = 0, p = paths; p; p = p->next) + trace_printf("Wink diff_tree_combined: find number of surviving paths num_parent=%d\n", num_parent); + for (num_paths = 0, p = paths; p; p = p->next) { + trace_printf("Wink diff_tree_combined: num_paths=%d &p=%p mode=%0x, oid=%s path=%s\n", num_paths, p, p->mode, oid_to_hex(&p->oid), p->path); + for (i = 0; i < num_parent; i++) { + trace_printf("Wink diff_tree_combined: &p->parent[%d]=%p status=%c mode=%x oid=%s path.buf=%p contents path.buf=%s\n", + i, &p->parent[i], p->parent[i].status, p->parent[i].mode, oid_to_hex(&p->parent[i].oid), p->parent[i].path.buf, p->parent[i].path.buf); + } num_paths++; + } + trace_printf("Wink diff_tree_combined: found %d surviving paths\n", num_paths); /* order paths according to diffcore_order */ if (opt->orderfile && num_paths) { wink@fwlaptop 25-01-03T18:43:18.695Z:~/prgs/forks/git (wink-segfault-with-minimal-changes) ``` I made those changes on the `next` branch of git/git repo as of yesterday, 1/2/25, here are the the relavant logs: ``` wink@fwlaptop 25-01-03T18:43:18.695Z:~/prgs/forks/git (wink-segfault-with-minimal-changes) $ git log -2 --oneline edf34e3ab4 (HEAD -> wink-segfault-with-minimal-changes, origin/wink-segfault-with-minimal-changes) bug: SEGFAULT with minimal changes: 6c04ab211c (upstream/next, origin/next, next) Sync with 'master' wink@fwlaptop 25-01-03T18:44:13.976Z:~/prgs/forks/git (wink-segfault-with-minimal-changes) ``` What did you expect to happen? (Expected behavior) When I use `git diff-tree --cc` on a merge commit with changes I expect to see my trace_printf statements print the `struct combined_diff_path` members and also the output diff-tree -cc results with the "combined changes" as can be seen below: ``` wink@fwlaptop 25-01-03T18:46:58.472Z:~/prgs/forks/git (wink-segfault-with-minimal-changes) $ GIT_TRACE=1 ./git diff-tree --cc 6f8ae955bda8ad246cc1f5f7a15f1c3b1c04696a 10:47:04.259504 git.c:476 trace: built-in: git diff-tree --cc 6f8ae955bda8ad246cc1f5f7a15f1c3b1c04696a 6f8ae955bda8ad246cc1f5f7a15f1c3b1c04696a 10:47:04.278220 combine-diff.c:1600 Wink diff_tree_combined: find number of surviving paths num_parent=2 10:47:04.278245 combine-diff.c:1602 Wink diff_tree_combined: num_paths=0 &p=0x5dd590883f60 mode=81a4, oid=0f41b2fd4a6b679a1cfcaa9a584c382068146212 path=refs.c 10:47:04.278253 combine-diff.c:1604 Wink diff_tree_combined: &p->parent[0]=0x5dd590883f98 status=M mode=81a4 oid=7dd5e9fa3323111f06303674b213ae24ed2d04b6 path.buf=(nil) contents path.buf=(null) 10:47:04.278260 combine-diff.c:1604 Wink diff_tree_combined: &p->parent[1]=0x5dd590883fe0 status=M mode=81a4 oid=c55583986940d8ef1e1c839364c03cd92d4f7114 path.buf=(nil) contents path.buf=(null) 10:47:04.278265 combine-diff.c:1602 Wink diff_tree_combined: num_paths=1 &p=0x5dd59088b450 mode=81a4, oid=a0cdd99250e8286b55808b697b0a94afac5d8319 path=refs.h 10:47:04.278270 combine-diff.c:1604 Wink diff_tree_combined: &p->parent[0]=0x5dd59088b488 status=M mode=81a4 oid=09be47afbee51e99f4ae49588cd65596ccfcb07e path.buf=(nil) contents path.buf=(null) 10:47:04.278274 combine-diff.c:1604 Wink diff_tree_combined: &p->parent[1]=0x5dd59088b4d0 status=M mode=81a4 oid=b0dfc65ed2e59c4b66967840339f81e7746a96d3 path.buf=(nil) contents path.buf=(null) 10:47:04.278278 combine-diff.c:1602 Wink diff_tree_combined: num_paths=2 &p=0x5dd59088b530 mode=81a4, oid=5cfb8b7ca8678e171b8e8a7ad6daf1af74a81b59 path=refs/files-backend.c 10:47:04.278283 combine-diff.c:1604 Wink diff_tree_combined: &p->parent[0]=0x5dd59088b568 status=M mode=81a4 oid=467fe347fa7e7d82ed7a2836e43ea749bb90ad7d path.buf=(nil) contents path.buf=(null) 10:47:04.278287 combine-diff.c:1604 Wink diff_tree_combined: &p->parent[1]=0x5dd59088b5b0 status=M mode=81a4 oid=8953d1c6d37b13b0db701888b3db92fd87a68aaa path.buf=(nil) contents path.buf=(null) 10:47:04.278291 combine-diff.c:1602 Wink diff_tree_combined: num_paths=3 &p=0x5dd59088b620 mode=81a4, oid=16550862d3ebe3b357c52254088b143c7ba000d6 path=refs/refs-internal.h 10:47:04.278296 combine-diff.c:1604 Wink diff_tree_combined: &p->parent[0]=0x5dd59088b658 status=M mode=81a4 oid=66e66e0fc1e812ebebd1d4b0119899c84bf1c0ae path.buf=(nil) contents path.buf=(null) 10:47:04.278300 combine-diff.c:1604 Wink diff_tree_combined: &p->parent[1]=0x5dd59088b6a0 status=M mode=81a4 oid=79b287c5ec5c7d8f759869cf93cda405640186dc path.buf=(nil) contents path.buf=(null) 10:47:04.278305 combine-diff.c:1602 Wink diff_tree_combined: num_paths=4 &p=0x5dd59088b710 mode=81a4, oid=00d95a9a2f42ce74c5cb4a42175b0953287851a6 path=refs/reftable-backend.c 10:47:04.278309 combine-diff.c:1604 Wink diff_tree_combined: &p->parent[0]=0x5dd59088b748 status=M mode=81a4 oid=8a2a5b847c3d86332e319da69bfb5c8a56a10e86 path.buf=(nil) contents path.buf=(null) 10:47:04.278314 combine-diff.c:1604 Wink diff_tree_combined: &p->parent[1]=0x5dd59088b790 status=M mode=81a4 oid=bec5962debea7b62572d08f6fa8fd38ab4cd8af6 path.buf=(nil) contents path.buf=(null) 10:47:04.278318 combine-diff.c:1609 Wink diff_tree_combined: found 5 surviving paths diff --cc refs/files-backend.c index 467fe347fa,8953d1c6d3..5cfb8b7ca8 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@@ -2533,9 -2539,15 +2543,15 @@@ static int check_old_oid(struct ref_upd oid_to_hex(oid), oid_to_hex(&update->old_oid)); - return -1; + return ret; } + struct files_transaction_backend_data { + struct ref_transaction *packed_transaction; + int packed_refs_locked; + struct strmap ref_locks; + }; + /* * Prepare for carrying out update: * - Lock the reference referred to by update. wink@fwlaptop 25-01-03T18:47:04.295Z:~/prgs/forks/git (wink-segfault-with-minimal-changes) ``` The above works because it has a hack fix I introduced and thus it works. The hack is to always use `find_paths_generic` rather than `find_paths_multitree`. To do this I added `need_generic_pathscan = true;` on top of the above change to have it run properly: ``` wink@fwlaptop 25-01-03T18:47:04.295Z:~/prgs/forks/git (wink-segfault-with-minimal-changes) $ git diff diff --git a/combine-diff.c b/combine-diff.c index 455bc19087..f03ff6f820 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1563,7 +1563,7 @@ void diff_tree_combined(const struct object_id *oid, (opt->pickaxe_opts & (DIFF_PICKAXE_KINDS_MASK & ~DIFF_PICKAXE_KIND_OBJFIND)) || opt->filter; - + need_generic_pathscan = true; if (need_generic_pathscan) { /* * NOTE generic case also handles --stat, as it computes