diff mbox series

[08/14] tree-diff: pass whole path string to path_appendnew()

Message ID 20250109084907.GH2748836@coredump.intra.peff.net (mailing list archive)
State New
Headers show
Series combine-diff cleanups | expand

Commit Message

Jeff King Jan. 9, 2025, 8:49 a.m. UTC
When diffing trees, we'll have a strbuf "base" containing the
slash-separted names of our parent trees, and a "path" string
representing an entry name from the current tree. We pass these
separately to path_appendnew(), which combines them to form a single
path string in the combine_diff_path struct.

Instead, let's append the path string to our base strbuf ourselves, pass
in the result, and then roll it back with strbuf_setlen(). This lets us
simplify path_appendnew() a bit, enabling further refactoring.

And while it might seem like this causes extra wasted allocations, it
does not in practice. We reuse the same strbuf for each tree entry, so
we only have to allocate it to match the largest name. Plus, in a
recursive diff we'll end up doing this same operation to extend the base
for the next level of recursion. So we're really just incurring a small
memcpy().

Signed-off-by: Jeff King <peff@peff.net>
---
 tree-diff.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

Comments

Patrick Steinhardt Jan. 13, 2025, 3:40 p.m. UTC | #1
On Thu, Jan 09, 2025 at 03:49:07AM -0500, Jeff King wrote:
> diff --git a/tree-diff.c b/tree-diff.c
> index 22fc2d8f8c..d2f8dd14a6 100644
> --- a/tree-diff.c
> +++ b/tree-diff.c
> @@ -129,20 +129,18 @@ static int emit_diff_first_parent_only(struct diff_options *opt, struct combine_
>   * and append it to paths list tail.
>   */
>  static struct combine_diff_path *path_appendnew(struct combine_diff_path *last,
> -	int nparent, const struct strbuf *base, const char *path, int pathlen,
> +	int nparent, const char *path, size_t len,

Sneaky, you also changed the type of `len` :) You might want to point
that out in the commit message.

>  	unsigned mode, const struct object_id *oid)
>  {
>  	struct combine_diff_path *p;
> -	size_t len = st_add(base->len, pathlen);
>  	size_t alloclen = combine_diff_path_size(nparent, len);
>  
>  	p = xmalloc(alloclen);
>  	p->next = NULL;
>  	last->next = p;
>  
>  	p->path = (char *)&(p->parent[nparent]);
> -	memcpy(p->path, base->buf, base->len);
> -	memcpy(p->path + base->len, path, pathlen);
> +	memcpy(p->path, path, len);
>  	p->path[len] = 0;
>  	p->mode = mode;
>  	oidcpy(&p->oid, oid ? oid : null_oid());
> @@ -206,7 +204,10 @@ static struct combine_diff_path *emit_path(struct combine_diff_path *p,
>  	if (emitthis) {
>  		int keep;
>  		struct combine_diff_path *pprev = p;
> -		p = path_appendnew(p, nparent, base, path, pathlen, mode, oid);
> +
> +		strbuf_add(base, path, pathlen);
> +		p = path_appendnew(p, nparent, base->buf, base->len, mode, oid);
> +		strbuf_setlen(base, old_baselen);
>  
>  		for (i = 0; i < nparent; ++i) {
>  			/*

Makes sense. And there is a single caller of `path_appendnew()`, only,
so no further changes should be required.

Patrick
Jeff King Jan. 14, 2025, 9:26 a.m. UTC | #2
On Mon, Jan 13, 2025 at 04:40:00PM +0100, Patrick Steinhardt wrote:

> On Thu, Jan 09, 2025 at 03:49:07AM -0500, Jeff King wrote:
> > diff --git a/tree-diff.c b/tree-diff.c
> > index 22fc2d8f8c..d2f8dd14a6 100644
> > --- a/tree-diff.c
> > +++ b/tree-diff.c
> > @@ -129,20 +129,18 @@ static int emit_diff_first_parent_only(struct diff_options *opt, struct combine_
> >   * and append it to paths list tail.
> >   */
> >  static struct combine_diff_path *path_appendnew(struct combine_diff_path *last,
> > -	int nparent, const struct strbuf *base, const char *path, int pathlen,
> > +	int nparent, const char *path, size_t len,
> 
> Sneaky, you also changed the type of `len` :) You might want to point
> that out in the commit message.

Sort of. The original took a (ptr,size_t) pair in the form of "base",
and then also a (ptr,int) path. That matches what the caller has:
"pathlen" comes from tree_entry(), which returns an int (it should
probably become a size_t in the long run, but it has a lot of ripple
effects if you change it).

Now the caller handles path/pathlen itself here:

> > +		strbuf_add(base, path, pathlen);

So there is nothing left to pass in except a (ptr,size_t) pair. We could
have continued passing those in as a strbuf, but calling it "base"
doesn't make sense any more.

The "int" is still there, but it just stays in the caller. In the
original it becomes a size_t via passing to combine_diff_path_size(). In
the new code, it happens when we feed it to strbuf_add().

-Peff
diff mbox series

Patch

diff --git a/tree-diff.c b/tree-diff.c
index 22fc2d8f8c..d2f8dd14a6 100644
--- a/tree-diff.c
+++ b/tree-diff.c
@@ -129,20 +129,18 @@  static int emit_diff_first_parent_only(struct diff_options *opt, struct combine_
  * and append it to paths list tail.
  */
 static struct combine_diff_path *path_appendnew(struct combine_diff_path *last,
-	int nparent, const struct strbuf *base, const char *path, int pathlen,
+	int nparent, const char *path, size_t len,
 	unsigned mode, const struct object_id *oid)
 {
 	struct combine_diff_path *p;
-	size_t len = st_add(base->len, pathlen);
 	size_t alloclen = combine_diff_path_size(nparent, len);
 
 	p = xmalloc(alloclen);
 	p->next = NULL;
 	last->next = p;
 
 	p->path = (char *)&(p->parent[nparent]);
-	memcpy(p->path, base->buf, base->len);
-	memcpy(p->path + base->len, path, pathlen);
+	memcpy(p->path, path, len);
 	p->path[len] = 0;
 	p->mode = mode;
 	oidcpy(&p->oid, oid ? oid : null_oid());
@@ -206,7 +204,10 @@  static struct combine_diff_path *emit_path(struct combine_diff_path *p,
 	if (emitthis) {
 		int keep;
 		struct combine_diff_path *pprev = p;
-		p = path_appendnew(p, nparent, base, path, pathlen, mode, oid);
+
+		strbuf_add(base, path, pathlen);
+		p = path_appendnew(p, nparent, base->buf, base->len, mode, oid);
+		strbuf_setlen(base, old_baselen);
 
 		for (i = 0; i < nparent; ++i) {
 			/*