diff mbox series

[4/6] apply: refactor code to drop `line_allocated`

Message ID 0427cb72507bba7ed62d13a5523fea351a0cb35f.1726470385.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series apply: fix leaking buffer of `struct image` | expand

Commit Message

Patrick Steinhardt Sept. 16, 2024, 7:10 a.m. UTC
The `struct image` has two members `line` and `line_allocated`. The
former member is the one that should be used throughougt the code,
whereas the latter one is used to track whether the lines have been
allocated or not.

In practice, the array of lines is always allocated. The reason why we
have `line_allocated` is that `remove_first_line()` will advance the
array pointer to drop the first entry, and thus it point into the array
instead of to the array header.

Refactor the function to use memmove(3P) instead, which allows us to get
rid of this double bookkeeping. We call this function at most once per
image anyway, so this shouldn't cause any performance regressions.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 apply.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

Comments

Junio C Hamano Sept. 16, 2024, 6:56 p.m. UTC | #1
Patrick Steinhardt <ps@pks.im> writes:

> Refactor the function to use memmove(3P) instead, which allows us to get
> rid of this double bookkeeping. We call this function at most once per
> image anyway, so this shouldn't cause any performance regressions.

Don't we call remove_first_line() as long as leading is larger than
trailing repeatedly?  Is "at most once" accurate?

As to the correctness, I think nobody takes the address of an
element in the line[] array and expects the address to stay valid
across a call to remove_first_line(), so this should be safe.

Thanks.

> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  apply.c | 33 ++++++++++++++-------------------
>  1 file changed, 14 insertions(+), 19 deletions(-)
Junio C Hamano Sept. 16, 2024, 9:40 p.m. UTC | #2
Patrick Steinhardt <ps@pks.im> writes:

> former member is the one that should be used throughougt the code,

"throughout" (I'll amend while queuing).
Patrick Steinhardt Sept. 17, 2024, 9:50 a.m. UTC | #3
On Mon, Sep 16, 2024 at 11:56:16AM -0700, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > Refactor the function to use memmove(3P) instead, which allows us to get
> > rid of this double bookkeeping. We call this function at most once per
> > image anyway, so this shouldn't cause any performance regressions.
> 
> Don't we call remove_first_line() as long as leading is larger than
> trailing repeatedly?  Is "at most once" accurate?
> 
> As to the correctness, I think nobody takes the address of an
> element in the line[] array and expects the address to stay valid
> across a call to remove_first_line(), so this should be safe.

Oh, you're right. I did search for a loop surrounding
`image_remove_first_line()`, but somehow I completely missed the obvious
`for (;;)` loop around it. No idea how I was able to miss it.

I still very much doubt that this will cause performance issues in
practice (even though it's only by gut feeling), but the statement is
obviously incorrect. In case the assumption ever turns out to be wrong
we can likely refactor the loop to only trim contents after we have
found how many lines to remove, at which point we can remove them with a
single call to `strbuf_remove()`.

Patrick
diff mbox series

Patch

diff --git a/apply.c b/apply.c
index 76f7777d4c..985564ac76 100644
--- a/apply.c
+++ b/apply.c
@@ -281,7 +281,6 @@  struct image {
 	size_t len;
 	size_t nr;
 	size_t alloc;
-	struct line *line_allocated;
 	struct line *line;
 };
 #define IMAGE_INIT { 0 }
@@ -295,7 +294,7 @@  static void image_init(struct image *image)
 static void image_clear(struct image *image)
 {
 	free(image->buf);
-	free(image->line_allocated);
+	free(image->line);
 	image_init(image);
 }
 
@@ -313,10 +312,10 @@  static uint32_t hash_line(const char *cp, size_t len)
 
 static void image_add_line(struct image *img, const char *bol, size_t len, unsigned flag)
 {
-	ALLOC_GROW(img->line_allocated, img->nr + 1, img->alloc);
-	img->line_allocated[img->nr].len = len;
-	img->line_allocated[img->nr].hash = hash_line(bol, len);
-	img->line_allocated[img->nr].flag = flag;
+	ALLOC_GROW(img->line, img->nr + 1, img->alloc);
+	img->line[img->nr].len = len;
+	img->line[img->nr].hash = hash_line(bol, len);
+	img->line[img->nr].flag = flag;
 	img->nr++;
 }
 
@@ -348,15 +347,15 @@  static void image_prepare(struct image *image, char *buf, size_t len,
 		image_add_line(image, cp, next - cp, 0);
 		cp = next;
 	}
-	image->line = image->line_allocated;
 }
 
 static void image_remove_first_line(struct image *img)
 {
 	img->buf += img->line[0].len;
 	img->len -= img->line[0].len;
-	img->line++;
 	img->nr--;
+	if (img->nr)
+		MOVE_ARRAY(img->line, img->line + 1, img->nr);
 }
 
 static void image_remove_last_line(struct image *img)
@@ -2335,7 +2334,7 @@  static void update_pre_post_images(struct image *preimage,
 	       : fixed_preimage.nr <= preimage->nr);
 	for (i = 0; i < fixed_preimage.nr; i++)
 		fixed_preimage.line[i].flag = preimage->line[i].flag;
-	free(preimage->line_allocated);
+	free(preimage->line);
 	*preimage = fixed_preimage;
 
 	/*
@@ -2879,14 +2878,12 @@  static void update_image(struct apply_state *state,
 
 	/* Adjust the line table */
 	nr = img->nr + postimage->nr - preimage_limit;
-	if (preimage_limit < postimage->nr) {
+	if (preimage_limit < postimage->nr)
 		/*
 		 * NOTE: this knows that we never call image_remove_first_line()
 		 * on anything other than pre/post image.
 		 */
 		REALLOC_ARRAY(img->line, nr);
-		img->line_allocated = img->line;
-	}
 	if (preimage_limit != postimage->nr)
 		MOVE_ARRAY(img->line + applied_pos + postimage->nr,
 			   img->line + applied_pos + preimage_limit,
@@ -3027,8 +3024,8 @@  static int apply_one_fragment(struct apply_state *state,
 	    newlines.len > 0 && newlines.buf[newlines.len - 1] == '\n') {
 		old--;
 		strbuf_setlen(&newlines, newlines.len - 1);
-		preimage.line_allocated[preimage.nr - 1].len--;
-		postimage.line_allocated[postimage.nr - 1].len--;
+		preimage.line[preimage.nr - 1].len--;
+		postimage.line[postimage.nr - 1].len--;
 	}
 
 	leading = frag->leading;
@@ -3062,8 +3059,6 @@  static int apply_one_fragment(struct apply_state *state,
 	preimage.len = old - oldlines;
 	postimage.buf = newlines.buf;
 	postimage.len = newlines.len;
-	preimage.line = preimage.line_allocated;
-	postimage.line = postimage.line_allocated;
 
 	for (;;) {
 
@@ -3151,8 +3146,8 @@  static int apply_one_fragment(struct apply_state *state,
 out:
 	free(oldlines);
 	strbuf_release(&newlines);
-	free(preimage.line_allocated);
-	free(postimage.line_allocated);
+	free(preimage.line);
+	free(postimage.line);
 
 	return (applied_pos < 0);
 }
@@ -3752,7 +3747,7 @@  static int apply_data(struct apply_state *state, struct patch *patch,
 	patch->result = image.buf;
 	patch->resultsize = image.len;
 	add_to_fn_table(state, patch);
-	free(image.line_allocated);
+	free(image.line);
 
 	if (0 < patch->is_delete && patch->resultsize)
 		return error(_("removal patch leaves file contents"));