diff mbox series

[16/20] strvec: add functions to replace and remove strings

Message ID c43c93db3bdc0ea5283a6d5c71d37d6fe949dd8c.1716465556.git.ps@pks.im (mailing list archive)
State Superseded
Headers show
Series Various memory leak fixes | expand

Commit Message

Patrick Steinhardt May 23, 2024, 12:26 p.m. UTC
Add two functions that allow to replace and remove strings contained in
the strvec. This will be used by a subsequent commit that refactors
git-mv(1).

While at it, add a bunch of unit tests that cover both old and new
functionality.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 Makefile                |   1 +
 strvec.c                |  20 ++++
 strvec.h                |  13 ++
 t/unit-tests/t-strvec.c | 259 ++++++++++++++++++++++++++++++++++++++++
 t/unit-tests/test-lib.c |  13 ++
 t/unit-tests/test-lib.h |  13 ++
 6 files changed, 319 insertions(+)
 create mode 100644 t/unit-tests/t-strvec.c

Comments

Eric Sunshine May 23, 2024, 5:09 p.m. UTC | #1
On Thu, May 23, 2024 at 8:27 AM Patrick Steinhardt <ps@pks.im> wrote:
> Add two functions that allow to replace and remove strings contained in
> the strvec. This will be used by a subsequent commit that refactors
> git-mv(1).
> [...]
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> diff --git a/strvec.c b/strvec.c
> @@ -56,6 +56,26 @@ void strvec_pushv(struct strvec *array, const char **items)
> +const char *strvec_replace(struct strvec *array, size_t idx, const char *replacement)
> +{
> +       char *to_free;
> +       if (idx >= array->nr)
> +               BUG("index outside of array boundary");
> +       to_free = (char *) array->v[idx];
> +       array->v[idx] = xstrdup(replacement);
> +       free(to_free);
> +       return array->v[idx];
> +}

The reason you delay calling free() until after xstrdup() is to
protect against the case when `replacement` is a substring of the
string already stored at `v[idx]`, correct?
Patrick Steinhardt May 24, 2024, 6:56 a.m. UTC | #2
On Thu, May 23, 2024 at 01:09:39PM -0400, Eric Sunshine wrote:
> On Thu, May 23, 2024 at 8:27 AM Patrick Steinhardt <ps@pks.im> wrote:
> > Add two functions that allow to replace and remove strings contained in
> > the strvec. This will be used by a subsequent commit that refactors
> > git-mv(1).
> > [...]
> > Signed-off-by: Patrick Steinhardt <ps@pks.im>
> > ---
> > diff --git a/strvec.c b/strvec.c
> > @@ -56,6 +56,26 @@ void strvec_pushv(struct strvec *array, const char **items)
> > +const char *strvec_replace(struct strvec *array, size_t idx, const char *replacement)
> > +{
> > +       char *to_free;
> > +       if (idx >= array->nr)
> > +               BUG("index outside of array boundary");
> > +       to_free = (char *) array->v[idx];
> > +       array->v[idx] = xstrdup(replacement);
> > +       free(to_free);
> > +       return array->v[idx];
> > +}
> 
> The reason you delay calling free() until after xstrdup() is to
> protect against the case when `replacement` is a substring of the
> string already stored at `v[idx]`, correct?

Yup. The patches for the strvec API have been lying around for quite a
while as I have originally implemented them in a different context. And
there I did hit this edge case indeed. It's rather unlikely to happen
overall, but I think it shouldn't hurt to be prepared.

There is no coverage of this scenario in this patch series though. Let
me add a unit test for this.

Patrick
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index cf504963c2..d4000fb1d6 100644
--- a/Makefile
+++ b/Makefile
@@ -1336,6 +1336,7 @@  THIRD_PARTY_SOURCES += sha1dc/%
 
 UNIT_TEST_PROGRAMS += t-mem-pool
 UNIT_TEST_PROGRAMS += t-strbuf
+UNIT_TEST_PROGRAMS += t-strvec
 UNIT_TEST_PROGRAMS += t-ctype
 UNIT_TEST_PROGRAMS += t-prio-queue
 UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS))
diff --git a/strvec.c b/strvec.c
index 178f4f3748..d4073ec9fa 100644
--- a/strvec.c
+++ b/strvec.c
@@ -56,6 +56,26 @@  void strvec_pushv(struct strvec *array, const char **items)
 		strvec_push(array, *items);
 }
 
+const char *strvec_replace(struct strvec *array, size_t idx, const char *replacement)
+{
+	char *to_free;
+	if (idx >= array->nr)
+		BUG("index outside of array boundary");
+	to_free = (char *) array->v[idx];
+	array->v[idx] = xstrdup(replacement);
+	free(to_free);
+	return array->v[idx];
+}
+
+void strvec_remove(struct strvec *array, size_t idx)
+{
+	if (idx >= array->nr)
+		BUG("index outside of array boundary");
+	free((char *)array->v[idx]);
+	memmove(array->v + idx, array->v + idx + 1, (array->nr - idx) * sizeof(char *));
+	array->nr--;
+}
+
 void strvec_pop(struct strvec *array)
 {
 	if (!array->nr)
diff --git a/strvec.h b/strvec.h
index 4715d3e51f..6c7e8b7d50 100644
--- a/strvec.h
+++ b/strvec.h
@@ -64,6 +64,19 @@  void strvec_pushl(struct strvec *, ...);
 /* Push a null-terminated array of strings onto the end of the array. */
 void strvec_pushv(struct strvec *, const char **);
 
+/**
+ * Replace the value at the given index with a new value. The index must be
+ * valid. Returns a pointer to the inserted value.
+ */
+const char *strvec_replace(struct strvec *array, size_t idx, const char *replacement);
+
+/*
+ * Remove the value at the given index. The remainder of the array will be
+ * moved to fill the resulting gap. The provided index must point into the
+ * array.
+ */
+void strvec_remove(struct strvec *array, size_t idx);
+
 /**
  * Remove the final element from the array. If there are no
  * elements in the array, do nothing.
diff --git a/t/unit-tests/t-strvec.c b/t/unit-tests/t-strvec.c
new file mode 100644
index 0000000000..c07ad62c7d
--- /dev/null
+++ b/t/unit-tests/t-strvec.c
@@ -0,0 +1,259 @@ 
+#include "test-lib.h"
+#include "strbuf.h"
+#include "strvec.h"
+
+#define check_strvec(vec, ...) \
+	check_strvec_loc(TEST_LOCATION(), vec, __VA_ARGS__)
+static void check_strvec_loc(const char *loc, struct strvec *vec, ...)
+{
+	va_list ap;
+	size_t nr = 0;
+
+	va_start(ap, vec);
+	while (1) {
+		const char *str = va_arg(ap, const char *);
+		if (!str)
+			break;
+
+		if (!check_uint(vec->nr, >, nr) ||
+		    !check_uint(vec->alloc, >, nr) ||
+		    !check_str(vec->v[nr], str)) {
+			struct strbuf msg = STRBUF_INIT;
+			strbuf_addf(&msg, "strvec index %"PRIuMAX, (uintmax_t) nr);
+			test_assert(loc, msg.buf, 0);
+			strbuf_release(&msg);
+			return;
+		}
+
+		nr++;
+	}
+
+	check_uint(vec->nr, ==, nr);
+	check_uint(vec->alloc, >=, nr);
+	check_pointer_eq(vec->v[nr], NULL);
+}
+
+static void t_static_init(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	check_pointer_eq(vec.v, empty_strvec);
+	check_uint(vec.nr, ==, 0);
+	check_uint(vec.alloc, ==, 0);
+}
+
+static void t_dynamic_init(void)
+{
+	struct strvec vec;
+	strvec_init(&vec);
+	check_pointer_eq(vec.v, empty_strvec);
+	check_uint(vec.nr, ==, 0);
+	check_uint(vec.alloc, ==, 0);
+}
+
+static void t_clear(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_push(&vec, "foo");
+	strvec_clear(&vec);
+	check_pointer_eq(vec.v, empty_strvec);
+	check_uint(vec.nr, ==, 0);
+	check_uint(vec.alloc, ==, 0);
+}
+
+static void t_push(void)
+{
+	struct strvec vec = STRVEC_INIT;
+
+	strvec_push(&vec, "foo");
+	check_strvec(&vec, "foo", NULL);
+
+	strvec_push(&vec, "bar");
+	check_strvec(&vec, "foo", "bar", NULL);
+
+	strvec_clear(&vec);
+}
+
+static void t_pushf(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_pushf(&vec, "foo: %d", 1);
+	check_strvec(&vec, "foo: 1", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_pushl(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_pushl(&vec, "foo", "bar", "baz", NULL);
+	check_strvec(&vec, "foo", "bar", "baz", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_pushv(void)
+{
+	const char *strings[] = {
+		"foo", "bar", "baz", NULL,
+	};
+	struct strvec vec = STRVEC_INIT;
+
+	strvec_pushv(&vec, strings);
+	check_strvec(&vec, "foo", "bar", "baz", NULL);
+
+	strvec_clear(&vec);
+}
+
+static void t_replace_at_head(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_pushl(&vec, "foo", "bar", "baz", NULL);
+	strvec_replace(&vec, 0, "replaced");
+	check_strvec(&vec, "replaced", "bar", "baz", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_replace_at_tail(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_pushl(&vec, "foo", "bar", "baz", NULL);
+	strvec_replace(&vec, 2, "replaced");
+	check_strvec(&vec, "foo", "bar", "replaced", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_replace_in_between(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_pushl(&vec, "foo", "bar", "baz", NULL);
+	strvec_replace(&vec, 1, "replaced");
+	check_strvec(&vec, "foo", "replaced", "baz", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_remove_at_head(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_pushl(&vec, "foo", "bar", "baz", NULL);
+	strvec_remove(&vec, 0);
+	check_strvec(&vec, "bar", "baz", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_remove_at_tail(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_pushl(&vec, "foo", "bar", "baz", NULL);
+	strvec_remove(&vec, 2);
+	check_strvec(&vec, "foo", "bar", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_remove_in_between(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_pushl(&vec, "foo", "bar", "baz", NULL);
+	strvec_remove(&vec, 1);
+	check_strvec(&vec, "foo", "baz", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_pop_empty_array(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_pop(&vec);
+	check_strvec(&vec, NULL);
+	strvec_clear(&vec);
+}
+
+static void t_pop_non_empty_array(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_pushl(&vec, "foo", "bar", "baz", NULL);
+	strvec_pop(&vec);
+	check_strvec(&vec, "foo", "bar", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_split_empty_string(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_split(&vec, "");
+	check_strvec(&vec, NULL);
+	strvec_clear(&vec);
+}
+
+static void t_split_single_item(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_split(&vec, "foo");
+	check_strvec(&vec, "foo", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_split_multiple_items(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_split(&vec, "foo bar baz");
+	check_strvec(&vec, "foo", "bar", "baz", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_split_whitespace_only(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_split(&vec, " \t\n");
+	check_strvec(&vec, NULL);
+	strvec_clear(&vec);
+}
+
+static void t_split_multiple_consecutive_whitespaces(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	strvec_split(&vec, "foo\n\t bar");
+	check_strvec(&vec, "foo", "bar", NULL);
+	strvec_clear(&vec);
+}
+
+static void t_detach(void)
+{
+	struct strvec vec = STRVEC_INIT;
+	const char **detached;
+
+	strvec_push(&vec, "foo");
+
+	detached = strvec_detach(&vec);
+	check_str(detached[0], "foo");
+	check_pointer_eq(detached[1], NULL);
+
+	check_pointer_eq(vec.v, empty_strvec);
+	check_uint(vec.nr, ==, 0);
+	check_uint(vec.alloc, ==, 0);
+
+	free((char *) detached[0]);
+	free(detached);
+}
+
+int cmd_main(int argc, const char **argv)
+{
+	TEST(t_static_init(), "static initialization");
+	TEST(t_dynamic_init(), "dynamic initialization");
+	TEST(t_clear(), "clear");
+	TEST(t_push(), "push");
+	TEST(t_pushf(), "pushf");
+	TEST(t_pushl(), "pushl");
+	TEST(t_pushv(), "pushv");
+	TEST(t_replace_at_head(), "replace at head");
+	TEST(t_replace_in_between(), "replace in between");
+	TEST(t_replace_at_tail(), "replace at tail");
+	TEST(t_remove_at_head(), "remove at head");
+	TEST(t_remove_in_between(), "remove in between");
+	TEST(t_remove_at_tail(), "remove at tail");
+	TEST(t_pop_empty_array(), "pop with empty array");
+	TEST(t_pop_non_empty_array(), "pop with non-empty array");
+	TEST(t_split_empty_string(), "split empty string");
+	TEST(t_split_single_item(), "split single item");
+	TEST(t_split_multiple_items(), "split multiple items");
+	TEST(t_split_whitespace_only(), "split whitespace only");
+	TEST(t_split_multiple_consecutive_whitespaces(), "split multiple consecutive whitespaces");
+	TEST(t_detach(), "detach");
+	return test_done();
+}
diff --git a/t/unit-tests/test-lib.c b/t/unit-tests/test-lib.c
index 66d6980ffb..3c513ce59a 100644
--- a/t/unit-tests/test-lib.c
+++ b/t/unit-tests/test-lib.c
@@ -318,6 +318,19 @@  int check_bool_loc(const char *loc, const char *check, int ok)
 
 union test__tmp test__tmp[2];
 
+int check_pointer_eq_loc(const char *loc, const char *check, int ok,
+			 const void *a, const void *b)
+{
+	int ret = test_assert(loc, check, ok);
+
+	if (!ret) {
+		test_msg("   left: %p", a);
+		test_msg("  right: %p", b);
+	}
+
+	return ret;
+}
+
 int check_int_loc(const char *loc, const char *check, int ok,
 		  intmax_t a, intmax_t b)
 {
diff --git a/t/unit-tests/test-lib.h b/t/unit-tests/test-lib.h
index a8f07ae0b7..2de6d715d5 100644
--- a/t/unit-tests/test-lib.h
+++ b/t/unit-tests/test-lib.h
@@ -75,6 +75,18 @@  int test_assert(const char *location, const char *check, int ok);
 	check_bool_loc(TEST_LOCATION(), #x, x)
 int check_bool_loc(const char *loc, const char *check, int ok);
 
+/*
+ * Compare two integers. Prints a message with the two values if the
+ * comparison fails. NB this is not thread safe.
+ */
+#define check_pointer_eq(a, b)						\
+	(test__tmp[0].p = (a), test__tmp[1].p = (b),			\
+	 check_pointer_eq_loc(TEST_LOCATION(), #a" == "#b,		\
+			      test__tmp[0].p == test__tmp[1].p,		\
+			      test__tmp[0].p, test__tmp[1].p))
+int check_pointer_eq_loc(const char *loc, const char *check, int ok,
+			 const void *a, const void *b);
+
 /*
  * Compare two integers. Prints a message with the two values if the
  * comparison fails. NB this is not thread safe.
@@ -136,6 +148,7 @@  union test__tmp {
 	intmax_t i;
 	uintmax_t u;
 	char c;
+	const void *p;
 };
 
 extern union test__tmp test__tmp[2];