diff mbox series

[GSoC,v2] t: port helper/test-hashmap.c to unit-tests/t-hashmap.c

Message ID 20240708161641.10335-2-shyamthakkar001@gmail.com (mailing list archive)
State Superseded
Headers show
Series [GSoC,v2] t: port helper/test-hashmap.c to unit-tests/t-hashmap.c | expand

Commit Message

Ghanshyam Thakkar July 8, 2024, 4:15 p.m. UTC
helper/test-hashmap.c along with t0011-hashmap.sh test the hashmap.h
library. Migrate them to the unit testing framework for better
debugging, runtime performance and consice code.

Along with the migration, make 'add' tests from the shellscript order
agnostic in unit tests, since they iterate over entries with the same
keys and we do not guarantee the order.

The helper/test-hashmap.c is still not removed because it contains a
performance test meant to be run by the user directly (not used in
t/perf). And it makes sense for such a utility to be a helper.

Mentored-by: Christian Couder <chriscool@tuxfamily.org>
Mentored-by: Kaartic Sivaraam <kaartic.sivaraam@gmail.com>
Signed-off-by: Ghanshyam Thakkar <shyamthakkar001@gmail.com>
---
The changes in v2 are inspired from the review of another similar
test t-oidmap: https://lore.kernel.org/git/16e06a6d-5fd0-4132-9d82-5c6f13b7f9ed@gmail.com/

The v2 also includes some formatting corrections and one of the
testcases, t_add(), was changed to be more similar to the original.

Range-diff against v1:
1:  f095025d1b ! 1:  bbb4f2f23e t: port helper/test-hashmap.c to unit-tests/t-hashmap.c
    @@ t/unit-tests/t-hashmap.c (new)
     +		  int ignore_case)
     +{
     +	struct hashmap map = HASHMAP_INIT(test_entry_cmp, &ignore_case);
    ++
     +	f(&map, ignore_case);
     +	hashmap_clear_and_free(&map, struct test_entry, ent);
     +}
    @@ t/unit-tests/t-hashmap.c (new)
     +{
     +	struct test_entry *entry;
     +	const char *key_val[][2] = { { "key1", "value1" },
    -+				   { "key2", "value2" },
    -+				   { "fooBarFrotz", "value3" } };
    ++				     { "key2", "value2" },
    ++				     { "fooBarFrotz", "value3" } };
     +
     +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
     +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
    @@ t/unit-tests/t-hashmap.c (new)
     +	free(entry);
     +
     +	check_int(map->tablesize, ==, 64);
    -+	check_int(hashmap_get_size(map), ==, ignore_case ? 3 : 4);
    ++	check_int(hashmap_get_size(map), ==,
    ++		  ignore_case ? ARRAY_SIZE(key_val) : ARRAY_SIZE(key_val) + 1);
     +}
     +
     +static void t_replace(struct hashmap *map, int ignore_case)
    @@ t/unit-tests/t-hashmap.c (new)
     +{
     +	struct test_entry *entry;
     +	const char *key_val[][2] = { { "key1", "value1" },
    -+				   { "key2", "value2" },
    -+				   { "fooBarFrotz", "value3" },
    -+				   { ignore_case ? "key4" : "foobarfrotz", "value4" } };
    ++				     { "key2", "value2" },
    ++				     { "fooBarFrotz", "value3" },
    ++				     { ignore_case ? "key4" : "foobarfrotz", "value4" } };
     +	const char *query[][2] = {
     +		{ ignore_case ? "Key1" : "key1", "value1" },
     +		{ ignore_case ? "keY2" : "key2", "value2" },
    @@ t/unit-tests/t-hashmap.c (new)
     +{
     +	struct test_entry *entry;
     +	const char *key_val[][3] = {
    -+		{ "key1", "value2", "UNUSED" },
    ++		{ "key1", "value1", "UNUSED" },
     +		{ ignore_case ? "Key1" : "key1", "value2", "UNUSED" },
     +		{ "fooBarFrotz", "value3", "UNUSED" },
     +		{ ignore_case ? "Foobarfrotz" : "fooBarFrotz", "value4", "UNUSED" }
     +	};
    ++	const char *queries[] = { "key1",
    ++				  ignore_case ? "Foobarfrotz" : "fooBarFrotz" };
     +
     +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
     +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
     +		hashmap_add(map, &entry->ent);
     +	}
     +
    -+	hashmap_for_each_entry_from(map, entry, ent)
    -+	{
    -+		int ret;
    -+		if (!check_int((ret = key_val_contains(key_val, ARRAY_SIZE(key_val),
    -+						       entry)), ==, 0)) {
    -+			switch (ret) {
    ++	for (size_t i = 0; i < ARRAY_SIZE(queries); i++) {
    ++		int count = 0;
    ++		entry = hashmap_get_entry_from_hash(map,
    ++			ignore_case ? strihash(queries[i]) :
    ++				      strhash(queries[i]),
    ++			queries[i], struct test_entry, ent);
    ++
    ++		hashmap_for_each_entry_from(map, entry, ent)
    ++		{
    ++			int ret;
    ++			if (!check_int((ret = key_val_contains(
    ++						key_val, ARRAY_SIZE(key_val),
    ++						entry)), ==, 0)) {
    ++				switch (ret) {
     +				case 1:
     +					test_msg("found entry was not given in the input\n"
     +						 "    key: %s\n  value: %s",
    @@ t/unit-tests/t-hashmap.c (new)
     +						 "    key: %s\n  value: %s",
     +						 entry->key, get_value(entry));
     +					break;
    ++				}
    ++			} else {
    ++				count++;
     +			}
     +		}
    ++		check_int(count, ==, 2);
     +	}
    -+
    ++	check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val));
     +	check(get_test_entry(map, ignore_case, "notInMap") == NULL);
     +}
     +
    @@ t/unit-tests/t-hashmap.c (new)
     +{
     +	struct test_entry *entry, *removed;
     +	const char *key_val[][2] = { { "key1", "value1" },
    -+				   { "key2", "value2" },
    -+				   { "fooBarFrotz", "value3" } };
    ++				     { "key2", "value2" },
    ++				     { "fooBarFrotz", "value3" } };
     +	const char *remove[][2] = { { ignore_case ? "Key1" : "key1", "value1" },
     +				    { ignore_case ? "keY2" : "key2", "value2" } };
     +
    @@ t/unit-tests/t-hashmap.c (new)
     +	const char *key_val[][3] = { { "key1", "value1", "UNUSED" },
     +				     { "key2", "value2", "UNUSED" },
     +				     { "fooBarFrotz", "value3", "UNUSED" } };
    ++	int count = 0;
     +
     +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
     +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
    @@ t/unit-tests/t-hashmap.c (new)
     +	{
     +		int ret;
     +		if (!check_int((ret = key_val_contains(key_val, ARRAY_SIZE(key_val),
    -+					      entry)), ==, 0)) {
    ++						       entry)), ==, 0)) {
     +			switch (ret) {
    -+				case 1:
    -+					test_msg("found entry was not given in the input\n"
    -+						 "    key: %s\n  value: %s",
    -+						 entry->key, get_value(entry));
    -+					break;
    -+				case 2:
    -+					test_msg("duplicate entry detected\n"
    -+						 "    key: %s\n  value: %s",
    -+						 entry->key, get_value(entry));
    -+					break;
    ++			case 1:
    ++				test_msg("found entry was not given in the input\n"
    ++					 "    key: %s\n  value: %s",
    ++					 entry->key, get_value(entry));
    ++				break;
    ++			case 2:
    ++				test_msg("duplicate entry detected\n"
    ++					 "    key: %s\n  value: %s",
    ++					 entry->key, get_value(entry));
    ++				break;
     +			}
    ++		} else {
    ++			count++;
     +		}
     +	}
    -+	check_int(hashmap_get_size(map), ==, 3);
    ++	check_int(count, ==, ARRAY_SIZE(key_val));
    ++	check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val));
     +}
     +
     +static void t_alloc(struct hashmap *map, int ignore_case)

 Makefile                 |   1 +
 t/helper/test-hashmap.c  | 100 +----------
 t/t0011-hashmap.sh       | 260 ----------------------------
 t/unit-tests/t-hashmap.c | 359 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 362 insertions(+), 358 deletions(-)
 delete mode 100755 t/t0011-hashmap.sh
 create mode 100644 t/unit-tests/t-hashmap.c

Comments

Josh Steadmon July 9, 2024, 7:34 p.m. UTC | #1
This looks like a good conversion to me. There are a few small
improvements that could be made, but mostly LGTM. Comments are inline
below.


On 2024.07.08 21:45, Ghanshyam Thakkar wrote:
> helper/test-hashmap.c along with t0011-hashmap.sh test the hashmap.h
> library. Migrate them to the unit testing framework for better
> debugging, runtime performance and consice code.

Typo: s/consice/concise/

> Along with the migration, make 'add' tests from the shellscript order
> agnostic in unit tests, since they iterate over entries with the same
> keys and we do not guarantee the order.
> 
> The helper/test-hashmap.c is still not removed because it contains a
> performance test meant to be run by the user directly (not used in
> t/perf). And it makes sense for such a utility to be a helper.
> 
> Mentored-by: Christian Couder <chriscool@tuxfamily.org>
> Mentored-by: Kaartic Sivaraam <kaartic.sivaraam@gmail.com>
> Signed-off-by: Ghanshyam Thakkar <shyamthakkar001@gmail.com>
> ---
> The changes in v2 are inspired from the review of another similar
> test t-oidmap: https://lore.kernel.org/git/16e06a6d-5fd0-4132-9d82-5c6f13b7f9ed@gmail.com/
> 
> The v2 also includes some formatting corrections and one of the
> testcases, t_add(), was changed to be more similar to the original.
> 
>  Makefile                 |   1 +
>  t/helper/test-hashmap.c  | 100 +----------
>  t/t0011-hashmap.sh       | 260 ----------------------------
>  t/unit-tests/t-hashmap.c | 359 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 362 insertions(+), 358 deletions(-)
>  delete mode 100755 t/t0011-hashmap.sh
>  create mode 100644 t/unit-tests/t-hashmap.c

[snip]

> diff --git a/t/unit-tests/t-hashmap.c b/t/unit-tests/t-hashmap.c
> new file mode 100644
> index 0000000000..1c951fcfd8
> --- /dev/null
> +++ b/t/unit-tests/t-hashmap.c
> @@ -0,0 +1,359 @@
> +#include "test-lib.h"
> +#include "hashmap.h"
> +#include "strbuf.h"
> +
> +struct test_entry {
> +	int padding; /* hashmap entry no longer needs to be the first member */
> +	struct hashmap_entry ent;
> +	/* key and value as two \0-terminated strings */
> +	char key[FLEX_ARRAY];
> +};
> +
> +static int test_entry_cmp(const void *cmp_data,
> +			  const struct hashmap_entry *eptr,
> +			  const struct hashmap_entry *entry_or_key,
> +			  const void *keydata)
> +{
> +	const int ignore_case = cmp_data ? *((int *)cmp_data) : 0;
> +	const struct test_entry *e1, *e2;
> +	const char *key = keydata;
> +
> +	e1 = container_of(eptr, const struct test_entry, ent);
> +	e2 = container_of(entry_or_key, const struct test_entry, ent);
> +
> +	if (ignore_case)
> +		return strcasecmp(e1->key, key ? key : e2->key);
> +	else
> +		return strcmp(e1->key, key ? key : e2->key);
> +}
> +
> +static const char *get_value(const struct test_entry *e)
> +{
> +	return e->key + strlen(e->key) + 1;
> +}
> +
> +static struct test_entry *alloc_test_entry(unsigned int ignore_case,
> +					   const char *key, const char *value)
> +{
> +	size_t klen = strlen(key);
> +	size_t vlen = strlen(value);
> +	unsigned int hash = ignore_case ? strihash(key) : strhash(key);
> +	struct test_entry *entry = xmalloc(st_add4(sizeof(*entry), klen, vlen, 2));
> +
> +	hashmap_entry_init(&entry->ent, hash);
> +	memcpy(entry->key, key, klen + 1);
> +	memcpy(entry->key + klen + 1, value, vlen + 1);
> +	return entry;
> +}

So we're duplicating `struct test_entry`, `test_entry_cmp()`, and
`alloc_test_entry()`, which have (almost) identical definitions in
t/helper/test-hashmap.c. I wonder if it's worth splitting these into a
separate .c file. Maybe it's too much of a pain to add Makefile rules to
share objects across the test helper and the unit tests. Something to
keep in mind I guess, if we find that we want to share more code than
this.


> +static struct test_entry *get_test_entry(struct hashmap *map,
> +					 unsigned int ignore_case, const char *key)
> +{
> +	return hashmap_get_entry_from_hash(
> +		map, ignore_case ? strihash(key) : strhash(key), key,
> +		struct test_entry, ent);
> +}
> +
> +static int key_val_contains(const char *key_val[][3], size_t n,
> +			    struct test_entry *entry)
> +{
> +	for (size_t i = 0; i < n; i++) {
> +		if (!strcmp(entry->key, key_val[i][0]) &&
> +		    !strcmp(get_value(entry), key_val[i][1])) {
> +			if (!strcmp(key_val[i][2], "USED"))
> +				return 2;
> +			key_val[i][2] = "USED";
> +			return 0;
> +		}
> +	}
> +	return 1;
> +}
> +
> +static void setup(void (*f)(struct hashmap *map, int ignore_case),
> +		  int ignore_case)
> +{
> +	struct hashmap map = HASHMAP_INIT(test_entry_cmp, &ignore_case);
> +
> +	f(&map, ignore_case);
> +	hashmap_clear_and_free(&map, struct test_entry, ent);
> +}

As I mentioned in my review [1] on René's TEST_RUN series, I don't think
we get much value out of having a setup + callback approach when the
setup is minimal. Would you consider rewriting a v2 using TEST_RUN once
that is ready in `next`?

[1] https://lore.kernel.org/git/tswyfparvchgi7qxrjxbx4eb7cohypzekjqzbnkbffsesaiazs@vtewtz7o6twi/

> +static void t_put(struct hashmap *map, int ignore_case)
> +{
> +	struct test_entry *entry;
> +	const char *key_val[][2] = { { "key1", "value1" },
> +				     { "key2", "value2" },
> +				     { "fooBarFrotz", "value3" } };
> +
> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> +		check(hashmap_put_entry(map, entry, ent) == NULL);
> +	}
> +
> +	entry = alloc_test_entry(ignore_case, "foobarfrotz", "value4");
> +	entry = hashmap_put_entry(map, entry, ent);
> +	check(ignore_case ? entry != NULL : entry == NULL);
> +	free(entry);
> +
> +	check_int(map->tablesize, ==, 64);
> +	check_int(hashmap_get_size(map), ==,
> +		  ignore_case ? ARRAY_SIZE(key_val) : ARRAY_SIZE(key_val) + 1);
> +}

Ahhh, so you're using the same function for both case-sensitive and
-insensitive tests. So I guess TEST_RUN isn't useful here after all.
Personally I'd still rather get rid of setup(), but I don't feel super
strongly about it.


> +static void t_replace(struct hashmap *map, int ignore_case)
> +{
> +	struct test_entry *entry;
> +
> +	entry = alloc_test_entry(ignore_case, "key1", "value1");
> +	check(hashmap_put_entry(map, entry, ent) == NULL);
> +
> +	entry = alloc_test_entry(ignore_case, ignore_case ? "Key1" : "key1",
> +				 "value2");
> +	entry = hashmap_put_entry(map, entry, ent);
> +	if (check(entry != NULL))
> +		check_str(get_value(entry), "value1");
> +	free(entry);
> +
> +	entry = alloc_test_entry(ignore_case, "fooBarFrotz", "value3");
> +	check(hashmap_put_entry(map, entry, ent) == NULL);
> +
> +	entry = alloc_test_entry(ignore_case,
> +				 ignore_case ? "foobarfrotz" : "fooBarFrotz",
> +				 "value4");
> +	entry = hashmap_put_entry(map, entry, ent);
> +	if (check(entry != NULL))
> +		check_str(get_value(entry), "value3");
> +	free(entry);
> +}
> +
> +static void t_get(struct hashmap *map, int ignore_case)
> +{
> +	struct test_entry *entry;
> +	const char *key_val[][2] = { { "key1", "value1" },
> +				     { "key2", "value2" },
> +				     { "fooBarFrotz", "value3" },
> +				     { ignore_case ? "key4" : "foobarfrotz", "value4" } };
> +	const char *query[][2] = {
> +		{ ignore_case ? "Key1" : "key1", "value1" },
> +		{ ignore_case ? "keY2" : "key2", "value2" },
> +		{ ignore_case ? "foobarfrotz" : "fooBarFrotz", "value3" }
> +	};
> +
> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> +		check(hashmap_put_entry(map, entry, ent) == NULL);
> +	}
> +
> +	for (size_t i = 0; i < ARRAY_SIZE(query); i++) {
> +		entry = get_test_entry(map, ignore_case, query[i][0]);
> +		if (check(entry != NULL))
> +			check_str(get_value(entry), query[i][1]);
> +	}
> +
> +	check(get_test_entry(map, ignore_case, "notInMap") == NULL);
> +}
> +
> +static void t_add(struct hashmap *map, int ignore_case)
> +{
> +	struct test_entry *entry;
> +	const char *key_val[][3] = {
> +		{ "key1", "value1", "UNUSED" },
> +		{ ignore_case ? "Key1" : "key1", "value2", "UNUSED" },
> +		{ "fooBarFrotz", "value3", "UNUSED" },
> +		{ ignore_case ? "Foobarfrotz" : "fooBarFrotz", "value4", "UNUSED" }
> +	};
> +	const char *queries[] = { "key1",
> +				  ignore_case ? "Foobarfrotz" : "fooBarFrotz" };
> +
> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> +		hashmap_add(map, &entry->ent);
> +	}
> +
> +	for (size_t i = 0; i < ARRAY_SIZE(queries); i++) {

Since we only have one query, can we remove the loop and simplify the
following block of code?

Also (here and elsewhere), it might be less confusing to say "UNSEEN" /
"SEEN" instead of "UNUSED" / "USED". The latter makes it sound to me
like there's some API requirement to have a 3-item array that we don't
actually need, but in this case those fields are actually used in
key_val_contains() to track duplicates.


> +		int count = 0;
> +		entry = hashmap_get_entry_from_hash(map,
> +			ignore_case ? strihash(queries[i]) :
> +				      strhash(queries[i]),
> +			queries[i], struct test_entry, ent);
> +
> +		hashmap_for_each_entry_from(map, entry, ent)
> +		{
> +			int ret;
> +			if (!check_int((ret = key_val_contains(
> +						key_val, ARRAY_SIZE(key_val),
> +						entry)), ==, 0)) {
> +				switch (ret) {
> +				case 1:
> +					test_msg("found entry was not given in the input\n"
> +						 "    key: %s\n  value: %s",
> +						 entry->key, get_value(entry));
> +					break;
> +				case 2:
> +					test_msg("duplicate entry detected\n"
> +						 "    key: %s\n  value: %s",
> +						 entry->key, get_value(entry));
> +					break;
> +				}
> +			} else {
> +				count++;
> +			}
> +		}
> +		check_int(count, ==, 2);
> +	}
> +	check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val));
> +	check(get_test_entry(map, ignore_case, "notInMap") == NULL);
> +}
> +
> +static void t_remove(struct hashmap *map, int ignore_case)
> +{
> +	struct test_entry *entry, *removed;
> +	const char *key_val[][2] = { { "key1", "value1" },
> +				     { "key2", "value2" },
> +				     { "fooBarFrotz", "value3" } };
> +	const char *remove[][2] = { { ignore_case ? "Key1" : "key1", "value1" },
> +				    { ignore_case ? "keY2" : "key2", "value2" } };
> +
> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> +		check(hashmap_put_entry(map, entry, ent) == NULL);
> +	}
> +
> +	for (size_t i = 0; i < ARRAY_SIZE(remove); i++) {
> +		entry = alloc_test_entry(ignore_case, remove[i][0], "");
> +		removed = hashmap_remove_entry(map, entry, ent, remove[i][0]);
> +		if (check(removed != NULL))
> +			check_str(get_value(removed), remove[i][1]);
> +		free(entry);
> +		free(removed);
> +	}
> +
> +	entry = alloc_test_entry(ignore_case, "notInMap", "");
> +	check(hashmap_remove_entry(map, entry, ent, "notInMap") == NULL);
> +	free(entry);
> +}

Is there a reason why you don't check the table size as the shell test
did? (similar to what you have in t_put())

> +static void t_iterate(struct hashmap *map, int ignore_case)
> +{
> +	struct test_entry *entry;
> +	struct hashmap_iter iter;
> +	const char *key_val[][3] = { { "key1", "value1", "UNUSED" },
> +				     { "key2", "value2", "UNUSED" },
> +				     { "fooBarFrotz", "value3", "UNUSED" } };
> +	int count = 0;
> +
> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> +		check(hashmap_put_entry(map, entry, ent) == NULL);
> +	}
> +
> +	hashmap_for_each_entry(map, &iter, entry, ent /* member name */)
> +	{
> +		int ret;
> +		if (!check_int((ret = key_val_contains(key_val, ARRAY_SIZE(key_val),
> +						       entry)), ==, 0)) {
> +			switch (ret) {
> +			case 1:
> +				test_msg("found entry was not given in the input\n"
> +					 "    key: %s\n  value: %s",
> +					 entry->key, get_value(entry));
> +				break;
> +			case 2:
> +				test_msg("duplicate entry detected\n"
> +					 "    key: %s\n  value: %s",
> +					 entry->key, get_value(entry));
> +				break;
> +			}
> +		} else {
> +			count++;
> +		}
> +	}
> +	check_int(count, ==, ARRAY_SIZE(key_val));
> +	check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val));
> +}
> +
> +static void t_alloc(struct hashmap *map, int ignore_case)
> +{
> +	struct test_entry *entry, *removed;
> +
> +	for (int i = 1; i <= 51; i++) {
> +		char *key = xstrfmt("key%d", i);
> +		char *value = xstrfmt("value%d", i);
> +		entry = alloc_test_entry(ignore_case, key, value);
> +		check(hashmap_put_entry(map, entry, ent) == NULL);
> +		free(key);
> +		free(value);
> +	}
> +	check_int(map->tablesize, ==, 64);
> +	check_int(hashmap_get_size(map), ==, 51);
> +
> +	entry = alloc_test_entry(ignore_case, "key52", "value52");
> +	check(hashmap_put_entry(map, entry, ent) == NULL);
> +	check_int(map->tablesize, ==, 256);
> +	check_int(hashmap_get_size(map), ==, 52);
> +
> +	for (int i = 1; i <= 12; i++) {
> +		char *key = xstrfmt("key%d", i);
> +		char *value = xstrfmt("value%d", i);
> +
> +		entry = alloc_test_entry(ignore_case, key, "");
> +		removed = hashmap_remove_entry(map, entry, ent, key);
> +		if (check(removed != NULL))
> +			check_str(value, get_value(removed));
> +		free(key);
> +		free(value);
> +		free(entry);
> +		free(removed);
> +	}
> +	check_int(map->tablesize, ==, 256);
> +	check_int(hashmap_get_size(map), ==, 40);
> +
> +	entry = alloc_test_entry(ignore_case, "key40", "");
> +	removed = hashmap_remove_entry(map, entry, ent, "key40");
> +	if (check(removed != NULL))
> +		check_str("value40", get_value(removed));
> +	check_int(map->tablesize, ==, 64);
> +	check_int(hashmap_get_size(map), ==, 39);
> +	free(entry);
> +	free(removed);
> +}
> +
> +static void t_intern(struct hashmap *map, int ignore_case)
> +{
> +	const char *values[] = { "value1", "Value1", "value2", "value2" };
> +
> +	for (size_t i = 0; i < ARRAY_SIZE(values); i++) {
> +		const char *i1 = strintern(values[i]);
> +		const char *i2 = strintern(values[i]);
> +
> +		if (!check(!strcmp(i1, values[i])))

Should we use check_str() here? Or does that add too much extraneous
detail when the test_msg() below is what we really care about?

> +			test_msg("strintern(%s) returns %s\n", values[i], i1);
> +		else if (!check(i1 != values[i]))

Similarly, check_pointer_eq() here?


> +			test_msg("strintern(%s) returns input pointer\n",
> +				 values[i]);
> +		else if (!check(i1 == i2))

And check_pointer_eq() here as well.


> +			test_msg("address('%s') != address('%s'), so strintern('%s') != strintern('%s')",
> +				 i1, i2, values[i], values[i]);
> +		else
> +			check_str(i1, values[i]);
> +	}
> +}
> +
> +int cmd_main(int argc UNUSED, const char **argv UNUSED)
> +{
> +	TEST(setup(t_put, 0), "put works");
> +	TEST(setup(t_put, 1), "put (case insensitive) works");
> +	TEST(setup(t_replace, 0), "replace works");
> +	TEST(setup(t_replace, 1), "replace (case insensitive) works");
> +	TEST(setup(t_get, 0), "get works");
> +	TEST(setup(t_get, 1), "get (case insensitive) works");
> +	TEST(setup(t_add, 0), "add works");
> +	TEST(setup(t_add, 1), "add (case insensitive) works");
> +	TEST(setup(t_remove, 0), "remove works");
> +	TEST(setup(t_remove, 1), "remove (case insensitive) works");
> +	TEST(setup(t_iterate, 0), "iterate works");
> +	TEST(setup(t_iterate, 1), "iterate (case insensitive) works");
> +	TEST(setup(t_alloc, 0), "grow / shrink works");
> +	TEST(setup(t_intern, 0), "string interning works");
> +	return test_done();
> +}
> -- 
> 2.45.2
> 
>
Junio C Hamano July 9, 2024, 9:42 p.m. UTC | #2
Josh Steadmon <steadmon@google.com> writes:

>> +static void t_put(struct hashmap *map, int ignore_case)
>> +{
>> +	struct test_entry *entry;
>> +	const char *key_val[][2] = { { "key1", "value1" },
>> +				     { "key2", "value2" },
>> +				     { "fooBarFrotz", "value3" } };
>> +
>> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
>> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
>> +		check(hashmap_put_entry(map, entry, ent) == NULL);
>> +	}
>> +
>> +	entry = alloc_test_entry(ignore_case, "foobarfrotz", "value4");
>> +	entry = hashmap_put_entry(map, entry, ent);
>> +	check(ignore_case ? entry != NULL : entry == NULL);
>> +	free(entry);
>> +
>> +	check_int(map->tablesize, ==, 64);
>> +	check_int(hashmap_get_size(map), ==,
>> +		  ignore_case ? ARRAY_SIZE(key_val) : ARRAY_SIZE(key_val) + 1);
>> +}
>
> Ahhh, so you're using the same function for both case-sensitive and
> -insensitive tests. So I guess TEST_RUN isn't useful here after all.
> Personally I'd still rather get rid of setup(), but I don't feel super
> strongly about it.

Consulting the table with "fooBarFrotz" and checking what gets
returned (expect "value3" for !icase, or "value4" for icase) is one
of the things that are missing.  In fact, the values stored are not
even checked with the above test at all.

>> +static void t_replace(struct hashmap *map, int ignore_case)
>> +{
>> +	struct test_entry *entry;
>> +
>> +	entry = alloc_test_entry(ignore_case, "key1", "value1");
>> +	check(hashmap_put_entry(map, entry, ent) == NULL);
>> +
>> +	entry = alloc_test_entry(ignore_case, ignore_case ? "Key1" : "key1",
>> +				 "value2");
>> +	entry = hashmap_put_entry(map, entry, ent);
>> +	if (check(entry != NULL))
>> +		check_str(get_value(entry), "value1");
>> +	free(entry);
>> +
>> +	entry = alloc_test_entry(ignore_case, "fooBarFrotz", "value3");
>> +	check(hashmap_put_entry(map, entry, ent) == NULL);
>> +
>> +	entry = alloc_test_entry(ignore_case,
>> +				 ignore_case ? "foobarfrotz" : "fooBarFrotz",
>> +				 "value4");

Curious.  If the hashmap is set up for icase use, do callers still
need to downcase the key?  Shouldn't the library take care of that?
After all, test_entry_cmp() when the hashmap is being used in icase
mode does strcasecmp() anyway.

>> +	entry = hashmap_put_entry(map, entry, ent);
>> +	if (check(entry != NULL))
>> +		check_str(get_value(entry), "value3");

Here the stored value is checked, which is good.

>> +	free(entry);
>> +}
>> +
>> +static void t_get(struct hashmap *map, int ignore_case)
>> +{
>> +	struct test_entry *entry;
>> +	const char *key_val[][2] = { { "key1", "value1" },
>> +				     { "key2", "value2" },
>> +				     { "fooBarFrotz", "value3" },
>> +				     { ignore_case ? "key4" : "foobarfrotz", "value4" } };
>> +	const char *query[][2] = {
>> +		{ ignore_case ? "Key1" : "key1", "value1" },
>> +		{ ignore_case ? "keY2" : "key2", "value2" },
>> +		{ ignore_case ? "foobarfrotz" : "fooBarFrotz", "value3" }
>> +	};
>> +
>> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
>> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
>> +		check(hashmap_put_entry(map, entry, ent) == NULL);
>> +	}
>> +
>> +	for (size_t i = 0; i < ARRAY_SIZE(query); i++) {
>> +		entry = get_test_entry(map, ignore_case, query[i][0]);
>> +		if (check(entry != NULL))
>> +			check_str(get_value(entry), query[i][1]);
>> +	}
>> +
>> +	check(get_test_entry(map, ignore_case, "notInMap") == NULL);
>> +}

It is getting dubious if it is worth having t_put, when t_get does
so similar things and more.

Same comment applies wrt icase, and not limited to t_get() but all
the remaining test functions (elided).

Thanks.
Ghanshyam Thakkar July 9, 2024, 11:27 p.m. UTC | #3
Josh Steadmon <steadmon@google.com> wrote:
>
> This looks like a good conversion to me. There are a few small
> improvements that could be made, but mostly LGTM. Comments are inline
> below.
>
>
> On 2024.07.08 21:45, Ghanshyam Thakkar wrote:
> > helper/test-hashmap.c along with t0011-hashmap.sh test the hashmap.h
> > library. Migrate them to the unit testing framework for better
> > debugging, runtime performance and consice code.
>
> Typo: s/consice/concise/

Will update.

>
> > Along with the migration, make 'add' tests from the shellscript order
> > agnostic in unit tests, since they iterate over entries with the same
> > keys and we do not guarantee the order.
> > 
> > The helper/test-hashmap.c is still not removed because it contains a
> > performance test meant to be run by the user directly (not used in
> > t/perf). And it makes sense for such a utility to be a helper.
> > 
> > Mentored-by: Christian Couder <chriscool@tuxfamily.org>
> > Mentored-by: Kaartic Sivaraam <kaartic.sivaraam@gmail.com>
> > Signed-off-by: Ghanshyam Thakkar <shyamthakkar001@gmail.com>
> > ---
> > The changes in v2 are inspired from the review of another similar
> > test t-oidmap: https://lore.kernel.org/git/16e06a6d-5fd0-4132-9d82-5c6f13b7f9ed@gmail.com/
> > 
> > The v2 also includes some formatting corrections and one of the
> > testcases, t_add(), was changed to be more similar to the original.
> > 
> >  Makefile                 |   1 +
> >  t/helper/test-hashmap.c  | 100 +----------
> >  t/t0011-hashmap.sh       | 260 ----------------------------
> >  t/unit-tests/t-hashmap.c | 359 +++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 362 insertions(+), 358 deletions(-)
> >  delete mode 100755 t/t0011-hashmap.sh
> >  create mode 100644 t/unit-tests/t-hashmap.c
>
> [snip]
>
> > diff --git a/t/unit-tests/t-hashmap.c b/t/unit-tests/t-hashmap.c
> > new file mode 100644
> > index 0000000000..1c951fcfd8
> > --- /dev/null
> > +++ b/t/unit-tests/t-hashmap.c
> > @@ -0,0 +1,359 @@
> > +#include "test-lib.h"
> > +#include "hashmap.h"
> > +#include "strbuf.h"
> > +
> > +struct test_entry {
> > +	int padding; /* hashmap entry no longer needs to be the first member */
> > +	struct hashmap_entry ent;
> > +	/* key and value as two \0-terminated strings */
> > +	char key[FLEX_ARRAY];
> > +};
> > +
> > +static int test_entry_cmp(const void *cmp_data,
> > +			  const struct hashmap_entry *eptr,
> > +			  const struct hashmap_entry *entry_or_key,
> > +			  const void *keydata)
> > +{
> > +	const int ignore_case = cmp_data ? *((int *)cmp_data) : 0;
> > +	const struct test_entry *e1, *e2;
> > +	const char *key = keydata;
> > +
> > +	e1 = container_of(eptr, const struct test_entry, ent);
> > +	e2 = container_of(entry_or_key, const struct test_entry, ent);
> > +
> > +	if (ignore_case)
> > +		return strcasecmp(e1->key, key ? key : e2->key);
> > +	else
> > +		return strcmp(e1->key, key ? key : e2->key);
> > +}
> > +
> > +static const char *get_value(const struct test_entry *e)
> > +{
> > +	return e->key + strlen(e->key) + 1;
> > +}
> > +
> > +static struct test_entry *alloc_test_entry(unsigned int ignore_case,
> > +					   const char *key, const char *value)
> > +{
> > +	size_t klen = strlen(key);
> > +	size_t vlen = strlen(value);
> > +	unsigned int hash = ignore_case ? strihash(key) : strhash(key);
> > +	struct test_entry *entry = xmalloc(st_add4(sizeof(*entry), klen, vlen, 2));
> > +
> > +	hashmap_entry_init(&entry->ent, hash);
> > +	memcpy(entry->key, key, klen + 1);
> > +	memcpy(entry->key + klen + 1, value, vlen + 1);
> > +	return entry;
> > +}
>
> So we're duplicating `struct test_entry`, `test_entry_cmp()`, and
> `alloc_test_entry()`, which have (almost) identical definitions in
> t/helper/test-hashmap.c. I wonder if it's worth splitting these into a
> separate .c file. Maybe it's too much of a pain to add Makefile rules to
> share objects across the test helper and the unit tests. Something to
> keep in mind I guess, if we find that we want to share more code than
> this.
>
>
> > +static struct test_entry *get_test_entry(struct hashmap *map,
> > +					 unsigned int ignore_case, const char *key)
> > +{
> > +	return hashmap_get_entry_from_hash(
> > +		map, ignore_case ? strihash(key) : strhash(key), key,
> > +		struct test_entry, ent);
> > +}
> > +
> > +static int key_val_contains(const char *key_val[][3], size_t n,
> > +			    struct test_entry *entry)
> > +{
> > +	for (size_t i = 0; i < n; i++) {
> > +		if (!strcmp(entry->key, key_val[i][0]) &&
> > +		    !strcmp(get_value(entry), key_val[i][1])) {
> > +			if (!strcmp(key_val[i][2], "USED"))
> > +				return 2;
> > +			key_val[i][2] = "USED";
> > +			return 0;
> > +		}
> > +	}
> > +	return 1;
> > +}
> > +
> > +static void setup(void (*f)(struct hashmap *map, int ignore_case),
> > +		  int ignore_case)
> > +{
> > +	struct hashmap map = HASHMAP_INIT(test_entry_cmp, &ignore_case);
> > +
> > +	f(&map, ignore_case);
> > +	hashmap_clear_and_free(&map, struct test_entry, ent);
> > +}
>
> As I mentioned in my review [1] on René's TEST_RUN series, I don't
> think
> we get much value out of having a setup + callback approach when the
> setup is minimal. Would you consider rewriting a v2 using TEST_RUN once
> that is ready in `next`?
>
> [1]
> https://lore.kernel.org/git/tswyfparvchgi7qxrjxbx4eb7cohypzekjqzbnkbffsesaiazs@vtewtz7o6twi/
>
> > +static void t_put(struct hashmap *map, int ignore_case)
> > +{
> > +	struct test_entry *entry;
> > +	const char *key_val[][2] = { { "key1", "value1" },
> > +				     { "key2", "value2" },
> > +				     { "fooBarFrotz", "value3" } };
> > +
> > +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> > +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> > +		check(hashmap_put_entry(map, entry, ent) == NULL);
> > +	}
> > +
> > +	entry = alloc_test_entry(ignore_case, "foobarfrotz", "value4");
> > +	entry = hashmap_put_entry(map, entry, ent);
> > +	check(ignore_case ? entry != NULL : entry == NULL);
> > +	free(entry);
> > +
> > +	check_int(map->tablesize, ==, 64);
> > +	check_int(hashmap_get_size(map), ==,
> > +		  ignore_case ? ARRAY_SIZE(key_val) : ARRAY_SIZE(key_val) + 1);
> > +}
>
> Ahhh, so you're using the same function for both case-sensitive and
> -insensitive tests. So I guess TEST_RUN isn't useful here after all.
> Personally I'd still rather get rid of setup(), but I don't feel super
> strongly about it.

Sure, I'll get rid of setup().

>
> > +static void t_replace(struct hashmap *map, int ignore_case)
> > +{
> > +	struct test_entry *entry;
> > +
> > +	entry = alloc_test_entry(ignore_case, "key1", "value1");
> > +	check(hashmap_put_entry(map, entry, ent) == NULL);
> > +
> > +	entry = alloc_test_entry(ignore_case, ignore_case ? "Key1" : "key1",
> > +				 "value2");
> > +	entry = hashmap_put_entry(map, entry, ent);
> > +	if (check(entry != NULL))
> > +		check_str(get_value(entry), "value1");
> > +	free(entry);
> > +
> > +	entry = alloc_test_entry(ignore_case, "fooBarFrotz", "value3");
> > +	check(hashmap_put_entry(map, entry, ent) == NULL);
> > +
> > +	entry = alloc_test_entry(ignore_case,
> > +				 ignore_case ? "foobarfrotz" : "fooBarFrotz",
> > +				 "value4");
> > +	entry = hashmap_put_entry(map, entry, ent);
> > +	if (check(entry != NULL))
> > +		check_str(get_value(entry), "value3");
> > +	free(entry);
> > +}
> > +
> > +static void t_get(struct hashmap *map, int ignore_case)
> > +{
> > +	struct test_entry *entry;
> > +	const char *key_val[][2] = { { "key1", "value1" },
> > +				     { "key2", "value2" },
> > +				     { "fooBarFrotz", "value3" },
> > +				     { ignore_case ? "key4" : "foobarfrotz", "value4" } };
> > +	const char *query[][2] = {
> > +		{ ignore_case ? "Key1" : "key1", "value1" },
> > +		{ ignore_case ? "keY2" : "key2", "value2" },
> > +		{ ignore_case ? "foobarfrotz" : "fooBarFrotz", "value3" }
> > +	};
> > +
> > +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> > +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> > +		check(hashmap_put_entry(map, entry, ent) == NULL);
> > +	}
> > +
> > +	for (size_t i = 0; i < ARRAY_SIZE(query); i++) {
> > +		entry = get_test_entry(map, ignore_case, query[i][0]);
> > +		if (check(entry != NULL))
> > +			check_str(get_value(entry), query[i][1]);
> > +	}
> > +
> > +	check(get_test_entry(map, ignore_case, "notInMap") == NULL);
> > +}
> > +
> > +static void t_add(struct hashmap *map, int ignore_case)
> > +{
> > +	struct test_entry *entry;
> > +	const char *key_val[][3] = {
> > +		{ "key1", "value1", "UNUSED" },
> > +		{ ignore_case ? "Key1" : "key1", "value2", "UNUSED" },
> > +		{ "fooBarFrotz", "value3", "UNUSED" },
> > +		{ ignore_case ? "Foobarfrotz" : "fooBarFrotz", "value4", "UNUSED" }
> > +	};
> > +	const char *queries[] = { "key1",
> > +				  ignore_case ? "Foobarfrotz" : "fooBarFrotz" };
> > +
> > +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> > +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> > +		hashmap_add(map, &entry->ent);
> > +	}
> > +
> > +	for (size_t i = 0; i < ARRAY_SIZE(queries); i++) {
>
> Since we only have one query, can we remove the loop and simplify the
> following block of code?

We have two queries as can be seen above. One is "key1" and the other is
"Foobarfrotz"/"fooBarFrotz".

> Also (here and elsewhere), it might be less confusing to say "UNSEEN" /
> "SEEN" instead of "UNUSED" / "USED". The latter makes it sound to me
> like there's some API requirement to have a 3-item array that we don't
> actually need, but in this case those fields are actually used in
> key_val_contains() to track duplicates.

Yeah, "UNSEEN" is definitely less confusing. Will change.

>
> > +		int count = 0;
> > +		entry = hashmap_get_entry_from_hash(map,
> > +			ignore_case ? strihash(queries[i]) :
> > +				      strhash(queries[i]),
> > +			queries[i], struct test_entry, ent);
> > +
> > +		hashmap_for_each_entry_from(map, entry, ent)
> > +		{
> > +			int ret;
> > +			if (!check_int((ret = key_val_contains(
> > +						key_val, ARRAY_SIZE(key_val),
> > +						entry)), ==, 0)) {
> > +				switch (ret) {
> > +				case 1:
> > +					test_msg("found entry was not given in the input\n"
> > +						 "    key: %s\n  value: %s",
> > +						 entry->key, get_value(entry));
> > +					break;
> > +				case 2:
> > +					test_msg("duplicate entry detected\n"
> > +						 "    key: %s\n  value: %s",
> > +						 entry->key, get_value(entry));
> > +					break;
> > +				}
> > +			} else {
> > +				count++;
> > +			}
> > +		}
> > +		check_int(count, ==, 2);
> > +	}
> > +	check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val));
> > +	check(get_test_entry(map, ignore_case, "notInMap") == NULL);
> > +}
> > +
> > +static void t_remove(struct hashmap *map, int ignore_case)
> > +{
> > +	struct test_entry *entry, *removed;
> > +	const char *key_val[][2] = { { "key1", "value1" },
> > +				     { "key2", "value2" },
> > +				     { "fooBarFrotz", "value3" } };
> > +	const char *remove[][2] = { { ignore_case ? "Key1" : "key1", "value1" },
> > +				    { ignore_case ? "keY2" : "key2", "value2" } };
> > +
> > +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> > +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> > +		check(hashmap_put_entry(map, entry, ent) == NULL);
> > +	}
> > +
> > +	for (size_t i = 0; i < ARRAY_SIZE(remove); i++) {
> > +		entry = alloc_test_entry(ignore_case, remove[i][0], "");
> > +		removed = hashmap_remove_entry(map, entry, ent, remove[i][0]);
> > +		if (check(removed != NULL))
> > +			check_str(get_value(removed), remove[i][1]);
> > +		free(entry);
> > +		free(removed);
> > +	}
> > +
> > +	entry = alloc_test_entry(ignore_case, "notInMap", "");
> > +	check(hashmap_remove_entry(map, entry, ent, "notInMap") == NULL);
> > +	free(entry);
> > +}
>
> Is there a reason why you don't check the table size as the shell test
> did? (similar to what you have in t_put())

Huh, I seem to have forgotten to put this snippet here:
  
	check_int(map->tablesize, ==, 64);
	check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val) - ARRAY_SIZE(remove));
	
I'll add it in v3.

>
> > +static void t_iterate(struct hashmap *map, int ignore_case)
> > +{
> > +	struct test_entry *entry;
> > +	struct hashmap_iter iter;
> > +	const char *key_val[][3] = { { "key1", "value1", "UNUSED" },
> > +				     { "key2", "value2", "UNUSED" },
> > +				     { "fooBarFrotz", "value3", "UNUSED" } };
> > +	int count = 0;
> > +
> > +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> > +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> > +		check(hashmap_put_entry(map, entry, ent) == NULL);
> > +	}
> > +
> > +	hashmap_for_each_entry(map, &iter, entry, ent /* member name */)
> > +	{
> > +		int ret;
> > +		if (!check_int((ret = key_val_contains(key_val, ARRAY_SIZE(key_val),
> > +						       entry)), ==, 0)) {
> > +			switch (ret) {
> > +			case 1:
> > +				test_msg("found entry was not given in the input\n"
> > +					 "    key: %s\n  value: %s",
> > +					 entry->key, get_value(entry));
> > +				break;
> > +			case 2:
> > +				test_msg("duplicate entry detected\n"
> > +					 "    key: %s\n  value: %s",
> > +					 entry->key, get_value(entry));
> > +				break;
> > +			}
> > +		} else {
> > +			count++;
> > +		}
> > +	}
> > +	check_int(count, ==, ARRAY_SIZE(key_val));
> > +	check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val));
> > +}
> > +
> > +static void t_alloc(struct hashmap *map, int ignore_case)
> > +{
> > +	struct test_entry *entry, *removed;
> > +
> > +	for (int i = 1; i <= 51; i++) {
> > +		char *key = xstrfmt("key%d", i);
> > +		char *value = xstrfmt("value%d", i);
> > +		entry = alloc_test_entry(ignore_case, key, value);
> > +		check(hashmap_put_entry(map, entry, ent) == NULL);
> > +		free(key);
> > +		free(value);
> > +	}
> > +	check_int(map->tablesize, ==, 64);
> > +	check_int(hashmap_get_size(map), ==, 51);
> > +
> > +	entry = alloc_test_entry(ignore_case, "key52", "value52");
> > +	check(hashmap_put_entry(map, entry, ent) == NULL);
> > +	check_int(map->tablesize, ==, 256);
> > +	check_int(hashmap_get_size(map), ==, 52);
> > +
> > +	for (int i = 1; i <= 12; i++) {
> > +		char *key = xstrfmt("key%d", i);
> > +		char *value = xstrfmt("value%d", i);
> > +
> > +		entry = alloc_test_entry(ignore_case, key, "");
> > +		removed = hashmap_remove_entry(map, entry, ent, key);
> > +		if (check(removed != NULL))
> > +			check_str(value, get_value(removed));
> > +		free(key);
> > +		free(value);
> > +		free(entry);
> > +		free(removed);
> > +	}
> > +	check_int(map->tablesize, ==, 256);
> > +	check_int(hashmap_get_size(map), ==, 40);
> > +
> > +	entry = alloc_test_entry(ignore_case, "key40", "");
> > +	removed = hashmap_remove_entry(map, entry, ent, "key40");
> > +	if (check(removed != NULL))
> > +		check_str("value40", get_value(removed));
> > +	check_int(map->tablesize, ==, 64);
> > +	check_int(hashmap_get_size(map), ==, 39);
> > +	free(entry);
> > +	free(removed);
> > +}
> > +
> > +static void t_intern(struct hashmap *map, int ignore_case)
> > +{
> > +	const char *values[] = { "value1", "Value1", "value2", "value2" };
> > +
> > +	for (size_t i = 0; i < ARRAY_SIZE(values); i++) {
> > +		const char *i1 = strintern(values[i]);
> > +		const char *i2 = strintern(values[i]);
> > +
> > +		if (!check(!strcmp(i1, values[i])))
>
> Should we use check_str() here? Or does that add too much extraneous
> detail when the test_msg() below is what we really care about?

It would repetitive as both print the same thing but test_msg() has more
context.

> > +			test_msg("strintern(%s) returns %s\n", values[i], i1);
> > +		else if (!check(i1 != values[i]))
>
> Similarly, check_pointer_eq() here?

How would we use check_pointer_eq() here as we are checking to make
sure they are not equal?

>
> > +			test_msg("strintern(%s) returns input pointer\n",
> > +				 values[i]);
> > +		else if (!check(i1 == i2))
>
> And check_pointer_eq() here as well.

Will update here.

Thanks for the review.
Phillip Wood July 10, 2024, 10:19 a.m. UTC | #4
On 09/07/2024 20:34, Josh Steadmon wrote:
> On 2024.07.08 21:45, Ghanshyam Thakkar wrote:

>> +static void t_put(struct hashmap *map, int ignore_case)
>> +{
>> +	struct test_entry *entry;
>> +	const char *key_val[][2] = { { "key1", "value1" },
>> +				     { "key2", "value2" },
>> +				     { "fooBarFrotz", "value3" } };
>> +
>> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
>> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
>> +		check(hashmap_put_entry(map, entry, ent) == NULL);
>> +	}
>> +
>> +	entry = alloc_test_entry(ignore_case, "foobarfrotz", "value4");
>> +	entry = hashmap_put_entry(map, entry, ent);
>> +	check(ignore_case ? entry != NULL : entry == NULL);
>> +	free(entry);
>> +
>> +	check_int(map->tablesize, ==, 64);
>> +	check_int(hashmap_get_size(map), ==,
>> +		  ignore_case ? ARRAY_SIZE(key_val) : ARRAY_SIZE(key_val) + 1);
>> +}
> 
> Ahhh, so you're using the same function for both case-sensitive and
> -insensitive tests. So I guess TEST_RUN isn't useful here after all.
> Personally I'd still rather get rid of setup(), but I don't feel super
> strongly about it.

I'm not sure - we have to pass ignore_case to HASHMAP_INIT and the test 
function so using setup() means we cannot pass different values to the 
two different functions.

For parameterized tests where we calling the same function with 
different inputs using a setup function allows us to
  - write more concise test code
  - easily change the setup of all the tests if the api changes in the
    future.
  - consistently free resources at the end of a test making it easier to
    write leak-free tests
  - assert pre- and post- conditions on all tests

Using TEST_RUN is useful to declare the different inputs for each test. 
For example in the oid-array tests it allows us to write

     TEST_RUN("ordered enumeration") {
         const char *input[] = { "88", "44", "aa", "55" };
         const char *expected[] = { "44", "55", "88", "aa" };

         TEST_ENUMERATION(input, expected)
     }

rather than declaring a bunch of variables up front a long way from 
where they are used.

>> +static void t_add(struct hashmap *map, int ignore_case)
>> +{
>> +	struct test_entry *entry;
>> +	const char *key_val[][3] = {
>> +		{ "key1", "value1", "UNUSED" },
>> +		{ ignore_case ? "Key1" : "key1", "value2", "UNUSED" },
>> +		{ "fooBarFrotz", "value3", "UNUSED" },
>> +		{ ignore_case ? "Foobarfrotz" : "fooBarFrotz", "value4", "UNUSED" }
>> +	};
>> +	const char *queries[] = { "key1",
>> +				  ignore_case ? "Foobarfrotz" : "fooBarFrotz" };
>> +
>> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
>> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
>> +		hashmap_add(map, &entry->ent);
>> +	}
>> +
>> +	for (size_t i = 0; i < ARRAY_SIZE(queries); i++) {
> 
> Since we only have one query, can we remove the loop and simplify the
> following block of code?
> 
> Also (here and elsewhere), it might be less confusing to say "UNSEEN" /
> "SEEN" instead of "UNUSED" / "USED". The latter makes it sound to me
> like there's some API requirement to have a 3-item array that we don't
> actually need, but in this case those fields are actually used in
> key_val_contains() to track duplicates.

The third element just needs to be a boolean flag so it might be better 
to use a struct

	const struct {
		char *key;
		char *val;
		char seen;
	} key_val[] = {
		{ .key = "key1", .val = "value1" },
		{ .key = ignore_case ? "Key1" : "key1" .val = "value2" },		{ .key = 
"fooBarFrotz" .val = "value3" },
		{ .key = ignore_case ? "Foobarfrotz" : "fooBarFrotz", .value = "value4" }
	};

Best Wishes

Phillip
Ghanshyam Thakkar July 10, 2024, 7:41 p.m. UTC | #5
Junio C Hamano <gitster@pobox.com> wrote:
> Josh Steadmon <steadmon@google.com> writes:
>
> >> +static void t_put(struct hashmap *map, int ignore_case)
> >> +{
> >> +	struct test_entry *entry;
> >> +	const char *key_val[][2] = { { "key1", "value1" },
> >> +				     { "key2", "value2" },
> >> +				     { "fooBarFrotz", "value3" } };
> >> +
> >> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> >> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> >> +		check(hashmap_put_entry(map, entry, ent) == NULL);
> >> +	}
> >> +
> >> +	entry = alloc_test_entry(ignore_case, "foobarfrotz", "value4");
> >> +	entry = hashmap_put_entry(map, entry, ent);
> >> +	check(ignore_case ? entry != NULL : entry == NULL);
> >> +	free(entry);
> >> +
> >> +	check_int(map->tablesize, ==, 64);
> >> +	check_int(hashmap_get_size(map), ==,
> >> +		  ignore_case ? ARRAY_SIZE(key_val) : ARRAY_SIZE(key_val) + 1);
> >> +}
> >
> > Ahhh, so you're using the same function for both case-sensitive and
> > -insensitive tests. So I guess TEST_RUN isn't useful here after all.
> > Personally I'd still rather get rid of setup(), but I don't feel super
> > strongly about it.
>
> Consulting the table with "fooBarFrotz" and checking what gets
> returned (expect "value3" for !icase, or "value4" for icase) is one
> of the things that are missing. In fact, the values stored are not
> even checked with the above test at all.

Yeah, I tried to replicate the 'put' tests from the shellscript.
However, t_get() does the same thing as you mentioned below, so I'll
just remove it and rename t_get() to t_put_get(), and add some
missing checks from t_put() to t_put_get().

>
> >> +static void t_replace(struct hashmap *map, int ignore_case)
> >> +{
> >> +	struct test_entry *entry;
> >> +
> >> +	entry = alloc_test_entry(ignore_case, "key1", "value1");
> >> +	check(hashmap_put_entry(map, entry, ent) == NULL);
> >> +
> >> +	entry = alloc_test_entry(ignore_case, ignore_case ? "Key1" : "key1",
> >> +				 "value2");
> >> +	entry = hashmap_put_entry(map, entry, ent);
> >> +	if (check(entry != NULL))
> >> +		check_str(get_value(entry), "value1");
> >> +	free(entry);
> >> +
> >> +	entry = alloc_test_entry(ignore_case, "fooBarFrotz", "value3");
> >> +	check(hashmap_put_entry(map, entry, ent) == NULL);
> >> +
> >> +	entry = alloc_test_entry(ignore_case,
> >> +				 ignore_case ? "foobarfrotz" : "fooBarFrotz",
> >> +				 "value4");
>
> Curious. If the hashmap is set up for icase use, do callers still
> need to downcase the key? Shouldn't the library take care of that?
> After all, test_entry_cmp() when the hashmap is being used in icase
> mode does strcasecmp() anyway.

Of course we don't need to downcase. But the idea is to insert
"fooBarFrotz" then insert "foobarfrotz" to check if it is considered the
same or not. In a way, it is also testing test_entry_cmp() and
alloc_test_entry(). If we pass "fooBarFrotz" both the times
we can't be sure if it downcases or not, as both are same, we'd
pass even if it didn't downcase. After all, we are testing the
library rather than using it.

Thanks.

>
> >> +	entry = hashmap_put_entry(map, entry, ent);
> >> +	if (check(entry != NULL))
> >> +		check_str(get_value(entry), "value3");
>
> Here the stored value is checked, which is good.
>
> >> +	free(entry);
> >> +}
> >> +
> >> +static void t_get(struct hashmap *map, int ignore_case)
> >> +{
> >> +	struct test_entry *entry;
> >> +	const char *key_val[][2] = { { "key1", "value1" },
> >> +				     { "key2", "value2" },
> >> +				     { "fooBarFrotz", "value3" },
> >> +				     { ignore_case ? "key4" : "foobarfrotz", "value4" } };
> >> +	const char *query[][2] = {
> >> +		{ ignore_case ? "Key1" : "key1", "value1" },
> >> +		{ ignore_case ? "keY2" : "key2", "value2" },
> >> +		{ ignore_case ? "foobarfrotz" : "fooBarFrotz", "value3" }
> >> +	};
> >> +
> >> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> >> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> >> +		check(hashmap_put_entry(map, entry, ent) == NULL);
> >> +	}
> >> +
> >> +	for (size_t i = 0; i < ARRAY_SIZE(query); i++) {
> >> +		entry = get_test_entry(map, ignore_case, query[i][0]);
> >> +		if (check(entry != NULL))
> >> +			check_str(get_value(entry), query[i][1]);
> >> +	}
> >> +
> >> +	check(get_test_entry(map, ignore_case, "notInMap") == NULL);
> >> +}
>
> It is getting dubious if it is worth having t_put, when t_get does
> so similar things and more.
>
> Same comment applies wrt icase, and not limited to t_get() but all
> the remaining test functions (elided).
>
> Thanks.
Ghanshyam Thakkar July 11, 2024, 6:54 p.m. UTC | #6
Phillip Wood <phillip.wood123@gmail.com> wrote:
> On 09/07/2024 20:34, Josh Steadmon wrote:
> > On 2024.07.08 21:45, Ghanshyam Thakkar wrote:
>
> >> +static void t_put(struct hashmap *map, int ignore_case)
> >> +{
> >> +	struct test_entry *entry;
> >> +	const char *key_val[][2] = { { "key1", "value1" },
> >> +				     { "key2", "value2" },
> >> +				     { "fooBarFrotz", "value3" } };
> >> +
> >> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> >> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> >> +		check(hashmap_put_entry(map, entry, ent) == NULL);
> >> +	}
> >> +
> >> +	entry = alloc_test_entry(ignore_case, "foobarfrotz", "value4");
> >> +	entry = hashmap_put_entry(map, entry, ent);
> >> +	check(ignore_case ? entry != NULL : entry == NULL);
> >> +	free(entry);
> >> +
> >> +	check_int(map->tablesize, ==, 64);
> >> +	check_int(hashmap_get_size(map), ==,
> >> +		  ignore_case ? ARRAY_SIZE(key_val) : ARRAY_SIZE(key_val) + 1);
> >> +}
> > 
> > Ahhh, so you're using the same function for both case-sensitive and
> > -insensitive tests. So I guess TEST_RUN isn't useful here after all.
> > Personally I'd still rather get rid of setup(), but I don't feel super
> > strongly about it.
>
> I'm not sure - we have to pass ignore_case to HASHMAP_INIT and the test
> function so using setup() means we cannot pass different values to the
> two different functions.
>
> For parameterized tests where we calling the same function with
> different inputs using a setup function allows us to
> - write more concise test code
> - easily change the setup of all the tests if the api changes in the
> future.
> - consistently free resources at the end of a test making it easier to
> write leak-free tests
> - assert pre- and post- conditions on all tests
>
> Using TEST_RUN is useful to declare the different inputs for each test.
> For example in the oid-array tests it allows us to write
>
> TEST_RUN("ordered enumeration") {
> const char *input[] = { "88", "44", "aa", "55" };
> const char *expected[] = { "44", "55", "88", "aa" };
>
> TEST_ENUMERATION(input, expected)
> }
>
> rather than declaring a bunch of variables up front a long way from
> where they are used.

Yeah, I changed my mind from removing setup(). I'll keep it as it does
not have anything complex to make manual verifying harder. Also, since
this test does not have any variable inputs, we can just use TEST(),
although TEST_RUN() with variable inputs is very useful in other tests.

> >> +static void t_add(struct hashmap *map, int ignore_case)
> >> +{
> >> +	struct test_entry *entry;
> >> +	const char *key_val[][3] = {
> >> +		{ "key1", "value1", "UNUSED" },
> >> +		{ ignore_case ? "Key1" : "key1", "value2", "UNUSED" },
> >> +		{ "fooBarFrotz", "value3", "UNUSED" },
> >> +		{ ignore_case ? "Foobarfrotz" : "fooBarFrotz", "value4", "UNUSED" }
> >> +	};
> >> +	const char *queries[] = { "key1",
> >> +				  ignore_case ? "Foobarfrotz" : "fooBarFrotz" };
> >> +
> >> +	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> >> +		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> >> +		hashmap_add(map, &entry->ent);
> >> +	}
> >> +
> >> +	for (size_t i = 0; i < ARRAY_SIZE(queries); i++) {
> > 
> > Since we only have one query, can we remove the loop and simplify the
> > following block of code?
> > 
> > Also (here and elsewhere), it might be less confusing to say "UNSEEN" /
> > "SEEN" instead of "UNUSED" / "USED". The latter makes it sound to me
> > like there's some API requirement to have a 3-item array that we don't
> > actually need, but in this case those fields are actually used in
> > key_val_contains() to track duplicates.
>
> The third element just needs to be a boolean flag so it might be better
> to use a struct
>
> const struct {
> char *key;
> char *val;
> char seen;
> } key_val[] = {
> { .key = "key1", .val = "value1" },
> { .key = ignore_case ? "Key1" : "key1" .val = "value2" }, { .key =
> "fooBarFrotz" .val = "value3" },
> { .key = ignore_case ? "Foobarfrotz" : "fooBarFrotz", .value = "value4"
> }
> };

I think we can just use an extra 'char seen[]' for markings and modify
key_val_contains() to have this parameter, instead of having it as a
struct.

Thanks.
diff mbox series

Patch

diff --git a/Makefile b/Makefile
index 3eab701b10..74bb026610 100644
--- a/Makefile
+++ b/Makefile
@@ -1336,6 +1336,7 @@  THIRD_PARTY_SOURCES += sha1dc/%
 UNIT_TEST_PROGRAMS += t-ctype
 UNIT_TEST_PROGRAMS += t-example-decorate
 UNIT_TEST_PROGRAMS += t-hash
+UNIT_TEST_PROGRAMS += t-hashmap
 UNIT_TEST_PROGRAMS += t-mem-pool
 UNIT_TEST_PROGRAMS += t-oidtree
 UNIT_TEST_PROGRAMS += t-prio-queue
diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c
index 2912899558..7b854a7030 100644
--- a/t/helper/test-hashmap.c
+++ b/t/helper/test-hashmap.c
@@ -12,11 +12,6 @@  struct test_entry
 	char key[FLEX_ARRAY];
 };
 
-static const char *get_value(const struct test_entry *e)
-{
-	return e->key + strlen(e->key) + 1;
-}
-
 static int test_entry_cmp(const void *cmp_data,
 			  const struct hashmap_entry *eptr,
 			  const struct hashmap_entry *entry_or_key,
@@ -141,30 +136,16 @@  static void perf_hashmap(unsigned int method, unsigned int rounds)
 /*
  * Read stdin line by line and print result of commands to stdout:
  *
- * hash key -> strhash(key) memhash(key) strihash(key) memihash(key)
- * put key value -> NULL / old value
- * get key -> NULL / value
- * remove key -> NULL / old value
- * iterate -> key1 value1\nkey2 value2\n...
- * size -> tablesize numentries
- *
  * perfhashmap method rounds -> test hashmap.[ch] performance
  */
 int cmd__hashmap(int argc, const char **argv)
 {
 	struct string_list parts = STRING_LIST_INIT_NODUP;
 	struct strbuf line = STRBUF_INIT;
-	int icase;
-	struct hashmap map = HASHMAP_INIT(test_entry_cmp, &icase);
-
-	/* init hash map */
-	icase = argc > 1 && !strcmp("ignorecase", argv[1]);
 
 	/* process commands from stdin */
 	while (strbuf_getline(&line, stdin) != EOF) {
 		char *cmd, *p1, *p2;
-		unsigned int hash = 0;
-		struct test_entry *entry;
 
 		/* break line into command and up to two parameters */
 		string_list_setlen(&parts, 0);
@@ -180,84 +161,8 @@  int cmd__hashmap(int argc, const char **argv)
 		cmd = parts.items[0].string;
 		p1 = parts.nr >= 1 ? parts.items[1].string : NULL;
 		p2 = parts.nr >= 2 ? parts.items[2].string : NULL;
-		if (p1)
-			hash = icase ? strihash(p1) : strhash(p1);
-
-		if (!strcmp("add", cmd) && p1 && p2) {
-
-			/* create entry with key = p1, value = p2 */
-			entry = alloc_test_entry(hash, p1, p2);
-
-			/* add to hashmap */
-			hashmap_add(&map, &entry->ent);
-
-		} else if (!strcmp("put", cmd) && p1 && p2) {
-
-			/* create entry with key = p1, value = p2 */
-			entry = alloc_test_entry(hash, p1, p2);
-
-			/* add / replace entry */
-			entry = hashmap_put_entry(&map, entry, ent);
-
-			/* print and free replaced entry, if any */
-			puts(entry ? get_value(entry) : "NULL");
-			free(entry);
-
-		} else if (!strcmp("get", cmd) && p1) {
-			/* lookup entry in hashmap */
-			entry = hashmap_get_entry_from_hash(&map, hash, p1,
-							struct test_entry, ent);
-
-			/* print result */
-			if (!entry)
-				puts("NULL");
-			hashmap_for_each_entry_from(&map, entry, ent)
-				puts(get_value(entry));
-
-		} else if (!strcmp("remove", cmd) && p1) {
-
-			/* setup static key */
-			struct hashmap_entry key;
-			struct hashmap_entry *rm;
-			hashmap_entry_init(&key, hash);
-
-			/* remove entry from hashmap */
-			rm = hashmap_remove(&map, &key, p1);
-			entry = rm ? container_of(rm, struct test_entry, ent)
-					: NULL;
-
-			/* print result and free entry*/
-			puts(entry ? get_value(entry) : "NULL");
-			free(entry);
-
-		} else if (!strcmp("iterate", cmd)) {
-			struct hashmap_iter iter;
-
-			hashmap_for_each_entry(&map, &iter, entry,
-						ent /* member name */)
-				printf("%s %s\n", entry->key, get_value(entry));
-
-		} else if (!strcmp("size", cmd)) {
-
-			/* print table sizes */
-			printf("%u %u\n", map.tablesize,
-			       hashmap_get_size(&map));
-
-		} else if (!strcmp("intern", cmd) && p1) {
-
-			/* test that strintern works */
-			const char *i1 = strintern(p1);
-			const char *i2 = strintern(p1);
-			if (strcmp(i1, p1))
-				printf("strintern(%s) returns %s\n", p1, i1);
-			else if (i1 == p1)
-				printf("strintern(%s) returns input pointer\n", p1);
-			else if (i1 != i2)
-				printf("strintern(%s) != strintern(%s)", i1, i2);
-			else
-				printf("%s\n", i1);
-
-		} else if (!strcmp("perfhashmap", cmd) && p1 && p2) {
+	
+		if (!strcmp("perfhashmap", cmd) && p1 && p2) {
 
 			perf_hashmap(atoi(p1), atoi(p2));
 
@@ -270,6 +175,5 @@  int cmd__hashmap(int argc, const char **argv)
 
 	string_list_clear(&parts, 0);
 	strbuf_release(&line);
-	hashmap_clear_and_free(&map, struct test_entry, ent);
 	return 0;
 }
diff --git a/t/t0011-hashmap.sh b/t/t0011-hashmap.sh
deleted file mode 100755
index 46e74ad107..0000000000
--- a/t/t0011-hashmap.sh
+++ /dev/null
@@ -1,260 +0,0 @@ 
-#!/bin/sh
-
-test_description='test hashmap and string hash functions'
-
-TEST_PASSES_SANITIZE_LEAK=true
-. ./test-lib.sh
-
-test_hashmap() {
-	echo "$1" | test-tool hashmap $3 > actual &&
-	echo "$2" > expect &&
-	test_cmp expect actual
-}
-
-test_expect_success 'put' '
-
-test_hashmap "put key1 value1
-put key2 value2
-put fooBarFrotz value3
-put foobarfrotz value4
-size" "NULL
-NULL
-NULL
-NULL
-64 4"
-
-'
-
-test_expect_success 'put (case insensitive)' '
-
-test_hashmap "put key1 value1
-put key2 value2
-put fooBarFrotz value3
-size" "NULL
-NULL
-NULL
-64 3" ignorecase
-
-'
-
-test_expect_success 'replace' '
-
-test_hashmap "put key1 value1
-put key1 value2
-put fooBarFrotz value3
-put fooBarFrotz value4
-size" "NULL
-value1
-NULL
-value3
-64 2"
-
-'
-
-test_expect_success 'replace (case insensitive)' '
-
-test_hashmap "put key1 value1
-put Key1 value2
-put fooBarFrotz value3
-put foobarfrotz value4
-size" "NULL
-value1
-NULL
-value3
-64 2" ignorecase
-
-'
-
-test_expect_success 'get' '
-
-test_hashmap "put key1 value1
-put key2 value2
-put fooBarFrotz value3
-put foobarfrotz value4
-get key1
-get key2
-get fooBarFrotz
-get notInMap" "NULL
-NULL
-NULL
-NULL
-value1
-value2
-value3
-NULL"
-
-'
-
-test_expect_success 'get (case insensitive)' '
-
-test_hashmap "put key1 value1
-put key2 value2
-put fooBarFrotz value3
-get Key1
-get keY2
-get foobarfrotz
-get notInMap" "NULL
-NULL
-NULL
-value1
-value2
-value3
-NULL" ignorecase
-
-'
-
-test_expect_success 'add' '
-
-test_hashmap "add key1 value1
-add key1 value2
-add fooBarFrotz value3
-add fooBarFrotz value4
-get key1
-get fooBarFrotz
-get notInMap" "value2
-value1
-value4
-value3
-NULL"
-
-'
-
-test_expect_success 'add (case insensitive)' '
-
-test_hashmap "add key1 value1
-add Key1 value2
-add fooBarFrotz value3
-add foobarfrotz value4
-get key1
-get Foobarfrotz
-get notInMap" "value2
-value1
-value4
-value3
-NULL" ignorecase
-
-'
-
-test_expect_success 'remove' '
-
-test_hashmap "put key1 value1
-put key2 value2
-put fooBarFrotz value3
-remove key1
-remove key2
-remove notInMap
-size" "NULL
-NULL
-NULL
-value1
-value2
-NULL
-64 1"
-
-'
-
-test_expect_success 'remove (case insensitive)' '
-
-test_hashmap "put key1 value1
-put key2 value2
-put fooBarFrotz value3
-remove Key1
-remove keY2
-remove notInMap
-size" "NULL
-NULL
-NULL
-value1
-value2
-NULL
-64 1" ignorecase
-
-'
-
-test_expect_success 'iterate' '
-	test-tool hashmap >actual.raw <<-\EOF &&
-	put key1 value1
-	put key2 value2
-	put fooBarFrotz value3
-	iterate
-	EOF
-
-	cat >expect <<-\EOF &&
-	NULL
-	NULL
-	NULL
-	fooBarFrotz value3
-	key1 value1
-	key2 value2
-	EOF
-
-	sort <actual.raw >actual &&
-	test_cmp expect actual
-'
-
-test_expect_success 'iterate (case insensitive)' '
-	test-tool hashmap ignorecase >actual.raw <<-\EOF &&
-	put key1 value1
-	put key2 value2
-	put fooBarFrotz value3
-	iterate
-	EOF
-
-	cat >expect <<-\EOF &&
-	NULL
-	NULL
-	NULL
-	fooBarFrotz value3
-	key1 value1
-	key2 value2
-	EOF
-
-	sort <actual.raw >actual &&
-	test_cmp expect actual
-'
-
-test_expect_success 'grow / shrink' '
-
-	rm -f in &&
-	rm -f expect &&
-	for n in $(test_seq 51)
-	do
-		echo put key$n value$n >> in &&
-		echo NULL >> expect || return 1
-	done &&
-	echo size >> in &&
-	echo 64 51 >> expect &&
-	echo put key52 value52 >> in &&
-	echo NULL >> expect &&
-	echo size >> in &&
-	echo 256 52 >> expect &&
-	for n in $(test_seq 12)
-	do
-		echo remove key$n >> in &&
-		echo value$n >> expect || return 1
-	done &&
-	echo size >> in &&
-	echo 256 40 >> expect &&
-	echo remove key40 >> in &&
-	echo value40 >> expect &&
-	echo size >> in &&
-	echo 64 39 >> expect &&
-	test-tool hashmap <in >out &&
-	test_cmp expect out
-
-'
-
-test_expect_success 'string interning' '
-
-test_hashmap "intern value1
-intern Value1
-intern value2
-intern value2
-" "value1
-Value1
-value2
-value2"
-
-'
-
-test_done
diff --git a/t/unit-tests/t-hashmap.c b/t/unit-tests/t-hashmap.c
new file mode 100644
index 0000000000..1c951fcfd8
--- /dev/null
+++ b/t/unit-tests/t-hashmap.c
@@ -0,0 +1,359 @@ 
+#include "test-lib.h"
+#include "hashmap.h"
+#include "strbuf.h"
+
+struct test_entry {
+	int padding; /* hashmap entry no longer needs to be the first member */
+	struct hashmap_entry ent;
+	/* key and value as two \0-terminated strings */
+	char key[FLEX_ARRAY];
+};
+
+static int test_entry_cmp(const void *cmp_data,
+			  const struct hashmap_entry *eptr,
+			  const struct hashmap_entry *entry_or_key,
+			  const void *keydata)
+{
+	const int ignore_case = cmp_data ? *((int *)cmp_data) : 0;
+	const struct test_entry *e1, *e2;
+	const char *key = keydata;
+
+	e1 = container_of(eptr, const struct test_entry, ent);
+	e2 = container_of(entry_or_key, const struct test_entry, ent);
+
+	if (ignore_case)
+		return strcasecmp(e1->key, key ? key : e2->key);
+	else
+		return strcmp(e1->key, key ? key : e2->key);
+}
+
+static const char *get_value(const struct test_entry *e)
+{
+	return e->key + strlen(e->key) + 1;
+}
+
+static struct test_entry *alloc_test_entry(unsigned int ignore_case,
+					   const char *key, const char *value)
+{
+	size_t klen = strlen(key);
+	size_t vlen = strlen(value);
+	unsigned int hash = ignore_case ? strihash(key) : strhash(key);
+	struct test_entry *entry = xmalloc(st_add4(sizeof(*entry), klen, vlen, 2));
+
+	hashmap_entry_init(&entry->ent, hash);
+	memcpy(entry->key, key, klen + 1);
+	memcpy(entry->key + klen + 1, value, vlen + 1);
+	return entry;
+}
+
+static struct test_entry *get_test_entry(struct hashmap *map,
+					 unsigned int ignore_case, const char *key)
+{
+	return hashmap_get_entry_from_hash(
+		map, ignore_case ? strihash(key) : strhash(key), key,
+		struct test_entry, ent);
+}
+
+static int key_val_contains(const char *key_val[][3], size_t n,
+			    struct test_entry *entry)
+{
+	for (size_t i = 0; i < n; i++) {
+		if (!strcmp(entry->key, key_val[i][0]) &&
+		    !strcmp(get_value(entry), key_val[i][1])) {
+			if (!strcmp(key_val[i][2], "USED"))
+				return 2;
+			key_val[i][2] = "USED";
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static void setup(void (*f)(struct hashmap *map, int ignore_case),
+		  int ignore_case)
+{
+	struct hashmap map = HASHMAP_INIT(test_entry_cmp, &ignore_case);
+
+	f(&map, ignore_case);
+	hashmap_clear_and_free(&map, struct test_entry, ent);
+}
+
+static void t_put(struct hashmap *map, int ignore_case)
+{
+	struct test_entry *entry;
+	const char *key_val[][2] = { { "key1", "value1" },
+				     { "key2", "value2" },
+				     { "fooBarFrotz", "value3" } };
+
+	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
+		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
+		check(hashmap_put_entry(map, entry, ent) == NULL);
+	}
+
+	entry = alloc_test_entry(ignore_case, "foobarfrotz", "value4");
+	entry = hashmap_put_entry(map, entry, ent);
+	check(ignore_case ? entry != NULL : entry == NULL);
+	free(entry);
+
+	check_int(map->tablesize, ==, 64);
+	check_int(hashmap_get_size(map), ==,
+		  ignore_case ? ARRAY_SIZE(key_val) : ARRAY_SIZE(key_val) + 1);
+}
+
+static void t_replace(struct hashmap *map, int ignore_case)
+{
+	struct test_entry *entry;
+
+	entry = alloc_test_entry(ignore_case, "key1", "value1");
+	check(hashmap_put_entry(map, entry, ent) == NULL);
+
+	entry = alloc_test_entry(ignore_case, ignore_case ? "Key1" : "key1",
+				 "value2");
+	entry = hashmap_put_entry(map, entry, ent);
+	if (check(entry != NULL))
+		check_str(get_value(entry), "value1");
+	free(entry);
+
+	entry = alloc_test_entry(ignore_case, "fooBarFrotz", "value3");
+	check(hashmap_put_entry(map, entry, ent) == NULL);
+
+	entry = alloc_test_entry(ignore_case,
+				 ignore_case ? "foobarfrotz" : "fooBarFrotz",
+				 "value4");
+	entry = hashmap_put_entry(map, entry, ent);
+	if (check(entry != NULL))
+		check_str(get_value(entry), "value3");
+	free(entry);
+}
+
+static void t_get(struct hashmap *map, int ignore_case)
+{
+	struct test_entry *entry;
+	const char *key_val[][2] = { { "key1", "value1" },
+				     { "key2", "value2" },
+				     { "fooBarFrotz", "value3" },
+				     { ignore_case ? "key4" : "foobarfrotz", "value4" } };
+	const char *query[][2] = {
+		{ ignore_case ? "Key1" : "key1", "value1" },
+		{ ignore_case ? "keY2" : "key2", "value2" },
+		{ ignore_case ? "foobarfrotz" : "fooBarFrotz", "value3" }
+	};
+
+	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
+		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
+		check(hashmap_put_entry(map, entry, ent) == NULL);
+	}
+
+	for (size_t i = 0; i < ARRAY_SIZE(query); i++) {
+		entry = get_test_entry(map, ignore_case, query[i][0]);
+		if (check(entry != NULL))
+			check_str(get_value(entry), query[i][1]);
+	}
+
+	check(get_test_entry(map, ignore_case, "notInMap") == NULL);
+}
+
+static void t_add(struct hashmap *map, int ignore_case)
+{
+	struct test_entry *entry;
+	const char *key_val[][3] = {
+		{ "key1", "value1", "UNUSED" },
+		{ ignore_case ? "Key1" : "key1", "value2", "UNUSED" },
+		{ "fooBarFrotz", "value3", "UNUSED" },
+		{ ignore_case ? "Foobarfrotz" : "fooBarFrotz", "value4", "UNUSED" }
+	};
+	const char *queries[] = { "key1",
+				  ignore_case ? "Foobarfrotz" : "fooBarFrotz" };
+
+	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
+		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
+		hashmap_add(map, &entry->ent);
+	}
+
+	for (size_t i = 0; i < ARRAY_SIZE(queries); i++) {
+		int count = 0;
+		entry = hashmap_get_entry_from_hash(map,
+			ignore_case ? strihash(queries[i]) :
+				      strhash(queries[i]),
+			queries[i], struct test_entry, ent);
+
+		hashmap_for_each_entry_from(map, entry, ent)
+		{
+			int ret;
+			if (!check_int((ret = key_val_contains(
+						key_val, ARRAY_SIZE(key_val),
+						entry)), ==, 0)) {
+				switch (ret) {
+				case 1:
+					test_msg("found entry was not given in the input\n"
+						 "    key: %s\n  value: %s",
+						 entry->key, get_value(entry));
+					break;
+				case 2:
+					test_msg("duplicate entry detected\n"
+						 "    key: %s\n  value: %s",
+						 entry->key, get_value(entry));
+					break;
+				}
+			} else {
+				count++;
+			}
+		}
+		check_int(count, ==, 2);
+	}
+	check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val));
+	check(get_test_entry(map, ignore_case, "notInMap") == NULL);
+}
+
+static void t_remove(struct hashmap *map, int ignore_case)
+{
+	struct test_entry *entry, *removed;
+	const char *key_val[][2] = { { "key1", "value1" },
+				     { "key2", "value2" },
+				     { "fooBarFrotz", "value3" } };
+	const char *remove[][2] = { { ignore_case ? "Key1" : "key1", "value1" },
+				    { ignore_case ? "keY2" : "key2", "value2" } };
+
+	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
+		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
+		check(hashmap_put_entry(map, entry, ent) == NULL);
+	}
+
+	for (size_t i = 0; i < ARRAY_SIZE(remove); i++) {
+		entry = alloc_test_entry(ignore_case, remove[i][0], "");
+		removed = hashmap_remove_entry(map, entry, ent, remove[i][0]);
+		if (check(removed != NULL))
+			check_str(get_value(removed), remove[i][1]);
+		free(entry);
+		free(removed);
+	}
+
+	entry = alloc_test_entry(ignore_case, "notInMap", "");
+	check(hashmap_remove_entry(map, entry, ent, "notInMap") == NULL);
+	free(entry);
+}
+
+static void t_iterate(struct hashmap *map, int ignore_case)
+{
+	struct test_entry *entry;
+	struct hashmap_iter iter;
+	const char *key_val[][3] = { { "key1", "value1", "UNUSED" },
+				     { "key2", "value2", "UNUSED" },
+				     { "fooBarFrotz", "value3", "UNUSED" } };
+	int count = 0;
+
+	for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
+		entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
+		check(hashmap_put_entry(map, entry, ent) == NULL);
+	}
+
+	hashmap_for_each_entry(map, &iter, entry, ent /* member name */)
+	{
+		int ret;
+		if (!check_int((ret = key_val_contains(key_val, ARRAY_SIZE(key_val),
+						       entry)), ==, 0)) {
+			switch (ret) {
+			case 1:
+				test_msg("found entry was not given in the input\n"
+					 "    key: %s\n  value: %s",
+					 entry->key, get_value(entry));
+				break;
+			case 2:
+				test_msg("duplicate entry detected\n"
+					 "    key: %s\n  value: %s",
+					 entry->key, get_value(entry));
+				break;
+			}
+		} else {
+			count++;
+		}
+	}
+	check_int(count, ==, ARRAY_SIZE(key_val));
+	check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val));
+}
+
+static void t_alloc(struct hashmap *map, int ignore_case)
+{
+	struct test_entry *entry, *removed;
+
+	for (int i = 1; i <= 51; i++) {
+		char *key = xstrfmt("key%d", i);
+		char *value = xstrfmt("value%d", i);
+		entry = alloc_test_entry(ignore_case, key, value);
+		check(hashmap_put_entry(map, entry, ent) == NULL);
+		free(key);
+		free(value);
+	}
+	check_int(map->tablesize, ==, 64);
+	check_int(hashmap_get_size(map), ==, 51);
+
+	entry = alloc_test_entry(ignore_case, "key52", "value52");
+	check(hashmap_put_entry(map, entry, ent) == NULL);
+	check_int(map->tablesize, ==, 256);
+	check_int(hashmap_get_size(map), ==, 52);
+
+	for (int i = 1; i <= 12; i++) {
+		char *key = xstrfmt("key%d", i);
+		char *value = xstrfmt("value%d", i);
+
+		entry = alloc_test_entry(ignore_case, key, "");
+		removed = hashmap_remove_entry(map, entry, ent, key);
+		if (check(removed != NULL))
+			check_str(value, get_value(removed));
+		free(key);
+		free(value);
+		free(entry);
+		free(removed);
+	}
+	check_int(map->tablesize, ==, 256);
+	check_int(hashmap_get_size(map), ==, 40);
+
+	entry = alloc_test_entry(ignore_case, "key40", "");
+	removed = hashmap_remove_entry(map, entry, ent, "key40");
+	if (check(removed != NULL))
+		check_str("value40", get_value(removed));
+	check_int(map->tablesize, ==, 64);
+	check_int(hashmap_get_size(map), ==, 39);
+	free(entry);
+	free(removed);
+}
+
+static void t_intern(struct hashmap *map, int ignore_case)
+{
+	const char *values[] = { "value1", "Value1", "value2", "value2" };
+
+	for (size_t i = 0; i < ARRAY_SIZE(values); i++) {
+		const char *i1 = strintern(values[i]);
+		const char *i2 = strintern(values[i]);
+
+		if (!check(!strcmp(i1, values[i])))
+			test_msg("strintern(%s) returns %s\n", values[i], i1);
+		else if (!check(i1 != values[i]))
+			test_msg("strintern(%s) returns input pointer\n",
+				 values[i]);
+		else if (!check(i1 == i2))
+			test_msg("address('%s') != address('%s'), so strintern('%s') != strintern('%s')",
+				 i1, i2, values[i], values[i]);
+		else
+			check_str(i1, values[i]);
+	}
+}
+
+int cmd_main(int argc UNUSED, const char **argv UNUSED)
+{
+	TEST(setup(t_put, 0), "put works");
+	TEST(setup(t_put, 1), "put (case insensitive) works");
+	TEST(setup(t_replace, 0), "replace works");
+	TEST(setup(t_replace, 1), "replace (case insensitive) works");
+	TEST(setup(t_get, 0), "get works");
+	TEST(setup(t_get, 1), "get (case insensitive) works");
+	TEST(setup(t_add, 0), "add works");
+	TEST(setup(t_add, 1), "add (case insensitive) works");
+	TEST(setup(t_remove, 0), "remove works");
+	TEST(setup(t_remove, 1), "remove (case insensitive) works");
+	TEST(setup(t_iterate, 0), "iterate works");
+	TEST(setup(t_iterate, 1), "iterate (case insensitive) works");
+	TEST(setup(t_alloc, 0), "grow / shrink works");
+	TEST(setup(t_intern, 0), "string interning works");
+	return test_done();
+}