diff mbox series

[v2,3/6] t/helper/test-hashmap.c: avoid using `strtok()`

Message ID 0ae07dec3663d7cbb0f8662c47485c0667a879b9.1681845518.git.me@ttaylorr.com (mailing list archive)
State New, archived
Headers show
Series banned: mark `strok()` as banned | expand

Commit Message

Taylor Blau April 18, 2023, 7:18 p.m. UTC
Avoid using the non-reentrant `strtok()` to separate the parts of each
incoming command. Instead of replacing it with `strtok_r()`, let's
instead use the more friendly `string_list_split_in_place_multi()`.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 t/helper/test-hashmap.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

Comments

Jeff King April 22, 2023, 11:16 a.m. UTC | #1
On Tue, Apr 18, 2023 at 03:18:49PM -0400, Taylor Blau wrote:

> @@ -159,21 +161,34 @@ int cmd__hashmap(int argc, const char **argv)
>  
>  	/* process commands from stdin */
>  	while (strbuf_getline(&line, stdin) != EOF) {
> -		char *cmd, *p1 = NULL, *p2 = NULL;
> +		char *cmd, *p1, *p2;
>  		unsigned int hash = 0;
>  		struct test_entry *entry;
>  
> +		/*
> +		 * Because we memdup() the arguments out of the
> +		 * string_list before inserting them into the hashmap,
> +		 * it's OK to set its length back to zero to avoid
> +		 * re-allocating the items array once per line.
> +		 *
> +		 * By doing so, we'll instead overwrite the existing
> +		 * entries and avoid re-allocating.
> +		 */
> +		string_list_setlen(&parts, 0);
>  		/* break line into command and up to two parameters */
> -		cmd = strtok(line.buf, DELIM);
> +		string_list_split_in_place_multi(&parts, line.buf, DELIM, 2);
> +

I'd argue we can drop this comment now. Having string_list_setlen()
makes it a blessed pattern, and I don't think there's anything special
about this caller that makes it more or less so. Obviously yes, the
string list items won't be valid as we enter a new loop iteration. But
that is always true of split_in_place(), not to mention strtok(),
because we are overwriting the buffer in each loop.

Ditto for the later commits which have similar (if shorter) comments.

-Peff
Taylor Blau April 24, 2023, 9:19 p.m. UTC | #2
On Sat, Apr 22, 2023 at 07:16:57AM -0400, Jeff King wrote:
> On Tue, Apr 18, 2023 at 03:18:49PM -0400, Taylor Blau wrote:
>
> > @@ -159,21 +161,34 @@ int cmd__hashmap(int argc, const char **argv)
> >
> >  	/* process commands from stdin */
> >  	while (strbuf_getline(&line, stdin) != EOF) {
> > -		char *cmd, *p1 = NULL, *p2 = NULL;
> > +		char *cmd, *p1, *p2;
> >  		unsigned int hash = 0;
> >  		struct test_entry *entry;
> >
> > +		/*
> > +		 * Because we memdup() the arguments out of the
> > +		 * string_list before inserting them into the hashmap,
> > +		 * it's OK to set its length back to zero to avoid
> > +		 * re-allocating the items array once per line.
> > +		 *
> > +		 * By doing so, we'll instead overwrite the existing
> > +		 * entries and avoid re-allocating.
> > +		 */
> > +		string_list_setlen(&parts, 0);
> >  		/* break line into command and up to two parameters */
> > -		cmd = strtok(line.buf, DELIM);
> > +		string_list_split_in_place_multi(&parts, line.buf, DELIM, 2);
> > +
>
> I'd argue we can drop this comment now. Having string_list_setlen()
> makes it a blessed pattern, and I don't think there's anything special
> about this caller that makes it more or less so. Obviously yes, the
> string list items won't be valid as we enter a new loop iteration. But
> that is always true of split_in_place(), not to mention strtok(),
> because we are overwriting the buffer in each loop.

Agreed, I think that part of the point of string_list_setlen() is that
this is a blessed pattern, so shouldn't need a comment.

Thanks,
Taylor
diff mbox series

Patch

diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c
index 36ff07bd4b..5a3e74a3e5 100644
--- a/t/helper/test-hashmap.c
+++ b/t/helper/test-hashmap.c
@@ -2,6 +2,7 @@ 
 #include "git-compat-util.h"
 #include "hashmap.h"
 #include "strbuf.h"
+#include "string-list.h"
 
 struct test_entry
 {
@@ -150,6 +151,7 @@  static void perf_hashmap(unsigned int method, unsigned int rounds)
  */
 int cmd__hashmap(int argc, const char **argv)
 {
+	struct string_list parts = STRING_LIST_INIT_NODUP;
 	struct strbuf line = STRBUF_INIT;
 	int icase;
 	struct hashmap map = HASHMAP_INIT(test_entry_cmp, &icase);
@@ -159,21 +161,34 @@  int cmd__hashmap(int argc, const char **argv)
 
 	/* process commands from stdin */
 	while (strbuf_getline(&line, stdin) != EOF) {
-		char *cmd, *p1 = NULL, *p2 = NULL;
+		char *cmd, *p1, *p2;
 		unsigned int hash = 0;
 		struct test_entry *entry;
 
+		/*
+		 * Because we memdup() the arguments out of the
+		 * string_list before inserting them into the hashmap,
+		 * it's OK to set its length back to zero to avoid
+		 * re-allocating the items array once per line.
+		 *
+		 * By doing so, we'll instead overwrite the existing
+		 * entries and avoid re-allocating.
+		 */
+		string_list_setlen(&parts, 0);
 		/* break line into command and up to two parameters */
-		cmd = strtok(line.buf, DELIM);
+		string_list_split_in_place_multi(&parts, line.buf, DELIM, 2);
+
 		/* ignore empty lines */
-		if (!cmd || *cmd == '#')
+		if (!parts.nr)
+			continue;
+		if (!*parts.items[0].string || *parts.items[0].string == '#')
 			continue;
 
-		p1 = strtok(NULL, DELIM);
-		if (p1) {
+		cmd = parts.items[0].string;
+		p1 = parts.nr >= 1 ? parts.items[1].string : NULL;
+		p2 = parts.nr >= 2 ? parts.items[2].string : NULL;
+		if (p1)
 			hash = icase ? strihash(p1) : strhash(p1);
-			p2 = strtok(NULL, DELIM);
-		}
 
 		if (!strcmp("add", cmd) && p1 && p2) {
 
@@ -260,6 +275,7 @@  int cmd__hashmap(int argc, const char **argv)
 		}
 	}
 
+	string_list_clear(&parts, 0);
 	strbuf_release(&line);
 	hashmap_clear_and_free(&map, struct test_entry, ent);
 	return 0;