diff mbox series

[4/5] wildmatch: use char instead of uchar

Message ID 20230210075939.44949-5-masahiroy@kernel.org (mailing list archive)
State Superseded
Headers show
Series Clean up wildmatch.c | expand

Commit Message

Masahiro Yamada Feb. 10, 2023, 7:59 a.m. UTC
dowild() casts (char *) and (uchar *) back-and-forth, which is
ugly.

This file was imported from rsync, which started to use (unsigned char)
since the following commit:

 | commit e11c42511903adc6d27cf1671cc76fa711ea37e5
 | Author: Wayne Davison <wayned@samba.org>
 | Date:   Sun Jul 6 04:33:54 2003 +0000
 |
 |     - Added [:class:] handling to the character-class code.
 |     - Use explicit unsigned characters for proper set checks.
 |     - Made the character-class code honor backslash escapes.
 |     - Accept '^' as a class-negation character in addition to '!'.

Perhaps, it was needed because rsync relies on is*() from <ctypes.h>.

GIT has its own implementations, so the behavior is clear.

In fact, commit 4546738b58a0 ("Unlocalized isspace and friends")
says one of the motivations is "we want the right signed behaviour".

sane_istest() casts the given character to (unsigned char) anyway
before sane_ctype[] table lookup, so dowild() can use 'char'.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---

 wildmatch.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

Comments

Ævar Arnfjörð Bjarmason Feb. 10, 2023, 1:09 p.m. UTC | #1
On Fri, Feb 10 2023, Masahiro Yamada wrote:

> dowild() casts (char *) and (uchar *) back-and-forth, which is
> ugly.
>
> This file was imported from rsync, which started to use (unsigned char)
> since the following commit:
>
>  | commit e11c42511903adc6d27cf1671cc76fa711ea37e5
>  | Author: Wayne Davison <wayned@samba.org>
>  | Date:   Sun Jul 6 04:33:54 2003 +0000
>  |
>  |     - Added [:class:] handling to the character-class code.
>  |     - Use explicit unsigned characters for proper set checks.
>  |     - Made the character-class code honor backslash escapes.
>  |     - Accept '^' as a class-negation character in addition to '!'.
>
> Perhaps, it was needed because rsync relies on is*() from <ctypes.h>.
>
> GIT has its own implementations, so the behavior is clear.
>
> In fact, commit 4546738b58a0 ("Unlocalized isspace and friends")
> says one of the motivations is "we want the right signed behaviour".
>
> sane_istest() casts the given character to (unsigned char) anyway
> before sane_ctype[] table lookup, so dowild() can use 'char'.
>
> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
> ---
>
>  wildmatch.c | 24 +++++++++++-------------
>  1 file changed, 11 insertions(+), 13 deletions(-)
>
> diff --git a/wildmatch.c b/wildmatch.c
> index 93800b8eac..7dffd783cb 100644
> --- a/wildmatch.c
> +++ b/wildmatch.c
> @@ -12,21 +12,19 @@
>  #include "cache.h"
>  #include "wildmatch.h"
>  
> -typedef unsigned char uchar;
> -
>  #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
>  				    && *(class) == *(litmatch) \
> -				    && strncmp((char*)class, litmatch, len) == 0)
> +				    && strncmp(class, litmatch, len) == 0)
>  
>  /* Match pattern "p" against "text" */
> -static int dowild(const uchar *p, const uchar *text, unsigned int flags)
> +static int dowild(const char *p, const char *text, unsigned int flags)
>  {
> -	uchar p_ch;
> -	const uchar *pattern = p;
> +	char p_ch;
> +	const char *pattern = p;
>  
>  	for ( ; (p_ch = *p) != '\0'; text++, p++) {
>  		int matched, match_slash, negated;
> -		uchar t_ch, prev_ch;
> +		char t_ch, prev_ch;
>  		if ((t_ch = *text) == '\0' && p_ch != '*')
>  			return WM_ABORT_ALL;
>  		if ((flags & WM_CASEFOLD) && isupper(t_ch))
> @@ -50,7 +48,7 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags)
>  			continue;
>  		case '*':
>  			if (*++p == '*') {
> -				const uchar *prev_p = p - 2;
> +				const char *prev_p = p - 2;
>  				while (*++p == '*') {}
>  				if (!(flags & WM_PATHNAME))
>  					/* without WM_PATHNAME, '*' == '**' */
> @@ -90,10 +88,10 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags)
>  				 * with WM_PATHNAME matches the next
>  				 * directory
>  				 */
> -				const char *slash = strchr((char*)text, '/');
> +				const char *slash = strchr(text, '/');
>  				if (!slash)
>  					return WM_NOMATCH;
> -				text = (const uchar*)slash;
> +				text = slash;
>  				/* the slash is consumed by the top-level for loop */
>  				break;
>  			}
> @@ -160,13 +158,13 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags)
>  					if (t_ch <= p_ch && t_ch >= prev_ch)
>  						matched = 1;
>  					else if ((flags & WM_CASEFOLD) && islower(t_ch)) {
> -						uchar t_ch_upper = toupper(t_ch);
> +						char t_ch_upper = toupper(t_ch);
>  						if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
>  							matched = 1;
>  					}
>  					p_ch = 0; /* This makes "prev_ch" get set to 0. */
>  				} else if (p_ch == '[' && p[1] == ':') {
> -					const uchar *s;
> +					const char *s;
>  					int i;
>  					for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
>  					if (!p_ch)
> @@ -237,5 +235,5 @@ static int dowild(const uchar *p, const uchar *text, unsigned int flags)
>  /* Match the "pattern" against the "text" string. */
>  int wildmatch(const char *pattern, const char *text, unsigned int flags)
>  {
> -	return dowild((const uchar*)pattern, (const uchar*)text, flags);
> +	return dowild(pattern, text, flags);
>  }

This looks good to me. I independently wrote much the same a while ago
for another reason, in: https://github.com/avar/git/commit/079f555375a

I.e. this happens to be the only bit in-tree that's stopping us from
running the xlc compiler in the c99 mode.

My solution was different, but I like yours better. I had not done your
analysis to discover that we didn't need this to be unsigned in the
first place, I merly converted the "uchar" to an "unsigned char".
diff mbox series

Patch

diff --git a/wildmatch.c b/wildmatch.c
index 93800b8eac..7dffd783cb 100644
--- a/wildmatch.c
+++ b/wildmatch.c
@@ -12,21 +12,19 @@ 
 #include "cache.h"
 #include "wildmatch.h"
 
-typedef unsigned char uchar;
-
 #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
 				    && *(class) == *(litmatch) \
-				    && strncmp((char*)class, litmatch, len) == 0)
+				    && strncmp(class, litmatch, len) == 0)
 
 /* Match pattern "p" against "text" */
-static int dowild(const uchar *p, const uchar *text, unsigned int flags)
+static int dowild(const char *p, const char *text, unsigned int flags)
 {
-	uchar p_ch;
-	const uchar *pattern = p;
+	char p_ch;
+	const char *pattern = p;
 
 	for ( ; (p_ch = *p) != '\0'; text++, p++) {
 		int matched, match_slash, negated;
-		uchar t_ch, prev_ch;
+		char t_ch, prev_ch;
 		if ((t_ch = *text) == '\0' && p_ch != '*')
 			return WM_ABORT_ALL;
 		if ((flags & WM_CASEFOLD) && isupper(t_ch))
@@ -50,7 +48,7 @@  static int dowild(const uchar *p, const uchar *text, unsigned int flags)
 			continue;
 		case '*':
 			if (*++p == '*') {
-				const uchar *prev_p = p - 2;
+				const char *prev_p = p - 2;
 				while (*++p == '*') {}
 				if (!(flags & WM_PATHNAME))
 					/* without WM_PATHNAME, '*' == '**' */
@@ -90,10 +88,10 @@  static int dowild(const uchar *p, const uchar *text, unsigned int flags)
 				 * with WM_PATHNAME matches the next
 				 * directory
 				 */
-				const char *slash = strchr((char*)text, '/');
+				const char *slash = strchr(text, '/');
 				if (!slash)
 					return WM_NOMATCH;
-				text = (const uchar*)slash;
+				text = slash;
 				/* the slash is consumed by the top-level for loop */
 				break;
 			}
@@ -160,13 +158,13 @@  static int dowild(const uchar *p, const uchar *text, unsigned int flags)
 					if (t_ch <= p_ch && t_ch >= prev_ch)
 						matched = 1;
 					else if ((flags & WM_CASEFOLD) && islower(t_ch)) {
-						uchar t_ch_upper = toupper(t_ch);
+						char t_ch_upper = toupper(t_ch);
 						if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
 							matched = 1;
 					}
 					p_ch = 0; /* This makes "prev_ch" get set to 0. */
 				} else if (p_ch == '[' && p[1] == ':') {
-					const uchar *s;
+					const char *s;
 					int i;
 					for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
 					if (!p_ch)
@@ -237,5 +235,5 @@  static int dowild(const uchar *p, const uchar *text, unsigned int flags)
 /* Match the "pattern" against the "text" string. */
 int wildmatch(const char *pattern, const char *text, unsigned int flags)
 {
-	return dowild((const uchar*)pattern, (const uchar*)text, flags);
+	return dowild(pattern, text, flags);
 }