diff mbox series

[RFC,1/9] lib/string: Add function to trim duplicate WS

Message ID 20191223225558.19242-2-tasleson@redhat.com (mailing list archive)
State New, archived
Headers show
Series Add persistent durable identifier to storage log messages | expand

Commit Message

Tony Asleson Dec. 23, 2019, 10:55 p.m. UTC
Adds function strim_dupe which trims leading & trailing whitespace and
duplicate adjacent.  Initial use case is to shorten T10 storage IDs.

Signed-off-by: Tony Asleson <tasleson@redhat.com>
---
 include/linux/string.h |  2 ++
 lib/string.c           | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

Comments

Matthew Wilcox Dec. 23, 2019, 11:28 p.m. UTC | #1
On Mon, Dec 23, 2019 at 04:55:50PM -0600, Tony Asleson wrote:
> +/**
> + * Removes leading and trailing whitespace and removes duplicate
> + * adjacent whitespace in a string, modifies string in place.
> + * @s The %NUL-terminated string to have spaces removed
> + * Returns the new length
> + */

This isn't good kernel-doc.  See Documentation/doc-guide/kernel-doc.rst
Compile with W=1 to get the format checked.

> +size_t strim_dupe(char *s)
> +{
> +	size_t ret = 0;
> +	char *w = s;
> +	char *p;
> +
> +	/*
> +	 * This will remove all leading and duplicate adjacent, but leave
> +	 * 1 space at the end if one or more are present.
> +	 */
> +	for (p = s; *p != '\0'; ++p) {
> +		if (!isspace(*p) || (p != s && !isspace(*(p - 1)))) {
> +			*w = *p;
> +			++w;
> +			ret += 1;
> +		}
> +	}

I'd be tempted to do ...

	size_t ret = 0;
	char *w = s;
	bool last_space = false;

	do {
		bool this_space = isspace(*s);

		if (!this_space || !last_space) {
			*w++ = *s;
			ret++;
		}
		s++;
		last_space = this_space;
	} while (s[-1] != '\0');

> +	/* Take off the last character if it's a space too */
> +	if (ret && isspace(*(w - 1))) {
> +		ret--;
> +		*(w - 1) = '\0';
> +	}
> +	return ret;
> +}
> +EXPORT_SYMBOL(strim_dupe);
> +
>  #ifndef __HAVE_ARCH_STRLEN
>  /**
>   * strlen - Find the length of a string
> -- 
> 2.21.0
>
Tony Asleson Jan. 2, 2020, 10:52 p.m. UTC | #2
On 12/23/19 5:28 PM, Matthew Wilcox wrote:
> On Mon, Dec 23, 2019 at 04:55:50PM -0600, Tony Asleson wrote:
>> +/**
>> + * Removes leading and trailing whitespace and removes duplicate
>> + * adjacent whitespace in a string, modifies string in place.
>> + * @s The %NUL-terminated string to have spaces removed
>> + * Returns the new length
>> + */
> 
> This isn't good kernel-doc.  See Documentation/doc-guide/kernel-doc.rst
> Compile with W=1 to get the format checked.

Indeed, I'll correct it.

>> +size_t strim_dupe(char *s)
>> +{
>> +	size_t ret = 0;
>> +	char *w = s;
>> +	char *p;
>> +
>> +	/*
>> +	 * This will remove all leading and duplicate adjacent, but leave
>> +	 * 1 space at the end if one or more are present.
>> +	 */
>> +	for (p = s; *p != '\0'; ++p) {
>> +		if (!isspace(*p) || (p != s && !isspace(*(p - 1)))) {
>> +			*w = *p;
>> +			++w;
>> +			ret += 1;
>> +		}
>> +	}
> 
> I'd be tempted to do ...
> 
> 	size_t ret = 0;
> 	char *w = s;
> 	bool last_space = false;
> 
> 	do {
> 		bool this_space = isspace(*s);
> 
> 		if (!this_space || !last_space) {
> 			*w++ = *s;
> 			ret++;
> 		}
> 		s++;
> 		last_space = this_space;
> 	} while (s[-1] != '\0');

That leaves a starting and trailing WS, how about something like this?

size_t strim_dupe(char *s)
{
	size_t ret = 0;
	char *w = s;
	bool last_space = false;

	do {
		bool this_space = isspace(*s);
		if (!this_space || (!last_space && ret)) {
			*w++ = *s;
			ret++;
		}
		s++;
		last_space = this_space;
	} while (s[-1] != '\0');

	if (ret > 1 && isspace(w[-2])) {
		w[-2] = '\0';
		ret--;
	}

	ret--;
	return ret;
}

Thanks
-Tony
Tony Asleson Jan. 3, 2020, 2:30 p.m. UTC | #3
On 1/2/20 4:52 PM, Tony Asleson wrote:
> On 12/23/19 5:28 PM, Matthew Wilcox wrote:
>> On Mon, Dec 23, 2019 at 04:55:50PM -0600, Tony Asleson wrote:
>>> +/**
>>> + * Removes leading and trailing whitespace and removes duplicate
>>> + * adjacent whitespace in a string, modifies string in place.
>>> + * @s The %NUL-terminated string to have spaces removed
>>> + * Returns the new length
>>> + */
>>
>> This isn't good kernel-doc.  See Documentation/doc-guide/kernel-doc.rst
>> Compile with W=1 to get the format checked.
> 
> Indeed, I'll correct it.
> 
>>> +size_t strim_dupe(char *s)
>>> +{
>>> +	size_t ret = 0;
>>> +	char *w = s;
>>> +	char *p;
>>> +
>>> +	/*
>>> +	 * This will remove all leading and duplicate adjacent, but leave
>>> +	 * 1 space at the end if one or more are present.
>>> +	 */
>>> +	for (p = s; *p != '\0'; ++p) {
>>> +		if (!isspace(*p) || (p != s && !isspace(*(p - 1)))) {
>>> +			*w = *p;
>>> +			++w;
>>> +			ret += 1;
>>> +		}
>>> +	}
>>
>> I'd be tempted to do ...
>>
>> 	size_t ret = 0;
>> 	char *w = s;
>> 	bool last_space = false;
>>
>> 	do {
>> 		bool this_space = isspace(*s);
>>
>> 		if (!this_space || !last_space) {
>> 			*w++ = *s;
>> 			ret++;
>> 		}
>> 		s++;
>> 		last_space = this_space;
>> 	} while (s[-1] != '\0');
> 
> That leaves a starting and trailing WS, how about something like this?
> 
> size_t strim_dupe(char *s)
> {
> 	size_t ret = 0;
> 	char *w = s;
> 	bool last_space = false;
> 
> 	do {
> 		bool this_space = isspace(*s);
> 		if (!this_space || (!last_space && ret)) {Mollie Fitzgerald
> 			*w++ = *s;
> 			ret++;
> 		}
> 		s++;
> 		last_space = this_space;
> 	} while (s[-1] != '\0');
> 
> 	if (ret > 1 && isspace(w[-2])) {
> 		w[-2] = '\0';
> 		ret--;
> 	}
> 
> 	ret--;
> 	return ret;
> }

This function was added so I could strip out extra spaces in the vpd
0x83 string representation, to shorten them before they get added to the
structured syslog message.  I'm starting to think this is a bad idea as
anyone that might want to write some code to use the kernel sysfs entry
for a device and search for it in the syslog would have to perturb the
id string the same way.

I think this change should just be removed from the patch series and
leave the IDs as they are.

If we really wanted a shorter ID, a better approach would be use a hash
of the ID string, but that introduces another level of complexity that
isn't helpful to end users.

-Tony
diff mbox series

Patch

diff --git a/include/linux/string.h b/include/linux/string.h
index b6ccdc2c7f02..bcca6bfab6ab 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -72,6 +72,8 @@  extern char * __must_check skip_spaces(const char *);
 
 extern char *strim(char *);
 
+extern size_t strim_dupe(char *s);
+
 static inline __must_check char *strstrip(char *str)
 {
 	return strim(str);
diff --git a/lib/string.c b/lib/string.c
index 08ec58cc673b..1186cce1f66b 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -515,6 +515,41 @@  char *strim(char *s)
 }
 EXPORT_SYMBOL(strim);
 
+/**
+ * Removes leading and trailing whitespace and removes duplicate
+ * adjacent whitespace in a string, modifies string in place.
+ * @s The %NUL-terminated string to have spaces removed
+ * Returns the new length
+ */
+size_t strim_dupe(char *s)
+{
+	size_t ret = 0;
+	char *w = s;
+	char *p;
+
+	/*
+	 * This will remove all leading and duplicate adjacent, but leave
+	 * 1 space at the end if one or more are present.
+	 */
+	for (p = s; *p != '\0'; ++p) {
+		if (!isspace(*p) || (p != s && !isspace(*(p - 1)))) {
+			*w = *p;
+			++w;
+			ret += 1;
+		}
+	}
+
+	*w = '\0';
+
+	/* Take off the last character if it's a space too */
+	if (ret && isspace(*(w - 1))) {
+		ret--;
+		*(w - 1) = '\0';
+	}
+	return ret;
+}
+EXPORT_SYMBOL(strim_dupe);
+
 #ifndef __HAVE_ARCH_STRLEN
 /**
  * strlen - Find the length of a string