diff mbox series

[v1,5/7] RISC-V: rework comments in ISA string parser

Message ID 20230504-never-childlike-75e2ce7e50d8@spud (mailing list archive)
State Superseded
Delegated to: Palmer Dabbelt
Headers show
Series ISA string parser cleanups++ | expand

Checks

Context Check Description
conchuod/tree_selection fail Failed to apply to next/pending-fixes or riscv/for-next

Commit Message

Conor Dooley May 4, 2023, 6:14 p.m. UTC
From: Conor Dooley <conor.dooley@microchip.com>

I have found these comments to not be at all helpful whenever I look at
the parser. Further, the comments in the default case (single letter
parser) are not quite right either.
Group the comments into a larger one at the start of each case, that
attempts to explain things at a higher level.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
---
 arch/riscv/kernel/cpufeature.c | 71 ++++++++++++++++++++++++++++------
 1 file changed, 60 insertions(+), 11 deletions(-)

Comments

Andrew Jones May 5, 2023, 9:12 a.m. UTC | #1
On Thu, May 04, 2023 at 07:14:24PM +0100, Conor Dooley wrote:
> From: Conor Dooley <conor.dooley@microchip.com>
> 
> I have found these comments to not be at all helpful whenever I look at
> the parser. Further, the comments in the default case (single letter
> parser) are not quite right either.
> Group the comments into a larger one at the start of each case, that
> attempts to explain things at a higher level.
> 
> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> ---
>  arch/riscv/kernel/cpufeature.c | 71 ++++++++++++++++++++++++++++------
>  1 file changed, 60 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index a79c5c52a174..2fc72f092057 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -146,7 +146,7 @@ void __init riscv_fill_hwcap(void)
>  
>  			switch (*ext) {
>  			case 's':
> -				/**
> +				/*
>  				 * Workaround for invalid single-letter 's' & 'u'(QEMU).
>  				 * No need to set the bit in riscv_isa as 's' & 'u' are
>  				 * not valid ISA extensions. It works until multi-letter
> @@ -163,53 +163,102 @@ void __init riscv_fill_hwcap(void)
>  			case 'X':
>  			case 'z':
>  			case 'Z':
> +				/*
> +				 * Before attempting to parse the extension itself, we find its end.
> +				 * As multi-letter extensions must be split from other multi-letter
> +				 * extensions with an "_", the end of a multi-letter extension will
> +				 * either be the null character as of_property_read_string() returns
> +				 * null-terminated strings,

The ACPI table also requires the ISA string be null-terminated. I'd maybe
drop the reference to the DT function and just state the string will be
null-terminated to avoid any ACPI concern.

> or the "_" at the start of the next
> +				 * multi-letter extension.
> +				 *
> +				 * Next, as the extensions version is currently ignored, we
> +				 * eliminate that portion. This is done by parsing backwards from
> +				 * the end of the extension, removing any numbers. This may be a
> +				 * major or minor number however, so the process is repeated if a
> +				 * minor number was found.
> +				 *
> +				 * ext_end is intended to represent the first character *after* the
> +				 * name portion of an extension, but will be decremented to the last
> +				 * character itself while eliminating the extensions version number.
> +				 * A simple re-increment solves this problem.
> +				 */
>  				ext_long = true;
> -				/* Multi-letter extension must be delimited */
>  				for (; *isa && *isa != '_'; ++isa)
>  					if (unlikely(!isalnum(*isa)))
>  						ext_err = true;
> -				/* Parse backwards */
> +
>  				ext_end = isa;
>  				if (unlikely(ext_err))
>  					break;
> +
>  				if (!isdigit(ext_end[-1]))
>  					break;
> -				/* Skip the minor version */
> +
>  				while (isdigit(*--ext_end))
>  					;
> -				if (tolower(ext_end[0]) != 'p'
> -				    || !isdigit(ext_end[-1])) {
> -					/* Advance it to offset the pre-decrement */
> +
> +				if (tolower(ext_end[0]) != 'p' || !isdigit(ext_end[-1])) {
>  					++ext_end;
>  					break;
>  				}
> -				/* Skip the major version */
> +
>  				while (isdigit(*--ext_end))
>  					;
> +
>  				++ext_end;
>  				break;
>  			default:
> +				/*
> +				 * Things are a little easier for single-letter extensions, as they
> +				 * are parsed forwards.
> +				 *
> +				 * After checking that our starting position is valid, we need to
> +				 * ensure that, when isa was incremented at the start of the loop,
> +				 * that it arrived at the start of the next extension.
> +				 *
> +				 * If we are already on a non-digit, there is nothing to do. Either
> +				 * we have a multi-letter extension's _, or the start of an
> +				 * extension.
> +				 *
> +				 * Otherwise we have found the current extension's major version
> +				 * number. Parse past it, and a subsequent p/minor version number
> +				 * if present. The `p` extension must not appear immediately after
> +				 * a number, so there is no fear of missing it.
> +				 *
> +				 */
>  				if (unlikely(!isalpha(*ext))) {
>  					ext_err = true;
>  					break;
>  				}
> -				/* Find next extension */
> +
>  				if (!isdigit(*isa))
>  					break;
> -				/* Skip the minor version */
> +
>  				while (isdigit(*++isa))
>  					;
> +
>  				if (tolower(*isa) != 'p')
>  					break;
> +
>  				if (!isdigit(*++isa)) {
>  					--isa;
>  					break;
>  				}
> -				/* Skip the major version */
> +
>  				while (isdigit(*++isa))
>  					;
> +
>  				break;
>  			}
> +
> +			/*
> +			 * The parser expects that at the start of an iteration isa points to the
> +			 * character before the start of the next extension. This will not be the
> +			 * case if we have just parsed a single-letter extension and the next
> +			 * extension is not a multi-letter extension prefixed with an "_". It is
> +			 * also not the case at the end of the string, where it will point to the
> +			 * terminating null character.
> +			 */
>  			if (*isa != '_')
>  				--isa;
>  
> -- 
> 2.39.2
>

Reviewed-by: Andrew Jones <ajones@ventanamicro.com>

Thanks,
drew
diff mbox series

Patch

diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index a79c5c52a174..2fc72f092057 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -146,7 +146,7 @@  void __init riscv_fill_hwcap(void)
 
 			switch (*ext) {
 			case 's':
-				/**
+				/*
 				 * Workaround for invalid single-letter 's' & 'u'(QEMU).
 				 * No need to set the bit in riscv_isa as 's' & 'u' are
 				 * not valid ISA extensions. It works until multi-letter
@@ -163,53 +163,102 @@  void __init riscv_fill_hwcap(void)
 			case 'X':
 			case 'z':
 			case 'Z':
+				/*
+				 * Before attempting to parse the extension itself, we find its end.
+				 * As multi-letter extensions must be split from other multi-letter
+				 * extensions with an "_", the end of a multi-letter extension will
+				 * either be the null character as of_property_read_string() returns
+				 * null-terminated strings, or the "_" at the start of the next
+				 * multi-letter extension.
+				 *
+				 * Next, as the extensions version is currently ignored, we
+				 * eliminate that portion. This is done by parsing backwards from
+				 * the end of the extension, removing any numbers. This may be a
+				 * major or minor number however, so the process is repeated if a
+				 * minor number was found.
+				 *
+				 * ext_end is intended to represent the first character *after* the
+				 * name portion of an extension, but will be decremented to the last
+				 * character itself while eliminating the extensions version number.
+				 * A simple re-increment solves this problem.
+				 */
 				ext_long = true;
-				/* Multi-letter extension must be delimited */
 				for (; *isa && *isa != '_'; ++isa)
 					if (unlikely(!isalnum(*isa)))
 						ext_err = true;
-				/* Parse backwards */
+
 				ext_end = isa;
 				if (unlikely(ext_err))
 					break;
+
 				if (!isdigit(ext_end[-1]))
 					break;
-				/* Skip the minor version */
+
 				while (isdigit(*--ext_end))
 					;
-				if (tolower(ext_end[0]) != 'p'
-				    || !isdigit(ext_end[-1])) {
-					/* Advance it to offset the pre-decrement */
+
+				if (tolower(ext_end[0]) != 'p' || !isdigit(ext_end[-1])) {
 					++ext_end;
 					break;
 				}
-				/* Skip the major version */
+
 				while (isdigit(*--ext_end))
 					;
+
 				++ext_end;
 				break;
 			default:
+				/*
+				 * Things are a little easier for single-letter extensions, as they
+				 * are parsed forwards.
+				 *
+				 * After checking that our starting position is valid, we need to
+				 * ensure that, when isa was incremented at the start of the loop,
+				 * that it arrived at the start of the next extension.
+				 *
+				 * If we are already on a non-digit, there is nothing to do. Either
+				 * we have a multi-letter extension's _, or the start of an
+				 * extension.
+				 *
+				 * Otherwise we have found the current extension's major version
+				 * number. Parse past it, and a subsequent p/minor version number
+				 * if present. The `p` extension must not appear immediately after
+				 * a number, so there is no fear of missing it.
+				 *
+				 */
 				if (unlikely(!isalpha(*ext))) {
 					ext_err = true;
 					break;
 				}
-				/* Find next extension */
+
 				if (!isdigit(*isa))
 					break;
-				/* Skip the minor version */
+
 				while (isdigit(*++isa))
 					;
+
 				if (tolower(*isa) != 'p')
 					break;
+
 				if (!isdigit(*++isa)) {
 					--isa;
 					break;
 				}
-				/* Skip the major version */
+
 				while (isdigit(*++isa))
 					;
+
 				break;
 			}
+
+			/*
+			 * The parser expects that at the start of an iteration isa points to the
+			 * character before the start of the next extension. This will not be the
+			 * case if we have just parsed a single-letter extension and the next
+			 * extension is not a multi-letter extension prefixed with an "_". It is
+			 * also not the case at the end of the string, where it will point to the
+			 * terminating null character.
+			 */
 			if (*isa != '_')
 				--isa;