diff mbox series

[4/4] arm64: Dereference CPU operations indirectly

Message ID 20200202232437.7626-5-gshan@redhat.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Gavin Shan Feb. 2, 2020, 11:24 p.m. UTC
One CPU operations is maintained through array @cpu_ops[NR_CPUS]. 2KB
memory is consumed when CONFIG_NR_CPUS is set to 256. It seems too
much memory has been used for this.

This introduces another array (@cpu_ops_index[NR_CPUS]), of which the
index to CPU operations array is stored. With this, we just one byte
for each CPU, 256 bytes for 256 CPUs, to dereference the CPU operations
indirectly.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 arch/arm64/kernel/cpu_ops.c | 44 +++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 19 deletions(-)

Comments

Robin Murphy Feb. 3, 2020, 5:18 p.m. UTC | #1
On 02/02/2020 11:24 pm, Gavin Shan wrote:
> One CPU operations is maintained through array @cpu_ops[NR_CPUS]. 2KB
> memory is consumed when CONFIG_NR_CPUS is set to 256. It seems too
> much memory has been used for this.
> 
> This introduces another array (@cpu_ops_index[NR_CPUS]), of which the
> index to CPU operations array is stored. With this, we just one byte
> for each CPU, 256 bytes for 256 CPUs, to dereference the CPU operations
> indirectly.

By extension of the same argument, that's still four times as big as it 
*needs* to be ;)

How important is the memory saving vs. the runtime overhead of more 
indirection?

> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>   arch/arm64/kernel/cpu_ops.c | 44 +++++++++++++++++++++----------------
>   1 file changed, 25 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
> index e133011f64b5..2a58222a2f24 100644
> --- a/arch/arm64/kernel/cpu_ops.c
> +++ b/arch/arm64/kernel/cpu_ops.c
> @@ -20,39 +20,33 @@ extern const struct cpu_operations acpi_parking_protocol_ops;
>   #endif
>   extern const struct cpu_operations cpu_psci_ops;
>   
> -static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
> -
> -static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
> +static const struct cpu_operations *const cpu_ops[] = {
>   	&smp_spin_table_ops,
> -	&cpu_psci_ops,
> -	NULL,
> -};
> -
> -static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
>   #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
>   	&acpi_parking_protocol_ops,
>   #endif
>   	&cpu_psci_ops,
>   	NULL,
>   };
> +static unsigned char cpu_ops_indexes[NR_CPUS] __ro_after_init;
>   
> -static const struct cpu_operations * __init cpu_get_ops(const char *name)
> +static unsigned char __init get_cpu_ops_index(const char *name)
>   {
> -	const struct cpu_operations *const *ops;
> -
> -	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
> +	const struct cpu_operations *const *ops = cpu_ops;
> +	unsigned char index = 0;
>   
>   	while (*ops) {

For a statically-initialised array which isn't exported to other 
compilation units you don't really need the null-terminator dance; a simple

	for (index = 0; index < ARRAY_SIZE(cpu_ops); index++)

should suffice.

>   		if (!strcmp(name, (*ops)->name))
> -			return *ops;
> +			return (index + 1);

Also this magic +1 is a bit horrible - I don't see any need for the 
interface to expose the underlying storage format, so it could just as 
easily return the actual index as an int with a negative error case.

Robin.

>   
>   		ops++;
> +		index++;
>   	}
>   
> -	return NULL;
> +	return 0;
>   }
>   
> -static const char *__init cpu_read_enable_method(int cpu)
> +static const char *__init get_cpu_method(int cpu)
>   {
>   	const char *enable_method;
>   
> @@ -98,21 +92,33 @@ static const char *__init cpu_read_enable_method(int cpu)
>    */
>   int __init init_cpu_ops(int cpu)
>   {
> -	const char *enable_method = cpu_read_enable_method(cpu);
> +	const char *enable_method = get_cpu_method(cpu);
> +	unsigned char index;
>   
>   	if (!enable_method)
>   		return -ENODEV;
>   
> -	cpu_ops[cpu] = cpu_get_ops(enable_method);
> -	if (!cpu_ops[cpu]) {
> +	index = get_cpu_ops_index(enable_method);
> +	if (!index) {
>   		pr_warn("Unsupported enable-method: %s\n", enable_method);
>   		return -EOPNOTSUPP;
>   	}
>   
> +	cpu_ops_indexes[cpu] = index;
> +
>   	return 0;
>   }
>   
>   const struct cpu_operations *get_cpu_ops(int cpu)
>   {
> -	return cpu_ops[cpu];
> +	unsigned char index = cpu_ops_indexes[cpu];
> +
> +	/*
> +	 * The corresponding CPU operation isn't set when the
> +	 * index is equal to zero.
> +	 */
> +	if (!index)
> +		return NULL;
> +
> +	return cpu_ops[index - 1];
>   }
>
Gavin Shan Feb. 3, 2020, 11:12 p.m. UTC | #2
On 2/4/20 4:18 AM, Robin Murphy wrote:
> On 02/02/2020 11:24 pm, Gavin Shan wrote:
>> One CPU operations is maintained through array @cpu_ops[NR_CPUS]. 2KB
>> memory is consumed when CONFIG_NR_CPUS is set to 256. It seems too
>> much memory has been used for this.
>>
>> This introduces another array (@cpu_ops_index[NR_CPUS]), of which the
>> index to CPU operations array is stored. With this, we just one byte
>> for each CPU, 256 bytes for 256 CPUs, to dereference the CPU operations
>> indirectly.
> 
> By extension of the same argument, that's still four times as big as it *needs* to be ;)
> 
> How important is the memory saving vs. the runtime overhead of more indirection?
> 

Yeah, I'll pack 4 CPUs' indexes into one byte  in v2. With this, the size is
decreased from 2KB to 64 bytes. Thanks for the nice reminder :)

It's hard to elaborate the benefits. The kernel image size will be decreased,
less traffic will be needed when it's copied over network. Less time will be
needed when it's compressed and so on. The optimization has the assumption -
these CPU operations aren't used in hot path.

>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>>   arch/arm64/kernel/cpu_ops.c | 44 +++++++++++++++++++++----------------
>>   1 file changed, 25 insertions(+), 19 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
>> index e133011f64b5..2a58222a2f24 100644
>> --- a/arch/arm64/kernel/cpu_ops.c
>> +++ b/arch/arm64/kernel/cpu_ops.c
>> @@ -20,39 +20,33 @@ extern const struct cpu_operations acpi_parking_protocol_ops;
>>   #endif
>>   extern const struct cpu_operations cpu_psci_ops;
>> -static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
>> -
>> -static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
>> +static const struct cpu_operations *const cpu_ops[] = {
>>       &smp_spin_table_ops,
>> -    &cpu_psci_ops,
>> -    NULL,
>> -};
>> -
>> -static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
>>   #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
>>       &acpi_parking_protocol_ops,
>>   #endif
>>       &cpu_psci_ops,
>>       NULL,
>>   };
>> +static unsigned char cpu_ops_indexes[NR_CPUS] __ro_after_init;
>> -static const struct cpu_operations * __init cpu_get_ops(const char *name)
>> +static unsigned char __init get_cpu_ops_index(const char *name)
>>   {
>> -    const struct cpu_operations *const *ops;
>> -
>> -    ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
>> +    const struct cpu_operations *const *ops = cpu_ops;
>> +    unsigned char index = 0;
>>       while (*ops) {
> 
> For a statically-initialised array which isn't exported to other compilation units you don't really need the null-terminator dance; a simple
> 
>      for (index = 0; index < ARRAY_SIZE(cpu_ops); index++)
> 
> should suffice.
> 

Yeah, will be changed in v2. The snippet come from the original implementation.


>>           if (!strcmp(name, (*ops)->name))
>> -            return *ops;
>> +            return (index + 1);
> 
> Also this magic +1 is a bit horrible - I don't see any need for the interface to expose the underlying storage format, so it could just as easily return the actual index as an int with a negative error case.
> 

@cpu_ops_indexes[] is cleared out. For one particular CPU, it's not correct
to fetch its operations if that wasn't initialized yet. So index#0 is reserved
as invalid. It seems we don't have this case after rechecking the code. So it's
safe to use index-0 and the suggested changes will be included in v2.

> Robin.
> 
>>           ops++;
>> +        index++;
>>       }
>> -    return NULL;
>> +    return 0;
>>   }
>> -static const char *__init cpu_read_enable_method(int cpu)
>> +static const char *__init get_cpu_method(int cpu)
>>   {
>>       const char *enable_method;
>> @@ -98,21 +92,33 @@ static const char *__init cpu_read_enable_method(int cpu)
>>    */
>>   int __init init_cpu_ops(int cpu)
>>   {Rechecked the code, it seems we don't have this case
>> -    const char *enable_method = cpu_read_enable_method(cpu);
>> +    const char *enable_method = get_cpu_method(cpu);
>> +    unsigned char index;
>>       if (!enable_method)
>>           return -ENODEV;
>> -    cpu_ops[cpu] = cpu_get_ops(enable_method);
>> -    if (!cpu_ops[cpu]) {
>> +    index = get_cpu_ops_index(enable_method);
>> +    if (!index) {
>>           pr_warn("Unsupported enable-method: %s\n", enable_method);
>>           return -EOPNOTSUPP;
>>       }
>> +    cpu_ops_indexes[cpu] = index;
>> +
>>       return 0;
>>   }
>>   const struct cpu_operations *get_cpu_ops(int cpu)
>>   {
>> -    return cpu_ops[cpu];
>> +    unsigned char index = cpu_ops_indexes[cpu];
>> +
>> +    /*
>> +     * The corresponding CPU operation isn't set when the
>> +     * index is equal to zero.
>> +     */
>> +    if (!index)
>> +        return NULL;
>> +
>> +    return cpu_ops[index - 1];
>>   }
>>

Thanks,
Gavin
diff mbox series

Patch

diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index e133011f64b5..2a58222a2f24 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -20,39 +20,33 @@  extern const struct cpu_operations acpi_parking_protocol_ops;
 #endif
 extern const struct cpu_operations cpu_psci_ops;
 
-static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
-
-static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
+static const struct cpu_operations *const cpu_ops[] = {
 	&smp_spin_table_ops,
-	&cpu_psci_ops,
-	NULL,
-};
-
-static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
 #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
 	&acpi_parking_protocol_ops,
 #endif
 	&cpu_psci_ops,
 	NULL,
 };
+static unsigned char cpu_ops_indexes[NR_CPUS] __ro_after_init;
 
-static const struct cpu_operations * __init cpu_get_ops(const char *name)
+static unsigned char __init get_cpu_ops_index(const char *name)
 {
-	const struct cpu_operations *const *ops;
-
-	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
+	const struct cpu_operations *const *ops = cpu_ops;
+	unsigned char index = 0;
 
 	while (*ops) {
 		if (!strcmp(name, (*ops)->name))
-			return *ops;
+			return (index + 1);
 
 		ops++;
+		index++;
 	}
 
-	return NULL;
+	return 0;
 }
 
-static const char *__init cpu_read_enable_method(int cpu)
+static const char *__init get_cpu_method(int cpu)
 {
 	const char *enable_method;
 
@@ -98,21 +92,33 @@  static const char *__init cpu_read_enable_method(int cpu)
  */
 int __init init_cpu_ops(int cpu)
 {
-	const char *enable_method = cpu_read_enable_method(cpu);
+	const char *enable_method = get_cpu_method(cpu);
+	unsigned char index;
 
 	if (!enable_method)
 		return -ENODEV;
 
-	cpu_ops[cpu] = cpu_get_ops(enable_method);
-	if (!cpu_ops[cpu]) {
+	index = get_cpu_ops_index(enable_method);
+	if (!index) {
 		pr_warn("Unsupported enable-method: %s\n", enable_method);
 		return -EOPNOTSUPP;
 	}
 
+	cpu_ops_indexes[cpu] = index;
+
 	return 0;
 }
 
 const struct cpu_operations *get_cpu_ops(int cpu)
 {
-	return cpu_ops[cpu];
+	unsigned char index = cpu_ops_indexes[cpu];
+
+	/*
+	 * The corresponding CPU operation isn't set when the
+	 * index is equal to zero.
+	 */
+	if (!index)
+		return NULL;
+
+	return cpu_ops[index - 1];
 }