diff mbox series

[v2,4/4] arm64: Dereference CPU operations indirectly

Message ID 20200203235107.190609-5-gshan@redhat.com (mailing list archive)
State New, archived
Headers show
Series arm64: Dereference CPU operations indirectly | expand

Commit Message

Gavin Shan Feb. 3, 2020, 11:51 p.m. UTC
One CPU operations is maintained through array @cpu_ops[NR_CPUS]. 2KB
memory is consumed when CONFIG_NR_CPUS is set to 256. It seems too
much memory has been used for this.

This introduces another array (@cpu_ops_index[NR_CPUS/4]), of which the
index to CPU operations array is stored. With this, we just need one byte
to be shared by 4 CPUs, 64 bytes for 256 CPUs, to dereference the CPU
operations indirectly. Note this optimization has the assumption: these
CPU operations aren't dereferenced in hot path.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 arch/arm64/kernel/cpu_ops.c | 49 ++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 25 deletions(-)

Comments

Lorenzo Pieralisi Feb. 11, 2020, 11:46 a.m. UTC | #1
On Tue, Feb 04, 2020 at 10:51:07AM +1100, Gavin Shan wrote:
> One CPU operations is maintained through array @cpu_ops[NR_CPUS]. 2KB
> memory is consumed when CONFIG_NR_CPUS is set to 256. It seems too
> much memory has been used for this.
> 
> This introduces another array (@cpu_ops_index[NR_CPUS/4]), of which the
> index to CPU operations array is stored. With this, we just need one byte
> to be shared by 4 CPUs, 64 bytes for 256 CPUs, to dereference the CPU
> operations indirectly. Note this optimization has the assumption: these
> CPU operations aren't dereferenced in hot path.

Actually the enable method must be the same across cpus, which brings
your optimization down to 1 byte for whatever number of cpus (aka,
you need an index to the one and only CPU ops entry).

If a cpu has an enable method != from the first that has been detected
we should let the cpu ops read fail, that index must not/can not be
different on != cpus, really, if it is firmware is broken and it is
probably better to avoid booting a cpu rather than trying, I hardly
see how we can introduce a regression by adding this logic, TBC.

Please let me know if anyone spots something I have missed.

Thanks,
Lorenzo

> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>  arch/arm64/kernel/cpu_ops.c | 49 ++++++++++++++++++-------------------
>  1 file changed, 24 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
> index e133011f64b5..d9103d5c9c6f 100644
> --- a/arch/arm64/kernel/cpu_ops.c
> +++ b/arch/arm64/kernel/cpu_ops.c
> @@ -4,7 +4,6 @@
>   *
>   * Copyright (C) 2013 ARM Ltd.
>   */
> -
>  #include <linux/acpi.h>
>  #include <linux/cache.h>
>  #include <linux/errno.h>
> @@ -20,39 +19,32 @@ extern const struct cpu_operations acpi_parking_protocol_ops;
>  #endif
>  extern const struct cpu_operations cpu_psci_ops;
>  
> -static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
> -
> -static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
> +/*
> + * Each element of the index array is shared by 4 CPUs. It means each
> + * CPU index uses 2 bits.
> + */
> +static const struct cpu_operations *const cpu_ops[] = {
>  	&smp_spin_table_ops,
> -	&cpu_psci_ops,
> -	NULL,
> -};
> -
> -static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
>  #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
>  	&acpi_parking_protocol_ops,
>  #endif
>  	&cpu_psci_ops,
> -	NULL,
>  };
> +static unsigned char cpu_ops_indexes[DIV_ROUND_UP(NR_CPUS, 4)] __ro_after_init;
>  
> -static const struct cpu_operations * __init cpu_get_ops(const char *name)
> +static int __init get_cpu_ops_index(const char *name)
>  {
> -	const struct cpu_operations *const *ops;
> -
> -	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
> -
> -	while (*ops) {
> -		if (!strcmp(name, (*ops)->name))
> -			return *ops;
> +	int index;
>  
> -		ops++;
> +	for (index = 0; index < ARRAY_SIZE(cpu_ops); index++) {
> +		if (!strcmp(cpu_ops[index]->name, name))
> +			return index;
>  	}
>  
> -	return NULL;
> +	return -ERANGE;
>  }
>  
> -static const char *__init cpu_read_enable_method(int cpu)
> +static const char *__init get_cpu_method(int cpu)
>  {
>  	const char *enable_method;
>  
> @@ -98,21 +90,28 @@ static const char *__init cpu_read_enable_method(int cpu)
>   */
>  int __init init_cpu_ops(int cpu)
>  {
> -	const char *enable_method = cpu_read_enable_method(cpu);
> +	const char *enable_method = get_cpu_method(cpu);
> +	unsigned char *pindex = &cpu_ops_indexes[cpu / 4];
> +	int index;
>  
>  	if (!enable_method)
>  		return -ENODEV;
>  
> -	cpu_ops[cpu] = cpu_get_ops(enable_method);
> -	if (!cpu_ops[cpu]) {
> +	index = get_cpu_ops_index(enable_method);
> +	if (index < 0) {
>  		pr_warn("Unsupported enable-method: %s\n", enable_method);
>  		return -EOPNOTSUPP;
>  	}
>  
> +	*pindex &= ~(0x3 << (2 * (cpu % 4)));
> +	*pindex |= ((index & 0x3) << (2 * (cpu % 4)));
> +
>  	return 0;
>  }
>  
>  const struct cpu_operations *get_cpu_ops(int cpu)
>  {
> -	return cpu_ops[cpu];
> +	int index = ((cpu_ops_indexes[cpu / 4] >> (2 * (cpu % 4))) & 0x3);
> +
> +	return cpu_ops[index];
>  }
> -- 
> 2.23.0
>
Gavin Shan Feb. 11, 2020, 11:29 p.m. UTC | #2
On 2/11/20 10:46 PM, Lorenzo Pieralisi wrote:
> On Tue, Feb 04, 2020 at 10:51:07AM +1100, Gavin Shan wrote:
>> One CPU operations is maintained through array @cpu_ops[NR_CPUS]. 2KB
>> memory is consumed when CONFIG_NR_CPUS is set to 256. It seems too
>> much memory has been used for this.
>>
>> This introduces another array (@cpu_ops_index[NR_CPUS/4]), of which the
>> index to CPU operations array is stored. With this, we just need one byte
>> to be shared by 4 CPUs, 64 bytes for 256 CPUs, to dereference the CPU
>> operations indirectly. Note this optimization has the assumption: these
>> CPU operations aren't dereferenced in hot path.
> 
> Actually the enable method must be the same across cpus, which brings
> your optimization down to 1 byte for whatever number of cpus (aka,
> you need an index to the one and only CPU ops entry).
> 
> If a cpu has an enable method != from the first that has been detected
> we should let the cpu ops read fail, that index must not/can not be
> different on != cpus, really, if it is firmware is broken and it is
> probably better to avoid booting a cpu rather than trying, I hardly
> see how we can introduce a regression by adding this logic, TBC.
> 
> Please let me know if anyone spots something I have missed.
> 

Lorenzo, thank you for the comments. Yes, we just need only one byte
if there is only one valid set of CPU operations. It seems it's ok
for cpu_read_ops() to return error if the selected CPU operation isn't
the one that have been previously choosen. In the error path, the CPU
won't be marked in the possible cpumask and it won't be tried to be
brought up.

I'll change the code in v3 based on your comments.

Thanks,
Gavin


> 
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>>   arch/arm64/kernel/cpu_ops.c | 49 ++++++++++++++++++-------------------
>>   1 file changed, 24 insertions(+), 25 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
>> index e133011f64b5..d9103d5c9c6f 100644
>> --- a/arch/arm64/kernel/cpu_ops.c
>> +++ b/arch/arm64/kernel/cpu_ops.c
>> @@ -4,7 +4,6 @@
>>    *
>>    * Copyright (C) 2013 ARM Ltd.
>>    */
>> -
>>   #include <linux/acpi.h>
>>   #include <linux/cache.h>
>>   #include <linux/errno.h>
>> @@ -20,39 +19,32 @@ extern const struct cpu_operations acpi_parking_protocol_ops;
>>   #endif
>>   extern const struct cpu_operations cpu_psci_ops;
>>   
>> -static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
>> -
>> -static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
>> +/*
>> + * Each element of the index array is shared by 4 CPUs. It means each
>> + * CPU index uses 2 bits.
>> + */
>> +static const struct cpu_operations *const cpu_ops[] = {
>>   	&smp_spin_table_ops,
>> -	&cpu_psci_ops,
>> -	NULL,
>> -};
>> -
>> -static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
>>   #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
>>   	&acpi_parking_protocol_ops,
>>   #endif
>>   	&cpu_psci_ops,
>> -	NULL,
>>   };
>> +static unsigned char cpu_ops_indexes[DIV_ROUND_UP(NR_CPUS, 4)] __ro_after_init;
>>   
>> -static const struct cpu_operations * __init cpu_get_ops(const char *name)
>> +static int __init get_cpu_ops_index(const char *name)
>>   {
>> -	const struct cpu_operations *const *ops;
>> -
>> -	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
>> -
>> -	while (*ops) {
>> -		if (!strcmp(name, (*ops)->name))
>> -			return *ops;
>> +	int index;
>>   
>> -		ops++;
>> +	for (index = 0; index < ARRAY_SIZE(cpu_ops); index++) {
>> +		if (!strcmp(cpu_ops[index]->name, name))
>> +			return index;
>>   	}
>>   
>> -	return NULL;
>> +	return -ERANGE;
>>   }
>>   
>> -static const char *__init cpu_read_enable_method(int cpu)
>> +static const char *__init get_cpu_method(int cpu)
>>   {
>>   	const char *enable_method;
>>   
>> @@ -98,21 +90,28 @@ static const char *__init cpu_read_enable_method(int cpu)
>>    */
>>   int __init init_cpu_ops(int cpu)
>>   {
>> -	const char *enable_method = cpu_read_enable_method(cpu);
>> +	const char *enable_method = get_cpu_method(cpu);
>> +	unsigned char *pindex = &cpu_ops_indexes[cpu / 4];
>> +	int index;
>>   
>>   	if (!enable_method)
>>   		return -ENODEV;
>>   
>> -	cpu_ops[cpu] = cpu_get_ops(enable_method);
>> -	if (!cpu_ops[cpu]) {
>> +	index = get_cpu_ops_index(enable_method);
>> +	if (index < 0) {
>>   		pr_warn("Unsupported enable-method: %s\n", enable_method);
>>   		return -EOPNOTSUPP;
>>   	}
>>   
>> +	*pindex &= ~(0x3 << (2 * (cpu % 4)));
>> +	*pindex |= ((index & 0x3) << (2 * (cpu % 4)));
>> +
>>   	return 0;
>>   }
>>   
>>   const struct cpu_operations *get_cpu_ops(int cpu)
>>   {
>> -	return cpu_ops[cpu];
>> +	int index = ((cpu_ops_indexes[cpu / 4] >> (2 * (cpu % 4))) & 0x3);
>> +
>> +	return cpu_ops[index];
>>   }
>> -- 
>> 2.23.0
>>
>
diff mbox series

Patch

diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index e133011f64b5..d9103d5c9c6f 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -4,7 +4,6 @@ 
  *
  * Copyright (C) 2013 ARM Ltd.
  */
-
 #include <linux/acpi.h>
 #include <linux/cache.h>
 #include <linux/errno.h>
@@ -20,39 +19,32 @@  extern const struct cpu_operations acpi_parking_protocol_ops;
 #endif
 extern const struct cpu_operations cpu_psci_ops;
 
-static const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
-
-static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
+/*
+ * Each element of the index array is shared by 4 CPUs. It means each
+ * CPU index uses 2 bits.
+ */
+static const struct cpu_operations *const cpu_ops[] = {
 	&smp_spin_table_ops,
-	&cpu_psci_ops,
-	NULL,
-};
-
-static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
 #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
 	&acpi_parking_protocol_ops,
 #endif
 	&cpu_psci_ops,
-	NULL,
 };
+static unsigned char cpu_ops_indexes[DIV_ROUND_UP(NR_CPUS, 4)] __ro_after_init;
 
-static const struct cpu_operations * __init cpu_get_ops(const char *name)
+static int __init get_cpu_ops_index(const char *name)
 {
-	const struct cpu_operations *const *ops;
-
-	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
-
-	while (*ops) {
-		if (!strcmp(name, (*ops)->name))
-			return *ops;
+	int index;
 
-		ops++;
+	for (index = 0; index < ARRAY_SIZE(cpu_ops); index++) {
+		if (!strcmp(cpu_ops[index]->name, name))
+			return index;
 	}
 
-	return NULL;
+	return -ERANGE;
 }
 
-static const char *__init cpu_read_enable_method(int cpu)
+static const char *__init get_cpu_method(int cpu)
 {
 	const char *enable_method;
 
@@ -98,21 +90,28 @@  static const char *__init cpu_read_enable_method(int cpu)
  */
 int __init init_cpu_ops(int cpu)
 {
-	const char *enable_method = cpu_read_enable_method(cpu);
+	const char *enable_method = get_cpu_method(cpu);
+	unsigned char *pindex = &cpu_ops_indexes[cpu / 4];
+	int index;
 
 	if (!enable_method)
 		return -ENODEV;
 
-	cpu_ops[cpu] = cpu_get_ops(enable_method);
-	if (!cpu_ops[cpu]) {
+	index = get_cpu_ops_index(enable_method);
+	if (index < 0) {
 		pr_warn("Unsupported enable-method: %s\n", enable_method);
 		return -EOPNOTSUPP;
 	}
 
+	*pindex &= ~(0x3 << (2 * (cpu % 4)));
+	*pindex |= ((index & 0x3) << (2 * (cpu % 4)));
+
 	return 0;
 }
 
 const struct cpu_operations *get_cpu_ops(int cpu)
 {
-	return cpu_ops[cpu];
+	int index = ((cpu_ops_indexes[cpu / 4] >> (2 * (cpu % 4))) & 0x3);
+
+	return cpu_ops[index];
 }