diff mbox series

[RFC,v2,22/35] ACPI: Check _STA present bit before making CPUs not present

Message ID 20230913163823.7880-23-james.morse@arm.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series ACPI/arm64: add support for virtual cpuhotplug | expand

Commit Message

James Morse Sept. 13, 2023, 4:38 p.m. UTC
When called acpi_processor_post_eject() unconditionally make a CPU
not-present and unregisters it.

To add support for AML events where the CPU has become disabled, but
remains present, the _STA method should be checked before calling
acpi_processor_remove().

Rename acpi_processor_post_eject() acpi_processor_remove_possible(), and
check the _STA before calling.

Adding the function prototype for arch_unregister_cpu() allows the
preprocessor guards to be removed.

After this change CPUs will remain registered and visible to
user-space as offline if buggy firmware triggers an eject-request,
but doesn't clear the corresponding _STA bits after _EJ0 has been
called.

Signed-off-by: James Morse <james.morse@arm.com>
---
 drivers/acpi/acpi_processor.c | 31 +++++++++++++++++++++++++------
 include/linux/cpu.h           |  1 +
 2 files changed, 26 insertions(+), 6 deletions(-)

Comments

Jonathan Cameron Sept. 14, 2023, 2:31 p.m. UTC | #1
On Wed, 13 Sep 2023 16:38:10 +0000
James Morse <james.morse@arm.com> wrote:

> When called acpi_processor_post_eject() unconditionally make a CPU
> not-present and unregisters it.
> 
> To add support for AML events where the CPU has become disabled, but
> remains present, the _STA method should be checked before calling
> acpi_processor_remove().
> 
> Rename acpi_processor_post_eject() acpi_processor_remove_possible(), and
> check the _STA before calling.
> 
> Adding the function prototype for arch_unregister_cpu() allows the
> preprocessor guards to be removed.
> 
> After this change CPUs will remain registered and visible to
> user-space as offline if buggy firmware triggers an eject-request,
> but doesn't clear the corresponding _STA bits after _EJ0 has been
> called.
Will be fun to see how many such buggy firmwares are out there.

> 
> Signed-off-by: James Morse <james.morse@arm.com>

Comment inline but not directly related to this patch so with or
without that change
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
>  drivers/acpi/acpi_processor.c | 31 +++++++++++++++++++++++++------
>  include/linux/cpu.h           |  1 +
>  2 files changed, 26 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
> index 00dcc23d49a8..2cafea1edc24 100644
> --- a/drivers/acpi/acpi_processor.c
> +++ b/drivers/acpi/acpi_processor.c
> @@ -457,13 +457,12 @@ static int acpi_processor_add(struct acpi_device *device,
>  	return result;
>  }
>  
> -#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU
>  /* Removal */
> -static void acpi_processor_post_eject(struct acpi_device *device)
> +static void acpi_processor_make_not_present(struct acpi_device *device)
>  {
>  	struct acpi_processor *pr;
>  
> -	if (!device || !acpi_driver_data(device))
> +	if (!IS_ENABLED(CONFIG_ACPI_HOTPLUG_PRESENT_CPU))

Would it be possible to do all the ifdef to IS_ENABLED changes in a separate
patch?  I haven't figure out if any of them have dependencies on the other
changes, but they do create a bunch of noise I'd rather not see in the more
complex corners of this.

>  		return;
>  
>  	pr = acpi_driver_data(device);
> @@ -501,7 +500,29 @@ static void acpi_processor_post_eject(struct acpi_device *device)
>  	free_cpumask_var(pr->throttling.shared_cpu_map);
>  	kfree(pr);
>  }
> -#endif /* CONFIG_ACPI_HOTPLUG_PRESENT_CPU */
> +
> +static void acpi_processor_post_eject(struct acpi_device *device)
> +{
> +	struct acpi_processor *pr;
> +	unsigned long long sta;
> +	acpi_status status;
> +
> +	if (!device)
> +		return;
> +
> +	pr = acpi_driver_data(device);
> +	if (!pr || pr->id >= nr_cpu_ids || invalid_phys_cpuid(pr->phys_id))
> +		return;
> +
> +	status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta);
> +	if (ACPI_FAILURE(status))
> +		return;
> +
> +	if (cpu_present(pr->id) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
> +		acpi_processor_make_not_present(device);
> +		return;
> +	}
> +}
>  
>  #ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
>  bool __init processor_physically_present(acpi_handle handle)
> @@ -626,9 +647,7 @@ static const struct acpi_device_id processor_device_ids[] = {
>  static struct acpi_scan_handler processor_handler = {
>  	.ids = processor_device_ids,
>  	.attach = acpi_processor_add,
> -#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU
>  	.post_eject = acpi_processor_post_eject,
> -#endif
>  	.hotplug = {
>  		.enabled = true,
>  	},
> diff --git a/include/linux/cpu.h b/include/linux/cpu.h
> index a71691d7c2ca..e117c06e0c6b 100644
> --- a/include/linux/cpu.h
> +++ b/include/linux/cpu.h
> @@ -81,6 +81,7 @@ struct device *cpu_device_create(struct device *parent, void *drvdata,
>  				 const struct attribute_group **groups,
>  				 const char *fmt, ...);
>  extern int arch_register_cpu(int cpu);
> +extern void arch_unregister_cpu(int cpu);
>  #ifdef CONFIG_HOTPLUG_CPU
>  extern void unregister_cpu(struct cpu *cpu);
>  extern ssize_t arch_cpu_probe(const char *, size_t);
Gavin Shan Sept. 19, 2023, 12:45 a.m. UTC | #2
On 9/14/23 02:38, James Morse wrote:
> When called acpi_processor_post_eject() unconditionally make a CPU
> not-present and unregisters it.
> 
> To add support for AML events where the CPU has become disabled, but
> remains present, the _STA method should be checked before calling
> acpi_processor_remove().
> 
> Rename acpi_processor_post_eject() acpi_processor_remove_possible(), and
> check the _STA before calling.
> 
> Adding the function prototype for arch_unregister_cpu() allows the
> preprocessor guards to be removed.
> 
> After this change CPUs will remain registered and visible to
> user-space as offline if buggy firmware triggers an eject-request,
> but doesn't clear the corresponding _STA bits after _EJ0 has been
> called.
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
>   drivers/acpi/acpi_processor.c | 31 +++++++++++++++++++++++++------
>   include/linux/cpu.h           |  1 +
>   2 files changed, 26 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
> index 00dcc23d49a8..2cafea1edc24 100644
> --- a/drivers/acpi/acpi_processor.c
> +++ b/drivers/acpi/acpi_processor.c
> @@ -457,13 +457,12 @@ static int acpi_processor_add(struct acpi_device *device,
>   	return result;
>   }
>   
> -#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU
>   /* Removal */
> -static void acpi_processor_post_eject(struct acpi_device *device)
> +static void acpi_processor_make_not_present(struct acpi_device *device)
>   {
>   	struct acpi_processor *pr;
>   
> -	if (!device || !acpi_driver_data(device))
> +	if (!IS_ENABLED(CONFIG_ACPI_HOTPLUG_PRESENT_CPU))
>   		return;
>   

In order to use IS_ENABLED(),

>   	pr = acpi_driver_data(device);
> @@ -501,7 +500,29 @@ static void acpi_processor_post_eject(struct acpi_device *device)
>   	free_cpumask_var(pr->throttling.shared_cpu_map);
>   	kfree(pr);
>   }
> -#endif /* CONFIG_ACPI_HOTPLUG_PRESENT_CPU */
> +
> +static void acpi_processor_post_eject(struct acpi_device *device)
> +{
> +	struct acpi_processor *pr;
> +	unsigned long long sta;
> +	acpi_status status;
> +
> +	if (!device)
> +		return;
> +
> +	pr = acpi_driver_data(device);
> +	if (!pr || pr->id >= nr_cpu_ids || invalid_phys_cpuid(pr->phys_id))
> +		return;
> +

Do we really need to validate the logic and hardware CPU IDs here? I think
the ACPI processor device can't be added successfully if one of them is
invalid.

> +	status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta);
> +	if (ACPI_FAILURE(status))
> +		return;
> +
> +	if (cpu_present(pr->id) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
> +		acpi_processor_make_not_present(device);
> +		return;
> +	}
> +}
>   
>   #ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
>   bool __init processor_physically_present(acpi_handle handle)
> @@ -626,9 +647,7 @@ static const struct acpi_device_id processor_device_ids[] = {
>   static struct acpi_scan_handler processor_handler = {
>   	.ids = processor_device_ids,
>   	.attach = acpi_processor_add,
> -#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU
>   	.post_eject = acpi_processor_post_eject,
> -#endif
>   	.hotplug = {
>   		.enabled = true,
>   	},
> diff --git a/include/linux/cpu.h b/include/linux/cpu.h
> index a71691d7c2ca..e117c06e0c6b 100644
> --- a/include/linux/cpu.h
> +++ b/include/linux/cpu.h
> @@ -81,6 +81,7 @@ struct device *cpu_device_create(struct device *parent, void *drvdata,
>   				 const struct attribute_group **groups,
>   				 const char *fmt, ...);
>   extern int arch_register_cpu(int cpu);
> +extern void arch_unregister_cpu(int cpu);

arch_unregister_cpu() is protected by CONFIG_HOTPLUG_CPU in the individual architectures,
for example arch/ia64/kernel/topology.c

>   #ifdef CONFIG_HOTPLUG_CPU
>   extern void unregister_cpu(struct cpu *cpu);
>   extern ssize_t arch_cpu_probe(const char *, size_t);

Thanks,
Gavin
Russell King (Oracle) Nov. 3, 2023, 2:09 p.m. UTC | #3
On Thu, Sep 14, 2023 at 03:31:10PM +0100, Jonathan Cameron wrote:
> On Wed, 13 Sep 2023 16:38:10 +0000
> James Morse <james.morse@arm.com> wrote:
> > -#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU
> >  /* Removal */
> > -static void acpi_processor_post_eject(struct acpi_device *device)
> > +static void acpi_processor_make_not_present(struct acpi_device *device)
> >  {
> >  	struct acpi_processor *pr;
> >  
> > -	if (!device || !acpi_driver_data(device))
> > +	if (!IS_ENABLED(CONFIG_ACPI_HOTPLUG_PRESENT_CPU))
> 
> Would it be possible to do all the ifdef to IS_ENABLED changes in a separate
> patch?  I haven't figure out if any of them have dependencies on the other
> changes, but they do create a bunch of noise I'd rather not see in the more
> complex corners of this.

I'm also wondering why we want to do this check here, rather than...

> > +static void acpi_processor_post_eject(struct acpi_device *device)
> > +{
> > +	struct acpi_processor *pr;
> > +	unsigned long long sta;
> > +	acpi_status status;

... here, because none of the code below has any effect if
acpi_processor_make_not_present() merely returns. So the below seems
like a waste of code space when CONFIG_ACPI_HOTPLUG_PRESENT_CPU is
disabled.

> > +
> > +	if (!device)
> > +		return;
> > +
> > +	pr = acpi_driver_data(device);
> > +	if (!pr || pr->id >= nr_cpu_ids || invalid_phys_cpuid(pr->phys_id))
> > +		return;
> > +
> > +	status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta);
> > +	if (ACPI_FAILURE(status))
> > +		return;
> > +
> > +	if (cpu_present(pr->id) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
> > +		acpi_processor_make_not_present(device);
> > +		return;
> > +	}
> > +}
Russell King (Oracle) Nov. 3, 2023, 2:37 p.m. UTC | #4
On Tue, Sep 19, 2023 at 10:45:39AM +1000, Gavin Shan wrote:
> On 9/14/23 02:38, James Morse wrote:
> > When called acpi_processor_post_eject() unconditionally make a CPU
> > not-present and unregisters it.
> > 
> > To add support for AML events where the CPU has become disabled, but
> > remains present, the _STA method should be checked before calling
> > acpi_processor_remove().
> > 
> > Rename acpi_processor_post_eject() acpi_processor_remove_possible(), and
> > check the _STA before calling.
> > 
> > Adding the function prototype for arch_unregister_cpu() allows the
> > preprocessor guards to be removed.
> > 
> > After this change CPUs will remain registered and visible to
> > user-space as offline if buggy firmware triggers an eject-request,
> > but doesn't clear the corresponding _STA bits after _EJ0 has been
> > called.
> > 
> > Signed-off-by: James Morse <james.morse@arm.com>
> > ---
> >   drivers/acpi/acpi_processor.c | 31 +++++++++++++++++++++++++------
> >   include/linux/cpu.h           |  1 +
> >   2 files changed, 26 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
> > index 00dcc23d49a8..2cafea1edc24 100644
> > --- a/drivers/acpi/acpi_processor.c
> > +++ b/drivers/acpi/acpi_processor.c
> > @@ -457,13 +457,12 @@ static int acpi_processor_add(struct acpi_device *device,
> >   	return result;
> >   }
> > -#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU
> >   /* Removal */
> > -static void acpi_processor_post_eject(struct acpi_device *device)
> > +static void acpi_processor_make_not_present(struct acpi_device *device)
> >   {
> >   	struct acpi_processor *pr;
> > -	if (!device || !acpi_driver_data(device))
> > +	if (!IS_ENABLED(CONFIG_ACPI_HOTPLUG_PRESENT_CPU))
> >   		return;
> 
> In order to use IS_ENABLED(),

And the rest of this statement is where?

> >   	pr = acpi_driver_data(device);
> > @@ -501,7 +500,29 @@ static void acpi_processor_post_eject(struct acpi_device *device)
> >   	free_cpumask_var(pr->throttling.shared_cpu_map);
> >   	kfree(pr);
> >   }
> > -#endif /* CONFIG_ACPI_HOTPLUG_PRESENT_CPU */
> > +
> > +static void acpi_processor_post_eject(struct acpi_device *device)
> > +{
> > +	struct acpi_processor *pr;
> > +	unsigned long long sta;
> > +	acpi_status status;
> > +
> > +	if (!device)
> > +		return;
> > +
> > +	pr = acpi_driver_data(device);
> > +	if (!pr || pr->id >= nr_cpu_ids || invalid_phys_cpuid(pr->phys_id))
> > +		return;
> > +
> 
> Do we really need to validate the logic and hardware CPU IDs here? I think
> the ACPI processor device can't be added successfully if one of them is
> invalid.
> 
> > +	status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta);
> > +	if (ACPI_FAILURE(status))
> > +		return;
> > +
> > +	if (cpu_present(pr->id) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
> > +		acpi_processor_make_not_present(device);
> > +		return;
> > +	}
> > +}
> >   #ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
> >   bool __init processor_physically_present(acpi_handle handle)
> > @@ -626,9 +647,7 @@ static const struct acpi_device_id processor_device_ids[] = {
> >   static struct acpi_scan_handler processor_handler = {
> >   	.ids = processor_device_ids,
> >   	.attach = acpi_processor_add,
> > -#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU
> >   	.post_eject = acpi_processor_post_eject,
> > -#endif
> >   	.hotplug = {
> >   		.enabled = true,
> >   	},
> > diff --git a/include/linux/cpu.h b/include/linux/cpu.h
> > index a71691d7c2ca..e117c06e0c6b 100644
> > --- a/include/linux/cpu.h
> > +++ b/include/linux/cpu.h
> > @@ -81,6 +81,7 @@ struct device *cpu_device_create(struct device *parent, void *drvdata,
> >   				 const struct attribute_group **groups,
> >   				 const char *fmt, ...);
> >   extern int arch_register_cpu(int cpu);
> > +extern void arch_unregister_cpu(int cpu);
> 
> arch_unregister_cpu() is protected by CONFIG_HOTPLUG_CPU in the individual architectures,
> for example arch/ia64/kernel/topology.c

Yes, I agree, there _may_ be a reference to arch_unregister_cpu() if
the compiler doesn't optimise the "if(0) return".

As things stand in my "head" tree (which I'll be posting once 6.7-rc1
is out) at the point that this patch exists in the series, there are
no architectures which provide arch_unregister_cpu(), and the only
implementation of it is the __weak one in drivers/base/cpu.c

That implementation is also ifdef'd with CONFIG_HOTPLUG_CPU and also
CONFIG_GENERIC_CPU_DEVICES.

Meanwhile, acpi_processor.c is always built with ACPI, and while we
have IS_ENABLED() clauses with James' patches for both of these
symbols, if the compiler doesn't optimise the code away, we will end
up with a reference and a link-time error. That being said, the 0-day
bot has not reported anything as yet (and it builds my tree.)

So, is this a problem that needs to be solved or not?
diff mbox series

Patch

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 00dcc23d49a8..2cafea1edc24 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -457,13 +457,12 @@  static int acpi_processor_add(struct acpi_device *device,
 	return result;
 }
 
-#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU
 /* Removal */
-static void acpi_processor_post_eject(struct acpi_device *device)
+static void acpi_processor_make_not_present(struct acpi_device *device)
 {
 	struct acpi_processor *pr;
 
-	if (!device || !acpi_driver_data(device))
+	if (!IS_ENABLED(CONFIG_ACPI_HOTPLUG_PRESENT_CPU))
 		return;
 
 	pr = acpi_driver_data(device);
@@ -501,7 +500,29 @@  static void acpi_processor_post_eject(struct acpi_device *device)
 	free_cpumask_var(pr->throttling.shared_cpu_map);
 	kfree(pr);
 }
-#endif /* CONFIG_ACPI_HOTPLUG_PRESENT_CPU */
+
+static void acpi_processor_post_eject(struct acpi_device *device)
+{
+	struct acpi_processor *pr;
+	unsigned long long sta;
+	acpi_status status;
+
+	if (!device)
+		return;
+
+	pr = acpi_driver_data(device);
+	if (!pr || pr->id >= nr_cpu_ids || invalid_phys_cpuid(pr->phys_id))
+		return;
+
+	status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta);
+	if (ACPI_FAILURE(status))
+		return;
+
+	if (cpu_present(pr->id) && !(sta & ACPI_STA_DEVICE_PRESENT)) {
+		acpi_processor_make_not_present(device);
+		return;
+	}
+}
 
 #ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
 bool __init processor_physically_present(acpi_handle handle)
@@ -626,9 +647,7 @@  static const struct acpi_device_id processor_device_ids[] = {
 static struct acpi_scan_handler processor_handler = {
 	.ids = processor_device_ids,
 	.attach = acpi_processor_add,
-#ifdef CONFIG_ACPI_HOTPLUG_PRESENT_CPU
 	.post_eject = acpi_processor_post_eject,
-#endif
 	.hotplug = {
 		.enabled = true,
 	},
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a71691d7c2ca..e117c06e0c6b 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -81,6 +81,7 @@  struct device *cpu_device_create(struct device *parent, void *drvdata,
 				 const struct attribute_group **groups,
 				 const char *fmt, ...);
 extern int arch_register_cpu(int cpu);
+extern void arch_unregister_cpu(int cpu);
 #ifdef CONFIG_HOTPLUG_CPU
 extern void unregister_cpu(struct cpu *cpu);
 extern ssize_t arch_cpu_probe(const char *, size_t);