diff mbox series

[v3] ACPI/processor_idle: Remove dummy wait if kernel is in guest mode

Message ID 20191023074945.17016-1-fengwei.yin@intel.com (mailing list archive)
State Superseded, archived
Headers show
Series [v3] ACPI/processor_idle: Remove dummy wait if kernel is in guest mode | expand

Commit Message

Yin, Fengwei Oct. 23, 2019, 7:49 a.m. UTC
In function acpi_idle_do_entry(), an ioport access is used for dummy
wait to guarantee hardware behavior. But it could trigger unnecessary
vmexit if kernel is running as guest in virtualization environtment.

If it's in virtualization environment, the deeper C state enter
operation (inb()) will trap to hyervisor. It's not needed to do
dummy wait after the inb() call. So we remove the dummy io port
access to avoid unnecessary VMexit.

We keep dummy io port access to maintain timing for native environment.

Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
ChangeLog:
v2 -> v3:
 - Remove dummy io port access totally for virtualization env.

v1 -> v2:
 - Use ndelay instead of dead loop for dummy delay.

 drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

Comments

David Laight Oct. 23, 2019, 8:45 a.m. UTC | #1
From: Yin Fengwei
> Sent: 23 October 2019 08:50


> In function acpi_idle_do_entry(), an ioport access is used for dummy
> wait to guarantee hardware behavior. But it could trigger unnecessary
> vmexit if kernel is running as guest in virtualization environtment.
> 
> If it's in virtualization environment, the deeper C state enter
> operation (inb()) will trap to hyervisor. It's not needed to do
> dummy wait after the inb() call. So we remove the dummy io port
> access to avoid unnecessary VMexit.
> 
> We keep dummy io port access to maintain timing for native environment.
> 
> Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
> ---
> ChangeLog:
> v2 -> v3:
>  - Remove dummy io port access totally for virtualization env.
> 
> v1 -> v2:
>  - Use ndelay instead of dead loop for dummy delay.
> 
>  drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
>  1 file changed, 33 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
> index ed56c6d20b08..0c4a97dd6917 100644
> --- a/drivers/acpi/processor_idle.c
> +++ b/drivers/acpi/processor_idle.c
> @@ -58,6 +58,17 @@ struct cpuidle_driver acpi_idle_driver = {
>  static
>  DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
> 
> +static void (*dummy_wait)(u64 address);
> +
> +static void default_dummy_wait(u64 address)
> +{
> +	inl(address);
> +}
> +
> +static void default_noop_wait(u64 address)
> +{
> +}
> +

Overengineered...
Just add:

static void wait_for_freeze(void)
{
#ifdef	CONFIG_X86
	/* No delay is needed if we are a guest */
	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
		return;
#endif
	/* Dummy wait op - must do something useless after P_LVL2 read
	   because chipsets cannot guarantee that STPCLK# signal
	   gets asserted in time to freeze execution properly. */
	inl(acpi_gbl_FADT.xpm_timer_block.address);
}

and use it to replace the inl().

...
> +#ifdef	CONFIG_X86
> +	/* For x86, if we are running in guest, we don't need extra
> +	 * access ioport as dummy wait.
> +	 */
> +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
> +		pr_err("We are in virtual env");
> +		dummy_wait = default_noop_wait;
> +	} else {
> +		pr_err("We are not in virtual env");
> +	}
> +#endif

WTF are the pr_err() for???

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Rafael J. Wysocki Oct. 23, 2019, 9:03 a.m. UTC | #2
On Wed, Oct 23, 2019 at 10:45 AM David Laight <David.Laight@aculab.com> wrote:
>
> From: Yin Fengwei
> > Sent: 23 October 2019 08:50
>
>
> > In function acpi_idle_do_entry(), an ioport access is used for dummy
> > wait to guarantee hardware behavior. But it could trigger unnecessary
> > vmexit if kernel is running as guest in virtualization environtment.
> >
> > If it's in virtualization environment, the deeper C state enter
> > operation (inb()) will trap to hyervisor. It's not needed to do
> > dummy wait after the inb() call. So we remove the dummy io port
> > access to avoid unnecessary VMexit.
> >
> > We keep dummy io port access to maintain timing for native environment.
> >
> > Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
> > ---
> > ChangeLog:
> > v2 -> v3:
> >  - Remove dummy io port access totally for virtualization env.
> >
> > v1 -> v2:
> >  - Use ndelay instead of dead loop for dummy delay.
> >
> >  drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
> >  1 file changed, 33 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
> > index ed56c6d20b08..0c4a97dd6917 100644
> > --- a/drivers/acpi/processor_idle.c
> > +++ b/drivers/acpi/processor_idle.c
> > @@ -58,6 +58,17 @@ struct cpuidle_driver acpi_idle_driver = {
> >  static
> >  DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
> >
> > +static void (*dummy_wait)(u64 address);
> > +
> > +static void default_dummy_wait(u64 address)
> > +{
> > +     inl(address);
> > +}
> > +
> > +static void default_noop_wait(u64 address)
> > +{
> > +}
> > +
>
> Overengineered...
> Just add:
>
> static void wait_for_freeze(void)
> {
> #ifdef  CONFIG_X86
>         /* No delay is needed if we are a guest */
>         if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
>                 return;
> #endif
>
>         /* Dummy wait op - must do something useless after P_LVL2 read
>            because chipsets cannot guarantee that STPCLK# signal
>            gets asserted in time to freeze execution properly. */
>         inl(acpi_gbl_FADT.xpm_timer_block.address);
> }
>
> and use it to replace the inl().

I was about to make a similar comment.
Yin, Fengwei Oct. 24, 2019, 1:22 a.m. UTC | #3
On 2019/10/23 下午4:45, David Laight wrote:
> From: Yin Fengwei
>> Sent: 23 October 2019 08:50
> 
> 
>> In function acpi_idle_do_entry(), an ioport access is used for dummy
>> wait to guarantee hardware behavior. But it could trigger unnecessary
>> vmexit if kernel is running as guest in virtualization environtment.
>>
>> If it's in virtualization environment, the deeper C state enter
>> operation (inb()) will trap to hyervisor. It's not needed to do
>> dummy wait after the inb() call. So we remove the dummy io port
>> access to avoid unnecessary VMexit.
>>
>> We keep dummy io port access to maintain timing for native environment.
>>
>> Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
>> ---
>> ChangeLog:
>> v2 -> v3:
>>   - Remove dummy io port access totally for virtualization env.
>>
>> v1 -> v2:
>>   - Use ndelay instead of dead loop for dummy delay.
>>
>>   drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
>>   1 file changed, 33 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
>> index ed56c6d20b08..0c4a97dd6917 100644
>> --- a/drivers/acpi/processor_idle.c
>> +++ b/drivers/acpi/processor_idle.c
>> @@ -58,6 +58,17 @@ struct cpuidle_driver acpi_idle_driver = {
>>   static
>>   DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
>>
>> +static void (*dummy_wait)(u64 address);
>> +
>> +static void default_dummy_wait(u64 address)
>> +{
>> +	inl(address);
>> +}
>> +
>> +static void default_noop_wait(u64 address)
>> +{
>> +}
>> +
> 
> Overengineered...
> Just add:
> 
> static void wait_for_freeze(void)
> {
> #ifdef	CONFIG_X86
> 	/* No delay is needed if we are a guest */
> 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
> 		return;
> #endif
> 	/* Dummy wait op - must do something useless after P_LVL2 read
> 	   because chipsets cannot guarantee that STPCLK# signal
> 	   gets asserted in time to freeze execution properly. */
> 	inl(acpi_gbl_FADT.xpm_timer_block.address);
> }
> 
> and use it to replace the inl().
OK. I was trying to avoid any impact to native case.

> 
> ...
>> +#ifdef	CONFIG_X86
>> +	/* For x86, if we are running in guest, we don't need extra
>> +	 * access ioport as dummy wait.
>> +	 */
>> +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
>> +		pr_err("We are in virtual env");
>> +		dummy_wait = default_noop_wait;
>> +	} else {
>> +		pr_err("We are not in virtual env");
>> +	}
>> +#endif
> 
> WTF are the pr_err() for???
Sorry. Didn't remove my debug code...

Regards
Yin, Fengwei

> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
>
Yin, Fengwei Oct. 24, 2019, 1:22 a.m. UTC | #4
On 2019/10/23 下午5:03, Rafael J. Wysocki wrote:
> On Wed, Oct 23, 2019 at 10:45 AM David Laight <David.Laight@aculab.com> wrote:
>>
>> From: Yin Fengwei
>>> Sent: 23 October 2019 08:50
>>
>>
>>> In function acpi_idle_do_entry(), an ioport access is used for dummy
>>> wait to guarantee hardware behavior. But it could trigger unnecessary
>>> vmexit if kernel is running as guest in virtualization environtment.
>>>
>>> If it's in virtualization environment, the deeper C state enter
>>> operation (inb()) will trap to hyervisor. It's not needed to do
>>> dummy wait after the inb() call. So we remove the dummy io port
>>> access to avoid unnecessary VMexit.
>>>
>>> We keep dummy io port access to maintain timing for native environment.
>>>
>>> Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
>>> ---
>>> ChangeLog:
>>> v2 -> v3:
>>>   - Remove dummy io port access totally for virtualization env.
>>>
>>> v1 -> v2:
>>>   - Use ndelay instead of dead loop for dummy delay.
>>>
>>>   drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
>>>   1 file changed, 33 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
>>> index ed56c6d20b08..0c4a97dd6917 100644
>>> --- a/drivers/acpi/processor_idle.c
>>> +++ b/drivers/acpi/processor_idle.c
>>> @@ -58,6 +58,17 @@ struct cpuidle_driver acpi_idle_driver = {
>>>   static
>>>   DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
>>>
>>> +static void (*dummy_wait)(u64 address);
>>> +
>>> +static void default_dummy_wait(u64 address)
>>> +{
>>> +     inl(address);
>>> +}
>>> +
>>> +static void default_noop_wait(u64 address)
>>> +{
>>> +}
>>> +
>>
>> Overengineered...
>> Just add:
>>
>> static void wait_for_freeze(void)
>> {
>> #ifdef  CONFIG_X86
>>          /* No delay is needed if we are a guest */
>>          if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
>>                  return;
>> #endif
>>
>>          /* Dummy wait op - must do something useless after P_LVL2 read
>>             because chipsets cannot guarantee that STPCLK# signal
>>             gets asserted in time to freeze execution properly. */
>>          inl(acpi_gbl_FADT.xpm_timer_block.address);
>> }
>>
>> and use it to replace the inl().
> 
> I was about to make a similar comment.
OK. Will send v4 soon.

Regards
Yin, Fengwei

>
diff mbox series

Patch

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index ed56c6d20b08..0c4a97dd6917 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -58,6 +58,17 @@  struct cpuidle_driver acpi_idle_driver = {
 static
 DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
 
+static void (*dummy_wait)(u64 address);
+
+static void default_dummy_wait(u64 address)
+{
+	inl(address);
+}
+
+static void default_noop_wait(u64 address)
+{
+}
+
 static int disabled_by_idle_boot_param(void)
 {
 	return boot_option_idle_override == IDLE_POLL ||
@@ -660,8 +671,13 @@  static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		inb(cx->address);
 		/* Dummy wait op - must do something useless after P_LVL2 read
 		   because chipsets cannot guarantee that STPCLK# signal
-		   gets asserted in time to freeze execution properly. */
-		inl(acpi_gbl_FADT.xpm_timer_block.address);
+		   gets asserted in time to freeze execution properly.
+
+		   This dummy wait is only needed for native env. If we are running
+		   as guest of a hypervisor, we don't need wait op here. We have
+		   different implementation for dummy_wait on native/virtual env. */
+
+		dummy_wait(acpi_gbl_FADT.xpm_timer_block.address);
 	}
 }
 
@@ -683,7 +699,7 @@  static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
 		else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
 			inb(cx->address);
 			/* See comment in acpi_idle_do_entry() */
-			inl(acpi_gbl_FADT.xpm_timer_block.address);
+			dummy_wait(acpi_gbl_FADT.xpm_timer_block.address);
 		} else
 			return -ENODEV;
 	}
@@ -912,6 +928,20 @@  static inline void acpi_processor_cstate_first_run_checks(void)
 			  max_cstate);
 	first_run++;
 
+	dummy_wait = default_dummy_wait;
+
+#ifdef	CONFIG_X86
+	/* For x86, if we are running in guest, we don't need extra
+	 * access ioport as dummy wait.
+	 */
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+		pr_err("We are in virtual env");
+		dummy_wait = default_noop_wait;
+	} else {
+		pr_err("We are not in virtual env");
+	}
+#endif
+
 	if (acpi_gbl_FADT.cst_control && !nocst) {
 		status = acpi_os_write_port(acpi_gbl_FADT.smi_command,
 					    acpi_gbl_FADT.cst_control, 8);