diff mbox series

[v4] ACPI/processor_idle: Remove dummy wait if kernel is in guest

Message ID 20191024070420.4512-1-fengwei.yin@intel.com (mailing list archive)
State Mainlined, archived
Headers show
Series [v4] ACPI/processor_idle: Remove dummy wait if kernel is in guest | expand

Commit Message

Yin, Fengwei Oct. 24, 2019, 7:04 a.m. UTC
In function acpi_idle_do_entry(), an ioport access is used for
dummy wait to guarantee hardware behavior. But it could trigger
unnecessary VMexit if kernel is running as guest in virtualization
environment.

If it's in virtualization environment, the deeper C state enter
operation (inb()) will trap to hypervisor. It's not needed to do
dummy wait after the inb() call. So we could just remove the
dummy io port access to avoid unnecessary VMexit.

And keep dummy io port access to maintain timing for native
environment.

Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
ChangeLog:
v3 -> v4:
 - Drop overengineered function pointer and do check whether
   we are in guest before dummy inl call.

v2 -> v3:
 - Remove dummy io port access totally for virtualization env.

v1 -> v2:
 - Use ndelay instead of dead loop for dummy delay.

 drivers/acpi/processor_idle.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

Comments

Rafael J. Wysocki Oct. 25, 2019, 9:06 a.m. UTC | #1
On Thu, Oct 24, 2019 at 9:04 AM Yin Fengwei <fengwei.yin@intel.com> wrote:
>
> In function acpi_idle_do_entry(), an ioport access is used for
> dummy wait to guarantee hardware behavior. But it could trigger
> unnecessary VMexit if kernel is running as guest in virtualization
> environment.
>
> If it's in virtualization environment, the deeper C state enter
> operation (inb()) will trap to hypervisor. It's not needed to do
> dummy wait after the inb() call. So we could just remove the
> dummy io port access to avoid unnecessary VMexit.
>
> And keep dummy io port access to maintain timing for native
> environment.
>
> Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
> ---
> ChangeLog:
> v3 -> v4:
>  - Drop overengineered function pointer and do check whether
>    we are in guest before dummy inl call.
>
> v2 -> v3:
>  - Remove dummy io port access totally for virtualization env.
>
> v1 -> v2:
>  - Use ndelay instead of dead loop for dummy delay.
>
>  drivers/acpi/processor_idle.c | 21 +++++++++++++++------
>  1 file changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
> index ed56c6d20b08..2ae95df2e74f 100644
> --- a/drivers/acpi/processor_idle.c
> +++ b/drivers/acpi/processor_idle.c
> @@ -642,6 +642,19 @@ static int acpi_idle_bm_check(void)
>         return bm_status;
>  }
>
> +static void wait_for_freeze(void)
> +{
> +#ifdef CONFIG_X86
> +       /* No delay is needed if we are in guest */
> +       if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
> +               return;
> +#endif
> +       /* Dummy wait op - must do something useless after P_LVL2 read
> +          because chipsets cannot guarantee that STPCLK# signal
> +          gets asserted in time to freeze execution properly. */
> +       inl(acpi_gbl_FADT.xpm_timer_block.address);
> +}
> +
>  /**
>   * acpi_idle_do_entry - enter idle state using the appropriate method
>   * @cx: cstate data
> @@ -658,10 +671,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
>         } else {
>                 /* IO port based C-state */
>                 inb(cx->address);
> -               /* Dummy wait op - must do something useless after P_LVL2 read
> -                  because chipsets cannot guarantee that STPCLK# signal
> -                  gets asserted in time to freeze execution properly. */
> -               inl(acpi_gbl_FADT.xpm_timer_block.address);
> +               wait_for_freeze();
>         }
>  }
>
> @@ -682,8 +692,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
>                         safe_halt();
>                 else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
>                         inb(cx->address);
> -                       /* See comment in acpi_idle_do_entry() */
> -                       inl(acpi_gbl_FADT.xpm_timer_block.address);
> +                       wait_for_freeze();
>                 } else
>                         return -ENODEV;
>         }
> --

Applying as 5.5 material, thanks!
Yin, Fengwei Oct. 25, 2019, 9:59 a.m. UTC | #2
On 10/25/2019 5:06 PM, Rafael J. Wysocki wrote:
> On Thu, Oct 24, 2019 at 9:04 AM Yin Fengwei <fengwei.yin@intel.com> wrote:
>>
>> In function acpi_idle_do_entry(), an ioport access is used for
>> dummy wait to guarantee hardware behavior. But it could trigger
>> unnecessary VMexit if kernel is running as guest in virtualization
>> environment.
>>
>> If it's in virtualization environment, the deeper C state enter
>> operation (inb()) will trap to hypervisor. It's not needed to do
>> dummy wait after the inb() call. So we could just remove the
>> dummy io port access to avoid unnecessary VMexit.
>>
>> And keep dummy io port access to maintain timing for native
>> environment.
>>
>> Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
>> ---
>> ChangeLog:
>> v3 -> v4:
>>   - Drop overengineered function pointer and do check whether
>>     we are in guest before dummy inl call.
>>
>> v2 -> v3:
>>   - Remove dummy io port access totally for virtualization env.
>>
>> v1 -> v2:
>>   - Use ndelay instead of dead loop for dummy delay.
>>
>>   drivers/acpi/processor_idle.c | 21 +++++++++++++++------
>>   1 file changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
>> index ed56c6d20b08..2ae95df2e74f 100644
>> --- a/drivers/acpi/processor_idle.c
>> +++ b/drivers/acpi/processor_idle.c
>> @@ -642,6 +642,19 @@ static int acpi_idle_bm_check(void)
>>          return bm_status;
>>   }
>>
>> +static void wait_for_freeze(void)
>> +{
>> +#ifdef CONFIG_X86
>> +       /* No delay is needed if we are in guest */
>> +       if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
>> +               return;
>> +#endif
>> +       /* Dummy wait op - must do something useless after P_LVL2 read
>> +          because chipsets cannot guarantee that STPCLK# signal
>> +          gets asserted in time to freeze execution properly. */
>> +       inl(acpi_gbl_FADT.xpm_timer_block.address);
>> +}
>> +
>>   /**
>>    * acpi_idle_do_entry - enter idle state using the appropriate method
>>    * @cx: cstate data
>> @@ -658,10 +671,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
>>          } else {
>>                  /* IO port based C-state */
>>                  inb(cx->address);
>> -               /* Dummy wait op - must do something useless after P_LVL2 read
>> -                  because chipsets cannot guarantee that STPCLK# signal
>> -                  gets asserted in time to freeze execution properly. */
>> -               inl(acpi_gbl_FADT.xpm_timer_block.address);
>> +               wait_for_freeze();
>>          }
>>   }
>>
>> @@ -682,8 +692,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
>>                          safe_halt();
>>                  else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
>>                          inb(cx->address);
>> -                       /* See comment in acpi_idle_do_entry() */
>> -                       inl(acpi_gbl_FADT.xpm_timer_block.address);
>> +                       wait_for_freeze();
>>                  } else
>>                          return -ENODEV;
>>          }
>> --
> 
> Applying as 5.5 material, thanks!
Thanks a lot.

Regards
Yin, Fengwei

>
diff mbox series

Patch

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index ed56c6d20b08..2ae95df2e74f 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -642,6 +642,19 @@  static int acpi_idle_bm_check(void)
 	return bm_status;
 }
 
+static void wait_for_freeze(void)
+{
+#ifdef	CONFIG_X86
+	/* No delay is needed if we are in guest */
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+		return;
+#endif
+	/* Dummy wait op - must do something useless after P_LVL2 read
+	   because chipsets cannot guarantee that STPCLK# signal
+	   gets asserted in time to freeze execution properly. */
+	inl(acpi_gbl_FADT.xpm_timer_block.address);
+}
+
 /**
  * acpi_idle_do_entry - enter idle state using the appropriate method
  * @cx: cstate data
@@ -658,10 +671,7 @@  static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
 	} else {
 		/* IO port based C-state */
 		inb(cx->address);
-		/* Dummy wait op - must do something useless after P_LVL2 read
-		   because chipsets cannot guarantee that STPCLK# signal
-		   gets asserted in time to freeze execution properly. */
-		inl(acpi_gbl_FADT.xpm_timer_block.address);
+		wait_for_freeze();
 	}
 }
 
@@ -682,8 +692,7 @@  static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
 			safe_halt();
 		else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
 			inb(cx->address);
-			/* See comment in acpi_idle_do_entry() */
-			inl(acpi_gbl_FADT.xpm_timer_block.address);
+			wait_for_freeze();
 		} else
 			return -ENODEV;
 	}