[v2,3/3] intel_idle: add C0.2 state for Sapphire Rapids Xeon

Message ID	20230310122110.895093-4-dedekind1@gmail.com (mailing list archive)
State	Changes Requested, archived
Headers	show Return-Path: <linux-pm-owner@vger.kernel.org> From: Artem Bityutskiy <dedekind1@gmail.com> To: x86@kernel.org, Linux PM Mailing List <linux-pm@vger.kernel.org> Cc: Artem Bityutskiy <dedekind1@gmail.com> Subject: [PATCH v2 3/3] intel_idle: add C0.2 state for Sapphire Rapids Xeon Date: Fri, 10 Mar 2023 14:21:10 +0200 Message-Id: <20230310122110.895093-4-dedekind1@gmail.com> In-Reply-To: <20230310122110.895093-1-dedekind1@gmail.com> References: <20230310122110.895093-1-dedekind1@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	Sapphire Rapids C0.x idle states support \| expand [v2,0/3] Sapphire Rapids C0.x idle states support [v2,1/3] x86/mwait: Add support for idle via umwait [v2,2/3] x86/umwait: Increase tpause and umwait quanta [v2,3/3] intel_idle: add C0.2 state for Sapphire Rapids Xeon

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 938c17f25d94..0d0e45de610e 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -51,11 +51,13 @@ #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/moduleparam.h> +#include <linux/units.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include <asm/nospec-branch.h> #include <asm/mwait.h> #include <asm/msr.h> +#include <asm/tsc.h> #include <asm/fpu/api.h> #define INTEL_IDLE_VERSION "0.5.1" @@ -73,6 +75,8 @@ static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static unsigned long auto_demotion_disable_flags; +static u64 umwait_limit; + static enum { C1E_PROMOTION_PRESERVE, C1E_PROMOTION_ENABLE, @@ -225,6 +229,27 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, return 0; } +/** + * intel_idle_umwait_irq - Request C0.x using the 'umwait' instruction. + * @dev: cpuidle device of the target CPU. + * @drv: cpuidle driver (assumed to point to intel_idle_driver). + * @index: Target idle state index. + * + * Request C0.1 or C0.2 using 'umwait' instruction with interrupts enabled. + */ +static __cpuidle int intel_idle_umwait_irq(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + u32 state = flg2MWAIT(drv->states[index].flags); + + raw_local_irq_enable(); + umwait_idle(rdtsc() + umwait_limit, state); + raw_local_irq_disable(); + + return index; +} + /* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. @@ -968,6 +993,13 @@ static struct cpuidle_state adl_n_cstates[] __initdata = { }; static struct cpuidle_state spr_cstates[] __initdata = { + { + .name = "C0.2", + .desc = "UMWAIT C0.2", + .flags = MWAIT2flg(TPAUSE_C02_STATE) | CPUIDLE_FLAG_IRQ_ENABLE, + .exit_latency_ns = 100, + .target_residency_ns = 100, + .enter = &intel_idle_umwait_irq, }, { .name = "C1", .desc = "MWAIT 0x00", @@ -1894,7 +1926,8 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) /* Structure copy. */ drv->states[drv->state_count] = cpuidle_state_table[cstate]; - if ((cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) || force_irq_on) { + if (cpuidle_state_table[cstate].enter == intel_idle && + ((cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) || force_irq_on)) { printk("intel_idle: forced intel_idle_irq for state %d\n", cstate); drv->states[drv->state_count].enter = intel_idle_irq; } @@ -1926,6 +1959,28 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) } } +/** + * umwait_limit_init - initialize time limit value for 'umwait'. + * + * C0.1 and C0.2 (later C0.x) idle states are requested via the 'umwait' + * instruction. The 'umwait' instruction requires the "deadline" - the TSC + * counter value to break out of C0.x (unless it broke out because of an + * interrupt or some other event). + * + * The deadline is specified as an absolute TSC value, and it is calculated as + * current TSC value + 'umwait_limit'. This function initializes the + * 'umwait_limit' variable to count of cycles per tick. The motivation is: + * * the tick is not disabled for shallow states like C0.x so, so idle will + * not last longer than a tick anyway + * * limit idle time to give cpuidle a chance to re-evaluate its C-state + * selection decision and possibly select a deeper C-state. + */ +static void __init umwait_limit_init(void) +{ + umwait_limit = (u64)TICK_NSEC * tsc_khz; + do_div(umwait_limit, MICRO); +} + /** * intel_idle_cpuidle_driver_init - Create the list of available idle states. * @drv: cpuidle driver structure to initialize. @@ -1933,6 +1988,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) { cpuidle_poll_state_init(drv); + umwait_limit_init(); if (disabled_states_mask & BIT(0)) drv->states[0].flags |= CPUIDLE_FLAG_OFF;

[v2,3/3] intel_idle: add C0.2 state for Sapphire Rapids Xeon

Commit Message

Comments

Patch