diff mbox series

[2/4] KVM: x86/pmu: Clear "reprogram" bit if counter is disabled or disallowed

Message ID 20220923001355.3741194-3-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Counter reprogramming fixes | expand

Commit Message

Sean Christopherson Sept. 23, 2022, 12:13 a.m. UTC
When reprogramming a counter, clear the counter's "reprogram pending" bit
if the counter is disabled (by the guest) or is disallowed (by the
userspace filter).  In both cases, there's no need to re-attempt
programming on the next coincident KVM_REQ_PMU as enabling the counter by
either method will trigger reprogramming.

Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/pmu.c | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

Comments

Like Xu Oct. 14, 2022, 7:14 a.m. UTC | #1
For subject title, the "reprogram" bit is _only_ used to keep track of 
pmc->perf_event,
not whether the counter is disabled.

On 23/9/2022 8:13 am, Sean Christopherson wrote:
> When reprogramming a counter, clear the counter's "reprogram pending" bit
> if the counter is disabled (by the guest) or is disallowed (by the
> userspace filter).  In both cases, there's no need to re-attempt
> programming on the next coincident KVM_REQ_PMU as enabling the counter by
> either method will trigger reprogramming.

Perhaps we could move the check_pmu_event_filter() towards the top of the call 
stack.

> 
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/kvm/pmu.c | 38 ++++++++++++++++++++++++--------------
>   1 file changed, 24 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 4504987cbbe2..4cd99320019b 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -150,9 +150,9 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
>   	__kvm_perf_overflow(pmc, true);
>   }
>   
> -static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
> -				  u64 config, bool exclude_user,
> -				  bool exclude_kernel, bool intr)
> +static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
> +				 bool exclude_user, bool exclude_kernel,
> +				 bool intr)
>   {
>   	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
>   	struct perf_event *event;
> @@ -204,14 +204,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
>   	if (IS_ERR(event)) {
>   		pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
>   			    PTR_ERR(event), pmc->idx);
> -		return;
> +		return PTR_ERR(event);
>   	}
>   
>   	pmc->perf_event = event;
>   	pmc_to_pmu(pmc)->event_count++;
> -	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
>   	pmc->is_paused = false;
>   	pmc->intr = intr || pebs;
> +	return 0;
>   }
>   
>   static void pmc_pause_counter(struct kvm_pmc *pmc)
> @@ -245,7 +245,6 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
>   	perf_event_enable(pmc->perf_event);
>   	pmc->is_paused = false;
>   
> -	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);

This change is very suspicious.

>   	return true;
>   }
>   
> @@ -303,10 +302,10 @@ void reprogram_counter(struct kvm_pmc *pmc)
>   	pmc_pause_counter(pmc);
>   
>   	if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
> -		return;
> +		goto reprogram_complete;
>   
>   	if (!check_pmu_event_filter(pmc))
> -		return;
> +		goto reprogram_complete;
>   
>   	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
>   		printk_once("kvm pmu: pin control bit is ignored\n");
> @@ -324,16 +323,27 @@ void reprogram_counter(struct kvm_pmc *pmc)
>   	}
>   
>   	if (pmc->current_config == new_config && pmc_resume_counter(pmc))
> -		return;
> +		goto reprogram_complete;
>   
>   	pmc_release_perf_event(pmc);
>   
>   	pmc->current_config = new_config;
> -	pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
> -			      (eventsel & pmu->raw_event_mask),
> -			      !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
> -			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
> -			      eventsel & ARCH_PERFMON_EVENTSEL_INT);
> +
> +	/*
> +	 * If reprogramming fails, e.g. due to contention, leave the counter's
> +	 * regprogram bit set, i.e. opportunistically try again on the next PMU

This is what we need, in the upstream case we need to keep trying regprogram
to try to occupy the hardware.

> +	 * refresh.  Don't make a new request as doing so can stall the guest
> +	 * if reprogramming repeatedly fails.

This does not happen, the guest still enters w/p perf_event backend support
and the vPMU is broken until the next vm-exit.

There is no need to endlessly call kvm_pmu_handle_event() when reprogram fails.

> +	 */
> +	if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
> +				  (eventsel & pmu->raw_event_mask),
> +				  !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
> +				  !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
> +				  eventsel & ARCH_PERFMON_EVENTSEL_INT))
> +		return;
> +
> +reprogram_complete:
> +	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
>   }
>   EXPORT_SYMBOL_GPL(reprogram_counter);
>
Sean Christopherson Oct. 14, 2022, 4:26 p.m. UTC | #2
On Fri, Oct 14, 2022, Like Xu wrote:
> For subject title, the "reprogram" bit is _only_ used to keep track of
> pmc->perf_event,
> not whether the counter is disabled.
> 
> On 23/9/2022 8:13 am, Sean Christopherson wrote:
> > When reprogramming a counter, clear the counter's "reprogram pending" bit
> > if the counter is disabled (by the guest) or is disallowed (by the
> > userspace filter).  In both cases, there's no need to re-attempt
> > programming on the next coincident KVM_REQ_PMU as enabling the counter by
> > either method will trigger reprogramming.
> 
> Perhaps we could move the check_pmu_event_filter() towards the top of the
> call stack.

Top of what call stack exactly?  reprogram_counter() has multiple callers, and
the filter check is already near the top of reprogram_counter().

> > @@ -245,7 +245,6 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
> >   	perf_event_enable(pmc->perf_event);
> >   	pmc->is_paused = false;
> > -	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
> 
> This change is very suspicious.

In the current code, pmc_resume_counter() clears the bit iff it returns true.
With this patch, reprogram_counter() is guarnteed to clear the bit if
pmc_resume_counter() returns true.

	if (pmc->current_config == new_config && pmc_resume_counter(pmc))
		goto reprogram_complete;

	pmc_release_perf_event(pmc);

	pmc->current_config = new_config;

	/*
	 * If reprogramming fails, e.g. due to contention, leave the counter's
	 * regprogram bit set, i.e. opportunistically try again on the next PMU
	 * refresh.  Don't make a new request as doing so can stall the guest
	 * if reprogramming repeatedly fails.
	 */
	if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
				  (eventsel & pmu->raw_event_mask),
				  !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
				  !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
				  eventsel & ARCH_PERFMON_EVENTSEL_INT))
		return;

reprogram_complete:
	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
	pmc->prev_counter = 0;

> > @@ -324,16 +323,27 @@ void reprogram_counter(struct kvm_pmc *pmc)
> >   	}
> >   	if (pmc->current_config == new_config && pmc_resume_counter(pmc))
> > -		return;
> > +		goto reprogram_complete;
> >   	pmc_release_perf_event(pmc);
> >   	pmc->current_config = new_config;
> > -	pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
> > -			      (eventsel & pmu->raw_event_mask),
> > -			      !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
> > -			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
> > -			      eventsel & ARCH_PERFMON_EVENTSEL_INT);
> > +
> > +	/*
> > +	 * If reprogramming fails, e.g. due to contention, leave the counter's
> > +	 * regprogram bit set, i.e. opportunistically try again on the next PMU
> 
> This is what we need, in the upstream case we need to keep trying regprogram
> to try to occupy the hardware.

Maybe in an ideal world, but in reality KVM can't guarantee that programming will
ever succeed.  Making a new KVM_REQ_PMU will prevent entering the guest, i.e. will
effectively hang the vCPU.  Breaking the vPMU isn't great, but hanging the guest
is worse.

> > +	 * refresh.  Don't make a new request as doing so can stall the guest
> > +	 * if reprogramming repeatedly fails.
> 
> This does not happen, the guest still enters w/p perf_event backend support
> and the vPMU is broken until the next vm-exit.
> 
> There is no need to endlessly call kvm_pmu_handle_event() when reprogram fails.

Yes, that's what the above comment is calling out, or at least trying to call out.
diff mbox series

Patch

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 4504987cbbe2..4cd99320019b 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -150,9 +150,9 @@  static void kvm_perf_overflow(struct perf_event *perf_event,
 	__kvm_perf_overflow(pmc, true);
 }
 
-static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
-				  u64 config, bool exclude_user,
-				  bool exclude_kernel, bool intr)
+static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
+				 bool exclude_user, bool exclude_kernel,
+				 bool intr)
 {
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 	struct perf_event *event;
@@ -204,14 +204,14 @@  static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	if (IS_ERR(event)) {
 		pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
 			    PTR_ERR(event), pmc->idx);
-		return;
+		return PTR_ERR(event);
 	}
 
 	pmc->perf_event = event;
 	pmc_to_pmu(pmc)->event_count++;
-	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
 	pmc->is_paused = false;
 	pmc->intr = intr || pebs;
+	return 0;
 }
 
 static void pmc_pause_counter(struct kvm_pmc *pmc)
@@ -245,7 +245,6 @@  static bool pmc_resume_counter(struct kvm_pmc *pmc)
 	perf_event_enable(pmc->perf_event);
 	pmc->is_paused = false;
 
-	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
 	return true;
 }
 
@@ -303,10 +302,10 @@  void reprogram_counter(struct kvm_pmc *pmc)
 	pmc_pause_counter(pmc);
 
 	if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
-		return;
+		goto reprogram_complete;
 
 	if (!check_pmu_event_filter(pmc))
-		return;
+		goto reprogram_complete;
 
 	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
 		printk_once("kvm pmu: pin control bit is ignored\n");
@@ -324,16 +323,27 @@  void reprogram_counter(struct kvm_pmc *pmc)
 	}
 
 	if (pmc->current_config == new_config && pmc_resume_counter(pmc))
-		return;
+		goto reprogram_complete;
 
 	pmc_release_perf_event(pmc);
 
 	pmc->current_config = new_config;
-	pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
-			      (eventsel & pmu->raw_event_mask),
-			      !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
-			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
-			      eventsel & ARCH_PERFMON_EVENTSEL_INT);
+
+	/*
+	 * If reprogramming fails, e.g. due to contention, leave the counter's
+	 * regprogram bit set, i.e. opportunistically try again on the next PMU
+	 * refresh.  Don't make a new request as doing so can stall the guest
+	 * if reprogramming repeatedly fails.
+	 */
+	if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
+				  (eventsel & pmu->raw_event_mask),
+				  !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+				  !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+				  eventsel & ARCH_PERFMON_EVENTSEL_INT))
+		return;
+
+reprogram_complete:
+	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
 }
 EXPORT_SYMBOL_GPL(reprogram_counter);