diff mbox series

[1/4] KVM: x86/pmu: Force reprogramming of all counters on PMU filter change

Message ID 20220923001355.3741194-2-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86/pmu: Counter reprogramming fixes | expand

Commit Message

Sean Christopherson Sept. 23, 2022, 12:13 a.m. UTC
Force vCPUs to reprogram all counters on a PMU filter change to provide
a sane ABI for userspace.  Use the existing KVM_REQ_PMU to do the
programming, and take advantage of the fact that the reprogram_pmi bitmap
fits in a u64 to set all bits in a single atomic update.  Note, setting
the bitmap and making the request needs to be done _after_ the SRCU
synchronization to ensure that vCPUs will reprogram using the new filter.

KVM's current "lazy" approach is confusing and non-deterministic.  It's
confusing because, from a developer perspective, the code is buggy as it
makes zero sense to let userspace modify the filter but then not actually
enforce the new filter.  The lazy approach is non-deterministic because
KVM enforces the filter whenever a counter is reprogrammed, not just on
guest WRMSRs, i.e. a guest might gain/lose access to an event at random
times depending on what is going on in the host.

Note, the resulting behavior is still non-determinstic while the filter
is in flux.  If userspace wants to guarantee deterministic behavior, all
vCPUs should be paused during the filter update.

Fixes: 66bb8a065f5a ("KVM: x86: PMU Event Filter")
Cc: Aaron Lewis <aaronlewis@google.com>
Jim Mattson <jmattson@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/include/asm/kvm_host.h | 11 ++++++++++-
 arch/x86/kvm/pmu.c              | 15 +++++++++++++--
 2 files changed, 23 insertions(+), 3 deletions(-)

Comments

Like Xu Oct. 13, 2022, 12:01 p.m. UTC | #1
Firstly, thanks for your comments that spewed out around vpmu.

On 23/9/2022 8:13 am, Sean Christopherson wrote:
> Force vCPUs to reprogram all counters on a PMU filter change to provide
> a sane ABI for userspace.  Use the existing KVM_REQ_PMU to do the
> programming, and take advantage of the fact that the reprogram_pmi bitmap
> fits in a u64 to set all bits in a single atomic update.  Note, setting
> the bitmap and making the request needs to be done _after_ the SRCU
> synchronization to ensure that vCPUs will reprogram using the new filter.
> 
> KVM's current "lazy" approach is confusing and non-deterministic.  It's

The resolute lazy approach was introduced in patch 03, right after this change.

> confusing because, from a developer perspective, the code is buggy as it
> makes zero sense to let userspace modify the filter but then not actually
> enforce the new filter.  The lazy approach is non-deterministic because
> KVM enforces the filter whenever a counter is reprogrammed, not just on
> guest WRMSRs, i.e. a guest might gain/lose access to an event at random
> times depending on what is going on in the host.
> 
> Note, the resulting behavior is still non-determinstic while the filter
> is in flux.  If userspace wants to guarantee deterministic behavior, all
> vCPUs should be paused during the filter update.
> 
> Fixes: 66bb8a065f5a ("KVM: x86: PMU Event Filter")
> Cc: Aaron Lewis <aaronlewis@google.com>
> Jim Mattson <jmattson@google.com>

miss "Cc:" ?

> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/include/asm/kvm_host.h | 11 ++++++++++-
>   arch/x86/kvm/pmu.c              | 15 +++++++++++++--
>   2 files changed, 23 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b3ce723efb43..462f041ede9f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -519,7 +519,16 @@ struct kvm_pmu {
>   	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
>   	struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
>   	struct irq_work irq_work;
> -	DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
> +
> +	/*
> +	 * Overlay the bitmap with a 64-bit atomic so that all bits can be
> +	 * set in a single access, e.g. to reprogram all counters when the PMU
> +	 * filter changes.
> +	 */
> +	union {
> +		DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
> +		atomic64_t __reprogram_pmi;
> +	};
>   	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
>   	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
>   
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index d9b9a0f0db17..4504987cbbe2 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -577,6 +577,8 @@ EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
>   int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
>   {
>   	struct kvm_pmu_event_filter tmp, *filter;
> +	struct kvm_vcpu *vcpu;
> +	unsigned long i;
>   	size_t size;
>   	int r;
>   
> @@ -613,9 +615,18 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
>   	mutex_lock(&kvm->lock);
>   	filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
>   				     mutex_is_locked(&kvm->lock));
> -	mutex_unlock(&kvm->lock);
> -
>   	synchronize_srcu_expedited(&kvm->srcu);

The relative order of these two operations has been reversed
	mutex_unlock() and synchronize_srcu_expedited()
, extending the execution window of the critical area of "kvm->lock)".
The motivation is also not explicitly stated in the commit message.

> +
> +	BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) >
> +		     sizeof(((struct kvm_pmu *)0)->__reprogram_pmi));
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm)
> +		atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi, -1ull);

How about:
	bitmap_copy(pmu->reprogram_pmi, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
to avoid further cycles on calls of 
"static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit)" ?

> +
> +	kvm_make_all_cpus_request(kvm, KVM_REQ_PMU);
> +
> +	mutex_unlock(&kvm->lock);
> +
>   	r = 0;
>   cleanup:
>   	kfree(filter);
Sean Christopherson Oct. 13, 2022, 8:53 p.m. UTC | #2
On Thu, Oct 13, 2022, Like Xu wrote:
> Firstly, thanks for your comments that spewed out around vpmu.
> 
> On 23/9/2022 8:13 am, Sean Christopherson wrote:
> > Force vCPUs to reprogram all counters on a PMU filter change to provide
> > a sane ABI for userspace.  Use the existing KVM_REQ_PMU to do the
> > programming, and take advantage of the fact that the reprogram_pmi bitmap
> > fits in a u64 to set all bits in a single atomic update.  Note, setting
> > the bitmap and making the request needs to be done _after_ the SRCU
> > synchronization to ensure that vCPUs will reprogram using the new filter.
> > 
> > KVM's current "lazy" approach is confusing and non-deterministic.  It's
> 
> The resolute lazy approach was introduced in patch 03, right after this change.

This is referring to the lazy recognition of the filter, not the deferred
reprogramming of the counters.  Regardless of whether reprogramming is handled
via request or in-line, KVM is still lazily recognizing the new filter as vCPUs
won't picke up the new filter until the _guest_ triggers a refresh.

> > @@ -613,9 +615,18 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
> >   	mutex_lock(&kvm->lock);
> >   	filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
> >   				     mutex_is_locked(&kvm->lock));
> > -	mutex_unlock(&kvm->lock);
> > -
> >   	synchronize_srcu_expedited(&kvm->srcu);
> 
> The relative order of these two operations has been reversed
> 	mutex_unlock() and synchronize_srcu_expedited()
> , extending the execution window of the critical area of "kvm->lock)".
> The motivation is also not explicitly stated in the commit message.

I'll add a blurb, after I re-convince myself that the sync+request needs to be
done under kvm->lock.

> > +	BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) >
> > +		     sizeof(((struct kvm_pmu *)0)->__reprogram_pmi));
> > +
> > +	kvm_for_each_vcpu(i, vcpu, kvm)
> > +		atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi, -1ull);
> 
> How about:
> 	bitmap_copy(pmu->reprogram_pmi, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
> to avoid further cycles on calls of
> "static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit)" ?

bitmap_copy() was my first choice too, but unfortunately it's doesn't guarantee
atomicity and could lead to data corruption if the target vCPU is concurrently
modifying the bitmap.
Like Xu Oct. 14, 2022, 6:41 a.m. UTC | #3
On 14/10/2022 4:53 am, Sean Christopherson wrote:
> On Thu, Oct 13, 2022, Like Xu wrote:
>> Firstly, thanks for your comments that spewed out around vpmu.
>>
>> On 23/9/2022 8:13 am, Sean Christopherson wrote:
>>> Force vCPUs to reprogram all counters on a PMU filter change to provide
>>> a sane ABI for userspace.  Use the existing KVM_REQ_PMU to do the
>>> programming, and take advantage of the fact that the reprogram_pmi bitmap
>>> fits in a u64 to set all bits in a single atomic update.  Note, setting
>>> the bitmap and making the request needs to be done _after_ the SRCU
>>> synchronization to ensure that vCPUs will reprogram using the new filter.
>>>
>>> KVM's current "lazy" approach is confusing and non-deterministic.  It's
>>
>> The resolute lazy approach was introduced in patch 03, right after this change.
> 
> This is referring to the lazy recognition of the filter, not the deferred
> reprogramming of the counters.  Regardless of whether reprogramming is handled
> via request or in-line, KVM is still lazily recognizing the new filter as vCPUs
> won't picke up the new filter until the _guest_ triggers a refresh.

It may still be too late for the pmu filter to take effect. To eliminate this 
"non-deterministic",
should we kick out all vpmu-enabled vcpus right after making KVM_REQ_PMU requests ?

> 
>>> @@ -613,9 +615,18 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
>>>    	mutex_lock(&kvm->lock);
>>>    	filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
>>>    				     mutex_is_locked(&kvm->lock));
>>> -	mutex_unlock(&kvm->lock);
>>> -
>>>    	synchronize_srcu_expedited(&kvm->srcu);
>>
>> The relative order of these two operations has been reversed
>> 	mutex_unlock() and synchronize_srcu_expedited()
>> , extending the execution window of the critical area of "kvm->lock)".
>> The motivation is also not explicitly stated in the commit message.
> 
> I'll add a blurb, after I re-convince myself that the sync+request needs to be
> done under kvm->lock.
> 
>>> +	BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) >
>>> +		     sizeof(((struct kvm_pmu *)0)->__reprogram_pmi));
>>> +
>>> +	kvm_for_each_vcpu(i, vcpu, kvm)
>>> +		atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi, -1ull);
>>
>> How about:
>> 	bitmap_copy(pmu->reprogram_pmi, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
>> to avoid further cycles on calls of
>> "static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit)" ?
> 
> bitmap_copy() was my first choice too, but unfortunately it's doesn't guarantee
> atomicity and could lead to data corruption if the target vCPU is concurrently
> modifying the bitmap.

Indeed, it may help to reuse "pmu->global_ctrl_mask" instead of "-1ull":

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 4504987cbbe2..8e279f816e27 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -621,7 +621,8 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void 
__user *argp)
  		     sizeof(((struct kvm_pmu *)0)->__reprogram_pmi));

  	kvm_for_each_vcpu(i, vcpu, kvm)
-		atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi, -1ull);
+		atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi,
+			     pmu->global_ctrl_mask);

  	kvm_make_all_cpus_request(kvm, KVM_REQ_PMU);

diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index b68956299fa8..a946c1c57e1d 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -185,6 +185,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
  	pmu->nr_arch_fixed_counters = 0;
  	pmu->global_status = 0;
  	bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
+	pmu->global_ctrl_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1);
  }

  static void amd_pmu_init(struct kvm_vcpu *vcpu)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b3ce723efb43..462f041ede9f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -519,7 +519,16 @@  struct kvm_pmu {
 	struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
 	struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
 	struct irq_work irq_work;
-	DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+
+	/*
+	 * Overlay the bitmap with a 64-bit atomic so that all bits can be
+	 * set in a single access, e.g. to reprogram all counters when the PMU
+	 * filter changes.
+	 */
+	union {
+		DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+		atomic64_t __reprogram_pmi;
+	};
 	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
 	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
 
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index d9b9a0f0db17..4504987cbbe2 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -577,6 +577,8 @@  EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
 {
 	struct kvm_pmu_event_filter tmp, *filter;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
 	size_t size;
 	int r;
 
@@ -613,9 +615,18 @@  int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
 	mutex_lock(&kvm->lock);
 	filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
 				     mutex_is_locked(&kvm->lock));
-	mutex_unlock(&kvm->lock);
-
 	synchronize_srcu_expedited(&kvm->srcu);
+
+	BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) >
+		     sizeof(((struct kvm_pmu *)0)->__reprogram_pmi));
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi, -1ull);
+
+	kvm_make_all_cpus_request(kvm, KVM_REQ_PMU);
+
+	mutex_unlock(&kvm->lock);
+
 	r = 0;
 cleanup:
 	kfree(filter);