diff mbox

kvm: rename HINTS_DEDICATED to KVM_HINTS_REALTIME

Message ID 1526568841-35372-1-git-send-email-mst@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michael S. Tsirkin May 17, 2018, 2:54 p.m. UTC
HINTS_DEDICATED seems to be somewhat confusing:

Guest doesn't really care whether it's the only task running on a host
CPU as long as it's not preempted.

And there are more reasons for Guest to be preempted than host CPU
sharing, for example, with memory overcommit it can get preempted on a
memory access, post copy migration can cause preemption, etc.

Let's call it KVM_HINTS_REALTIME which seems to better
match what guests expect.

Also, the flag most be set on all vCPUs - current guests assume this.
Note so in the documentation.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Documentation/virtual/kvm/cpuid.txt  | 6 +++---
 arch/x86/include/uapi/asm/kvm_para.h | 2 +-
 arch/x86/kernel/kvm.c                | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

Comments

Paolo Bonzini May 17, 2018, 4:05 p.m. UTC | #1
On 17/05/2018 16:54, Michael S. Tsirkin wrote:
> HINTS_DEDICATED seems to be somewhat confusing:
> 
> Guest doesn't really care whether it's the only task running on a host
> CPU as long as it's not preempted.
> 
> And there are more reasons for Guest to be preempted than host CPU
> sharing, for example, with memory overcommit it can get preempted on a
> memory access, post copy migration can cause preemption, etc.
> 
> Let's call it KVM_HINTS_REALTIME which seems to better
> match what guests expect.
> 
> Also, the flag most be set on all vCPUs - current guests assume this.
> Note so in the documentation.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  Documentation/virtual/kvm/cpuid.txt  | 6 +++---
>  arch/x86/include/uapi/asm/kvm_para.h | 2 +-
>  arch/x86/kernel/kvm.c                | 8 ++++----
>  3 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
> index d4f33eb8..ab022dc 100644
> --- a/Documentation/virtual/kvm/cpuid.txt
> +++ b/Documentation/virtual/kvm/cpuid.txt
> @@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
>  
>  flag                               || value || meaning
>  ==================================================================================
> -KVM_HINTS_DEDICATED                ||     0 || guest checks this feature bit to
> -                                   ||       || determine if there is vCPU pinning
> -                                   ||       || and there is no vCPU over-commitment,
> +KVM_HINTS_REALTIME                 ||     0 || guest checks this feature bit to
> +                                   ||       || determine that vCPUs are never
> +                                   ||       || preempted for an unlimited time,
>                                     ||       || allowing optimizations
>  ----------------------------------------------------------------------------------
> diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
> index 4c851eb..0ede697 100644
> --- a/arch/x86/include/uapi/asm/kvm_para.h
> +++ b/arch/x86/include/uapi/asm/kvm_para.h
> @@ -29,7 +29,7 @@
>  #define KVM_FEATURE_PV_TLB_FLUSH	9
>  #define KVM_FEATURE_ASYNC_PF_VMEXIT	10
>  
> -#define KVM_HINTS_DEDICATED      0
> +#define KVM_HINTS_REALTIME      0
>  
>  /* The last 8 bits are used to indicate how to interpret the flags field
>   * in pvclock structure. If no bits are set, all flags are ignored.
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 7867417..5b2300b 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void)
>  static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
>  {
>  	native_smp_prepare_cpus(max_cpus);
> -	if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
> +	if (kvm_para_has_hint(KVM_HINTS_REALTIME))
>  		static_branch_disable(&virt_spin_lock_key);
>  }
>  
> @@ -553,7 +553,7 @@ static void __init kvm_guest_init(void)
>  	}
>  
>  	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
> -	    !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
> +	    !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
>  	    kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
>  		pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
>  
> @@ -649,7 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void)
>  	int cpu;
>  
>  	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
> -	    !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
> +	    !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
>  	    kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
>  		for_each_possible_cpu(cpu) {
>  			zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
> @@ -745,7 +745,7 @@ void __init kvm_spinlock_init(void)
>  	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
>  		return;
>  
> -	if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
> +	if (kvm_para_has_hint(KVM_HINTS_REALTIME))
>  		return;
>  
>  	__pv_init_lock_hash();
> 

Queued, thanks.

Paolo
Eduardo Habkost May 17, 2018, 6:46 p.m. UTC | #2
On Thu, May 17, 2018 at 05:54:24PM +0300, Michael S. Tsirkin wrote:
> HINTS_DEDICATED seems to be somewhat confusing:
> 
> Guest doesn't really care whether it's the only task running on a host
> CPU as long as it's not preempted.
> 
> And there are more reasons for Guest to be preempted than host CPU
> sharing, for example, with memory overcommit it can get preempted on a
> memory access, post copy migration can cause preemption, etc.
> 
> Let's call it KVM_HINTS_REALTIME which seems to better
> match what guests expect.
> 
> Also, the flag most be set on all vCPUs - current guests assume this.
> Note so in the documentation.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  Documentation/virtual/kvm/cpuid.txt  | 6 +++---
>  arch/x86/include/uapi/asm/kvm_para.h | 2 +-
>  arch/x86/kernel/kvm.c                | 8 ++++----
>  3 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
> index d4f33eb8..ab022dc 100644
> --- a/Documentation/virtual/kvm/cpuid.txt
> +++ b/Documentation/virtual/kvm/cpuid.txt
> @@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
>  
>  flag                               || value || meaning
>  ==================================================================================
> -KVM_HINTS_DEDICATED                ||     0 || guest checks this feature bit to
> -                                   ||       || determine if there is vCPU pinning
> -                                   ||       || and there is no vCPU over-commitment,
> +KVM_HINTS_REALTIME                 ||     0 || guest checks this feature bit to
> +                                   ||       || determine that vCPUs are never
> +                                   ||       || preempted for an unlimited time,
>                                     ||       || allowing optimizations

My understanding of the original patch is that the intention is
to tell the guest that it is very unlikely to be preempted, so it
can choose a more appropriate spinlock implementation.  This
description implies that the guest will never be preempted, which
is much stronger guarantee.

Isn't this new description incompatible with existing usage of
the hint, which might include people who just use vCPU pinning
but no mlock?
Michael S. Tsirkin May 17, 2018, 7:55 p.m. UTC | #3
On Thu, May 17, 2018 at 03:46:58PM -0300, Eduardo Habkost wrote:
> On Thu, May 17, 2018 at 05:54:24PM +0300, Michael S. Tsirkin wrote:
> > HINTS_DEDICATED seems to be somewhat confusing:
> > 
> > Guest doesn't really care whether it's the only task running on a host
> > CPU as long as it's not preempted.
> > 
> > And there are more reasons for Guest to be preempted than host CPU
> > sharing, for example, with memory overcommit it can get preempted on a
> > memory access, post copy migration can cause preemption, etc.
> > 
> > Let's call it KVM_HINTS_REALTIME which seems to better
> > match what guests expect.
> > 
> > Also, the flag most be set on all vCPUs - current guests assume this.
> > Note so in the documentation.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  Documentation/virtual/kvm/cpuid.txt  | 6 +++---
> >  arch/x86/include/uapi/asm/kvm_para.h | 2 +-
> >  arch/x86/kernel/kvm.c                | 8 ++++----
> >  3 files changed, 8 insertions(+), 8 deletions(-)
> > 
> > diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
> > index d4f33eb8..ab022dc 100644
> > --- a/Documentation/virtual/kvm/cpuid.txt
> > +++ b/Documentation/virtual/kvm/cpuid.txt
> > @@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
> >  
> >  flag                               || value || meaning
> >  ==================================================================================
> > -KVM_HINTS_DEDICATED                ||     0 || guest checks this feature bit to
> > -                                   ||       || determine if there is vCPU pinning
> > -                                   ||       || and there is no vCPU over-commitment,
> > +KVM_HINTS_REALTIME                 ||     0 || guest checks this feature bit to
> > +                                   ||       || determine that vCPUs are never
> > +                                   ||       || preempted for an unlimited time,
> >                                     ||       || allowing optimizations
> 
> My understanding of the original patch is that the intention is
> to tell the guest that it is very unlikely to be preempted,
> so it
> can choose a more appropriate spinlock implementation.  This
> description implies that the guest will never be preempted, which
> is much stronger guarantee.

Note:

...  for an unlimited time.

> 
> Isn't this new description incompatible with existing usage of
> the hint, which might include people who just use vCPU pinning
> but no mlock?

Without mlock you should always use pv spinlocks.

Otherwise you risk blocking on a lock taken by
a VCPU that is in turn blocked on IO, where the IO
is not completing because CPU is being used up
spinning.

> -- 
> Eduardo
Paolo Bonzini May 18, 2018, 9:41 a.m. UTC | #4
On 17/05/2018 20:46, Eduardo Habkost wrote:
> My understanding of the original patch is that the intention is
> to tell the guest that it is very unlikely to be preempted, so it
> can choose a more appropriate spinlock implementation.  This
> description implies that the guest will never be preempted, which
> is much stronger guarantee.
> 
> Isn't this new description incompatible with existing usage of
> the hint, which might include people who just use vCPU pinning
> but no mlock?

If you use hugetlbfs and vhost-user you don't really need mlock for the
QEMU process, do you?  The QEMU process is not doing much in that case
and hugetlbfs gives you pinned memory automatically.

Paolo
Michael S. Tsirkin May 18, 2018, 12:47 p.m. UTC | #5
On Fri, May 18, 2018 at 11:41:23AM +0200, Paolo Bonzini wrote:
> On 17/05/2018 20:46, Eduardo Habkost wrote:
> > My understanding of the original patch is that the intention is
> > to tell the guest that it is very unlikely to be preempted, so it
> > can choose a more appropriate spinlock implementation.  This
> > description implies that the guest will never be preempted, which
> > is much stronger guarantee.
> > 
> > Isn't this new description incompatible with existing usage of
> > the hint, which might include people who just use vCPU pinning
> > but no mlock?
> 
> If you use hugetlbfs and vhost-user you don't really need mlock for the
> QEMU process, do you?  The QEMU process is not doing much in that case
> and hugetlbfs gives you pinned memory automatically.
> 
> Paolo

Same with PCI device passthrough: VFIO pins all guest memory right now.
Eduardo Habkost May 18, 2018, 4:04 p.m. UTC | #6
CCing qemu-devel, as I'm now discussing userspace.

On Thu, May 17, 2018 at 10:55:33PM +0300, Michael S. Tsirkin wrote:
> On Thu, May 17, 2018 at 03:46:58PM -0300, Eduardo Habkost wrote:
> > On Thu, May 17, 2018 at 05:54:24PM +0300, Michael S. Tsirkin wrote:
> > > HINTS_DEDICATED seems to be somewhat confusing:
> > > 
> > > Guest doesn't really care whether it's the only task running on a host
> > > CPU as long as it's not preempted.
> > > 
> > > And there are more reasons for Guest to be preempted than host CPU
> > > sharing, for example, with memory overcommit it can get preempted on a
> > > memory access, post copy migration can cause preemption, etc.
> > > 
> > > Let's call it KVM_HINTS_REALTIME which seems to better
> > > match what guests expect.
> > > 
> > > Also, the flag most be set on all vCPUs - current guests assume th.
> > > Note so in the documentation.
> > > 
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > >  Documentation/virtual/kvm/cpuid.txt  | 6 +++---
> > >  arch/x86/include/uapi/asm/kvm_para.h | 2 +-
> > >  arch/x86/kernel/kvm.c                | 8 ++++----
> > >  3 files changed, 8 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
> > > index d4f33eb8..ab022dc 100644
> > > --- a/Documentation/virtual/kvm/cpuid.txt
> > > +++ b/Documentation/virtual/kvm/cpuid.txt
> > > @@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
> > >  
> > >  flag                               || value || meaning
> > >  ==================================================================================
> > > -KVM_HINTS_DEDICATED                ||     0 || guest checks this feature bit to
> > > -                                   ||       || determine if there is vCPU pinning
> > > -                                   ||       || and there is no vCPU over-commitment,
> > > +KVM_HINTS_REALTIME                 ||     0 || guest checks this feature bit to
> > > +                                   ||       || determine that vCPUs are never
> > > +                                   ||       || preempted for an unlimited time,
> > >                                     ||       || allowing optimizations
> > 
> > My understanding of the original patch is that the intention is
> > to tell the guest that it is very unlikely to be preempted,
> > so it
> > can choose a more appropriate spinlock implementation.  This
> > description implies that the guest will never be preempted, which
> > is much stronger guarantee.
> 
> Note:
> 
> ...  for an unlimited time.

Which still sounds like a stronger guarantee than the original
description.  But:

> 
> > 
> > Isn't this new description incompatible with existing usage of
> > the hint, which might include people who just use vCPU pinning
> > but no mlock?
> 
> Without mlock you should always use pv spinlocks.
> 
> Otherwise you risk blocking on a lock taken by
> a VCPU that is in turn blocked on IO, where the IO
> is not completing because CPU is being used up
> spinning.

So the stronger guarantee seems necessary.

Now what should host userspace do if the user is trying to run an
existing configuration where the CPUID hint was set but memory is
not pinned?
Michael S. Tsirkin May 18, 2018, 5:01 p.m. UTC | #7
On Fri, May 18, 2018 at 01:04:31PM -0300, Eduardo Habkost wrote:
> CCing qemu-devel, as I'm now discussing userspace.
> 
> On Thu, May 17, 2018 at 10:55:33PM +0300, Michael S. Tsirkin wrote:
> > On Thu, May 17, 2018 at 03:46:58PM -0300, Eduardo Habkost wrote:
> > > On Thu, May 17, 2018 at 05:54:24PM +0300, Michael S. Tsirkin wrote:
> > > > HINTS_DEDICATED seems to be somewhat confusing:
> > > > 
> > > > Guest doesn't really care whether it's the only task running on a host
> > > > CPU as long as it's not preempted.
> > > > 
> > > > And there are more reasons for Guest to be preempted than host CPU
> > > > sharing, for example, with memory overcommit it can get preempted on a
> > > > memory access, post copy migration can cause preemption, etc.
> > > > 
> > > > Let's call it KVM_HINTS_REALTIME which seems to better
> > > > match what guests expect.
> > > > 
> > > > Also, the flag most be set on all vCPUs - current guests assume th.
> > > > Note so in the documentation.
> > > > 
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > >  Documentation/virtual/kvm/cpuid.txt  | 6 +++---
> > > >  arch/x86/include/uapi/asm/kvm_para.h | 2 +-
> > > >  arch/x86/kernel/kvm.c                | 8 ++++----
> > > >  3 files changed, 8 insertions(+), 8 deletions(-)
> > > > 
> > > > diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
> > > > index d4f33eb8..ab022dc 100644
> > > > --- a/Documentation/virtual/kvm/cpuid.txt
> > > > +++ b/Documentation/virtual/kvm/cpuid.txt
> > > > @@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
> > > >  
> > > >  flag                               || value || meaning
> > > >  ==================================================================================
> > > > -KVM_HINTS_DEDICATED                ||     0 || guest checks this feature bit to
> > > > -                                   ||       || determine if there is vCPU pinning
> > > > -                                   ||       || and there is no vCPU over-commitment,
> > > > +KVM_HINTS_REALTIME                 ||     0 || guest checks this feature bit to
> > > > +                                   ||       || determine that vCPUs are never
> > > > +                                   ||       || preempted for an unlimited time,
> > > >                                     ||       || allowing optimizations
> > > 
> > > My understanding of the original patch is that the intention is
> > > to tell the guest that it is very unlikely to be preempted,
> > > so it
> > > can choose a more appropriate spinlock implementation.  This
> > > description implies that the guest will never be preempted, which
> > > is much stronger guarantee.
> > 
> > Note:
> > 
> > ...  for an unlimited time.
> 
> Which still sounds like a stronger guarantee than the original
> description.  But:
> 
> > 
> > > 
> > > Isn't this new description incompatible with existing usage of
> > > the hint, which might include people who just use vCPU pinning
> > > but no mlock?
> > 
> > Without mlock you should always use pv spinlocks.
> > 
> > Otherwise you risk blocking on a lock taken by
> > a VCPU that is in turn blocked on IO, where the IO
> > is not completing because CPU is being used up
> > spinning.
> 
> So the stronger guarantee seems necessary.
> 
> Now what should host userspace do if the user is trying to run an
> existing configuration where the CPUID hint was set but memory is
> not pinned?
> 
> -- 
> Eduardo

As much as we'd like to be helpful and validate input, you need a real
time host too. I'm not sure how we'd find out - I suggest we do not
bother for now.
Eduardo Habkost May 18, 2018, 5:13 p.m. UTC | #8
On Fri, May 18, 2018 at 08:01:49PM +0300, Michael S. Tsirkin wrote:
> On Fri, May 18, 2018 at 01:04:31PM -0300, Eduardo Habkost wrote:
> > CCing qemu-devel, as I'm now discussing userspace.
> > 
> > On Thu, May 17, 2018 at 10:55:33PM +0300, Michael S. Tsirkin wrote:
> > > On Thu, May 17, 2018 at 03:46:58PM -0300, Eduardo Habkost wrote:
> > > > On Thu, May 17, 2018 at 05:54:24PM +0300, Michael S. Tsirkin wrote:
> > > > > HINTS_DEDICATED seems to be somewhat confusing:
> > > > > 
> > > > > Guest doesn't really care whether it's the only task running on a host
> > > > > CPU as long as it's not preempted.
> > > > > 
> > > > > And there are more reasons for Guest to be preempted than host CPU
> > > > > sharing, for example, with memory overcommit it can get preempted on a
> > > > > memory access, post copy migration can cause preemption, etc.
> > > > > 
> > > > > Let's call it KVM_HINTS_REALTIME which seems to better
> > > > > match what guests expect.
> > > > > 
> > > > > Also, the flag most be set on all vCPUs - current guests assume th.
> > > > > Note so in the documentation.
> > > > > 
> > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > > ---
> > > > >  Documentation/virtual/kvm/cpuid.txt  | 6 +++---
> > > > >  arch/x86/include/uapi/asm/kvm_para.h | 2 +-
> > > > >  arch/x86/kernel/kvm.c                | 8 ++++----
> > > > >  3 files changed, 8 insertions(+), 8 deletions(-)
> > > > > 
> > > > > diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
> > > > > index d4f33eb8..ab022dc 100644
> > > > > --- a/Documentation/virtual/kvm/cpuid.txt
> > > > > +++ b/Documentation/virtual/kvm/cpuid.txt
> > > > > @@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
> > > > >  
> > > > >  flag                               || value || meaning
> > > > >  ==================================================================================
> > > > > -KVM_HINTS_DEDICATED                ||     0 || guest checks this feature bit to
> > > > > -                                   ||       || determine if there is vCPU pinning
> > > > > -                                   ||       || and there is no vCPU over-commitment,
> > > > > +KVM_HINTS_REALTIME                 ||     0 || guest checks this feature bit to
> > > > > +                                   ||       || determine that vCPUs are never
> > > > > +                                   ||       || preempted for an unlimited time,
> > > > >                                     ||       || allowing optimizations
> > > > 
> > > > My understanding of the original patch is that the intention is
> > > > to tell the guest that it is very unlikely to be preempted,
> > > > so it
> > > > can choose a more appropriate spinlock implementation.  This
> > > > description implies that the guest will never be preempted, which
> > > > is much stronger guarantee.
> > > 
> > > Note:
> > > 
> > > ...  for an unlimited time.
> > 
> > Which still sounds like a stronger guarantee than the original
> > description.  But:
> > 
> > > 
> > > > 
> > > > Isn't this new description incompatible with existing usage of
> > > > the hint, which might include people who just use vCPU pinning
> > > > but no mlock?
> > > 
> > > Without mlock you should always use pv spinlocks.
> > > 
> > > Otherwise you risk blocking on a lock taken by
> > > a VCPU that is in turn blocked on IO, where the IO
> > > is not completing because CPU is being used up
> > > spinning.
> > 
> > So the stronger guarantee seems necessary.
> > 
> > Now what should host userspace do if the user is trying to run an
> > existing configuration where the CPUID hint was set but memory is
> > not pinned?
> 
> As much as we'd like to be helpful and validate input, you need a real
> time host too. I'm not sure how we'd find out - I suggest we do not
> bother for now.

I'm worried that people will start enabling the flag in all kinds
of scenarios where the guarantees can't be kept, and make the
meaning of the flag in practice completely different from its
documented meaning.

So I'd like to either detect cases where it's obviously wrong to
enable the flag, or document the requirements very clearly on
QEMU documentation.
Paolo Bonzini May 18, 2018, 5:17 p.m. UTC | #9
On 18/05/2018 18:04, Eduardo Habkost wrote:
>> Without mlock you should always use pv spinlocks.
>>
>> Otherwise you risk blocking on a lock taken by
>> a VCPU that is in turn blocked on IO, where the IO
>> is not completing because CPU is being used up
>> spinning.
>
> So the stronger guarantee seems necessary.
> 
> Now what should host userspace do if the user is trying to run an
> existing configuration where the CPUID hint was set but memory is
> not pinned?

As mentioned elsewhere in the thread, there are many ways to pin memory,
and mlock is not always necessary.  However, I agree with Michael in
making the hint provide a stronger guarantee.

Paolo
Paolo Bonzini May 18, 2018, 5:18 p.m. UTC | #10
On 18/05/2018 19:13, Eduardo Habkost wrote:
>> As much as we'd like to be helpful and validate input, you need a real
>> time host too. I'm not sure how we'd find out - I suggest we do not
>> bother for now.
> I'm worried that people will start enabling the flag in all kinds
> of scenarios where the guarantees can't be kept, and make the
> meaning of the flag in practice completely different from its
> documented meaning.

I don't think we should try to detect anything.  As far as QEMU is
concerned, it's mostly garbage in, garbage out when it comes to invalid
configurations.  It's just a bit, and using it in invalid configurations
is okay if you're doing it (for example) for debugging.

Paolo
Eduardo Habkost May 18, 2018, 5:54 p.m. UTC | #11
On Fri, May 18, 2018 at 07:18:57PM +0200, Paolo Bonzini wrote:
> On 18/05/2018 19:13, Eduardo Habkost wrote:
> >> As much as we'd like to be helpful and validate input, you need a real
> >> time host too. I'm not sure how we'd find out - I suggest we do not
> >> bother for now.
> > I'm worried that people will start enabling the flag in all kinds
> > of scenarios where the guarantees can't be kept, and make the
> > meaning of the flag in practice completely different from its
> > documented meaning.
> 
> I don't think we should try to detect anything.  As far as QEMU is
> concerned, it's mostly garbage in, garbage out when it comes to invalid
> configurations.  It's just a bit, and using it in invalid configurations
> is okay if you're doing it (for example) for debugging.

In this case, I'd like the requirements and recommendations to be
included in QEMU documentation.  Especially to point out the most
obvious and more likely mistakes (like not ensuring memory is
pinned at all, or letting the vCPU threads be interrupted).

So, is there a known list of steps required to configure a host
to enable kvm-hints-realtime safely, already?  I'd like the
documentation to be better than "you should fiddle with the CPU
affinity on your system and also ensure memory will be pinned;
good luck".
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index d4f33eb8..ab022dc 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -72,8 +72,8 @@  KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
 
 flag                               || value || meaning
 ==================================================================================
-KVM_HINTS_DEDICATED                ||     0 || guest checks this feature bit to
-                                   ||       || determine if there is vCPU pinning
-                                   ||       || and there is no vCPU over-commitment,
+KVM_HINTS_REALTIME                 ||     0 || guest checks this feature bit to
+                                   ||       || determine that vCPUs are never
+                                   ||       || preempted for an unlimited time,
                                    ||       || allowing optimizations
 ----------------------------------------------------------------------------------
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 4c851eb..0ede697 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -29,7 +29,7 @@ 
 #define KVM_FEATURE_PV_TLB_FLUSH	9
 #define KVM_FEATURE_ASYNC_PF_VMEXIT	10
 
-#define KVM_HINTS_DEDICATED      0
+#define KVM_HINTS_REALTIME      0
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 7867417..5b2300b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -457,7 +457,7 @@  static void __init sev_map_percpu_data(void)
 static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
 {
 	native_smp_prepare_cpus(max_cpus);
-	if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+	if (kvm_para_has_hint(KVM_HINTS_REALTIME))
 		static_branch_disable(&virt_spin_lock_key);
 }
 
@@ -553,7 +553,7 @@  static void __init kvm_guest_init(void)
 	}
 
 	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
-	    !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+	    !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
 	    kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
 		pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
 
@@ -649,7 +649,7 @@  static __init int kvm_setup_pv_tlb_flush(void)
 	int cpu;
 
 	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
-	    !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+	    !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
 	    kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
 		for_each_possible_cpu(cpu) {
 			zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
@@ -745,7 +745,7 @@  void __init kvm_spinlock_init(void)
 	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
 		return;
 
-	if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+	if (kvm_para_has_hint(KVM_HINTS_REALTIME))
 		return;
 
 	__pv_init_lock_hash();