Message ID | 20170905132444.7163-4-jgross@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Sep 05, 2017 at 03:24:43PM +0200, Juergen Gross wrote: > diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h > index 48a706f641f2..fbd98896385c 100644 > --- a/arch/x86/include/asm/qspinlock.h > +++ b/arch/x86/include/asm/qspinlock.h > @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) > smp_store_release((u8 *)lock, 0); > } > Should this not have: #ifdef CONFIG_PARAVIRT ? > +static inline bool native_virt_spin_lock(struct qspinlock *lock) > +{ > + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) > + return false; > + > + /* > + * On hypervisors without PARAVIRT_SPINLOCKS support we fall > + * back to a Test-and-Set spinlock, because fair locks have > + * horrible lock 'holder' preemption issues. > + */ > + > + do { > + while (atomic_read(&lock->val) != 0) > + cpu_relax(); > + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); > + > + return true; > +} #endif > + > #ifdef CONFIG_PARAVIRT_SPINLOCKS > extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); > extern void __pv_init_lock_hash(void); > #ifdef CONFIG_PARAVIRT > #define virt_spin_lock virt_spin_lock > +#ifdef CONFIG_PARAVIRT_SPINLOCKS > static inline bool virt_spin_lock(struct qspinlock *lock) > { > + return pv_virt_spin_lock(lock); > +} > +#else > +static inline bool virt_spin_lock(struct qspinlock *lock) > +{ > + return native_virt_spin_lock(lock); > } > +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ > #endif /* CONFIG_PARAVIRT */ Because I think the above only ever uses native_virt_spin_lock() when PARAVIRT. > @@ -1381,6 +1382,7 @@ void __init native_smp_prepare_boot_cpu(void) > /* already set me in cpu_online_mask in boot_cpu_init() */ > cpumask_set_cpu(me, cpu_callout_mask); > cpu_set_state_online(me); > + native_pv_lock_init(); > } Aah, this is where that goes.. OK that works too.
On 05/09/17 15:55, Peter Zijlstra wrote: > On Tue, Sep 05, 2017 at 03:24:43PM +0200, Juergen Gross wrote: >> diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h >> index 48a706f641f2..fbd98896385c 100644 >> --- a/arch/x86/include/asm/qspinlock.h >> +++ b/arch/x86/include/asm/qspinlock.h >> @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) >> smp_store_release((u8 *)lock, 0); >> } >> > > > Should this not have: > > #ifdef CONFIG_PARAVIRT > > ? I can add it if you want. Juergen
On 09/05/2017 09:24 AM, Juergen Gross wrote: > There are cases where a guest tries to switch spinlocks to bare metal > behavior (e.g. by setting "xen_nopvspin" boot parameter). Today this > has the downside of falling back to unfair test and set scheme for > qspinlocks due to virt_spin_lock() detecting the virtualized > environment. > > Make virt_spin_lock() a paravirt operation in order to enable users > to select an explicit behavior like bare metal. > > Signed-off-by: Juergen Gross <jgross@suse.com> > --- > arch/x86/include/asm/paravirt.h | 5 ++++ > arch/x86/include/asm/paravirt_types.h | 1 + > arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- > arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++ > arch/x86/kernel/smpboot.c | 2 ++ > 5 files changed, 55 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h > index c25dd22f7c70..d9e954fb37df 100644 > --- a/arch/x86/include/asm/paravirt.h > +++ b/arch/x86/include/asm/paravirt.h > @@ -725,6 +725,11 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) > return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); > } > > +static __always_inline bool pv_virt_spin_lock(struct qspinlock *lock) > +{ > + return PVOP_CALLEE1(bool, pv_lock_ops.virt_spin_lock, lock); > +} > + > #endif /* SMP && PARAVIRT_SPINLOCKS */ > > #ifdef CONFIG_X86_32 > diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h > index 19efefc0e27e..928f5e7953a7 100644 > --- a/arch/x86/include/asm/paravirt_types.h > +++ b/arch/x86/include/asm/paravirt_types.h > @@ -319,6 +319,7 @@ struct pv_lock_ops { > void (*kick)(int cpu); > > struct paravirt_callee_save vcpu_is_preempted; > + struct paravirt_callee_save virt_spin_lock; > } __no_randomize_layout; > > /* This contains all the paravirt structures: we get a convenient > diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h > index 48a706f641f2..fbd98896385c 100644 > --- a/arch/x86/include/asm/qspinlock.h > +++ b/arch/x86/include/asm/qspinlock.h > @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) > smp_store_release((u8 *)lock, 0); > } > > +static inline bool native_virt_spin_lock(struct qspinlock *lock) > +{ > + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) > + return false; > + I think you can take the above if statement out as you has done test in native_pv_lock_init(). So the test will also be false here. As this patch series is x86 specific, you should probably add "x86/" in front of paravirt in the patch titles. Cheers, Longman
On Tue, Sep 05, 2017 at 10:02:57AM -0400, Waiman Long wrote: > On 09/05/2017 09:24 AM, Juergen Gross wrote: > > +static inline bool native_virt_spin_lock(struct qspinlock *lock) > > +{ > > + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) > > + return false; > > + > > I think you can take the above if statement out as you has done test in > native_pv_lock_init(). So the test will also be false here. That does mean we'll run a test-and-set spinlock until paravirt patching happens though. I prefer to not do that. One important point.. we must not be holding any locks when we switch over between the two locks. Back then I spend some time making sure that didn't happen with the X86 feature flag muck.
On 09/05/2017 09:24 AM, Juergen Gross wrote: > There are cases where a guest tries to switch spinlocks to bare metal > behavior (e.g. by setting "xen_nopvspin" boot parameter). Today this > has the downside of falling back to unfair test and set scheme for > qspinlocks due to virt_spin_lock() detecting the virtualized > environment. > > Make virt_spin_lock() a paravirt operation in order to enable users > to select an explicit behavior like bare metal. > > Signed-off-by: Juergen Gross <jgross@suse.com> > --- > arch/x86/include/asm/paravirt.h | 5 ++++ > arch/x86/include/asm/paravirt_types.h | 1 + > arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- > arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++ > arch/x86/kernel/smpboot.c | 2 ++ > 5 files changed, 55 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h > index c25dd22f7c70..d9e954fb37df 100644 > --- a/arch/x86/include/asm/paravirt.h > +++ b/arch/x86/include/asm/paravirt.h > @@ -725,6 +725,11 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) > return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); > } > > +static __always_inline bool pv_virt_spin_lock(struct qspinlock *lock) > +{ > + return PVOP_CALLEE1(bool, pv_lock_ops.virt_spin_lock, lock); > +} > + > #endif /* SMP && PARAVIRT_SPINLOCKS */ > > #ifdef CONFIG_X86_32 > diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h > index 19efefc0e27e..928f5e7953a7 100644 > --- a/arch/x86/include/asm/paravirt_types.h > +++ b/arch/x86/include/asm/paravirt_types.h > @@ -319,6 +319,7 @@ struct pv_lock_ops { > void (*kick)(int cpu); > > struct paravirt_callee_save vcpu_is_preempted; > + struct paravirt_callee_save virt_spin_lock; > } __no_randomize_layout; > > /* This contains all the paravirt structures: we get a convenient > diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h > index 48a706f641f2..fbd98896385c 100644 > --- a/arch/x86/include/asm/qspinlock.h > +++ b/arch/x86/include/asm/qspinlock.h > @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) > smp_store_release((u8 *)lock, 0); > } > > +static inline bool native_virt_spin_lock(struct qspinlock *lock) > +{ > + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) > + return false; > + > + /* > + * On hypervisors without PARAVIRT_SPINLOCKS support we fall > + * back to a Test-and-Set spinlock, because fair locks have > + * horrible lock 'holder' preemption issues. > + */ > + > + do { > + while (atomic_read(&lock->val) != 0) > + cpu_relax(); > + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); > + > + return true; > +} > + > #ifdef CONFIG_PARAVIRT_SPINLOCKS > extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); > extern void __pv_init_lock_hash(void); > @@ -38,33 +57,32 @@ static inline bool vcpu_is_preempted(long cpu) > { > return pv_vcpu_is_preempted(cpu); > } > + > +void native_pv_lock_init(void) __init; > #else > static inline void queued_spin_unlock(struct qspinlock *lock) > { > native_queued_spin_unlock(lock); > } > + > +static inline void native_pv_lock_init(void) > +{ > +} > #endif > > #ifdef CONFIG_PARAVIRT > #define virt_spin_lock virt_spin_lock > +#ifdef CONFIG_PARAVIRT_SPINLOCKS > static inline bool virt_spin_lock(struct qspinlock *lock) > { > - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) > - return false; Have you consider just add one more jump label here to skip virt_spin_lock when KVM or Xen want to do so? > - > - /* > - * On hypervisors without PARAVIRT_SPINLOCKS support we fall > - * back to a Test-and-Set spinlock, because fair locks have > - * horrible lock 'holder' preemption issues. > - */ > - > - do { > - while (atomic_read(&lock->val) != 0) > - cpu_relax(); > - } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); > - > - return true; > + return pv_virt_spin_lock(lock); > +} > +#else > +static inline bool virt_spin_lock(struct qspinlock *lock) > +{ > + return native_virt_spin_lock(lock); > } > +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ > #endif /* CONFIG_PARAVIRT */ > > #include <asm-generic/qspinlock.h> > diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c > index 26e4bd92f309..1be187ef8a38 100644 > --- a/arch/x86/kernel/paravirt-spinlocks.c > +++ b/arch/x86/kernel/paravirt-spinlocks.c > @@ -20,6 +20,12 @@ bool pv_is_native_spin_unlock(void) > __raw_callee_save___native_queued_spin_unlock; > } > > +__visible bool __native_virt_spin_lock(struct qspinlock *lock) > +{ > + return native_virt_spin_lock(lock); > +} > +PV_CALLEE_SAVE_REGS_THUNK(__native_virt_spin_lock); I have some concern about the overhead of register saving/restoring have on spin lock performance in case the kernel is under a non-KVM/Xen hypervisor. Cheers, Longman
On 05/09/17 16:10, Waiman Long wrote: > On 09/05/2017 09:24 AM, Juergen Gross wrote: >> There are cases where a guest tries to switch spinlocks to bare metal >> behavior (e.g. by setting "xen_nopvspin" boot parameter). Today this >> has the downside of falling back to unfair test and set scheme for >> qspinlocks due to virt_spin_lock() detecting the virtualized >> environment. >> >> Make virt_spin_lock() a paravirt operation in order to enable users >> to select an explicit behavior like bare metal. >> >> Signed-off-by: Juergen Gross <jgross@suse.com> >> --- >> arch/x86/include/asm/paravirt.h | 5 ++++ >> arch/x86/include/asm/paravirt_types.h | 1 + >> arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- >> arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++ >> arch/x86/kernel/smpboot.c | 2 ++ >> 5 files changed, 55 insertions(+), 15 deletions(-) >> >> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h >> index c25dd22f7c70..d9e954fb37df 100644 >> --- a/arch/x86/include/asm/paravirt.h >> +++ b/arch/x86/include/asm/paravirt.h >> @@ -725,6 +725,11 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) >> return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); >> } >> >> +static __always_inline bool pv_virt_spin_lock(struct qspinlock *lock) >> +{ >> + return PVOP_CALLEE1(bool, pv_lock_ops.virt_spin_lock, lock); >> +} >> + >> #endif /* SMP && PARAVIRT_SPINLOCKS */ >> >> #ifdef CONFIG_X86_32 >> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h >> index 19efefc0e27e..928f5e7953a7 100644 >> --- a/arch/x86/include/asm/paravirt_types.h >> +++ b/arch/x86/include/asm/paravirt_types.h >> @@ -319,6 +319,7 @@ struct pv_lock_ops { >> void (*kick)(int cpu); >> >> struct paravirt_callee_save vcpu_is_preempted; >> + struct paravirt_callee_save virt_spin_lock; >> } __no_randomize_layout; >> >> /* This contains all the paravirt structures: we get a convenient >> diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h >> index 48a706f641f2..fbd98896385c 100644 >> --- a/arch/x86/include/asm/qspinlock.h >> +++ b/arch/x86/include/asm/qspinlock.h >> @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) >> smp_store_release((u8 *)lock, 0); >> } >> >> +static inline bool native_virt_spin_lock(struct qspinlock *lock) >> +{ >> + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) >> + return false; >> + >> + /* >> + * On hypervisors without PARAVIRT_SPINLOCKS support we fall >> + * back to a Test-and-Set spinlock, because fair locks have >> + * horrible lock 'holder' preemption issues. >> + */ >> + >> + do { >> + while (atomic_read(&lock->val) != 0) >> + cpu_relax(); >> + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); >> + >> + return true; >> +} >> + >> #ifdef CONFIG_PARAVIRT_SPINLOCKS >> extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); >> extern void __pv_init_lock_hash(void); >> @@ -38,33 +57,32 @@ static inline bool vcpu_is_preempted(long cpu) >> { >> return pv_vcpu_is_preempted(cpu); >> } >> + >> +void native_pv_lock_init(void) __init; >> #else >> static inline void queued_spin_unlock(struct qspinlock *lock) >> { >> native_queued_spin_unlock(lock); >> } >> + >> +static inline void native_pv_lock_init(void) >> +{ >> +} >> #endif >> >> #ifdef CONFIG_PARAVIRT >> #define virt_spin_lock virt_spin_lock >> +#ifdef CONFIG_PARAVIRT_SPINLOCKS >> static inline bool virt_spin_lock(struct qspinlock *lock) >> { >> - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) >> - return false; > > Have you consider just add one more jump label here to skip > virt_spin_lock when KVM or Xen want to do so? Why? Did you look at patch 4? This is the way to do it... > >> - >> - /* >> - * On hypervisors without PARAVIRT_SPINLOCKS support we fall >> - * back to a Test-and-Set spinlock, because fair locks have >> - * horrible lock 'holder' preemption issues. >> - */ >> - >> - do { >> - while (atomic_read(&lock->val) != 0) >> - cpu_relax(); >> - } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); >> - >> - return true; >> + return pv_virt_spin_lock(lock); >> +} >> +#else >> +static inline bool virt_spin_lock(struct qspinlock *lock) >> +{ >> + return native_virt_spin_lock(lock); >> } >> +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ >> #endif /* CONFIG_PARAVIRT */ >> >> #include <asm-generic/qspinlock.h> >> diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c >> index 26e4bd92f309..1be187ef8a38 100644 >> --- a/arch/x86/kernel/paravirt-spinlocks.c >> +++ b/arch/x86/kernel/paravirt-spinlocks.c >> @@ -20,6 +20,12 @@ bool pv_is_native_spin_unlock(void) >> __raw_callee_save___native_queued_spin_unlock; >> } >> >> +__visible bool __native_virt_spin_lock(struct qspinlock *lock) >> +{ >> + return native_virt_spin_lock(lock); >> +} >> +PV_CALLEE_SAVE_REGS_THUNK(__native_virt_spin_lock); > > I have some concern about the overhead of register saving/restoring have > on spin lock performance in case the kernel is under a non-KVM/Xen > hypervisor. We are on the slow path already. Juergen
On 09/05/2017 10:08 AM, Peter Zijlstra wrote: > On Tue, Sep 05, 2017 at 10:02:57AM -0400, Waiman Long wrote: >> On 09/05/2017 09:24 AM, Juergen Gross wrote: >>> +static inline bool native_virt_spin_lock(struct qspinlock *lock) >>> +{ >>> + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) >>> + return false; >>> + >> I think you can take the above if statement out as you has done test in >> native_pv_lock_init(). So the test will also be false here. > That does mean we'll run a test-and-set spinlock until paravirt patching > happens though. I prefer to not do that. > > One important point.. we must not be holding any locks when we switch > over between the two locks. Back then I spend some time making sure that > didn't happen with the X86 feature flag muck. AFAICT, native_pv_lock_init() is called before SMP init. So it shouldn't matter. Cheers, Longman
On 09/05/2017 10:18 AM, Juergen Gross wrote: > On 05/09/17 16:10, Waiman Long wrote: >> On 09/05/2017 09:24 AM, Juergen Gross wrote: >>> There are cases where a guest tries to switch spinlocks to bare metal >>> behavior (e.g. by setting "xen_nopvspin" boot parameter). Today this >>> has the downside of falling back to unfair test and set scheme for >>> qspinlocks due to virt_spin_lock() detecting the virtualized >>> environment. >>> >>> Make virt_spin_lock() a paravirt operation in order to enable users >>> to select an explicit behavior like bare metal. >>> >>> Signed-off-by: Juergen Gross <jgross@suse.com> >>> --- >>> arch/x86/include/asm/paravirt.h | 5 ++++ >>> arch/x86/include/asm/paravirt_types.h | 1 + >>> arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- >>> arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++ >>> arch/x86/kernel/smpboot.c | 2 ++ >>> 5 files changed, 55 insertions(+), 15 deletions(-) >>> >>> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h >>> index c25dd22f7c70..d9e954fb37df 100644 >>> --- a/arch/x86/include/asm/paravirt.h >>> +++ b/arch/x86/include/asm/paravirt.h >>> @@ -725,6 +725,11 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) >>> return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); >>> } >>> >>> +static __always_inline bool pv_virt_spin_lock(struct qspinlock *lock) >>> +{ >>> + return PVOP_CALLEE1(bool, pv_lock_ops.virt_spin_lock, lock); >>> +} >>> + >>> #endif /* SMP && PARAVIRT_SPINLOCKS */ >>> >>> #ifdef CONFIG_X86_32 >>> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h >>> index 19efefc0e27e..928f5e7953a7 100644 >>> --- a/arch/x86/include/asm/paravirt_types.h >>> +++ b/arch/x86/include/asm/paravirt_types.h >>> @@ -319,6 +319,7 @@ struct pv_lock_ops { >>> void (*kick)(int cpu); >>> >>> struct paravirt_callee_save vcpu_is_preempted; >>> + struct paravirt_callee_save virt_spin_lock; >>> } __no_randomize_layout; >>> >>> /* This contains all the paravirt structures: we get a convenient >>> diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h >>> index 48a706f641f2..fbd98896385c 100644 >>> --- a/arch/x86/include/asm/qspinlock.h >>> +++ b/arch/x86/include/asm/qspinlock.h >>> @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) >>> smp_store_release((u8 *)lock, 0); >>> } >>> >>> +static inline bool native_virt_spin_lock(struct qspinlock *lock) >>> +{ >>> + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) >>> + return false; >>> + >>> + /* >>> + * On hypervisors without PARAVIRT_SPINLOCKS support we fall >>> + * back to a Test-and-Set spinlock, because fair locks have >>> + * horrible lock 'holder' preemption issues. >>> + */ >>> + >>> + do { >>> + while (atomic_read(&lock->val) != 0) >>> + cpu_relax(); >>> + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); >>> + >>> + return true; >>> +} >>> + >>> #ifdef CONFIG_PARAVIRT_SPINLOCKS >>> extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); >>> extern void __pv_init_lock_hash(void); >>> @@ -38,33 +57,32 @@ static inline bool vcpu_is_preempted(long cpu) >>> { >>> return pv_vcpu_is_preempted(cpu); >>> } >>> + >>> +void native_pv_lock_init(void) __init; >>> #else >>> static inline void queued_spin_unlock(struct qspinlock *lock) >>> { >>> native_queued_spin_unlock(lock); >>> } >>> + >>> +static inline void native_pv_lock_init(void) >>> +{ >>> +} >>> #endif >>> >>> #ifdef CONFIG_PARAVIRT >>> #define virt_spin_lock virt_spin_lock >>> +#ifdef CONFIG_PARAVIRT_SPINLOCKS >>> static inline bool virt_spin_lock(struct qspinlock *lock) >>> { >>> - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) >>> - return false; >> Have you consider just add one more jump label here to skip >> virt_spin_lock when KVM or Xen want to do so? > Why? Did you look at patch 4? This is the way to do it... I asked this because of my performance concern as stated later in the email. >>> - >>> - /* >>> - * On hypervisors without PARAVIRT_SPINLOCKS support we fall >>> - * back to a Test-and-Set spinlock, because fair locks have >>> - * horrible lock 'holder' preemption issues. >>> - */ >>> - >>> - do { >>> - while (atomic_read(&lock->val) != 0) >>> - cpu_relax(); >>> - } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); >>> - >>> - return true; >>> + return pv_virt_spin_lock(lock); >>> +} >>> +#else >>> +static inline bool virt_spin_lock(struct qspinlock *lock) >>> +{ >>> + return native_virt_spin_lock(lock); >>> } >>> +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ >>> #endif /* CONFIG_PARAVIRT */ >>> >>> #include <asm-generic/qspinlock.h> >>> diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c >>> index 26e4bd92f309..1be187ef8a38 100644 >>> --- a/arch/x86/kernel/paravirt-spinlocks.c >>> +++ b/arch/x86/kernel/paravirt-spinlocks.c >>> @@ -20,6 +20,12 @@ bool pv_is_native_spin_unlock(void) >>> __raw_callee_save___native_queued_spin_unlock; >>> } >>> >>> +__visible bool __native_virt_spin_lock(struct qspinlock *lock) >>> +{ >>> + return native_virt_spin_lock(lock); >>> +} >>> +PV_CALLEE_SAVE_REGS_THUNK(__native_virt_spin_lock); >> I have some concern about the overhead of register saving/restoring have >> on spin lock performance in case the kernel is under a non-KVM/Xen >> hypervisor. > We are on the slow path already. That is true, but I still still believe there will be performance impact on lock contention behavior where the slowpath will be used. Cheers, Longman
On 09/05/2017 10:24 AM, Waiman Long wrote: > On 09/05/2017 10:18 AM, Juergen Gross wrote: >> On 05/09/17 16:10, Waiman Long wrote: >>> On 09/05/2017 09:24 AM, Juergen Gross wrote: >>>> There are cases where a guest tries to switch spinlocks to bare metal >>>> behavior (e.g. by setting "xen_nopvspin" boot parameter). Today this >>>> has the downside of falling back to unfair test and set scheme for >>>> qspinlocks due to virt_spin_lock() detecting the virtualized >>>> environment. >>>> >>>> Make virt_spin_lock() a paravirt operation in order to enable users >>>> to select an explicit behavior like bare metal. >>>> >>>> Signed-off-by: Juergen Gross <jgross@suse.com> >>>> --- >>>> arch/x86/include/asm/paravirt.h | 5 ++++ >>>> arch/x86/include/asm/paravirt_types.h | 1 + >>>> arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- >>>> arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++ >>>> arch/x86/kernel/smpboot.c | 2 ++ >>>> 5 files changed, 55 insertions(+), 15 deletions(-) >>>> >>>> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h >>>> index c25dd22f7c70..d9e954fb37df 100644 >>>> --- a/arch/x86/include/asm/paravirt.h >>>> +++ b/arch/x86/include/asm/paravirt.h >>>> @@ -725,6 +725,11 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) >>>> return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); >>>> } >>>> >>>> +static __always_inline bool pv_virt_spin_lock(struct qspinlock *lock) >>>> +{ >>>> + return PVOP_CALLEE1(bool, pv_lock_ops.virt_spin_lock, lock); >>>> +} >>>> + >>>> #endif /* SMP && PARAVIRT_SPINLOCKS */ >>>> >>>> #ifdef CONFIG_X86_32 >>>> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h >>>> index 19efefc0e27e..928f5e7953a7 100644 >>>> --- a/arch/x86/include/asm/paravirt_types.h >>>> +++ b/arch/x86/include/asm/paravirt_types.h >>>> @@ -319,6 +319,7 @@ struct pv_lock_ops { >>>> void (*kick)(int cpu); >>>> >>>> struct paravirt_callee_save vcpu_is_preempted; >>>> + struct paravirt_callee_save virt_spin_lock; >>>> } __no_randomize_layout; >>>> >>>> /* This contains all the paravirt structures: we get a convenient >>>> diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h >>>> index 48a706f641f2..fbd98896385c 100644 >>>> --- a/arch/x86/include/asm/qspinlock.h >>>> +++ b/arch/x86/include/asm/qspinlock.h >>>> @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) >>>> smp_store_release((u8 *)lock, 0); >>>> } >>>> >>>> +static inline bool native_virt_spin_lock(struct qspinlock *lock) >>>> +{ >>>> + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) >>>> + return false; >>>> + >>>> + /* >>>> + * On hypervisors without PARAVIRT_SPINLOCKS support we fall >>>> + * back to a Test-and-Set spinlock, because fair locks have >>>> + * horrible lock 'holder' preemption issues. >>>> + */ >>>> + >>>> + do { >>>> + while (atomic_read(&lock->val) != 0) >>>> + cpu_relax(); >>>> + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); >>>> + >>>> + return true; >>>> +} >>>> + >>>> #ifdef CONFIG_PARAVIRT_SPINLOCKS >>>> extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); >>>> extern void __pv_init_lock_hash(void); >>>> @@ -38,33 +57,32 @@ static inline bool vcpu_is_preempted(long cpu) >>>> { >>>> return pv_vcpu_is_preempted(cpu); >>>> } >>>> + >>>> +void native_pv_lock_init(void) __init; >>>> #else >>>> static inline void queued_spin_unlock(struct qspinlock *lock) >>>> { >>>> native_queued_spin_unlock(lock); >>>> } >>>> + >>>> +static inline void native_pv_lock_init(void) >>>> +{ >>>> +} >>>> #endif >>>> >>>> #ifdef CONFIG_PARAVIRT >>>> #define virt_spin_lock virt_spin_lock >>>> +#ifdef CONFIG_PARAVIRT_SPINLOCKS >>>> static inline bool virt_spin_lock(struct qspinlock *lock) >>>> { >>>> - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) >>>> - return false; >>> Have you consider just add one more jump label here to skip >>> virt_spin_lock when KVM or Xen want to do so? >> Why? Did you look at patch 4? This is the way to do it... > I asked this because of my performance concern as stated later in the email. For clarification, I was actually asking if you consider just adding one more jump label to skip it for Xen/KVM instead of making virt_spin_lock() a pv-op. Cheers, Longman
On 05/09/17 16:31, Waiman Long wrote: > On 09/05/2017 10:24 AM, Waiman Long wrote: >> On 09/05/2017 10:18 AM, Juergen Gross wrote: >>> On 05/09/17 16:10, Waiman Long wrote: >>>> On 09/05/2017 09:24 AM, Juergen Gross wrote: >>>>> There are cases where a guest tries to switch spinlocks to bare metal >>>>> behavior (e.g. by setting "xen_nopvspin" boot parameter). Today this >>>>> has the downside of falling back to unfair test and set scheme for >>>>> qspinlocks due to virt_spin_lock() detecting the virtualized >>>>> environment. >>>>> >>>>> Make virt_spin_lock() a paravirt operation in order to enable users >>>>> to select an explicit behavior like bare metal. >>>>> >>>>> Signed-off-by: Juergen Gross <jgross@suse.com> >>>>> --- >>>>> arch/x86/include/asm/paravirt.h | 5 ++++ >>>>> arch/x86/include/asm/paravirt_types.h | 1 + >>>>> arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- >>>>> arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++ >>>>> arch/x86/kernel/smpboot.c | 2 ++ >>>>> 5 files changed, 55 insertions(+), 15 deletions(-) >>>>> >>>>> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h >>>>> index c25dd22f7c70..d9e954fb37df 100644 >>>>> --- a/arch/x86/include/asm/paravirt.h >>>>> +++ b/arch/x86/include/asm/paravirt.h >>>>> @@ -725,6 +725,11 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) >>>>> return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); >>>>> } >>>>> >>>>> +static __always_inline bool pv_virt_spin_lock(struct qspinlock *lock) >>>>> +{ >>>>> + return PVOP_CALLEE1(bool, pv_lock_ops.virt_spin_lock, lock); >>>>> +} >>>>> + >>>>> #endif /* SMP && PARAVIRT_SPINLOCKS */ >>>>> >>>>> #ifdef CONFIG_X86_32 >>>>> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h >>>>> index 19efefc0e27e..928f5e7953a7 100644 >>>>> --- a/arch/x86/include/asm/paravirt_types.h >>>>> +++ b/arch/x86/include/asm/paravirt_types.h >>>>> @@ -319,6 +319,7 @@ struct pv_lock_ops { >>>>> void (*kick)(int cpu); >>>>> >>>>> struct paravirt_callee_save vcpu_is_preempted; >>>>> + struct paravirt_callee_save virt_spin_lock; >>>>> } __no_randomize_layout; >>>>> >>>>> /* This contains all the paravirt structures: we get a convenient >>>>> diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h >>>>> index 48a706f641f2..fbd98896385c 100644 >>>>> --- a/arch/x86/include/asm/qspinlock.h >>>>> +++ b/arch/x86/include/asm/qspinlock.h >>>>> @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) >>>>> smp_store_release((u8 *)lock, 0); >>>>> } >>>>> >>>>> +static inline bool native_virt_spin_lock(struct qspinlock *lock) >>>>> +{ >>>>> + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) >>>>> + return false; >>>>> + >>>>> + /* >>>>> + * On hypervisors without PARAVIRT_SPINLOCKS support we fall >>>>> + * back to a Test-and-Set spinlock, because fair locks have >>>>> + * horrible lock 'holder' preemption issues. >>>>> + */ >>>>> + >>>>> + do { >>>>> + while (atomic_read(&lock->val) != 0) >>>>> + cpu_relax(); >>>>> + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); >>>>> + >>>>> + return true; >>>>> +} >>>>> + >>>>> #ifdef CONFIG_PARAVIRT_SPINLOCKS >>>>> extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); >>>>> extern void __pv_init_lock_hash(void); >>>>> @@ -38,33 +57,32 @@ static inline bool vcpu_is_preempted(long cpu) >>>>> { >>>>> return pv_vcpu_is_preempted(cpu); >>>>> } >>>>> + >>>>> +void native_pv_lock_init(void) __init; >>>>> #else >>>>> static inline void queued_spin_unlock(struct qspinlock *lock) >>>>> { >>>>> native_queued_spin_unlock(lock); >>>>> } >>>>> + >>>>> +static inline void native_pv_lock_init(void) >>>>> +{ >>>>> +} >>>>> #endif >>>>> >>>>> #ifdef CONFIG_PARAVIRT >>>>> #define virt_spin_lock virt_spin_lock >>>>> +#ifdef CONFIG_PARAVIRT_SPINLOCKS >>>>> static inline bool virt_spin_lock(struct qspinlock *lock) >>>>> { >>>>> - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) >>>>> - return false; >>>> Have you consider just add one more jump label here to skip >>>> virt_spin_lock when KVM or Xen want to do so? >>> Why? Did you look at patch 4? This is the way to do it... >> I asked this because of my performance concern as stated later in the email. > > For clarification, I was actually asking if you consider just adding one > more jump label to skip it for Xen/KVM instead of making > virt_spin_lock() a pv-op. Aah, okay. Bare metal could make use of this jump label, too. I'll have a try how it looks like. Juergen
Guys, please trim email. On Tue, Sep 05, 2017 at 10:31:46AM -0400, Waiman Long wrote: > For clarification, I was actually asking if you consider just adding one > more jump label to skip it for Xen/KVM instead of making > virt_spin_lock() a pv-op. I don't understand. What performance are you worried about. Native will now do: "xor rax,rax; jnz some_cold_label" that's fairly trival code.
On 09/06/2017 03:08 AM, Peter Zijlstra wrote: > Guys, please trim email. > > On Tue, Sep 05, 2017 at 10:31:46AM -0400, Waiman Long wrote: >> For clarification, I was actually asking if you consider just adding one >> more jump label to skip it for Xen/KVM instead of making >> virt_spin_lock() a pv-op. > I don't understand. What performance are you worried about. Native will > now do: "xor rax,rax; jnz some_cold_label" that's fairly trival code. It is not native that I am talking about. I am worry about VM with non-Xen/KVM hypervisor where virt_spin_lock() will actually be called. Now that function will become a callee-saved function call instead of being inlined into the native slowpath function. Cheers, Longman
On Wed, Sep 06, 2017 at 08:44:09AM -0400, Waiman Long wrote: > On 09/06/2017 03:08 AM, Peter Zijlstra wrote: > > Guys, please trim email. > > > > On Tue, Sep 05, 2017 at 10:31:46AM -0400, Waiman Long wrote: > >> For clarification, I was actually asking if you consider just adding one > >> more jump label to skip it for Xen/KVM instead of making > >> virt_spin_lock() a pv-op. > > I don't understand. What performance are you worried about. Native will > > now do: "xor rax,rax; jnz some_cold_label" that's fairly trival code. > > It is not native that I am talking about. I am worry about VM with > non-Xen/KVM hypervisor where virt_spin_lock() will actually be called. > Now that function will become a callee-saved function call instead of > being inlined into the native slowpath function. But only if we actually end up using the test-and-set thing, because if you have paravirt we end up using that. And the test-and-set thing sucks anyway. But yes, you're right, that case gets worse.
On 09/06/2017 09:06 AM, Peter Zijlstra wrote: > On Wed, Sep 06, 2017 at 08:44:09AM -0400, Waiman Long wrote: >> On 09/06/2017 03:08 AM, Peter Zijlstra wrote: >>> Guys, please trim email. >>> >>> On Tue, Sep 05, 2017 at 10:31:46AM -0400, Waiman Long wrote: >>>> For clarification, I was actually asking if you consider just adding one >>>> more jump label to skip it for Xen/KVM instead of making >>>> virt_spin_lock() a pv-op. >>> I don't understand. What performance are you worried about. Native will >>> now do: "xor rax,rax; jnz some_cold_label" that's fairly trival code. >> It is not native that I am talking about. I am worry about VM with >> non-Xen/KVM hypervisor where virt_spin_lock() will actually be called. >> Now that function will become a callee-saved function call instead of >> being inlined into the native slowpath function. > But only if we actually end up using the test-and-set thing, because if > you have paravirt we end up using that. I am talking about scenario like a kernel with paravirt spinlock running in a VM managed by VMware, for example. We may not want to penalize them while there are alternatives with less penalty. Cheers, Longman
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c25dd22f7c70..d9e954fb37df 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -725,6 +725,11 @@ static __always_inline bool pv_vcpu_is_preempted(long cpu) return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); } +static __always_inline bool pv_virt_spin_lock(struct qspinlock *lock) +{ + return PVOP_CALLEE1(bool, pv_lock_ops.virt_spin_lock, lock); +} + #endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 19efefc0e27e..928f5e7953a7 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -319,6 +319,7 @@ struct pv_lock_ops { void (*kick)(int cpu); struct paravirt_callee_save vcpu_is_preempted; + struct paravirt_callee_save virt_spin_lock; } __no_randomize_layout; /* This contains all the paravirt structures: we get a convenient diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 48a706f641f2..fbd98896385c 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -17,6 +17,25 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) smp_store_release((u8 *)lock, 0); } +static inline bool native_virt_spin_lock(struct qspinlock *lock) +{ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) + return false; + + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + + do { + while (atomic_read(&lock->val) != 0) + cpu_relax(); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); + + return true; +} + #ifdef CONFIG_PARAVIRT_SPINLOCKS extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); extern void __pv_init_lock_hash(void); @@ -38,33 +57,32 @@ static inline bool vcpu_is_preempted(long cpu) { return pv_vcpu_is_preempted(cpu); } + +void native_pv_lock_init(void) __init; #else static inline void queued_spin_unlock(struct qspinlock *lock) { native_queued_spin_unlock(lock); } + +static inline void native_pv_lock_init(void) +{ +} #endif #ifdef CONFIG_PARAVIRT #define virt_spin_lock virt_spin_lock +#ifdef CONFIG_PARAVIRT_SPINLOCKS static inline bool virt_spin_lock(struct qspinlock *lock) { - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) - return false; - - /* - * On hypervisors without PARAVIRT_SPINLOCKS support we fall - * back to a Test-and-Set spinlock, because fair locks have - * horrible lock 'holder' preemption issues. - */ - - do { - while (atomic_read(&lock->val) != 0) - cpu_relax(); - } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); - - return true; + return pv_virt_spin_lock(lock); +} +#else +static inline bool virt_spin_lock(struct qspinlock *lock) +{ + return native_virt_spin_lock(lock); } +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ #endif /* CONFIG_PARAVIRT */ #include <asm-generic/qspinlock.h> diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 26e4bd92f309..1be187ef8a38 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -20,6 +20,12 @@ bool pv_is_native_spin_unlock(void) __raw_callee_save___native_queued_spin_unlock; } +__visible bool __native_virt_spin_lock(struct qspinlock *lock) +{ + return native_virt_spin_lock(lock); +} +PV_CALLEE_SAVE_REGS_THUNK(__native_virt_spin_lock); + struct pv_lock_ops pv_lock_ops = { #ifdef CONFIG_SMP .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, @@ -27,6 +33,14 @@ struct pv_lock_ops pv_lock_ops = { .wait = paravirt_nop, .kick = paravirt_nop, .vcpu_is_preempted = __PV_IS_CALLEE_SAVE(_paravirt_false), + .virt_spin_lock = PV_CALLEE_SAVE(__native_virt_spin_lock), #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); + +void __init native_pv_lock_init(void) +{ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) + pv_lock_ops.virt_spin_lock = + __PV_IS_CALLEE_SAVE(_paravirt_false); +} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 54b9e89d4d6b..21500d3ba359 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -77,6 +77,7 @@ #include <asm/i8259.h> #include <asm/realmode.h> #include <asm/misc.h> +#include <asm/qspinlock.h> /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -1381,6 +1382,7 @@ void __init native_smp_prepare_boot_cpu(void) /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); cpu_set_state_online(me); + native_pv_lock_init(); } void __init native_smp_cpus_done(unsigned int max_cpus)
There are cases where a guest tries to switch spinlocks to bare metal behavior (e.g. by setting "xen_nopvspin" boot parameter). Today this has the downside of falling back to unfair test and set scheme for qspinlocks due to virt_spin_lock() detecting the virtualized environment. Make virt_spin_lock() a paravirt operation in order to enable users to select an explicit behavior like bare metal. Signed-off-by: Juergen Gross <jgross@suse.com> --- arch/x86/include/asm/paravirt.h | 5 ++++ arch/x86/include/asm/paravirt_types.h | 1 + arch/x86/include/asm/qspinlock.h | 48 ++++++++++++++++++++++++----------- arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++ arch/x86/kernel/smpboot.c | 2 ++ 5 files changed, 55 insertions(+), 15 deletions(-)