diff mbox series

KVM: arm64: Make vcpu flag updates non-preemptible

Message ID 20230417093629.1440039-1-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Make vcpu flag updates non-preemptible | expand

Commit Message

Marc Zyngier April 17, 2023, 9:36 a.m. UTC
Per-vcpu flags are updated using a non-atomic RMW operation.
Which means it is possible to get preempted between the read and
write operations.

Another interesting thing to note is that preemption also updates
flags, as we have some flag manipulation in both the load and put
operations.

It is thus possible to lose information communicated by either
load or put, as the preempted flag update will overwrite the flags
when the thread is resumed. This is specially critical if either
load or put has stored information which depends on the physical
CPU the vcpu runs on.

This results in really elusive bugs, and kudos must be given to
Mostafa for the long hours of debugging, and finally spotting
the problem.

Fixes: e87abb73e594 ("KVM: arm64: Add helpers to manipulate vcpu flags among a set")
Reported-by: Mostafa Saleh <smostafa@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org
---
 arch/arm64/include/asm/kvm_host.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

Comments

Will Deacon April 17, 2023, 11:40 a.m. UTC | #1
On Mon, Apr 17, 2023 at 10:36:29AM +0100, Marc Zyngier wrote:
> Per-vcpu flags are updated using a non-atomic RMW operation.
> Which means it is possible to get preempted between the read and
> write operations.
> 
> Another interesting thing to note is that preemption also updates
> flags, as we have some flag manipulation in both the load and put
> operations.
> 
> It is thus possible to lose information communicated by either
> load or put, as the preempted flag update will overwrite the flags
> when the thread is resumed. This is specially critical if either
> load or put has stored information which depends on the physical
> CPU the vcpu runs on.
> 
> This results in really elusive bugs, and kudos must be given to
> Mostafa for the long hours of debugging, and finally spotting
> the problem.
> 
> Fixes: e87abb73e594 ("KVM: arm64: Add helpers to manipulate vcpu flags among a set")
> Reported-by: Mostafa Saleh <smostafa@google.com>
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> Cc: stable@vger.kernel.org
> ---
>  arch/arm64/include/asm/kvm_host.h | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index bcd774d74f34..d716cfd806e8 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -579,6 +579,19 @@ struct kvm_vcpu_arch {
>  		v->arch.flagset & (m);				\
>  	})
>  
> +/*
> + * Note that the set/clear accessors must be preempt-safe in order to
> + * avoid nesting them with load/put which also manipulate flags...
> + */
> +#ifdef __KVM_NVHE_HYPERVISOR__
> +/* the nVHE hypervisor is always non-preemptible */
> +#define __vcpu_flags_preempt_disable()
> +#define __vcpu_flags_preempt_enable()
> +#else
> +#define __vcpu_flags_preempt_disable()	preempt_disable()
> +#define __vcpu_flags_preempt_enable()	preempt_enable()
> +#endif

If it makes things cleaner, we could define local (empty) copies of these
preempt_*() macros at EL2 to save you having to wrap them here. Up to you.

>  #define __vcpu_set_flag(v, flagset, f, m)			\
>  	do {							\
>  		typeof(v->arch.flagset) *fset;			\
> @@ -586,9 +599,11 @@ struct kvm_vcpu_arch {
>  		__build_check_flag(v, flagset, f, m);		\
>  								\
>  		fset = &v->arch.flagset;			\
> +		__vcpu_flags_preempt_disable();			\
>  		if (HWEIGHT(m) > 1)				\
>  			*fset &= ~(m);				\
>  		*fset |= (f);					\
> +		__vcpu_flags_preempt_enable();			\
>  	} while (0)
>  
>  #define __vcpu_clear_flag(v, flagset, f, m)			\
> @@ -598,7 +613,9 @@ struct kvm_vcpu_arch {
>  		__build_check_flag(v, flagset, f, m);		\
>  								\
>  		fset = &v->arch.flagset;			\
> +		__vcpu_flags_preempt_disable();			\
>  		*fset &= ~(m);					\
> +		__vcpu_flags_preempt_enable();			\
>  	} while (0)
>  
>  #define vcpu_get_flag(v, ...)	__vcpu_get_flag((v), __VA_ARGS__)

Given that __vcpu_get_flag() is still preemptible, we should probably
add a READ_ONCE() in there when we access the relevant flags field. In
practice, they're all single-byte fields so it should be ok, but I think
the READ_ONCE() is still worthwhile.

Will
Marc Zyngier April 17, 2023, 12:35 p.m. UTC | #2
On Mon, 17 Apr 2023 12:40:26 +0100,
Will Deacon <will@kernel.org> wrote:
> 
> On Mon, Apr 17, 2023 at 10:36:29AM +0100, Marc Zyngier wrote:
> > Per-vcpu flags are updated using a non-atomic RMW operation.
> > Which means it is possible to get preempted between the read and
> > write operations.
> > 
> > Another interesting thing to note is that preemption also updates
> > flags, as we have some flag manipulation in both the load and put
> > operations.
> > 
> > It is thus possible to lose information communicated by either
> > load or put, as the preempted flag update will overwrite the flags
> > when the thread is resumed. This is specially critical if either
> > load or put has stored information which depends on the physical
> > CPU the vcpu runs on.
> > 
> > This results in really elusive bugs, and kudos must be given to
> > Mostafa for the long hours of debugging, and finally spotting
> > the problem.
> > 
> > Fixes: e87abb73e594 ("KVM: arm64: Add helpers to manipulate vcpu flags among a set")
> > Reported-by: Mostafa Saleh <smostafa@google.com>
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > Cc: stable@vger.kernel.org
> > ---
> >  arch/arm64/include/asm/kvm_host.h | 17 +++++++++++++++++
> >  1 file changed, 17 insertions(+)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index bcd774d74f34..d716cfd806e8 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -579,6 +579,19 @@ struct kvm_vcpu_arch {
> >  		v->arch.flagset & (m);				\
> >  	})
> >  
> > +/*
> > + * Note that the set/clear accessors must be preempt-safe in order to
> > + * avoid nesting them with load/put which also manipulate flags...
> > + */
> > +#ifdef __KVM_NVHE_HYPERVISOR__
> > +/* the nVHE hypervisor is always non-preemptible */
> > +#define __vcpu_flags_preempt_disable()
> > +#define __vcpu_flags_preempt_enable()
> > +#else
> > +#define __vcpu_flags_preempt_disable()	preempt_disable()
> > +#define __vcpu_flags_preempt_enable()	preempt_enable()
> > +#endif
> 
> If it makes things cleaner, we could define local (empty) copies of these
> preempt_*() macros at EL2 to save you having to wrap them here. Up to you.

Nah, that's fine. This is subtle enough stuff that I'm happy to see it
all exposed in the same location.

> >  #define __vcpu_set_flag(v, flagset, f, m)			\
> >  	do {							\
> >  		typeof(v->arch.flagset) *fset;			\
> > @@ -586,9 +599,11 @@ struct kvm_vcpu_arch {
> >  		__build_check_flag(v, flagset, f, m);		\
> >  								\
> >  		fset = &v->arch.flagset;			\
> > +		__vcpu_flags_preempt_disable();			\
> >  		if (HWEIGHT(m) > 1)				\
> >  			*fset &= ~(m);				\
> >  		*fset |= (f);					\
> > +		__vcpu_flags_preempt_enable();			\
> >  	} while (0)
> >  
> >  #define __vcpu_clear_flag(v, flagset, f, m)			\
> > @@ -598,7 +613,9 @@ struct kvm_vcpu_arch {
> >  		__build_check_flag(v, flagset, f, m);		\
> >  								\
> >  		fset = &v->arch.flagset;			\
> > +		__vcpu_flags_preempt_disable();			\
> >  		*fset &= ~(m);					\
> > +		__vcpu_flags_preempt_enable();			\
> >  	} while (0)
> >  
> >  #define vcpu_get_flag(v, ...)	__vcpu_get_flag((v), __VA_ARGS__)
> 
> Given that __vcpu_get_flag() is still preemptible, we should probably
> add a READ_ONCE() in there when we access the relevant flags field. In
> practice, they're all single-byte fields so it should be ok, but I think
> the READ_ONCE() is still worthwhile.

Yup, good point. People are already talking about expanding some of
the fields for $REASON, so they may become larger than a single byte.
And READ_ONCE() makes it clear that there is some level of atomicity
required here as well.

I'll respin this shortly.

Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index bcd774d74f34..d716cfd806e8 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -579,6 +579,19 @@  struct kvm_vcpu_arch {
 		v->arch.flagset & (m);				\
 	})
 
+/*
+ * Note that the set/clear accessors must be preempt-safe in order to
+ * avoid nesting them with load/put which also manipulate flags...
+ */
+#ifdef __KVM_NVHE_HYPERVISOR__
+/* the nVHE hypervisor is always non-preemptible */
+#define __vcpu_flags_preempt_disable()
+#define __vcpu_flags_preempt_enable()
+#else
+#define __vcpu_flags_preempt_disable()	preempt_disable()
+#define __vcpu_flags_preempt_enable()	preempt_enable()
+#endif
+
 #define __vcpu_set_flag(v, flagset, f, m)			\
 	do {							\
 		typeof(v->arch.flagset) *fset;			\
@@ -586,9 +599,11 @@  struct kvm_vcpu_arch {
 		__build_check_flag(v, flagset, f, m);		\
 								\
 		fset = &v->arch.flagset;			\
+		__vcpu_flags_preempt_disable();			\
 		if (HWEIGHT(m) > 1)				\
 			*fset &= ~(m);				\
 		*fset |= (f);					\
+		__vcpu_flags_preempt_enable();			\
 	} while (0)
 
 #define __vcpu_clear_flag(v, flagset, f, m)			\
@@ -598,7 +613,9 @@  struct kvm_vcpu_arch {
 		__build_check_flag(v, flagset, f, m);		\
 								\
 		fset = &v->arch.flagset;			\
+		__vcpu_flags_preempt_disable();			\
 		*fset &= ~(m);					\
+		__vcpu_flags_preempt_enable();			\
 	} while (0)
 
 #define vcpu_get_flag(v, ...)	__vcpu_get_flag((v), __VA_ARGS__)