diff mbox

[v10,05/18] KVM: arm64: Convert lazy FPSIMD context switch trap to C

Message ID 1527005119-6842-6-git-send-email-Dave.Martin@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Martin May 22, 2018, 4:05 p.m. UTC
To make the lazy FPSIMD context switch trap code easier to hack on,
this patch converts it to C.

This is not amazingly efficient, but the trap should typically only
be taken once per host context switch.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/hyp/entry.S  | 57 +++++++++++++++++----------------------------
 arch/arm64/kvm/hyp/switch.c | 24 +++++++++++++++++++
 2 files changed, 46 insertions(+), 35 deletions(-)

Comments

Alex Bennée May 23, 2018, 7:35 p.m. UTC | #1
Dave Martin <Dave.Martin@arm.com> writes:

> To make the lazy FPSIMD context switch trap code easier to hack on,
> this patch converts it to C.
>
> This is not amazingly efficient, but the trap should typically only
> be taken once per host context switch.
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm64/kvm/hyp/entry.S  | 57 +++++++++++++++++----------------------------
>  arch/arm64/kvm/hyp/switch.c | 24 +++++++++++++++++++
>  2 files changed, 46 insertions(+), 35 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> index e41a161..40f349b 100644
> --- a/arch/arm64/kvm/hyp/entry.S
> +++ b/arch/arm64/kvm/hyp/entry.S
> @@ -172,40 +172,27 @@ ENTRY(__fpsimd_guest_restore)
>  	// x1: vcpu
>  	// x2-x29,lr: vcpu regs
>  	// vcpu x0-x1 on the stack
> -	stp	x2, x3, [sp, #-16]!
> -	stp	x4, lr, [sp, #-16]!
> -
> -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> -	mrs	x2, cptr_el2
> -	bic	x2, x2, #CPTR_EL2_TFP
> -	msr	cptr_el2, x2
> -alternative_else
> -	mrs	x2, cpacr_el1
> -	orr	x2, x2, #CPACR_EL1_FPEN
> -	msr	cpacr_el1, x2
> -alternative_endif
> -	isb
> -
> -	mov	x3, x1
> -
> -	ldr	x0, [x3, #VCPU_HOST_CONTEXT]
> -	kern_hyp_va x0
> -	add	x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
> -	bl	__fpsimd_save_state
> -
> -	add	x2, x3, #VCPU_CONTEXT
> -	add	x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
> -	bl	__fpsimd_restore_state
> -
> -	// Skip restoring fpexc32 for AArch64 guests
> -	mrs	x1, hcr_el2
> -	tbnz	x1, #HCR_RW_SHIFT, 1f
> -	ldr	x4, [x3, #VCPU_FPEXC32_EL2]
> -	msr	fpexc32_el2, x4
> -1:
> -	ldp	x4, lr, [sp], #16
> -	ldp	x2, x3, [sp], #16
> -	ldp	x0, x1, [sp], #16
> -
> +	stp	x2, x3, [sp, #-144]!
> +	stp	x4, x5, [sp, #16]
> +	stp	x6, x7, [sp, #32]
> +	stp	x8, x9, [sp, #48]
> +	stp	x10, x11, [sp, #64]
> +	stp	x12, x13, [sp, #80]
> +	stp	x14, x15, [sp, #96]
> +	stp	x16, x17, [sp, #112]
> +	stp	x18, lr, [sp, #128]
> +
> +	bl	__hyp_switch_fpsimd
> +
> +	ldp	x4, x5, [sp, #16]
> +	ldp	x6, x7, [sp, #32]
> +	ldp	x8, x9, [sp, #48]
> +	ldp	x10, x11, [sp, #64]
> +	ldp	x12, x13, [sp, #80]
> +	ldp	x14, x15, [sp, #96]
> +	ldp	x16, x17, [sp, #112]
> +	ldp	x18, lr, [sp, #128]
> +	ldp	x0, x1, [sp, #144]
> +	ldp	x2, x3, [sp], #160
>  	eret
>  ENDPROC(__fpsimd_guest_restore)
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index d964523..c0796c4 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -318,6 +318,30 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
>  	}
>  }
>
> +void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
> +				    struct kvm_vcpu *vcpu)
> +{
> +	kvm_cpu_context_t *host_ctxt;
> +
> +	if (has_vhe())
> +		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
> +			     cpacr_el1);
> +	else
> +		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
> +			     cptr_el2);

Is there no way to do alternative() in C or does it always come down to
different inline asms?

Anyway:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>


> +
> +	isb();
> +
> +	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
> +	__fpsimd_save_state(&host_ctxt->gp_regs.fp_regs);
> +	__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
> +
> +	/* Skip restoring fpexc32 for AArch64 guests */
> +	if (!(read_sysreg(hcr_el2) & HCR_RW))
> +		write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
> +			     fpexc32_el2);
> +}
> +
>  /*
>   * Return true when we were able to fixup the guest exit and should return to
>   * the guest, false when we should restore the host state and return to the


--
Alex Bennée
Christoffer Dall May 24, 2018, 8:12 a.m. UTC | #2
On Wed, May 23, 2018 at 08:35:13PM +0100, Alex Bennée wrote:
> 
> Dave Martin <Dave.Martin@arm.com> writes:
> 
> > To make the lazy FPSIMD context switch trap code easier to hack on,
> > this patch converts it to C.
> >
> > This is not amazingly efficient, but the trap should typically only
> > be taken once per host context switch.
> >
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
> > ---
> >  arch/arm64/kvm/hyp/entry.S  | 57 +++++++++++++++++----------------------------
> >  arch/arm64/kvm/hyp/switch.c | 24 +++++++++++++++++++
> >  2 files changed, 46 insertions(+), 35 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> > index e41a161..40f349b 100644
> > --- a/arch/arm64/kvm/hyp/entry.S
> > +++ b/arch/arm64/kvm/hyp/entry.S
> > @@ -172,40 +172,27 @@ ENTRY(__fpsimd_guest_restore)
> >  	// x1: vcpu
> >  	// x2-x29,lr: vcpu regs
> >  	// vcpu x0-x1 on the stack
> > -	stp	x2, x3, [sp, #-16]!
> > -	stp	x4, lr, [sp, #-16]!
> > -
> > -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> > -	mrs	x2, cptr_el2
> > -	bic	x2, x2, #CPTR_EL2_TFP
> > -	msr	cptr_el2, x2
> > -alternative_else
> > -	mrs	x2, cpacr_el1
> > -	orr	x2, x2, #CPACR_EL1_FPEN
> > -	msr	cpacr_el1, x2
> > -alternative_endif
> > -	isb
> > -
> > -	mov	x3, x1
> > -
> > -	ldr	x0, [x3, #VCPU_HOST_CONTEXT]
> > -	kern_hyp_va x0
> > -	add	x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
> > -	bl	__fpsimd_save_state
> > -
> > -	add	x2, x3, #VCPU_CONTEXT
> > -	add	x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
> > -	bl	__fpsimd_restore_state
> > -
> > -	// Skip restoring fpexc32 for AArch64 guests
> > -	mrs	x1, hcr_el2
> > -	tbnz	x1, #HCR_RW_SHIFT, 1f
> > -	ldr	x4, [x3, #VCPU_FPEXC32_EL2]
> > -	msr	fpexc32_el2, x4
> > -1:
> > -	ldp	x4, lr, [sp], #16
> > -	ldp	x2, x3, [sp], #16
> > -	ldp	x0, x1, [sp], #16
> > -
> > +	stp	x2, x3, [sp, #-144]!
> > +	stp	x4, x5, [sp, #16]
> > +	stp	x6, x7, [sp, #32]
> > +	stp	x8, x9, [sp, #48]
> > +	stp	x10, x11, [sp, #64]
> > +	stp	x12, x13, [sp, #80]
> > +	stp	x14, x15, [sp, #96]
> > +	stp	x16, x17, [sp, #112]
> > +	stp	x18, lr, [sp, #128]
> > +
> > +	bl	__hyp_switch_fpsimd
> > +
> > +	ldp	x4, x5, [sp, #16]
> > +	ldp	x6, x7, [sp, #32]
> > +	ldp	x8, x9, [sp, #48]
> > +	ldp	x10, x11, [sp, #64]
> > +	ldp	x12, x13, [sp, #80]
> > +	ldp	x14, x15, [sp, #96]
> > +	ldp	x16, x17, [sp, #112]
> > +	ldp	x18, lr, [sp, #128]
> > +	ldp	x0, x1, [sp, #144]
> > +	ldp	x2, x3, [sp], #160
> >  	eret
> >  ENDPROC(__fpsimd_guest_restore)
> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index d964523..c0796c4 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c
> > @@ -318,6 +318,30 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
> >  	}
> >  }
> >
> > +void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
> > +				    struct kvm_vcpu *vcpu)
> > +{
> > +	kvm_cpu_context_t *host_ctxt;
> > +
> > +	if (has_vhe())
> > +		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
> > +			     cpacr_el1);
> > +	else
> > +		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
> > +			     cptr_el2);
> 
> Is there no way to do alternative() in C or does it always come down to
> different inline asms?
> 

has_vhe() should resolve to a static key, and I prefer this over the
previous alternative construct we had for selecting function calls in C,
as that resultet in having to follow too many levels of indirection.

Thanks,
-Christoffer
Dave Martin May 24, 2018, 8:54 a.m. UTC | #3
On Thu, May 24, 2018 at 10:12:20AM +0200, Christoffer Dall wrote:
> On Wed, May 23, 2018 at 08:35:13PM +0100, Alex Bennée wrote:
> > 
> > Dave Martin <Dave.Martin@arm.com> writes:
> > 
> > > To make the lazy FPSIMD context switch trap code easier to hack on,
> > > this patch converts it to C.
> > >
> > > This is not amazingly efficient, but the trap should typically only
> > > be taken once per host context switch.
> > >
> > > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > > Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
> > > ---
> > >  arch/arm64/kvm/hyp/entry.S  | 57 +++++++++++++++++----------------------------
> > >  arch/arm64/kvm/hyp/switch.c | 24 +++++++++++++++++++
> > >  2 files changed, 46 insertions(+), 35 deletions(-)

[...]

> > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > > index d964523..c0796c4 100644
> > > --- a/arch/arm64/kvm/hyp/switch.c
> > > +++ b/arch/arm64/kvm/hyp/switch.c
> > > @@ -318,6 +318,30 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
> > >  	}
> > >  }
> > >
> > > +void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
> > > +				    struct kvm_vcpu *vcpu)
> > > +{
> > > +	kvm_cpu_context_t *host_ctxt;
> > > +
> > > +	if (has_vhe())
> > > +		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
> > > +			     cpacr_el1);
> > > +	else
> > > +		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
> > > +			     cptr_el2);
> > 
> > Is there no way to do alternative() in C or does it always come down to
> > different inline asms?
> > 
> 
> has_vhe() should resolve to a static key, and I prefer this over the
> previous alternative construct we had for selecting function calls in C,
> as that resultet in having to follow too many levels of indirection.

I'll defer to Christoffer on that -- I was just following precedent :)

The if (has_vhe()) approach has the benefit of being much more
readable, and the static branch predictor in many CPUs will succeed in
folding a short-range unconditional branch out entirely.  There will be
a small increase in I-cache pressure due to the larger inline code
size, but probably not much beyond that.

Cheers
---Dave
Alex Bennée May 24, 2018, 9:14 a.m. UTC | #4
Dave Martin <Dave.Martin@arm.com> writes:

> On Thu, May 24, 2018 at 10:12:20AM +0200, Christoffer Dall wrote:
>> On Wed, May 23, 2018 at 08:35:13PM +0100, Alex Bennée wrote:
>> >
>> > Dave Martin <Dave.Martin@arm.com> writes:
>> >
>> > > To make the lazy FPSIMD context switch trap code easier to hack on,
>> > > this patch converts it to C.
>> > >
>> > > This is not amazingly efficient, but the trap should typically only
>> > > be taken once per host context switch.
>> > >
>> > > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
>> > > Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
>> > > ---
>> > >  arch/arm64/kvm/hyp/entry.S  | 57 +++++++++++++++++----------------------------
>> > >  arch/arm64/kvm/hyp/switch.c | 24 +++++++++++++++++++
>> > >  2 files changed, 46 insertions(+), 35 deletions(-)
>
> [...]
>
>> > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
>> > > index d964523..c0796c4 100644
>> > > --- a/arch/arm64/kvm/hyp/switch.c
>> > > +++ b/arch/arm64/kvm/hyp/switch.c
>> > > @@ -318,6 +318,30 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
>> > >  	}
>> > >  }
>> > >
>> > > +void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
>> > > +				    struct kvm_vcpu *vcpu)
>> > > +{
>> > > +	kvm_cpu_context_t *host_ctxt;
>> > > +
>> > > +	if (has_vhe())
>> > > +		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
>> > > +			     cpacr_el1);
>> > > +	else
>> > > +		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
>> > > +			     cptr_el2);
>> >
>> > Is there no way to do alternative() in C or does it always come down to
>> > different inline asms?
>> >
>>
>> has_vhe() should resolve to a static key, and I prefer this over the
>> previous alternative construct we had for selecting function calls in C,
>> as that resultet in having to follow too many levels of indirection.
>
> I'll defer to Christoffer on that -- I was just following precedent :)
>
> The if (has_vhe()) approach has the benefit of being much more
> readable, and the static branch predictor in many CPUs will succeed in
> folding a short-range unconditional branch out entirely.  There will be
> a small increase in I-cache pressure due to the larger inline code
> size, but probably not much beyond that.

Fair enough - it was mostly a curiosity. It seems most of the use of
alternative() are mostly at the low level instruction level anyway.

--
Alex Bennée
diff mbox

Patch

diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e41a161..40f349b 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -172,40 +172,27 @@  ENTRY(__fpsimd_guest_restore)
 	// x1: vcpu
 	// x2-x29,lr: vcpu regs
 	// vcpu x0-x1 on the stack
-	stp	x2, x3, [sp, #-16]!
-	stp	x4, lr, [sp, #-16]!
-
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	mrs	x2, cptr_el2
-	bic	x2, x2, #CPTR_EL2_TFP
-	msr	cptr_el2, x2
-alternative_else
-	mrs	x2, cpacr_el1
-	orr	x2, x2, #CPACR_EL1_FPEN
-	msr	cpacr_el1, x2
-alternative_endif
-	isb
-
-	mov	x3, x1
-
-	ldr	x0, [x3, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x0
-	add	x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
-	bl	__fpsimd_save_state
-
-	add	x2, x3, #VCPU_CONTEXT
-	add	x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
-	bl	__fpsimd_restore_state
-
-	// Skip restoring fpexc32 for AArch64 guests
-	mrs	x1, hcr_el2
-	tbnz	x1, #HCR_RW_SHIFT, 1f
-	ldr	x4, [x3, #VCPU_FPEXC32_EL2]
-	msr	fpexc32_el2, x4
-1:
-	ldp	x4, lr, [sp], #16
-	ldp	x2, x3, [sp], #16
-	ldp	x0, x1, [sp], #16
-
+	stp	x2, x3, [sp, #-144]!
+	stp	x4, x5, [sp, #16]
+	stp	x6, x7, [sp, #32]
+	stp	x8, x9, [sp, #48]
+	stp	x10, x11, [sp, #64]
+	stp	x12, x13, [sp, #80]
+	stp	x14, x15, [sp, #96]
+	stp	x16, x17, [sp, #112]
+	stp	x18, lr, [sp, #128]
+
+	bl	__hyp_switch_fpsimd
+
+	ldp	x4, x5, [sp, #16]
+	ldp	x6, x7, [sp, #32]
+	ldp	x8, x9, [sp, #48]
+	ldp	x10, x11, [sp, #64]
+	ldp	x12, x13, [sp, #80]
+	ldp	x14, x15, [sp, #96]
+	ldp	x16, x17, [sp, #112]
+	ldp	x18, lr, [sp, #128]
+	ldp	x0, x1, [sp, #144]
+	ldp	x2, x3, [sp], #160
 	eret
 ENDPROC(__fpsimd_guest_restore)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index d964523..c0796c4 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -318,6 +318,30 @@  static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 	}
 }
 
+void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused,
+				    struct kvm_vcpu *vcpu)
+{
+	kvm_cpu_context_t *host_ctxt;
+
+	if (has_vhe())
+		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
+			     cpacr_el1);
+	else
+		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+			     cptr_el2);
+
+	isb();
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	__fpsimd_save_state(&host_ctxt->gp_regs.fp_regs);
+	__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+
+	/* Skip restoring fpexc32 for AArch64 guests */
+	if (!(read_sysreg(hcr_el2) & HCR_RW))
+		write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
+			     fpexc32_el2);
+}
+
 /*
  * Return true when we were able to fixup the guest exit and should return to
  * the guest, false when we should restore the host state and return to the