Message ID | 1527005119-6842-6-git-send-email-Dave.Martin@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Dave Martin <Dave.Martin@arm.com> writes: > To make the lazy FPSIMD context switch trap code easier to hack on, > this patch converts it to C. > > This is not amazingly efficient, but the trap should typically only > be taken once per host context switch. > > Signed-off-by: Dave Martin <Dave.Martin@arm.com> > Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> > --- > arch/arm64/kvm/hyp/entry.S | 57 +++++++++++++++++---------------------------- > arch/arm64/kvm/hyp/switch.c | 24 +++++++++++++++++++ > 2 files changed, 46 insertions(+), 35 deletions(-) > > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S > index e41a161..40f349b 100644 > --- a/arch/arm64/kvm/hyp/entry.S > +++ b/arch/arm64/kvm/hyp/entry.S > @@ -172,40 +172,27 @@ ENTRY(__fpsimd_guest_restore) > // x1: vcpu > // x2-x29,lr: vcpu regs > // vcpu x0-x1 on the stack > - stp x2, x3, [sp, #-16]! > - stp x4, lr, [sp, #-16]! > - > -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN > - mrs x2, cptr_el2 > - bic x2, x2, #CPTR_EL2_TFP > - msr cptr_el2, x2 > -alternative_else > - mrs x2, cpacr_el1 > - orr x2, x2, #CPACR_EL1_FPEN > - msr cpacr_el1, x2 > -alternative_endif > - isb > - > - mov x3, x1 > - > - ldr x0, [x3, #VCPU_HOST_CONTEXT] > - kern_hyp_va x0 > - add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS) > - bl __fpsimd_save_state > - > - add x2, x3, #VCPU_CONTEXT > - add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) > - bl __fpsimd_restore_state > - > - // Skip restoring fpexc32 for AArch64 guests > - mrs x1, hcr_el2 > - tbnz x1, #HCR_RW_SHIFT, 1f > - ldr x4, [x3, #VCPU_FPEXC32_EL2] > - msr fpexc32_el2, x4 > -1: > - ldp x4, lr, [sp], #16 > - ldp x2, x3, [sp], #16 > - ldp x0, x1, [sp], #16 > - > + stp x2, x3, [sp, #-144]! > + stp x4, x5, [sp, #16] > + stp x6, x7, [sp, #32] > + stp x8, x9, [sp, #48] > + stp x10, x11, [sp, #64] > + stp x12, x13, [sp, #80] > + stp x14, x15, [sp, #96] > + stp x16, x17, [sp, #112] > + stp x18, lr, [sp, #128] > + > + bl __hyp_switch_fpsimd > + > + ldp x4, x5, [sp, #16] > + ldp x6, x7, [sp, #32] > + ldp x8, x9, [sp, #48] > + ldp x10, x11, [sp, #64] > + ldp x12, x13, [sp, #80] > + ldp x14, x15, [sp, #96] > + ldp x16, x17, [sp, #112] > + ldp x18, lr, [sp, #128] > + ldp x0, x1, [sp, #144] > + ldp x2, x3, [sp], #160 > eret > ENDPROC(__fpsimd_guest_restore) > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c > index d964523..c0796c4 100644 > --- a/arch/arm64/kvm/hyp/switch.c > +++ b/arch/arm64/kvm/hyp/switch.c > @@ -318,6 +318,30 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) > } > } > > +void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused, > + struct kvm_vcpu *vcpu) > +{ > + kvm_cpu_context_t *host_ctxt; > + > + if (has_vhe()) > + write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN, > + cpacr_el1); > + else > + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, > + cptr_el2); Is there no way to do alternative() in C or does it always come down to different inline asms? Anyway: Reviewed-by: Alex Bennée <alex.bennee@linaro.org> > + > + isb(); > + > + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); > + __fpsimd_save_state(&host_ctxt->gp_regs.fp_regs); > + __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); > + > + /* Skip restoring fpexc32 for AArch64 guests */ > + if (!(read_sysreg(hcr_el2) & HCR_RW)) > + write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], > + fpexc32_el2); > +} > + > /* > * Return true when we were able to fixup the guest exit and should return to > * the guest, false when we should restore the host state and return to the -- Alex Bennée
On Wed, May 23, 2018 at 08:35:13PM +0100, Alex Bennée wrote: > > Dave Martin <Dave.Martin@arm.com> writes: > > > To make the lazy FPSIMD context switch trap code easier to hack on, > > this patch converts it to C. > > > > This is not amazingly efficient, but the trap should typically only > > be taken once per host context switch. > > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com> > > Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> > > --- > > arch/arm64/kvm/hyp/entry.S | 57 +++++++++++++++++---------------------------- > > arch/arm64/kvm/hyp/switch.c | 24 +++++++++++++++++++ > > 2 files changed, 46 insertions(+), 35 deletions(-) > > > > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S > > index e41a161..40f349b 100644 > > --- a/arch/arm64/kvm/hyp/entry.S > > +++ b/arch/arm64/kvm/hyp/entry.S > > @@ -172,40 +172,27 @@ ENTRY(__fpsimd_guest_restore) > > // x1: vcpu > > // x2-x29,lr: vcpu regs > > // vcpu x0-x1 on the stack > > - stp x2, x3, [sp, #-16]! > > - stp x4, lr, [sp, #-16]! > > - > > -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN > > - mrs x2, cptr_el2 > > - bic x2, x2, #CPTR_EL2_TFP > > - msr cptr_el2, x2 > > -alternative_else > > - mrs x2, cpacr_el1 > > - orr x2, x2, #CPACR_EL1_FPEN > > - msr cpacr_el1, x2 > > -alternative_endif > > - isb > > - > > - mov x3, x1 > > - > > - ldr x0, [x3, #VCPU_HOST_CONTEXT] > > - kern_hyp_va x0 > > - add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS) > > - bl __fpsimd_save_state > > - > > - add x2, x3, #VCPU_CONTEXT > > - add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) > > - bl __fpsimd_restore_state > > - > > - // Skip restoring fpexc32 for AArch64 guests > > - mrs x1, hcr_el2 > > - tbnz x1, #HCR_RW_SHIFT, 1f > > - ldr x4, [x3, #VCPU_FPEXC32_EL2] > > - msr fpexc32_el2, x4 > > -1: > > - ldp x4, lr, [sp], #16 > > - ldp x2, x3, [sp], #16 > > - ldp x0, x1, [sp], #16 > > - > > + stp x2, x3, [sp, #-144]! > > + stp x4, x5, [sp, #16] > > + stp x6, x7, [sp, #32] > > + stp x8, x9, [sp, #48] > > + stp x10, x11, [sp, #64] > > + stp x12, x13, [sp, #80] > > + stp x14, x15, [sp, #96] > > + stp x16, x17, [sp, #112] > > + stp x18, lr, [sp, #128] > > + > > + bl __hyp_switch_fpsimd > > + > > + ldp x4, x5, [sp, #16] > > + ldp x6, x7, [sp, #32] > > + ldp x8, x9, [sp, #48] > > + ldp x10, x11, [sp, #64] > > + ldp x12, x13, [sp, #80] > > + ldp x14, x15, [sp, #96] > > + ldp x16, x17, [sp, #112] > > + ldp x18, lr, [sp, #128] > > + ldp x0, x1, [sp, #144] > > + ldp x2, x3, [sp], #160 > > eret > > ENDPROC(__fpsimd_guest_restore) > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c > > index d964523..c0796c4 100644 > > --- a/arch/arm64/kvm/hyp/switch.c > > +++ b/arch/arm64/kvm/hyp/switch.c > > @@ -318,6 +318,30 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) > > } > > } > > > > +void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused, > > + struct kvm_vcpu *vcpu) > > +{ > > + kvm_cpu_context_t *host_ctxt; > > + > > + if (has_vhe()) > > + write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN, > > + cpacr_el1); > > + else > > + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, > > + cptr_el2); > > Is there no way to do alternative() in C or does it always come down to > different inline asms? > has_vhe() should resolve to a static key, and I prefer this over the previous alternative construct we had for selecting function calls in C, as that resultet in having to follow too many levels of indirection. Thanks, -Christoffer
On Thu, May 24, 2018 at 10:12:20AM +0200, Christoffer Dall wrote: > On Wed, May 23, 2018 at 08:35:13PM +0100, Alex Bennée wrote: > > > > Dave Martin <Dave.Martin@arm.com> writes: > > > > > To make the lazy FPSIMD context switch trap code easier to hack on, > > > this patch converts it to C. > > > > > > This is not amazingly efficient, but the trap should typically only > > > be taken once per host context switch. > > > > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com> > > > Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> > > > --- > > > arch/arm64/kvm/hyp/entry.S | 57 +++++++++++++++++---------------------------- > > > arch/arm64/kvm/hyp/switch.c | 24 +++++++++++++++++++ > > > 2 files changed, 46 insertions(+), 35 deletions(-) [...] > > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c > > > index d964523..c0796c4 100644 > > > --- a/arch/arm64/kvm/hyp/switch.c > > > +++ b/arch/arm64/kvm/hyp/switch.c > > > @@ -318,6 +318,30 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) > > > } > > > } > > > > > > +void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused, > > > + struct kvm_vcpu *vcpu) > > > +{ > > > + kvm_cpu_context_t *host_ctxt; > > > + > > > + if (has_vhe()) > > > + write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN, > > > + cpacr_el1); > > > + else > > > + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, > > > + cptr_el2); > > > > Is there no way to do alternative() in C or does it always come down to > > different inline asms? > > > > has_vhe() should resolve to a static key, and I prefer this over the > previous alternative construct we had for selecting function calls in C, > as that resultet in having to follow too many levels of indirection. I'll defer to Christoffer on that -- I was just following precedent :) The if (has_vhe()) approach has the benefit of being much more readable, and the static branch predictor in many CPUs will succeed in folding a short-range unconditional branch out entirely. There will be a small increase in I-cache pressure due to the larger inline code size, but probably not much beyond that. Cheers ---Dave
Dave Martin <Dave.Martin@arm.com> writes: > On Thu, May 24, 2018 at 10:12:20AM +0200, Christoffer Dall wrote: >> On Wed, May 23, 2018 at 08:35:13PM +0100, Alex Bennée wrote: >> > >> > Dave Martin <Dave.Martin@arm.com> writes: >> > >> > > To make the lazy FPSIMD context switch trap code easier to hack on, >> > > this patch converts it to C. >> > > >> > > This is not amazingly efficient, but the trap should typically only >> > > be taken once per host context switch. >> > > >> > > Signed-off-by: Dave Martin <Dave.Martin@arm.com> >> > > Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> >> > > --- >> > > arch/arm64/kvm/hyp/entry.S | 57 +++++++++++++++++---------------------------- >> > > arch/arm64/kvm/hyp/switch.c | 24 +++++++++++++++++++ >> > > 2 files changed, 46 insertions(+), 35 deletions(-) > > [...] > >> > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c >> > > index d964523..c0796c4 100644 >> > > --- a/arch/arm64/kvm/hyp/switch.c >> > > +++ b/arch/arm64/kvm/hyp/switch.c >> > > @@ -318,6 +318,30 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) >> > > } >> > > } >> > > >> > > +void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused, >> > > + struct kvm_vcpu *vcpu) >> > > +{ >> > > + kvm_cpu_context_t *host_ctxt; >> > > + >> > > + if (has_vhe()) >> > > + write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN, >> > > + cpacr_el1); >> > > + else >> > > + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, >> > > + cptr_el2); >> > >> > Is there no way to do alternative() in C or does it always come down to >> > different inline asms? >> > >> >> has_vhe() should resolve to a static key, and I prefer this over the >> previous alternative construct we had for selecting function calls in C, >> as that resultet in having to follow too many levels of indirection. > > I'll defer to Christoffer on that -- I was just following precedent :) > > The if (has_vhe()) approach has the benefit of being much more > readable, and the static branch predictor in many CPUs will succeed in > folding a short-range unconditional branch out entirely. There will be > a small increase in I-cache pressure due to the larger inline code > size, but probably not much beyond that. Fair enough - it was mostly a curiosity. It seems most of the use of alternative() are mostly at the low level instruction level anyway. -- Alex Bennée
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index e41a161..40f349b 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -172,40 +172,27 @@ ENTRY(__fpsimd_guest_restore) // x1: vcpu // x2-x29,lr: vcpu regs // vcpu x0-x1 on the stack - stp x2, x3, [sp, #-16]! - stp x4, lr, [sp, #-16]! - -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs x2, cptr_el2 - bic x2, x2, #CPTR_EL2_TFP - msr cptr_el2, x2 -alternative_else - mrs x2, cpacr_el1 - orr x2, x2, #CPACR_EL1_FPEN - msr cpacr_el1, x2 -alternative_endif - isb - - mov x3, x1 - - ldr x0, [x3, #VCPU_HOST_CONTEXT] - kern_hyp_va x0 - add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - bl __fpsimd_save_state - - add x2, x3, #VCPU_CONTEXT - add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - bl __fpsimd_restore_state - - // Skip restoring fpexc32 for AArch64 guests - mrs x1, hcr_el2 - tbnz x1, #HCR_RW_SHIFT, 1f - ldr x4, [x3, #VCPU_FPEXC32_EL2] - msr fpexc32_el2, x4 -1: - ldp x4, lr, [sp], #16 - ldp x2, x3, [sp], #16 - ldp x0, x1, [sp], #16 - + stp x2, x3, [sp, #-144]! + stp x4, x5, [sp, #16] + stp x6, x7, [sp, #32] + stp x8, x9, [sp, #48] + stp x10, x11, [sp, #64] + stp x12, x13, [sp, #80] + stp x14, x15, [sp, #96] + stp x16, x17, [sp, #112] + stp x18, lr, [sp, #128] + + bl __hyp_switch_fpsimd + + ldp x4, x5, [sp, #16] + ldp x6, x7, [sp, #32] + ldp x8, x9, [sp, #48] + ldp x10, x11, [sp, #64] + ldp x12, x13, [sp, #80] + ldp x14, x15, [sp, #96] + ldp x16, x17, [sp, #112] + ldp x18, lr, [sp, #128] + ldp x0, x1, [sp, #144] + ldp x2, x3, [sp], #160 eret ENDPROC(__fpsimd_guest_restore) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index d964523..c0796c4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -318,6 +318,30 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) } } +void __hyp_text __hyp_switch_fpsimd(u64 esr __always_unused, + struct kvm_vcpu *vcpu) +{ + kvm_cpu_context_t *host_ctxt; + + if (has_vhe()) + write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN, + cpacr_el1); + else + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, + cptr_el2); + + isb(); + + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + __fpsimd_save_state(&host_ctxt->gp_regs.fp_regs); + __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); + + /* Skip restoring fpexc32 for AArch64 guests */ + if (!(read_sysreg(hcr_el2) & HCR_RW)) + write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], + fpexc32_el2); +} + /* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the