Message ID | 1537524123-9578-24-git-send-email-paulus@ozlabs.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: PPC: Book3S HV: Nested HV virtualization | expand |
On Fri, Sep 21, 2018 at 08:01:54PM +1000, Paul Mackerras wrote: > From: Suraj Jitindar Singh <sjitindarsingh@gmail.com> > > A HEAI (hypervisor emulation assistance interrupt) occurs when a > hypervisor resource or instruction is used in a privileged but > non-hypervisor state and the LPCR_EVIRT bit is set in LPCR. When > this occurs bit 45 is set in HSRR1. Detect the occurrence of this, > and if userspace has enabled the nested virtualization capability > on the VM, then call the code to handle it accordingly. > > With LPCR[EVIRT] set, we also get HEAI (without bit 45 set) for > mfspr or mtspr to unimplemented SPR numbers. For these accesses, > we emulate the EVIRT=0 behaviour, which is to make the access > a no-op for privileged software unless it is accessing SPR 0, > 4, 5 or 6. Problem-state accesses and accesses to SPR 0, 4, 5 > or 6 generate an illegal-instruction type program interrupt. > > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com> > Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Question, though: with the new mostly-paravirt approach to nested virt, what HV instructions do you still need to emulate? > --- > arch/powerpc/include/asm/kvm_book3s.h | 2 + > arch/powerpc/kvm/book3s_hv.c | 87 ++++++++++++++++++++++------------- > arch/powerpc/kvm/book3s_hv_nested.c | 55 ++++++++++++++++++++++ > 3 files changed, 112 insertions(+), 32 deletions(-) > > diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h > index 125bc5b..a22a501 100644 > --- a/arch/powerpc/include/asm/kvm_book3s.h > +++ b/arch/powerpc/include/asm/kvm_book3s.h > @@ -287,6 +287,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); > void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, > struct hv_guest_state *hr); > long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu); > +int kvmhv_emulate_priv(struct kvm_run *run, struct kvm_vcpu *vcpu, > + unsigned int instr); > > void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); > > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 61de6ac..3b78d97 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -1024,30 +1024,6 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) > return kvmppc_hcall_impl_hv_realmode(cmd); > } > > -static int kvmppc_emulate_debug_inst(struct kvm_run *run, > - struct kvm_vcpu *vcpu) > -{ > - u32 last_inst; > - > - if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != > - EMULATE_DONE) { > - /* > - * Fetch failed, so return to guest and > - * try executing it again. > - */ > - return RESUME_GUEST; > - } > - > - if (last_inst == KVMPPC_INST_SW_BREAKPOINT) { > - run->exit_reason = KVM_EXIT_DEBUG; > - run->debug.arch.address = kvmppc_get_pc(vcpu); > - return RESUME_HOST; > - } else { > - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); > - return RESUME_GUEST; > - } > -} > - > static void do_nothing(void *x) > { > } > @@ -1141,6 +1117,23 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) > return RESUME_GUEST; > } > > +static int kvmhv_emulate_unknown_spr(struct kvm_vcpu *vcpu, u32 instr) > +{ > + u32 spr = get_sprn(instr); > + > + /* > + * In privileged state, access to unimplemented SPRs is a no-op > + * except for SPR 0, 4, 5 and 6. All other accesses get turned > + * into illegal-instruction program interrupts. > + */ > + if ((vcpu->arch.shregs.msr & MSR_PR) || > + spr == 0 || (4 <= spr && spr <= 6)) > + return EMULATE_FAIL; > + > + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); > + return RESUME_GUEST; > +} > + > static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, > struct task_struct *tsk) > { > @@ -1257,19 +1250,49 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, > * to the guest. If guest debug is enabled, we need to check > * whether the instruction is a software breakpoint instruction. > * Accordingly return to Guest or Host. > + * With LPCR[EVIRT] set, we also get these for accesses to > + * unknown SPRs and for guests executing hypervisor privileged > + * instructions. > */ > case BOOK3S_INTERRUPT_H_EMUL_ASSIST: > - if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED) > - vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ? > - swab32(vcpu->arch.emul_inst) : > - vcpu->arch.emul_inst; > - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { > - r = kvmppc_emulate_debug_inst(run, vcpu); > + { > + u32 instr = vcpu->arch.emul_inst; > + unsigned long srr1_bit = SRR1_PROGILL; > + > + vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ? > + swab32(instr) : instr; > + > + r = EMULATE_FAIL; > + if (vcpu->arch.shregs.msr & SRR1_PROGPRIV) { > + /* > + * Tried to execute hypervisor privileged instruction > + * or mtspr/mfspr on a hypervisor privileged SPR while > + * MSR(HV | PR) == 0b00 -> Privileged but !HV state > + */ > + srr1_bit = SRR1_PROGPRIV; > + if (vcpu->kvm->arch.nested_enable) > + r = kvmhv_emulate_priv(run, vcpu, instr); > } else { > - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); > + /* Illegal instruction or unknown SPR access */ > + if (instr == KVMPPC_INST_SW_BREAKPOINT && > + (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)) { > + run->exit_reason = KVM_EXIT_DEBUG; > + run->debug.arch.address = kvmppc_get_pc(vcpu); > + r = RESUME_HOST; > + break; > + } > + if ((instr & 0xfc0006fe) == PPC_INST_MFSPR) > + /* mfspr or mtspr to unknown SPR, may be noop */ > + r = kvmhv_emulate_unknown_spr(vcpu, instr); > + } > + if (r == EMULATE_FAIL) { > + pr_debug("KVM: Couldn't emulate instruction 0x%.8x\n", > + instr); > + kvmppc_core_queue_program(vcpu, srr1_bit); > r = RESUME_GUEST; > } > break; > + } > /* > * This occurs if the guest (kernel or userspace), does something that > * is prohibited by HFSCR. > @@ -4597,7 +4620,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) > */ > if (cpu_has_feature(CPU_FTR_ARCH_300)) { > lpcr &= ~LPCR_VPM0; > - lpcr |= LPCR_HVICE | LPCR_HEIC; > + lpcr |= LPCR_HVICE | LPCR_HEIC | LPCR_EVIRT; > > /* > * If xive is enabled, we route 0x500 interrupts directly > diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c > index a7f3da9..93ecf3b 100644 > --- a/arch/powerpc/kvm/book3s_hv_nested.c > +++ b/arch/powerpc/kvm/book3s_hv_nested.c > @@ -15,6 +15,7 @@ > #include <asm/mmu.h> > #include <asm/pgtable.h> > #include <asm/pgalloc.h> > +#include <asm/disassemble.h> > > static struct patb_entry *pseries_partition_tb; > > @@ -514,3 +515,57 @@ long kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) > { > return RESUME_HOST; > } > + > +static int kvmhv_emulate_priv_mtspr(struct kvm_run *run, struct kvm_vcpu *vcpu, > + unsigned int instr) > +{ > + return EMULATE_FAIL; > +} > + > +static int kvmhv_emulate_priv_mfspr(struct kvm_run *run, struct kvm_vcpu *vcpu, > + unsigned int instr) > +{ > + return EMULATE_FAIL; > +} > + > +static int kvmhv_emulate_priv_op_31(struct kvm_run *run, struct kvm_vcpu *vcpu, > + unsigned int instr) > +{ > + return EMULATE_FAIL; > +} > + > +static int kvmhv_emulate_priv_op(struct kvm_run *run, struct kvm_vcpu *vcpu, > + unsigned int instr) > +{ > + return EMULATE_FAIL; > +} > + > +int kvmhv_emulate_priv(struct kvm_run *run, struct kvm_vcpu *vcpu, > + unsigned int instr) > +{ > + int rc = EMULATE_FAIL; > + > + switch (get_op(instr)) { > + case 31: > + switch (get_xop(instr)) { > + case OP_31_XOP_MTSPR: > + rc = kvmhv_emulate_priv_mtspr(run, vcpu, instr); > + break; > + case OP_31_XOP_MFSPR: > + rc = kvmhv_emulate_priv_mfspr(run, vcpu, instr); > + break; > + default: > + rc = kvmhv_emulate_priv_op_31(run, vcpu, instr); > + break; > + } > + > + if (rc == EMULATE_DONE) > + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); > + break; > + default: > + rc = kvmhv_emulate_priv_op(run, vcpu, instr); > + break; > + } > + > + return rc; > +}
On Thu, Sep 27, 2018 at 11:08:26AM +1000, David Gibson wrote: > On Fri, Sep 21, 2018 at 08:01:54PM +1000, Paul Mackerras wrote: > > From: Suraj Jitindar Singh <sjitindarsingh@gmail.com> > > > > A HEAI (hypervisor emulation assistance interrupt) occurs when a > > hypervisor resource or instruction is used in a privileged but > > non-hypervisor state and the LPCR_EVIRT bit is set in LPCR. When > > this occurs bit 45 is set in HSRR1. Detect the occurrence of this, > > and if userspace has enabled the nested virtualization capability > > on the VM, then call the code to handle it accordingly. > > > > With LPCR[EVIRT] set, we also get HEAI (without bit 45 set) for > > mfspr or mtspr to unimplemented SPR numbers. For these accesses, > > we emulate the EVIRT=0 behaviour, which is to make the access > > a no-op for privileged software unless it is accessing SPR 0, > > 4, 5 or 6. Problem-state accesses and accesses to SPR 0, 4, 5 > > or 6 generate an illegal-instruction type program interrupt. > > > > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com> > > Signed-off-by: Paul Mackerras <paulus@ozlabs.org> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > Question, though: with the new mostly-paravirt approach to nested > virt, what HV instructions do you still need to emulate? Currently just tlbie. If we add a H_TLBIE then not even that. Paul.
On Thu, Sep 27, 2018 at 11:27:57AM +1000, Paul Mackerras wrote: > On Thu, Sep 27, 2018 at 11:08:26AM +1000, David Gibson wrote: > > On Fri, Sep 21, 2018 at 08:01:54PM +1000, Paul Mackerras wrote: > > > From: Suraj Jitindar Singh <sjitindarsingh@gmail.com> > > > > > > A HEAI (hypervisor emulation assistance interrupt) occurs when a > > > hypervisor resource or instruction is used in a privileged but > > > non-hypervisor state and the LPCR_EVIRT bit is set in LPCR. When > > > this occurs bit 45 is set in HSRR1. Detect the occurrence of this, > > > and if userspace has enabled the nested virtualization capability > > > on the VM, then call the code to handle it accordingly. > > > > > > With LPCR[EVIRT] set, we also get HEAI (without bit 45 set) for > > > mfspr or mtspr to unimplemented SPR numbers. For these accesses, > > > we emulate the EVIRT=0 behaviour, which is to make the access > > > a no-op for privileged software unless it is accessing SPR 0, > > > 4, 5 or 6. Problem-state accesses and accesses to SPR 0, 4, 5 > > > or 6 generate an illegal-instruction type program interrupt. > > > > > > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com> > > > Signed-off-by: Paul Mackerras <paulus@ozlabs.org> > > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > > > Question, though: with the new mostly-paravirt approach to nested > > virt, what HV instructions do you still need to emulate? > > Currently just tlbie. If we add a H_TLBIE then not even that. Ok. It does seem like paravirting the tlbie might be a good idea, if it lets us drop the whole extra instruction emulation infrastructure.
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 125bc5b..a22a501 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -287,6 +287,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu); +int kvmhv_emulate_priv(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int instr); void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 61de6ac..3b78d97 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1024,30 +1024,6 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) return kvmppc_hcall_impl_hv_realmode(cmd); } -static int kvmppc_emulate_debug_inst(struct kvm_run *run, - struct kvm_vcpu *vcpu) -{ - u32 last_inst; - - if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != - EMULATE_DONE) { - /* - * Fetch failed, so return to guest and - * try executing it again. - */ - return RESUME_GUEST; - } - - if (last_inst == KVMPPC_INST_SW_BREAKPOINT) { - run->exit_reason = KVM_EXIT_DEBUG; - run->debug.arch.address = kvmppc_get_pc(vcpu); - return RESUME_HOST; - } else { - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - return RESUME_GUEST; - } -} - static void do_nothing(void *x) { } @@ -1141,6 +1117,23 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +static int kvmhv_emulate_unknown_spr(struct kvm_vcpu *vcpu, u32 instr) +{ + u32 spr = get_sprn(instr); + + /* + * In privileged state, access to unimplemented SPRs is a no-op + * except for SPR 0, 4, 5 and 6. All other accesses get turned + * into illegal-instruction program interrupts. + */ + if ((vcpu->arch.shregs.msr & MSR_PR) || + spr == 0 || (4 <= spr && spr <= 6)) + return EMULATE_FAIL; + + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + return RESUME_GUEST; +} + static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -1257,19 +1250,49 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, * to the guest. If guest debug is enabled, we need to check * whether the instruction is a software breakpoint instruction. * Accordingly return to Guest or Host. + * With LPCR[EVIRT] set, we also get these for accesses to + * unknown SPRs and for guests executing hypervisor privileged + * instructions. */ case BOOK3S_INTERRUPT_H_EMUL_ASSIST: - if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED) - vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ? - swab32(vcpu->arch.emul_inst) : - vcpu->arch.emul_inst; - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { - r = kvmppc_emulate_debug_inst(run, vcpu); + { + u32 instr = vcpu->arch.emul_inst; + unsigned long srr1_bit = SRR1_PROGILL; + + vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ? + swab32(instr) : instr; + + r = EMULATE_FAIL; + if (vcpu->arch.shregs.msr & SRR1_PROGPRIV) { + /* + * Tried to execute hypervisor privileged instruction + * or mtspr/mfspr on a hypervisor privileged SPR while + * MSR(HV | PR) == 0b00 -> Privileged but !HV state + */ + srr1_bit = SRR1_PROGPRIV; + if (vcpu->kvm->arch.nested_enable) + r = kvmhv_emulate_priv(run, vcpu, instr); } else { - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + /* Illegal instruction or unknown SPR access */ + if (instr == KVMPPC_INST_SW_BREAKPOINT && + (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.address = kvmppc_get_pc(vcpu); + r = RESUME_HOST; + break; + } + if ((instr & 0xfc0006fe) == PPC_INST_MFSPR) + /* mfspr or mtspr to unknown SPR, may be noop */ + r = kvmhv_emulate_unknown_spr(vcpu, instr); + } + if (r == EMULATE_FAIL) { + pr_debug("KVM: Couldn't emulate instruction 0x%.8x\n", + instr); + kvmppc_core_queue_program(vcpu, srr1_bit); r = RESUME_GUEST; } break; + } /* * This occurs if the guest (kernel or userspace), does something that * is prohibited by HFSCR. @@ -4597,7 +4620,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) */ if (cpu_has_feature(CPU_FTR_ARCH_300)) { lpcr &= ~LPCR_VPM0; - lpcr |= LPCR_HVICE | LPCR_HEIC; + lpcr |= LPCR_HVICE | LPCR_HEIC | LPCR_EVIRT; /* * If xive is enabled, we route 0x500 interrupts directly diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index a7f3da9..93ecf3b 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -15,6 +15,7 @@ #include <asm/mmu.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/disassemble.h> static struct patb_entry *pseries_partition_tb; @@ -514,3 +515,57 @@ long kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) { return RESUME_HOST; } + +static int kvmhv_emulate_priv_mtspr(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int instr) +{ + return EMULATE_FAIL; +} + +static int kvmhv_emulate_priv_mfspr(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int instr) +{ + return EMULATE_FAIL; +} + +static int kvmhv_emulate_priv_op_31(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int instr) +{ + return EMULATE_FAIL; +} + +static int kvmhv_emulate_priv_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int instr) +{ + return EMULATE_FAIL; +} + +int kvmhv_emulate_priv(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int instr) +{ + int rc = EMULATE_FAIL; + + switch (get_op(instr)) { + case 31: + switch (get_xop(instr)) { + case OP_31_XOP_MTSPR: + rc = kvmhv_emulate_priv_mtspr(run, vcpu, instr); + break; + case OP_31_XOP_MFSPR: + rc = kvmhv_emulate_priv_mfspr(run, vcpu, instr); + break; + default: + rc = kvmhv_emulate_priv_op_31(run, vcpu, instr); + break; + } + + if (rc == EMULATE_DONE) + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + break; + default: + rc = kvmhv_emulate_priv_op(run, vcpu, instr); + break; + } + + return rc; +}