Message ID | 1365437854-30214-25-git-send-email-marc.zyngier@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Apr 08, 2013 at 05:17:26PM +0100, Marc Zyngier wrote: > Allow access to the 32bit register file through the usual API. > > Reviewed-by: Christopher Covington <cov@codeaurora.org> > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > --- > arch/arm64/include/asm/kvm_emulate.h | 17 +++- > arch/arm64/kvm/Makefile | 2 +- > arch/arm64/kvm/regmap.c | 168 +++++++++++++++++++++++++++++++++++ > 3 files changed, 184 insertions(+), 3 deletions(-) > create mode 100644 arch/arm64/kvm/regmap.c > > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h > index 2dcfa74..37a6567 100644 > --- a/arch/arm64/include/asm/kvm_emulate.h > +++ b/arch/arm64/include/asm/kvm_emulate.h > @@ -28,6 +28,9 @@ > #include <asm/kvm_mmio.h> > #include <asm/ptrace.h> > > +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); > +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); > + > void kvm_inject_undefined(struct kvm_vcpu *vcpu); > void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); > void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); > @@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) > > static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) > { > - return false; /* 32bit? Bahhh... */ > + return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT); nit: you don't need the '!!': it's a bool > } > > static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) > @@ -64,28 +67,38 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) > > static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) > { > + *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; > } > > static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) > { > + if (vcpu_mode_is_32bit(vcpu)) > + return vcpu_reg32(vcpu, reg_num); > + > return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; > } > > /* Get vcpu SPSR for current mode */ > static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) > { > + if (vcpu_mode_is_32bit(vcpu)) > + return vcpu_spsr32(vcpu); > + > return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; > } > > static inline bool kvm_vcpu_reg_is_pc(const struct kvm_vcpu *vcpu, int reg) > { > - return false; > + return (vcpu_mode_is_32bit(vcpu)) && reg == 15; > } > > static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) > { > u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; > > + if (vcpu_mode_is_32bit(vcpu)) > + return mode > COMPAT_PSR_MODE_USR; > + > return mode != PSR_MODE_EL0t; > } > > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile > index be9eb3833..1668448 100644 > --- a/arch/arm64/kvm/Makefile > +++ b/arch/arm64/kvm/Makefile > @@ -11,7 +11,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += kvm.o > kvm-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) > kvm-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../arch/arm/kvm/, arm.o mmu.o mmio.o psci.o perf.o) > > -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o > +kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o > kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o > kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o > > diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c > new file mode 100644 > index 0000000..bbc6ae3 > --- /dev/null > +++ b/arch/arm64/kvm/regmap.c > @@ -0,0 +1,168 @@ > +/* > + * Copyright (C) 2012,2013 - ARM Ltd > + * Author: Marc Zyngier <marc.zyngier@arm.com> > + * > + * Derived from arch/arm/kvm/emulate.c: > + * Copyright (C) 2012 - Virtual Open Systems and Columbia University > + * Author: Christoffer Dall <c.dall@virtualopensystems.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include <linux/mm.h> > +#include <linux/kvm_host.h> > +#include <asm/kvm_emulate.h> > +#include <asm/ptrace.h> > + > +#define VCPU_NR_MODES 6 > +#define REG_OFFSET(_reg) \ > + (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long)) > + > +#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R)) > + > +static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { > + /* USR Registers */ > + { > + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), > + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), > + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), > + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), > + USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), > + REG_OFFSET(pc) > + }, > + > + /* FIQ Registers */ > + { > + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), > + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), > + USR_REG_OFFSET(6), USR_REG_OFFSET(7), > + REG_OFFSET(compat_r8_fiq), /* r8 */ > + REG_OFFSET(compat_r9_fiq), /* r9 */ > + REG_OFFSET(compat_r10_fiq), /* r10 */ > + REG_OFFSET(compat_r11_fiq), /* r11 */ > + REG_OFFSET(compat_r12_fiq), /* r12 */ > + REG_OFFSET(compat_sp_fiq), /* r13 */ > + REG_OFFSET(compat_lr_fiq), /* r14 */ > + REG_OFFSET(pc) > + }, > + > + /* IRQ Registers */ > + { > + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), > + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), > + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), > + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), > + USR_REG_OFFSET(12), > + REG_OFFSET(compat_sp_irq), /* r13 */ > + REG_OFFSET(compat_lr_irq), /* r14 */ > + REG_OFFSET(pc) > + }, > + > + /* SVC Registers */ > + { > + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), > + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), > + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), > + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), > + USR_REG_OFFSET(12), > + REG_OFFSET(compat_sp_svc), /* r13 */ > + REG_OFFSET(compat_lr_svc), /* r14 */ > + REG_OFFSET(pc) > + }, > + > + /* ABT Registers */ > + { > + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), > + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), > + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), > + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), > + USR_REG_OFFSET(12), > + REG_OFFSET(compat_sp_abt), /* r13 */ > + REG_OFFSET(compat_lr_abt), /* r14 */ > + REG_OFFSET(pc) > + }, > + > + /* UND Registers */ > + { > + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), > + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), > + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), > + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), > + USR_REG_OFFSET(12), > + REG_OFFSET(compat_sp_und), /* r13 */ > + REG_OFFSET(compat_lr_und), /* r14 */ > + REG_OFFSET(pc) > + }, > +}; > + > +/* > + * Return a pointer to the register number valid in the current mode of > + * the virtual CPU. > + */ > +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) > +{ > + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; > + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; > + > + switch (mode) { > + case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC: > + mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ > + break; > + > + case COMPAT_PSR_MODE_ABT: > + mode = 4; > + break; > + > + case COMPAT_PSR_MODE_UND: > + mode = 5; > + break; > + > + case COMPAT_PSR_MODE_SYS: > + mode = 0; /* SYS maps to USR */ > + break; > + > + default: > + BUG(); > + } > + > + return reg_array + vcpu_reg_offsets[mode][reg_num]; > +} > + > +/* > + * Return the SPSR for the current mode of the virtual CPU. > + */ > +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu) > +{ > + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; > + switch (mode) { > + case COMPAT_PSR_MODE_SVC: > + mode = KVM_SPSR_SVC; > + break; > + case COMPAT_PSR_MODE_ABT: > + mode = KVM_SPSR_ABT; > + break; > + case COMPAT_PSR_MODE_UND: > + mode = KVM_SPSR_UND; > + break; > + case COMPAT_PSR_MODE_IRQ: > + mode = KVM_SPSR_IRQ; > + break; > + case COMPAT_PSR_MODE_FIQ: > + mode = KVM_SPSR_FIQ; > + break; > + default: > + BUG(); > + } > + > + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode]; > +} > -- > 1.8.1.4 > > > > _______________________________________________ > kvmarm mailing list > kvmarm@lists.cs.columbia.edu > https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 24/04/13 00:00, Christoffer Dall wrote: > On Mon, Apr 08, 2013 at 05:17:26PM +0100, Marc Zyngier wrote: >> Allow access to the 32bit register file through the usual API. >> >> Reviewed-by: Christopher Covington <cov@codeaurora.org> >> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> >> --- >> arch/arm64/include/asm/kvm_emulate.h | 17 +++- >> arch/arm64/kvm/Makefile | 2 +- >> arch/arm64/kvm/regmap.c | 168 +++++++++++++++++++++++++++++++++++ >> 3 files changed, 184 insertions(+), 3 deletions(-) >> create mode 100644 arch/arm64/kvm/regmap.c >> >> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h >> index 2dcfa74..37a6567 100644 >> --- a/arch/arm64/include/asm/kvm_emulate.h >> +++ b/arch/arm64/include/asm/kvm_emulate.h >> @@ -28,6 +28,9 @@ >> #include <asm/kvm_mmio.h> >> #include <asm/ptrace.h> >> >> +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); >> +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); >> + >> void kvm_inject_undefined(struct kvm_vcpu *vcpu); >> void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); >> void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); >> @@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) >> >> static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) >> { >> - return false; /* 32bit? Bahhh... */ >> + return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT); > > nit: you don't need the '!!': it's a bool No it is not. It is a bitwise and, turned into a bool. Thanks, M.
On Wed, Apr 24, 2013 at 6:06 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: > On 24/04/13 00:00, Christoffer Dall wrote: >> On Mon, Apr 08, 2013 at 05:17:26PM +0100, Marc Zyngier wrote: >>> Allow access to the 32bit register file through the usual API. >>> >>> Reviewed-by: Christopher Covington <cov@codeaurora.org> >>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> >>> --- >>> arch/arm64/include/asm/kvm_emulate.h | 17 +++- >>> arch/arm64/kvm/Makefile | 2 +- >>> arch/arm64/kvm/regmap.c | 168 +++++++++++++++++++++++++++++++++++ >>> 3 files changed, 184 insertions(+), 3 deletions(-) >>> create mode 100644 arch/arm64/kvm/regmap.c >>> >>> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h >>> index 2dcfa74..37a6567 100644 >>> --- a/arch/arm64/include/asm/kvm_emulate.h >>> +++ b/arch/arm64/include/asm/kvm_emulate.h >>> @@ -28,6 +28,9 @@ >>> #include <asm/kvm_mmio.h> >>> #include <asm/ptrace.h> >>> >>> +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); >>> +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); >>> + >>> void kvm_inject_undefined(struct kvm_vcpu *vcpu); >>> void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); >>> void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); >>> @@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) >>> >>> static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) >>> { >>> - return false; /* 32bit? Bahhh... */ >>> + return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT); >> >> nit: you don't need the '!!': it's a bool > > No it is not. It is a bitwise and, turned into a bool. > yeah and the result of the bitwise and will be cast to a bool as per the function's return value without the use of '!!' or did I read the C spec wrong? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Apr 08, 2013 at 05:17:26PM +0100, Marc Zyngier wrote: > static inline bool kvm_vcpu_reg_is_pc(const struct kvm_vcpu *vcpu, int reg) > { > - return false; > + return (vcpu_mode_is_32bit(vcpu)) && reg == 15; > } On AArch64, would ESR_EL2 have SRT == 15 when the source/destination register is PC? The mapping between AArch32 and AArch64 registers suggests R13_hyp. Maybe 15 is correct but it's not clear to me from the spec. BTW, on arch/arm it looks like this is used when you get a data abort with PC as the destination register and you inject a prefetch abort in this case. Why isn't this a normal data abort? Once you get the information, you load it into PC but first you need to sort out the data abort (unless I don't understand how the kvm_inject_pabt works).
On 02/05/13 17:09, Catalin Marinas wrote: > On Mon, Apr 08, 2013 at 05:17:26PM +0100, Marc Zyngier wrote: >> static inline bool kvm_vcpu_reg_is_pc(const struct kvm_vcpu *vcpu, int reg) >> { >> - return false; >> + return (vcpu_mode_is_32bit(vcpu)) && reg == 15; >> } > > On AArch64, would ESR_EL2 have SRT == 15 when the source/destination > register is PC? The mapping between AArch32 and AArch64 registers > suggests R13_hyp. Maybe 15 is correct but it's not clear to me from the > spec. The register reported by ESL_EL2 is indeed r15 when EL1 is in AARch32. That's because we don't have PC as a GPR on AARch64. > BTW, on arch/arm it looks like this is used when you get a data abort > with PC as the destination register and you inject a prefetch abort in > this case. Why isn't this a normal data abort? Once you get the > information, you load it into PC but first you need to sort out the data > abort (unless I don't understand how the kvm_inject_pabt works). Indeed, it should be a data abort, as we correctly fetched the instruction. Now, I wonder why we even bother trying to catch this case. Fetching PC from MMIO looks quite silly, but I don't think anything really forbids it in the architecture. M.
On Tue, May 07, 2013 at 05:28:00PM +0100, Marc Zyngier wrote: > On 02/05/13 17:09, Catalin Marinas wrote: > > BTW, on arch/arm it looks like this is used when you get a data abort > > with PC as the destination register and you inject a prefetch abort in > > this case. Why isn't this a normal data abort? Once you get the > > information, you load it into PC but first you need to sort out the data > > abort (unless I don't understand how the kvm_inject_pabt works). > > Indeed, it should be a data abort, as we correctly fetched the > instruction. Now, I wonder why we even bother trying to catch this case. > Fetching PC from MMIO looks quite silly, but I don't think anything > really forbids it in the architecture. It's not forbidden and you should just treat it as any other data abort, no need to check whether the register is PC. If you do the PC adjustment further down in that function it will be overridden by the instruction emulation anyway. There is no optimisation in checking for PC since such fault is very unlikely in sane code anyway.
On Tue, May 07, 2013 at 05:33:03PM +0100, Catalin Marinas wrote: > On Tue, May 07, 2013 at 05:28:00PM +0100, Marc Zyngier wrote: > > On 02/05/13 17:09, Catalin Marinas wrote: > > > BTW, on arch/arm it looks like this is used when you get a data abort > > > with PC as the destination register and you inject a prefetch abort in > > > this case. Why isn't this a normal data abort? Once you get the > > > information, you load it into PC but first you need to sort out the data > > > abort (unless I don't understand how the kvm_inject_pabt works). > > > > Indeed, it should be a data abort, as we correctly fetched the > > instruction. Now, I wonder why we even bother trying to catch this case. > > Fetching PC from MMIO looks quite silly, but I don't think anything > > really forbids it in the architecture. > > It's not forbidden and you should just treat it as any other data abort, > no need to check whether the register is PC. If you do the PC adjustment > further down in that function it will be overridden by the instruction > emulation anyway. There is no optimisation in checking for PC since such > fault is very unlikely in sane code anyway. > The reason is simply that it is most likely because of a bug that this happens, and we would rather catch such an error in a meaningful way than let this go crazy and have users track it down for a long time. Especially, this was relevant when we did io instruction decoding and we wanted to catch potential bugs in that when it was development code. That all being said, we can remove the check. I don't think, however, that it being an unlikely thing is a good argument: if we remove the check we need to make sure that the VM does whatever the architecture dictates, which I assume is to not skip the MMIO instruction and support setting the PC to the value returned from an I/O emulation device in QEMU. I think the scenario sounds crazy and is more than anything a sign of a bug somewhere, and whether it should be a PABT or a DABT is a judgement call, but I chose a PABT because the thing that's weird is jumping to an I/O address, it's not that the memory address for the load is problematic. -Christoffer -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 11 May 2013 01:36, Christoffer Dall <cdall@cs.columbia.edu> wrote: > That all being said, we can remove the check. I don't think, however, > that it being an unlikely thing is a good argument: if we remove the > check we need to make sure that the VM does whatever the architecture > dictates, which I assume is to not skip the MMIO instruction and support > setting the PC to the value returned from an I/O emulation device in > QEMU. > > I think the scenario sounds crazy and is more than anything a sign of a > bug somewhere, and whether it should be a PABT or a DABT is a judgement > call, but I chose a PABT because the thing that's weird is jumping to an > I/O address, it's not that the memory address for the load is > problematic. I'm confused -- in your first paragraph you talk about loading PC from an MMIO region but in the second you talk about jumping to an MMIO region. This check is catching the former, right? Loading PC from a device isn't totally unheard of: the VIC (common on ARM7) has a register which returns "address of the next interrupt to take", with the intention that the IRQ vector can just have a single instruction directly loading PC from the VIC register: http://infocenter.arm.com/help/topic/com.arm.doc.ddi0273a/I1006455.html Handling "executing code out of an MMIO region" is (a) rather harder and (b) not necessary IMHO. QEMU's TCG emulation doesn't support it and we haven't had huge "can't run this guest" issues as a result. thanks -- PMM -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, May 11, 2013 at 01:36:30AM +0100, Christoffer Dall wrote: > On Tue, May 07, 2013 at 05:33:03PM +0100, Catalin Marinas wrote: > > On Tue, May 07, 2013 at 05:28:00PM +0100, Marc Zyngier wrote: > > > On 02/05/13 17:09, Catalin Marinas wrote: > > > > BTW, on arch/arm it looks like this is used when you get a data abort > > > > with PC as the destination register and you inject a prefetch abort in > > > > this case. Why isn't this a normal data abort? Once you get the > > > > information, you load it into PC but first you need to sort out the data > > > > abort (unless I don't understand how the kvm_inject_pabt works). > > > > > > Indeed, it should be a data abort, as we correctly fetched the > > > instruction. Now, I wonder why we even bother trying to catch this case. > > > Fetching PC from MMIO looks quite silly, but I don't think anything > > > really forbids it in the architecture. > > > > It's not forbidden and you should just treat it as any other data abort, > > no need to check whether the register is PC. If you do the PC adjustment > > further down in that function it will be overridden by the instruction > > emulation anyway. There is no optimisation in checking for PC since such > > fault is very unlikely in sane code anyway. > > > The reason is simply that it is most likely because of a bug that this > happens, and we would rather catch such an error in a meaningful way > than let this go crazy and have users track it down for a long time. It is probably a bug but it is a valid code sequence (and Peter even gave an example). > Especially, this was relevant when we did io instruction decoding and we > wanted to catch potential bugs in that when it was development code. > > That all being said, we can remove the check. I don't think, however, > that it being an unlikely thing is a good argument: if we remove the > check we need to make sure that the VM does whatever the architecture > dictates, which I assume is to not skip the MMIO instruction and support > setting the PC to the value returned from an I/O emulation device in > QEMU. > > I think the scenario sounds crazy and is more than anything a sign of a > bug somewhere, and whether it should be a PABT or a DABT is a judgement > call, but I chose a PABT because the thing that's weird is jumping to an > I/O address, it's not that the memory address for the load is > problematic. I think that's where things get confused. You are reading from an I/O location with the destination being the PC and it is trapped by KVM. This has nothing to do with PABT, it's purely a DABT on the I/O access. You should emulate it and store the result in the PC. If the value loaded in PC is wrong, you will get a subsequent PABT in the guest but you must not translate the DABT on I/O memory (which would be generated even if the destination is not PC) into a PABT which confuses the guest further. By doing this you try to convince the guest that it really branched to the I/O address (since you raise the PABT with the DFAR value) when it simply tried to load an address from I/O and branch to this new address. IOW, these two are equivalent regarding PC: ldr pc, [r0] @ r0 is an I/O address and ldr r1, [r0] @ r0 is an I/O address mov pc, r1 In the second scenario, you don't raise a PABT on the first instruction. You may raise one on the second if PC is invalid but that's different and most likely it will be a guest-only thing. So for DABT emulation, checking whether the destination register is PC is simply a minimal optimisation (if at all) of that case to avoid storing the PC twice. Injecting PABT when you get a DABT is a bug. You already catch PABT on I/O address in kvm_handle_guest_abort() and correctly inject PABT into guest.
On Sat, May 11, 2013 at 10:43:37AM +0100, Catalin Marinas wrote: > On Sat, May 11, 2013 at 01:36:30AM +0100, Christoffer Dall wrote: > > On Tue, May 07, 2013 at 05:33:03PM +0100, Catalin Marinas wrote: > > > On Tue, May 07, 2013 at 05:28:00PM +0100, Marc Zyngier wrote: > > > > On 02/05/13 17:09, Catalin Marinas wrote: > > > > > BTW, on arch/arm it looks like this is used when you get a data abort > > > > > with PC as the destination register and you inject a prefetch abort in > > > > > this case. Why isn't this a normal data abort? Once you get the > > > > > information, you load it into PC but first you need to sort out the data > > > > > abort (unless I don't understand how the kvm_inject_pabt works). > > > > > > > > Indeed, it should be a data abort, as we correctly fetched the > > > > instruction. Now, I wonder why we even bother trying to catch this case. > > > > Fetching PC from MMIO looks quite silly, but I don't think anything > > > > really forbids it in the architecture. > > > > > > It's not forbidden and you should just treat it as any other data abort, > > > no need to check whether the register is PC. If you do the PC adjustment > > > further down in that function it will be overridden by the instruction > > > emulation anyway. There is no optimisation in checking for PC since such > > > fault is very unlikely in sane code anyway. > > > > > The reason is simply that it is most likely because of a bug that this > > happens, and we would rather catch such an error in a meaningful way > > than let this go crazy and have users track it down for a long time. > > It is probably a bug but it is a valid code sequence (and Peter even > gave an example). > > > Especially, this was relevant when we did io instruction decoding and we > > wanted to catch potential bugs in that when it was development code. > > > > That all being said, we can remove the check. I don't think, however, > > that it being an unlikely thing is a good argument: if we remove the > > check we need to make sure that the VM does whatever the architecture > > dictates, which I assume is to not skip the MMIO instruction and support > > setting the PC to the value returned from an I/O emulation device in > > QEMU. > > > > I think the scenario sounds crazy and is more than anything a sign of a > > bug somewhere, and whether it should be a PABT or a DABT is a judgement > > call, but I chose a PABT because the thing that's weird is jumping to an > > I/O address, it's not that the memory address for the load is > > problematic. > > I think that's where things get confused. You are reading from an I/O > location with the destination being the PC and it is trapped by KVM. > This has nothing to do with PABT, it's purely a DABT on the I/O access. > You should emulate it and store the result in the PC. If the value > loaded in PC is wrong, you will get a subsequent PABT in the guest but > you must not translate the DABT on I/O memory (which would be generated > even if the destination is not PC) into a PABT which confuses the guest > further. By doing this you try to convince the guest that it really > branched to the I/O address (since you raise the PABT with the DFAR > value) when it simply tried to load an address from I/O and branch to > this new address. > > IOW, these two are equivalent regarding PC: > > ldr pc, [r0] @ r0 is an I/O address > > and > > ldr r1, [r0] @ r0 is an I/O address > mov pc, r1 > > In the second scenario, you don't raise a PABT on the first instruction. > You may raise one on the second if PC is invalid but that's different > and most likely it will be a guest-only thing. > > So for DABT emulation, checking whether the destination register is PC > is simply a minimal optimisation (if at all) of that case to avoid > storing the PC twice. Injecting PABT when you get a DABT is a bug. You > already catch PABT on I/O address in kvm_handle_guest_abort() and > correctly inject PABT into guest. > Yeah, I got a little confused on the jump to an I/O address and load the PC from an I/O address distinction, thanks for waking me up. I will send out a patch that should address this issue by correctly loading the value into the PC (and not inject a DABT). Note that I don't plan on doing any explicit checking for a case where the VM does something like: ldrb pc, [r0] @ r0 is an I/O address Because I assume that this is either an undefined instruction or the behavior is unpredictable as per the architecture anyway, and we should never get there unless we have a bug in our decoding implementation, which I doubt at this point. -Christoffer -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 2dcfa74..37a6567 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -28,6 +28,9 @@ #include <asm/kvm_mmio.h> #include <asm/ptrace.h> +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); + void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); @@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) { - return false; /* 32bit? Bahhh... */ + return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT); } static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) @@ -64,28 +67,38 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) { + *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; } static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { + if (vcpu_mode_is_32bit(vcpu)) + return vcpu_reg32(vcpu, reg_num); + return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; } /* Get vcpu SPSR for current mode */ static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) { + if (vcpu_mode_is_32bit(vcpu)) + return vcpu_spsr32(vcpu); + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; } static inline bool kvm_vcpu_reg_is_pc(const struct kvm_vcpu *vcpu, int reg) { - return false; + return (vcpu_mode_is_32bit(vcpu)) && reg == 15; } static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; + if (vcpu_mode_is_32bit(vcpu)) + return mode > COMPAT_PSR_MODE_USR; + return mode != PSR_MODE_EL0t; } diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index be9eb3833..1668448 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += kvm.o kvm-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) kvm-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../arch/arm/kvm/, arm.o mmu.o mmio.o psci.o perf.o) -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o +kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c new file mode 100644 index 0000000..bbc6ae3 --- /dev/null +++ b/arch/arm64/kvm/regmap.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Derived from arch/arm/kvm/emulate.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/mm.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/ptrace.h> + +#define VCPU_NR_MODES 6 +#define REG_OFFSET(_reg) \ + (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long)) + +#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R)) + +static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { + /* USR Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), + REG_OFFSET(pc) + }, + + /* FIQ Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), + REG_OFFSET(compat_r8_fiq), /* r8 */ + REG_OFFSET(compat_r9_fiq), /* r9 */ + REG_OFFSET(compat_r10_fiq), /* r10 */ + REG_OFFSET(compat_r11_fiq), /* r11 */ + REG_OFFSET(compat_r12_fiq), /* r12 */ + REG_OFFSET(compat_sp_fiq), /* r13 */ + REG_OFFSET(compat_lr_fiq), /* r14 */ + REG_OFFSET(pc) + }, + + /* IRQ Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_irq), /* r13 */ + REG_OFFSET(compat_lr_irq), /* r14 */ + REG_OFFSET(pc) + }, + + /* SVC Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_svc), /* r13 */ + REG_OFFSET(compat_lr_svc), /* r14 */ + REG_OFFSET(pc) + }, + + /* ABT Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_abt), /* r13 */ + REG_OFFSET(compat_lr_abt), /* r14 */ + REG_OFFSET(pc) + }, + + /* UND Registers */ + { + USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), + USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), + USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), + USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), + USR_REG_OFFSET(12), + REG_OFFSET(compat_sp_und), /* r13 */ + REG_OFFSET(compat_lr_und), /* r14 */ + REG_OFFSET(pc) + }, +}; + +/* + * Return a pointer to the register number valid in the current mode of + * the virtual CPU. + */ +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) +{ + unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs; + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + + switch (mode) { + case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC: + mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ + break; + + case COMPAT_PSR_MODE_ABT: + mode = 4; + break; + + case COMPAT_PSR_MODE_UND: + mode = 5; + break; + + case COMPAT_PSR_MODE_SYS: + mode = 0; /* SYS maps to USR */ + break; + + default: + BUG(); + } + + return reg_array + vcpu_reg_offsets[mode][reg_num]; +} + +/* + * Return the SPSR for the current mode of the virtual CPU. + */ +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu) +{ + unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; + switch (mode) { + case COMPAT_PSR_MODE_SVC: + mode = KVM_SPSR_SVC; + break; + case COMPAT_PSR_MODE_ABT: + mode = KVM_SPSR_ABT; + break; + case COMPAT_PSR_MODE_UND: + mode = KVM_SPSR_UND; + break; + case COMPAT_PSR_MODE_IRQ: + mode = KVM_SPSR_IRQ; + break; + case COMPAT_PSR_MODE_FIQ: + mode = KVM_SPSR_FIQ; + break; + default: + BUG(); + } + + return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode]; +}