diff mbox

[v3,24/32] arm64: KVM: 32bit GP register access

Message ID 1365437854-30214-25-git-send-email-marc.zyngier@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marc Zyngier April 8, 2013, 4:17 p.m. UTC
Allow access to the 32bit register file through the usual API.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/kvm_emulate.h |  17 +++-
 arch/arm64/kvm/Makefile              |   2 +-
 arch/arm64/kvm/regmap.c              | 168 +++++++++++++++++++++++++++++++++++
 3 files changed, 184 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm64/kvm/regmap.c

Comments

Christoffer Dall April 23, 2013, 11 p.m. UTC | #1
On Mon, Apr 08, 2013 at 05:17:26PM +0100, Marc Zyngier wrote:
> Allow access to the 32bit register file through the usual API.
> 
> Reviewed-by: Christopher Covington <cov@codeaurora.org>
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm64/include/asm/kvm_emulate.h |  17 +++-
>  arch/arm64/kvm/Makefile              |   2 +-
>  arch/arm64/kvm/regmap.c              | 168 +++++++++++++++++++++++++++++++++++
>  3 files changed, 184 insertions(+), 3 deletions(-)
>  create mode 100644 arch/arm64/kvm/regmap.c
> 
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index 2dcfa74..37a6567 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -28,6 +28,9 @@
>  #include <asm/kvm_mmio.h>
>  #include <asm/ptrace.h>
>  
> +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
> +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
> +
>  void kvm_inject_undefined(struct kvm_vcpu *vcpu);
>  void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
>  void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
> @@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
>  
>  static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
>  {
> -	return false;	/* 32bit? Bahhh... */
> +	return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);

nit: you don't need the '!!': it's a bool

>  }
>  
>  static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
> @@ -64,28 +67,38 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
>  
>  static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
>  {
> +	*vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
>  }
>  
>  static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
>  {
> +	if (vcpu_mode_is_32bit(vcpu))
> +		return vcpu_reg32(vcpu, reg_num);
> +
>  	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
>  }
>  
>  /* Get vcpu SPSR for current mode */
>  static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
>  {
> +	if (vcpu_mode_is_32bit(vcpu))
> +		return vcpu_spsr32(vcpu);
> +
>  	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
>  }
>  
>  static inline bool kvm_vcpu_reg_is_pc(const struct kvm_vcpu *vcpu, int reg)
>  {
> -	return false;
> +	return (vcpu_mode_is_32bit(vcpu)) && reg == 15;
>  }
>  
>  static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
>  {
>  	u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
>  
> +	if (vcpu_mode_is_32bit(vcpu))
> +		return mode > COMPAT_PSR_MODE_USR;
> +
>  	return mode != PSR_MODE_EL0t;
>  }
>  
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index be9eb3833..1668448 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -11,7 +11,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../arch/arm/kvm/, arm.o mmu.o mmio.o psci.o perf.o)
>  
> -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o
> +kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
>  
> diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
> new file mode 100644
> index 0000000..bbc6ae3
> --- /dev/null
> +++ b/arch/arm64/kvm/regmap.c
> @@ -0,0 +1,168 @@
> +/*
> + * Copyright (C) 2012,2013 - ARM Ltd
> + * Author: Marc Zyngier <marc.zyngier@arm.com>
> + *
> + * Derived from arch/arm/kvm/emulate.c:
> + * Copyright (C) 2012 - Virtual Open Systems and Columbia University
> + * Author: Christoffer Dall <c.dall@virtualopensystems.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/kvm_host.h>
> +#include <asm/kvm_emulate.h>
> +#include <asm/ptrace.h>
> +
> +#define VCPU_NR_MODES 6
> +#define REG_OFFSET(_reg) \
> +	(offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
> +
> +#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
> +
> +static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
> +	/* USR Registers */
> +	{
> +		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
> +		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
> +		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
> +		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
> +		USR_REG_OFFSET(12), USR_REG_OFFSET(13),	USR_REG_OFFSET(14),
> +		REG_OFFSET(pc)
> +	},
> +
> +	/* FIQ Registers */
> +	{
> +		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
> +		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
> +		USR_REG_OFFSET(6), USR_REG_OFFSET(7),
> +		REG_OFFSET(compat_r8_fiq),  /* r8 */
> +		REG_OFFSET(compat_r9_fiq),  /* r9 */
> +		REG_OFFSET(compat_r10_fiq), /* r10 */
> +		REG_OFFSET(compat_r11_fiq), /* r11 */
> +		REG_OFFSET(compat_r12_fiq), /* r12 */
> +		REG_OFFSET(compat_sp_fiq),  /* r13 */
> +		REG_OFFSET(compat_lr_fiq),  /* r14 */
> +		REG_OFFSET(pc)
> +	},
> +
> +	/* IRQ Registers */
> +	{
> +		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
> +		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
> +		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
> +		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
> +		USR_REG_OFFSET(12),
> +		REG_OFFSET(compat_sp_irq), /* r13 */
> +		REG_OFFSET(compat_lr_irq), /* r14 */
> +		REG_OFFSET(pc)
> +	},
> +
> +	/* SVC Registers */
> +	{
> +		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
> +		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
> +		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
> +		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
> +		USR_REG_OFFSET(12),
> +		REG_OFFSET(compat_sp_svc), /* r13 */
> +		REG_OFFSET(compat_lr_svc), /* r14 */
> +		REG_OFFSET(pc)
> +	},
> +
> +	/* ABT Registers */
> +	{
> +		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
> +		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
> +		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
> +		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
> +		USR_REG_OFFSET(12),
> +		REG_OFFSET(compat_sp_abt), /* r13 */
> +		REG_OFFSET(compat_lr_abt), /* r14 */
> +		REG_OFFSET(pc)
> +	},
> +
> +	/* UND Registers */
> +	{
> +		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
> +		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
> +		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
> +		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
> +		USR_REG_OFFSET(12),
> +		REG_OFFSET(compat_sp_und), /* r13 */
> +		REG_OFFSET(compat_lr_und), /* r14 */
> +		REG_OFFSET(pc)
> +	},
> +};
> +
> +/*
> + * Return a pointer to the register number valid in the current mode of
> + * the virtual CPU.
> + */
> +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
> +{
> +	unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
> +	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
> +
> +	switch (mode) {
> +	case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC:
> +		mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
> +		break;
> +
> +	case COMPAT_PSR_MODE_ABT:
> +		mode = 4;
> +		break;
> +
> +	case COMPAT_PSR_MODE_UND:
> +		mode = 5;
> +		break;
> +
> +	case COMPAT_PSR_MODE_SYS:
> +		mode = 0;	/* SYS maps to USR */
> +		break;
> +
> +	default:
> +		BUG();
> +	}
> +
> +	return reg_array + vcpu_reg_offsets[mode][reg_num];
> +}
> +
> +/*
> + * Return the SPSR for the current mode of the virtual CPU.
> + */
> +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
> +{
> +	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
> +	switch (mode) {
> +	case COMPAT_PSR_MODE_SVC:
> +		mode = KVM_SPSR_SVC;
> +		break;
> +	case COMPAT_PSR_MODE_ABT:
> +		mode = KVM_SPSR_ABT;
> +		break;
> +	case COMPAT_PSR_MODE_UND:
> +		mode = KVM_SPSR_UND;
> +		break;
> +	case COMPAT_PSR_MODE_IRQ:
> +		mode = KVM_SPSR_IRQ;
> +		break;
> +	case COMPAT_PSR_MODE_FIQ:
> +		mode = KVM_SPSR_FIQ;
> +		break;
> +	default:
> +		BUG();
> +	}
> +
> +	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
> +}
> -- 
> 1.8.1.4
> 
> 
> 
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier April 24, 2013, 1:06 p.m. UTC | #2
On 24/04/13 00:00, Christoffer Dall wrote:
> On Mon, Apr 08, 2013 at 05:17:26PM +0100, Marc Zyngier wrote:
>> Allow access to the 32bit register file through the usual API.
>>
>> Reviewed-by: Christopher Covington <cov@codeaurora.org>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  arch/arm64/include/asm/kvm_emulate.h |  17 +++-
>>  arch/arm64/kvm/Makefile              |   2 +-
>>  arch/arm64/kvm/regmap.c              | 168 +++++++++++++++++++++++++++++++++++
>>  3 files changed, 184 insertions(+), 3 deletions(-)
>>  create mode 100644 arch/arm64/kvm/regmap.c
>>
>> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
>> index 2dcfa74..37a6567 100644
>> --- a/arch/arm64/include/asm/kvm_emulate.h
>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>> @@ -28,6 +28,9 @@
>>  #include <asm/kvm_mmio.h>
>>  #include <asm/ptrace.h>
>>  
>> +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
>> +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
>> +
>>  void kvm_inject_undefined(struct kvm_vcpu *vcpu);
>>  void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
>>  void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
>> @@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
>>  
>>  static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
>>  {
>> -	return false;	/* 32bit? Bahhh... */
>> +	return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
> 
> nit: you don't need the '!!': it's a bool

No it is not. It is a bitwise and, turned into a bool.

Thanks,

	M.
Christoffer Dall April 24, 2013, 5:09 p.m. UTC | #3
On Wed, Apr 24, 2013 at 6:06 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 24/04/13 00:00, Christoffer Dall wrote:
>> On Mon, Apr 08, 2013 at 05:17:26PM +0100, Marc Zyngier wrote:
>>> Allow access to the 32bit register file through the usual API.
>>>
>>> Reviewed-by: Christopher Covington <cov@codeaurora.org>
>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>> ---
>>>  arch/arm64/include/asm/kvm_emulate.h |  17 +++-
>>>  arch/arm64/kvm/Makefile              |   2 +-
>>>  arch/arm64/kvm/regmap.c              | 168 +++++++++++++++++++++++++++++++++++
>>>  3 files changed, 184 insertions(+), 3 deletions(-)
>>>  create mode 100644 arch/arm64/kvm/regmap.c
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
>>> index 2dcfa74..37a6567 100644
>>> --- a/arch/arm64/include/asm/kvm_emulate.h
>>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>>> @@ -28,6 +28,9 @@
>>>  #include <asm/kvm_mmio.h>
>>>  #include <asm/ptrace.h>
>>>
>>> +unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
>>> +unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
>>> +
>>>  void kvm_inject_undefined(struct kvm_vcpu *vcpu);
>>>  void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
>>>  void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
>>> @@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
>>>
>>>  static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
>>>  {
>>> -    return false;   /* 32bit? Bahhh... */
>>> +    return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
>>
>> nit: you don't need the '!!': it's a bool
>
> No it is not. It is a bitwise and, turned into a bool.
>
yeah and the result of the bitwise and will be cast to a bool as per
the function's return value without the use of '!!' or did I read the
C spec wrong?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Catalin Marinas May 2, 2013, 4:09 p.m. UTC | #4
On Mon, Apr 08, 2013 at 05:17:26PM +0100, Marc Zyngier wrote:
>  static inline bool kvm_vcpu_reg_is_pc(const struct kvm_vcpu *vcpu, int reg)
>  {
> -	return false;
> +	return (vcpu_mode_is_32bit(vcpu)) && reg == 15;
>  }

On AArch64, would ESR_EL2 have SRT == 15 when the source/destination
register is PC? The mapping between AArch32 and AArch64 registers
suggests R13_hyp. Maybe 15 is correct but it's not clear to me from the
spec.

BTW, on arch/arm it looks like this is used when you get a data abort
with PC as the destination register and you inject a prefetch abort in
this case. Why isn't this a normal data abort? Once you get the
information, you load it into PC but first you need to sort out the data
abort (unless I don't understand how the kvm_inject_pabt works).
Marc Zyngier May 7, 2013, 4:28 p.m. UTC | #5
On 02/05/13 17:09, Catalin Marinas wrote:
> On Mon, Apr 08, 2013 at 05:17:26PM +0100, Marc Zyngier wrote:
>>  static inline bool kvm_vcpu_reg_is_pc(const struct kvm_vcpu *vcpu, int reg)
>>  {
>> -	return false;
>> +	return (vcpu_mode_is_32bit(vcpu)) && reg == 15;
>>  }
> 
> On AArch64, would ESR_EL2 have SRT == 15 when the source/destination
> register is PC? The mapping between AArch32 and AArch64 registers
> suggests R13_hyp. Maybe 15 is correct but it's not clear to me from the
> spec.

The register reported by ESL_EL2 is indeed r15 when EL1 is in AARch32.
That's because we don't have PC as a GPR on AARch64.

> BTW, on arch/arm it looks like this is used when you get a data abort
> with PC as the destination register and you inject a prefetch abort in
> this case. Why isn't this a normal data abort? Once you get the
> information, you load it into PC but first you need to sort out the data
> abort (unless I don't understand how the kvm_inject_pabt works).

Indeed, it should be a data abort, as we correctly fetched the
instruction. Now, I wonder why we even bother trying to catch this case.
Fetching PC from MMIO looks quite silly, but I don't think anything
really forbids it in the architecture.

	M.
Catalin Marinas May 7, 2013, 4:33 p.m. UTC | #6
On Tue, May 07, 2013 at 05:28:00PM +0100, Marc Zyngier wrote:
> On 02/05/13 17:09, Catalin Marinas wrote:
> > BTW, on arch/arm it looks like this is used when you get a data abort
> > with PC as the destination register and you inject a prefetch abort in
> > this case. Why isn't this a normal data abort? Once you get the
> > information, you load it into PC but first you need to sort out the data
> > abort (unless I don't understand how the kvm_inject_pabt works).
> 
> Indeed, it should be a data abort, as we correctly fetched the
> instruction. Now, I wonder why we even bother trying to catch this case.
> Fetching PC from MMIO looks quite silly, but I don't think anything
> really forbids it in the architecture.

It's not forbidden and you should just treat it as any other data abort,
no need to check whether the register is PC. If you do the PC adjustment
further down in that function it will be overridden by the instruction
emulation anyway. There is no optimisation in checking for PC since such
fault is very unlikely in sane code anyway.
Christoffer Dall May 11, 2013, 12:36 a.m. UTC | #7
On Tue, May 07, 2013 at 05:33:03PM +0100, Catalin Marinas wrote:
> On Tue, May 07, 2013 at 05:28:00PM +0100, Marc Zyngier wrote:
> > On 02/05/13 17:09, Catalin Marinas wrote:
> > > BTW, on arch/arm it looks like this is used when you get a data abort
> > > with PC as the destination register and you inject a prefetch abort in
> > > this case. Why isn't this a normal data abort? Once you get the
> > > information, you load it into PC but first you need to sort out the data
> > > abort (unless I don't understand how the kvm_inject_pabt works).
> > 
> > Indeed, it should be a data abort, as we correctly fetched the
> > instruction. Now, I wonder why we even bother trying to catch this case.
> > Fetching PC from MMIO looks quite silly, but I don't think anything
> > really forbids it in the architecture.
> 
> It's not forbidden and you should just treat it as any other data abort,
> no need to check whether the register is PC. If you do the PC adjustment
> further down in that function it will be overridden by the instruction
> emulation anyway. There is no optimisation in checking for PC since such
> fault is very unlikely in sane code anyway.
> 
The reason is simply that it is most likely because of a bug that this
happens, and we would rather catch such an error in a meaningful way
than let this go crazy and have users track it down for a long time.

Especially, this was relevant when we did io instruction decoding and we
wanted to catch potential bugs in that when it was development code.

That all being said, we can remove the check.  I don't think, however,
that it being an unlikely thing is a good argument: if we remove the
check we need to make sure that the VM does whatever the architecture
dictates, which I assume is to not skip the MMIO instruction and support
setting the PC to the value returned from an I/O emulation device in
QEMU.

I think the scenario sounds crazy and is more than anything a sign of a
bug somewhere, and whether it should be a PABT or a DABT is a judgement
call, but I chose a PABT because the thing that's weird is jumping to an
I/O address, it's not that the memory address for the load is
problematic.

-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Maydell May 11, 2013, 7:51 a.m. UTC | #8
On 11 May 2013 01:36, Christoffer Dall <cdall@cs.columbia.edu> wrote:
> That all being said, we can remove the check.  I don't think, however,
> that it being an unlikely thing is a good argument: if we remove the
> check we need to make sure that the VM does whatever the architecture
> dictates, which I assume is to not skip the MMIO instruction and support
> setting the PC to the value returned from an I/O emulation device in
> QEMU.
>
> I think the scenario sounds crazy and is more than anything a sign of a
> bug somewhere, and whether it should be a PABT or a DABT is a judgement
> call, but I chose a PABT because the thing that's weird is jumping to an
> I/O address, it's not that the memory address for the load is
> problematic.

I'm confused -- in your first paragraph you talk about loading
PC from an MMIO region but in the second you talk about jumping
to an MMIO region. This check is catching the former, right?

Loading PC from a device isn't totally unheard of: the VIC (common
on ARM7) has a register which returns "address of the next interrupt
to take", with the intention that the IRQ vector can just have a
single instruction directly loading PC from the VIC register:
http://infocenter.arm.com/help/topic/com.arm.doc.ddi0273a/I1006455.html

Handling "executing code out of an MMIO region" is (a) rather
harder and (b) not necessary IMHO. QEMU's TCG emulation doesn't
support it and we haven't had huge "can't run this guest"
issues as a result.

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Catalin Marinas May 11, 2013, 9:43 a.m. UTC | #9
On Sat, May 11, 2013 at 01:36:30AM +0100, Christoffer Dall wrote:
> On Tue, May 07, 2013 at 05:33:03PM +0100, Catalin Marinas wrote:
> > On Tue, May 07, 2013 at 05:28:00PM +0100, Marc Zyngier wrote:
> > > On 02/05/13 17:09, Catalin Marinas wrote:
> > > > BTW, on arch/arm it looks like this is used when you get a data abort
> > > > with PC as the destination register and you inject a prefetch abort in
> > > > this case. Why isn't this a normal data abort? Once you get the
> > > > information, you load it into PC but first you need to sort out the data
> > > > abort (unless I don't understand how the kvm_inject_pabt works).
> > > 
> > > Indeed, it should be a data abort, as we correctly fetched the
> > > instruction. Now, I wonder why we even bother trying to catch this case.
> > > Fetching PC from MMIO looks quite silly, but I don't think anything
> > > really forbids it in the architecture.
> > 
> > It's not forbidden and you should just treat it as any other data abort,
> > no need to check whether the register is PC. If you do the PC adjustment
> > further down in that function it will be overridden by the instruction
> > emulation anyway. There is no optimisation in checking for PC since such
> > fault is very unlikely in sane code anyway.
> > 
> The reason is simply that it is most likely because of a bug that this
> happens, and we would rather catch such an error in a meaningful way
> than let this go crazy and have users track it down for a long time.

It is probably a bug but it is a valid code sequence (and Peter even
gave an example).

> Especially, this was relevant when we did io instruction decoding and we
> wanted to catch potential bugs in that when it was development code.
> 
> That all being said, we can remove the check.  I don't think, however,
> that it being an unlikely thing is a good argument: if we remove the
> check we need to make sure that the VM does whatever the architecture
> dictates, which I assume is to not skip the MMIO instruction and support
> setting the PC to the value returned from an I/O emulation device in
> QEMU.
> 
> I think the scenario sounds crazy and is more than anything a sign of a
> bug somewhere, and whether it should be a PABT or a DABT is a judgement
> call, but I chose a PABT because the thing that's weird is jumping to an
> I/O address, it's not that the memory address for the load is
> problematic.

I think that's where things get confused. You are reading from an I/O
location with the destination being the PC and it is trapped by KVM.
This has nothing to do with PABT, it's purely a DABT on the I/O access.
You should emulate it and store the result in the PC. If the value
loaded in PC is wrong, you will get a subsequent PABT in the guest but
you must not translate the DABT on I/O memory (which would be generated
even if the destination is not PC) into a PABT which confuses the guest
further. By doing this you try to convince the guest that it really
branched to the I/O address (since you raise the PABT with the DFAR
value) when it simply tried to load an address from I/O and branch to
this new address.

IOW, these two are equivalent regarding PC:

	ldr	pc, [r0]	@ r0 is an I/O address

and 

	ldr	r1, [r0]	@ r0 is an I/O address
	mov	pc, r1

In the second scenario, you don't raise a PABT on the first instruction.
You may raise one on the second if PC is invalid but that's different
and most likely it will be a guest-only thing.

So for DABT emulation, checking whether the destination register is PC
is simply a minimal optimisation (if at all) of that case to avoid
storing the PC twice. Injecting PABT when you get a DABT is a bug. You
already catch PABT on I/O address in kvm_handle_guest_abort() and
correctly inject PABT into guest.
Christoffer Dall May 12, 2013, 6:51 p.m. UTC | #10
On Sat, May 11, 2013 at 10:43:37AM +0100, Catalin Marinas wrote:
> On Sat, May 11, 2013 at 01:36:30AM +0100, Christoffer Dall wrote:
> > On Tue, May 07, 2013 at 05:33:03PM +0100, Catalin Marinas wrote:
> > > On Tue, May 07, 2013 at 05:28:00PM +0100, Marc Zyngier wrote:
> > > > On 02/05/13 17:09, Catalin Marinas wrote:
> > > > > BTW, on arch/arm it looks like this is used when you get a data abort
> > > > > with PC as the destination register and you inject a prefetch abort in
> > > > > this case. Why isn't this a normal data abort? Once you get the
> > > > > information, you load it into PC but first you need to sort out the data
> > > > > abort (unless I don't understand how the kvm_inject_pabt works).
> > > > 
> > > > Indeed, it should be a data abort, as we correctly fetched the
> > > > instruction. Now, I wonder why we even bother trying to catch this case.
> > > > Fetching PC from MMIO looks quite silly, but I don't think anything
> > > > really forbids it in the architecture.
> > > 
> > > It's not forbidden and you should just treat it as any other data abort,
> > > no need to check whether the register is PC. If you do the PC adjustment
> > > further down in that function it will be overridden by the instruction
> > > emulation anyway. There is no optimisation in checking for PC since such
> > > fault is very unlikely in sane code anyway.
> > > 
> > The reason is simply that it is most likely because of a bug that this
> > happens, and we would rather catch such an error in a meaningful way
> > than let this go crazy and have users track it down for a long time.
> 
> It is probably a bug but it is a valid code sequence (and Peter even
> gave an example).
> 
> > Especially, this was relevant when we did io instruction decoding and we
> > wanted to catch potential bugs in that when it was development code.
> > 
> > That all being said, we can remove the check.  I don't think, however,
> > that it being an unlikely thing is a good argument: if we remove the
> > check we need to make sure that the VM does whatever the architecture
> > dictates, which I assume is to not skip the MMIO instruction and support
> > setting the PC to the value returned from an I/O emulation device in
> > QEMU.
> > 
> > I think the scenario sounds crazy and is more than anything a sign of a
> > bug somewhere, and whether it should be a PABT or a DABT is a judgement
> > call, but I chose a PABT because the thing that's weird is jumping to an
> > I/O address, it's not that the memory address for the load is
> > problematic.
> 
> I think that's where things get confused. You are reading from an I/O
> location with the destination being the PC and it is trapped by KVM.
> This has nothing to do with PABT, it's purely a DABT on the I/O access.
> You should emulate it and store the result in the PC. If the value
> loaded in PC is wrong, you will get a subsequent PABT in the guest but
> you must not translate the DABT on I/O memory (which would be generated
> even if the destination is not PC) into a PABT which confuses the guest
> further. By doing this you try to convince the guest that it really
> branched to the I/O address (since you raise the PABT with the DFAR
> value) when it simply tried to load an address from I/O and branch to
> this new address.
> 
> IOW, these two are equivalent regarding PC:
> 
> 	ldr	pc, [r0]	@ r0 is an I/O address
> 
> and 
> 
> 	ldr	r1, [r0]	@ r0 is an I/O address
> 	mov	pc, r1
> 
> In the second scenario, you don't raise a PABT on the first instruction.
> You may raise one on the second if PC is invalid but that's different
> and most likely it will be a guest-only thing.
> 
> So for DABT emulation, checking whether the destination register is PC
> is simply a minimal optimisation (if at all) of that case to avoid
> storing the PC twice. Injecting PABT when you get a DABT is a bug. You
> already catch PABT on I/O address in kvm_handle_guest_abort() and
> correctly inject PABT into guest.
> 
Yeah, I got a little confused on the jump to an I/O address and load
the PC from an I/O address distinction, thanks for waking me up.

I will send out a patch that should address this issue by correctly
loading the value into the PC (and not inject a DABT).  Note that I
don't plan on doing any explicit checking for a case where the VM does
something like:

	ldrb	pc, [r0]	@ r0 is an I/O address

Because I assume that this is either an undefined instruction or the
behavior is unpredictable as per the architecture anyway, and we should
never get there unless we have a bug in our decoding implementation,
which I doubt at this point.

-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 2dcfa74..37a6567 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -28,6 +28,9 @@ 
 #include <asm/kvm_mmio.h>
 #include <asm/ptrace.h>
 
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
@@ -49,7 +52,7 @@  static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
 
 static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
 {
-	return false;	/* 32bit? Bahhh... */
+	return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
 }
 
 static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
@@ -64,28 +67,38 @@  static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
 
 static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
 {
+	*vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
 }
 
 static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
 {
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_reg32(vcpu, reg_num);
+
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
 }
 
 /* Get vcpu SPSR for current mode */
 static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
 {
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_spsr32(vcpu);
+
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
 }
 
 static inline bool kvm_vcpu_reg_is_pc(const struct kvm_vcpu *vcpu, int reg)
 {
-	return false;
+	return (vcpu_mode_is_32bit(vcpu)) && reg == 15;
 }
 
 static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
 {
 	u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
 
+	if (vcpu_mode_is_32bit(vcpu))
+		return mode > COMPAT_PSR_MODE_USR;
+
 	return mode != PSR_MODE_EL0t;
 }
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index be9eb3833..1668448 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -11,7 +11,7 @@  obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 kvm-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../arch/arm/kvm/, arm.o mmu.o mmio.o psci.o perf.o)
 
-kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o
+kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
new file mode 100644
index 0000000..bbc6ae3
--- /dev/null
+++ b/arch/arm64/kvm/regmap.c
@@ -0,0 +1,168 @@ 
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/emulate.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/ptrace.h>
+
+#define VCPU_NR_MODES 6
+#define REG_OFFSET(_reg) \
+	(offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
+
+#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
+
+static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
+	/* USR Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12), USR_REG_OFFSET(13),	USR_REG_OFFSET(14),
+		REG_OFFSET(pc)
+	},
+
+	/* FIQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7),
+		REG_OFFSET(compat_r8_fiq),  /* r8 */
+		REG_OFFSET(compat_r9_fiq),  /* r9 */
+		REG_OFFSET(compat_r10_fiq), /* r10 */
+		REG_OFFSET(compat_r11_fiq), /* r11 */
+		REG_OFFSET(compat_r12_fiq), /* r12 */
+		REG_OFFSET(compat_sp_fiq),  /* r13 */
+		REG_OFFSET(compat_lr_fiq),  /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* IRQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_irq), /* r13 */
+		REG_OFFSET(compat_lr_irq), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* SVC Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_svc), /* r13 */
+		REG_OFFSET(compat_lr_svc), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* ABT Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_abt), /* r13 */
+		REG_OFFSET(compat_lr_abt), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* UND Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_und), /* r13 */
+		REG_OFFSET(compat_lr_und), /* r14 */
+		REG_OFFSET(pc)
+	},
+};
+
+/*
+ * Return a pointer to the register number valid in the current mode of
+ * the virtual CPU.
+ */
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+
+	switch (mode) {
+	case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC:
+		mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
+		break;
+
+	case COMPAT_PSR_MODE_ABT:
+		mode = 4;
+		break;
+
+	case COMPAT_PSR_MODE_UND:
+		mode = 5;
+		break;
+
+	case COMPAT_PSR_MODE_SYS:
+		mode = 0;	/* SYS maps to USR */
+		break;
+
+	default:
+		BUG();
+	}
+
+	return reg_array + vcpu_reg_offsets[mode][reg_num];
+}
+
+/*
+ * Return the SPSR for the current mode of the virtual CPU.
+ */
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+	switch (mode) {
+	case COMPAT_PSR_MODE_SVC:
+		mode = KVM_SPSR_SVC;
+		break;
+	case COMPAT_PSR_MODE_ABT:
+		mode = KVM_SPSR_ABT;
+		break;
+	case COMPAT_PSR_MODE_UND:
+		mode = KVM_SPSR_UND;
+		break;
+	case COMPAT_PSR_MODE_IRQ:
+		mode = KVM_SPSR_IRQ;
+		break;
+	case COMPAT_PSR_MODE_FIQ:
+		mode = KVM_SPSR_FIQ;
+		break;
+	default:
+		BUG();
+	}
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+}