diff mbox

[v3,9/9] kvmtool: virtio: enable arm/arm64 support for bi-endianness

Message ID 1398363443-3764-10-git-send-email-marc.zyngier@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marc Zyngier April 24, 2014, 6:17 p.m. UTC
Implement the kcm_cpu__get_endianness call for both AArch32 and
AArch64, and advertise the bi-endianness support.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 tools/kvm/arm/aarch32/kvm-cpu.c                  | 14 +++++++++++++
 tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h |  2 ++
 tools/kvm/arm/aarch64/kvm-cpu.c                  | 25 ++++++++++++++++++++++++
 tools/kvm/arm/include/arm-common/kvm-arch.h      |  2 ++
 4 files changed, 43 insertions(+)

Comments

Will Deacon May 6, 2014, 2:28 p.m. UTC | #1
Hi Marc,

On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
> Implement the kcm_cpu__get_endianness call for both AArch32 and

s/kcm/kvm/

> AArch64, and advertise the bi-endianness support.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  tools/kvm/arm/aarch32/kvm-cpu.c                  | 14 +++++++++++++
>  tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h |  2 ++
>  tools/kvm/arm/aarch64/kvm-cpu.c                  | 25 ++++++++++++++++++++++++
>  tools/kvm/arm/include/arm-common/kvm-arch.h      |  2 ++
>  4 files changed, 43 insertions(+)
> 
> diff --git a/tools/kvm/arm/aarch32/kvm-cpu.c b/tools/kvm/arm/aarch32/kvm-cpu.c
> index bd71037..464b473 100644
> --- a/tools/kvm/arm/aarch32/kvm-cpu.c
> +++ b/tools/kvm/arm/aarch32/kvm-cpu.c
> @@ -1,5 +1,6 @@
>  #include "kvm/kvm-cpu.h"
>  #include "kvm/kvm.h"
> +#include "kvm/virtio.h"
>  
>  #include <asm/ptrace.h>
>  
> @@ -76,6 +77,19 @@ void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
>  		die_perror("KVM_SET_ONE_REG failed (pc)");
>  }
>  
> +int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
> +{
> +	struct kvm_one_reg reg;
> +	u32 data;
> +
> +	reg.id = ARM_CORE_REG(usr_regs.ARM_cpsr);
> +	reg.addr = (u64)(unsigned long)&data;
> +	if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
> +		die("KVM_GET_ONE_REG failed (cpsr)");
> +
> +	return (data & PSR_E_BIT) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
> +}
> +
>  void kvm_cpu__show_code(struct kvm_cpu *vcpu)
>  {
>  	struct kvm_one_reg reg;
> diff --git a/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h b/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h
> index 7d70c3b..ed7da45 100644
> --- a/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h
> +++ b/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h
> @@ -13,5 +13,7 @@
>  #define ARM_MPIDR_HWID_BITMASK	0xFF00FFFFFFUL
>  #define ARM_CPU_ID		3, 0, 0, 0
>  #define ARM_CPU_ID_MPIDR	5
> +#define ARM_CPU_CTRL		3, 0, 1, 0
> +#define ARM_CPU_CTRL_SCTLR	0
>  
>  #endif /* KVM__KVM_CPU_ARCH_H */
> diff --git a/tools/kvm/arm/aarch64/kvm-cpu.c b/tools/kvm/arm/aarch64/kvm-cpu.c
> index 059e42c..b3ce2c8 100644
> --- a/tools/kvm/arm/aarch64/kvm-cpu.c
> +++ b/tools/kvm/arm/aarch64/kvm-cpu.c
> @@ -1,12 +1,16 @@
>  #include "kvm/kvm-cpu.h"
>  #include "kvm/kvm.h"
> +#include "kvm/virtio.h"
>  
>  #include <asm/ptrace.h>
>  
>  #define COMPAT_PSR_F_BIT	0x00000040
>  #define COMPAT_PSR_I_BIT	0x00000080
> +#define COMPAT_PSR_E_BIT	0x00000200
>  #define COMPAT_PSR_MODE_SVC	0x00000013
>  
> +#define SCTLR_EL1_EE_MASK	(1 << 25)
> +
>  #define ARM64_CORE_REG(x)	(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
>  				 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
>  
> @@ -133,6 +137,27 @@ void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
>  		return reset_vcpu_aarch64(vcpu);
>  }
>  
> +int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
> +{
> +	struct kvm_one_reg reg;
> +	u64 data;
> +
> +	reg.id = ARM64_CORE_REG(regs.pstate);
> +	reg.addr = (u64)&data;
> +	if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
> +		die("KVM_GET_ONE_REG failed (spsr[EL1])");

This bit hurt for a while ;) Can you add a comment mentioning SETEND for
AArch32 guests please?

> +	if (data & PSR_MODE32_BIT)
> +		return (data & COMPAT_PSR_E_BIT) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
> +
> +	reg.id = ARM64_SYS_REG(ARM_CPU_CTRL, ARM_CPU_CTRL_SCTLR); /* SCTLR_EL1 */

We should probably just rename ARM_CPU_CTRL_SCTLR that to include the _EL1
suffix.

> +	reg.addr = (u64)&data;
> +	if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
> +		die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
> +
> +	return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;

This rules out guests where userspace and kernelspace can run with different
endinness. Whilst Linux doesn't currently do this, can we support it here?
It all gets a bit hairy if the guest is using a stage-1 SMMU to let
userspace play with a virtio device...

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier May 6, 2014, 5:25 p.m. UTC | #2
Hi Will,

On Tue, May 06 2014 at  3:28:07 pm BST, Will Deacon <will.deacon@arm.com> wrote:
> Hi Marc,
>
> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
>> Implement the kcm_cpu__get_endianness call for both AArch32 and
>
> s/kcm/kvm/

Are you saying that I hace fat fingers? ;-)

>> AArch64, and advertise the bi-endianness support.
>> 
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  tools/kvm/arm/aarch32/kvm-cpu.c                  | 14 +++++++++++++
>>  tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h |  2 ++
>>  tools/kvm/arm/aarch64/kvm-cpu.c                  | 25 ++++++++++++++++++++++++
>>  tools/kvm/arm/include/arm-common/kvm-arch.h      |  2 ++
>>  4 files changed, 43 insertions(+)
>> 
>> diff --git a/tools/kvm/arm/aarch32/kvm-cpu.c b/tools/kvm/arm/aarch32/kvm-cpu.c
>> index bd71037..464b473 100644
>> --- a/tools/kvm/arm/aarch32/kvm-cpu.c
>> +++ b/tools/kvm/arm/aarch32/kvm-cpu.c
>> @@ -1,5 +1,6 @@
>>  #include "kvm/kvm-cpu.h"
>>  #include "kvm/kvm.h"
>> +#include "kvm/virtio.h"
>>  
>>  #include <asm/ptrace.h>
>>  
>> @@ -76,6 +77,19 @@ void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
>>  		die_perror("KVM_SET_ONE_REG failed (pc)");
>>  }
>>  
>> +int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
>> +{
>> +	struct kvm_one_reg reg;
>> +	u32 data;
>> +
>> +	reg.id = ARM_CORE_REG(usr_regs.ARM_cpsr);
>> +	reg.addr = (u64)(unsigned long)&data;
>> +	if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>> +		die("KVM_GET_ONE_REG failed (cpsr)");
>> +
>> +	return (data & PSR_E_BIT) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>> +}
>> +
>>  void kvm_cpu__show_code(struct kvm_cpu *vcpu)
>>  {
>>  	struct kvm_one_reg reg;
>> diff --git a/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h b/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h
>> index 7d70c3b..ed7da45 100644
>> --- a/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h
>> +++ b/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h
>> @@ -13,5 +13,7 @@
>>  #define ARM_MPIDR_HWID_BITMASK	0xFF00FFFFFFUL
>>  #define ARM_CPU_ID		3, 0, 0, 0
>>  #define ARM_CPU_ID_MPIDR	5
>> +#define ARM_CPU_CTRL		3, 0, 1, 0
>> +#define ARM_CPU_CTRL_SCTLR	0
>>  
>>  #endif /* KVM__KVM_CPU_ARCH_H */
>> diff --git a/tools/kvm/arm/aarch64/kvm-cpu.c b/tools/kvm/arm/aarch64/kvm-cpu.c
>> index 059e42c..b3ce2c8 100644
>> --- a/tools/kvm/arm/aarch64/kvm-cpu.c
>> +++ b/tools/kvm/arm/aarch64/kvm-cpu.c
>> @@ -1,12 +1,16 @@
>>  #include "kvm/kvm-cpu.h"
>>  #include "kvm/kvm.h"
>> +#include "kvm/virtio.h"
>>  
>>  #include <asm/ptrace.h>
>>  
>>  #define COMPAT_PSR_F_BIT	0x00000040
>>  #define COMPAT_PSR_I_BIT	0x00000080
>> +#define COMPAT_PSR_E_BIT	0x00000200
>>  #define COMPAT_PSR_MODE_SVC	0x00000013
>>  
>> +#define SCTLR_EL1_EE_MASK	(1 << 25)
>> +
>>  #define ARM64_CORE_REG(x)	(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
>>  				 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
>>  
>> @@ -133,6 +137,27 @@ void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
>>  		return reset_vcpu_aarch64(vcpu);
>>  }
>>  
>> +int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
>> +{
>> +	struct kvm_one_reg reg;
>> +	u64 data;
>> +
>> +	reg.id = ARM64_CORE_REG(regs.pstate);
>> +	reg.addr = (u64)&data;
>> +	if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>> +		die("KVM_GET_ONE_REG failed (spsr[EL1])");
>
> This bit hurt for a while ;) Can you add a comment mentioning SETEND for
> AArch32 guests please?

Sure.

>> +	if (data & PSR_MODE32_BIT)
>> +		return (data & COMPAT_PSR_E_BIT) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>> +
>> +	reg.id = ARM64_SYS_REG(ARM_CPU_CTRL, ARM_CPU_CTRL_SCTLR); /* SCTLR_EL1 */
>
> We should probably just rename ARM_CPU_CTRL_SCTLR that to include the _EL1
> suffix.

Fair enough.

>> +	reg.addr = (u64)&data;
>> +	if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>> +		die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
>> +
>> +	return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>
> This rules out guests where userspace and kernelspace can run with different
> endinness. Whilst Linux doesn't currently do this, can we support it here?
> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
> userspace play with a virtio device...

Yeah, I suppose we could check either EE or E0 depending on the mode
when the access was made. We already have all the information, just need
to handle the case. I'll respin the series.

Thanks,

	M.
Peter Maydell May 6, 2014, 6:38 p.m. UTC | #3
On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On Tue, May 06 2014 at  3:28:07 pm BST, Will Deacon <will.deacon@arm.com> wrote:
>> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
>>> +    reg.addr = (u64)&data;
>>> +    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>>> +            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
>>> +
>>> +    return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>>
>> This rules out guests where userspace and kernelspace can run with different
>> endinness. Whilst Linux doesn't currently do this, can we support it here?
>> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
>> userspace play with a virtio device...
>
> Yeah, I suppose we could check either EE or E0 depending on the mode
> when the access was made. We already have all the information, just need
> to handle the case. I'll respin the series.

Hi Marc :-)

How virtio implementations should determine their endianness is
a spec question, I think; at any rate QEMU and kvmtool ought to
agree on how it's done. I think the most recent suggestion on the
QEMU mailing list (for PPC) is that we should care about the
guest kernel endianness, but I don't know if anybody thought of
the pass-through-to-userspace usecase...

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Maydell May 7, 2014, 9:34 a.m. UTC | #4
On 6 May 2014 19:38, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> On Tue, May 06 2014 at  3:28:07 pm BST, Will Deacon <will.deacon@arm.com> wrote:
>>> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
>>>> +    reg.addr = (u64)&data;
>>>> +    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>>>> +            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
>>>> +
>>>> +    return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>>>
>>> This rules out guests where userspace and kernelspace can run with different
>>> endinness. Whilst Linux doesn't currently do this, can we support it here?
>>> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
>>> userspace play with a virtio device...
>>
>> Yeah, I suppose we could check either EE or E0 depending on the mode
>> when the access was made. We already have all the information, just need
>> to handle the case. I'll respin the series.

> How virtio implementations should determine their endianness is
> a spec question, I think; at any rate QEMU and kvmtool ought to
> agree on how it's done. I think the most recent suggestion on the
> QEMU mailing list (for PPC) is that we should care about the
> guest kernel endianness, but I don't know if anybody thought of
> the pass-through-to-userspace usecase...

Current opinion on the qemu-devel thread seems to be that we
should just define that the endianness of the virtio device is
the endianness of the guest kernel at the point where the guest
triggers a reset of the virtio device by writing zero the QueuePFN
or Status registers.

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf May 7, 2014, 9:42 a.m. UTC | #5
> Am 07.05.2014 um 11:34 schrieb Peter Maydell <peter.maydell@linaro.org>:
> 
>> On 6 May 2014 19:38, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>>> On Tue, May 06 2014 at  3:28:07 pm BST, Will Deacon <will.deacon@arm.com> wrote:
>>>>> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
>>>>> +    reg.addr = (u64)&data;
>>>>> +    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>>>>> +            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
>>>>> +
>>>>> +    return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>>>> 
>>>> This rules out guests where userspace and kernelspace can run with different
>>>> endinness. Whilst Linux doesn't currently do this, can we support it here?
>>>> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
>>>> userspace play with a virtio device...
>>> 
>>> Yeah, I suppose we could check either EE or E0 depending on the mode
>>> when the access was made. We already have all the information, just need
>>> to handle the case. I'll respin the series.
> 
>> How virtio implementations should determine their endianness is
>> a spec question, I think; at any rate QEMU and kvmtool ought to
>> agree on how it's done. I think the most recent suggestion on the
>> QEMU mailing list (for PPC) is that we should care about the
>> guest kernel endianness, but I don't know if anybody thought of
>> the pass-through-to-userspace usecase...
> 
> Current opinion on the qemu-devel thread seems to be that we
> should just define that the endianness of the virtio device is
> the endianness of the guest kernel at the point where the guest
> triggers a reset of the virtio device by writing zero the QueuePFN
> or Status registers.

Virtio by design has full access to guest physical memory. It doesn't route DMA via PCI. So user space drivers simply don't make sense here.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier May 7, 2014, 9:52 a.m. UTC | #6
On Wed, May 07 2014 at 10:34:30 am BST, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 6 May 2014 19:38, Peter Maydell <peter.maydell@linaro.org> wrote:
>> On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>> On Tue, May 06 2014 at  3:28:07 pm BST, Will Deacon <will.deacon@arm.com> wrote:
>>>> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
>>>>> +    reg.addr = (u64)&data;
>>>>> +    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>>>>> +            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
>>>>> +
>>>>> +    return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>>>>
>>>> This rules out guests where userspace and kernelspace can run with different
>>>> endinness. Whilst Linux doesn't currently do this, can we support it here?
>>>> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
>>>> userspace play with a virtio device...
>>>
>>> Yeah, I suppose we could check either EE or E0 depending on the mode
>>> when the access was made. We already have all the information, just need
>>> to handle the case. I'll respin the series.
>
>> How virtio implementations should determine their endianness is
>> a spec question, I think; at any rate QEMU and kvmtool ought to
>> agree on how it's done. I think the most recent suggestion on the
>> QEMU mailing list (for PPC) is that we should care about the
>> guest kernel endianness, but I don't know if anybody thought of
>> the pass-through-to-userspace usecase...
>
> Current opinion on the qemu-devel thread seems to be that we
> should just define that the endianness of the virtio device is
> the endianness of the guest kernel at the point where the guest
> triggers a reset of the virtio device by writing zero the QueuePFN
> or Status registers.

On AArch32, we only have the CPSR.E bit to select the endiannes. Are we
going to simply explode if the access comes from userspace?

On AArch64, we can either select the kernel endianness, or userspace
endianness. Are we going to go a different route just for the sake of
enforcing kernel access?

I'm inclined to think of userspace access as a valid use case.

	M.
Alexander Graf May 7, 2014, 9:55 a.m. UTC | #7
> Am 07.05.2014 um 11:52 schrieb Marc Zyngier <marc.zyngier@arm.com>:
> 
>> On Wed, May 07 2014 at 10:34:30 am BST, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> On 6 May 2014 19:38, Peter Maydell <peter.maydell@linaro.org> wrote:
>>>> On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>>>> On Tue, May 06 2014 at  3:28:07 pm BST, Will Deacon <will.deacon@arm.com> wrote:
>>>>>> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
>>>>>> +    reg.addr = (u64)&data;
>>>>>> +    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>>>>>> +            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
>>>>>> +
>>>>>> +    return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>>>>> 
>>>>> This rules out guests where userspace and kernelspace can run with different
>>>>> endinness. Whilst Linux doesn't currently do this, can we support it here?
>>>>> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
>>>>> userspace play with a virtio device...
>>>> 
>>>> Yeah, I suppose we could check either EE or E0 depending on the mode
>>>> when the access was made. We already have all the information, just need
>>>> to handle the case. I'll respin the series.
>> 
>>> How virtio implementations should determine their endianness is
>>> a spec question, I think; at any rate QEMU and kvmtool ought to
>>> agree on how it's done. I think the most recent suggestion on the
>>> QEMU mailing list (for PPC) is that we should care about the
>>> guest kernel endianness, but I don't know if anybody thought of
>>> the pass-through-to-userspace usecase...
>> 
>> Current opinion on the qemu-devel thread seems to be that we
>> should just define that the endianness of the virtio device is
>> the endianness of the guest kernel at the point where the guest
>> triggers a reset of the virtio device by writing zero the QueuePFN
>> or Status registers.
> 
> On AArch32, we only have the CPSR.E bit to select the endiannes. Are we
> going to simply explode if the access comes from userspace?
> 
> On AArch64, we can either select the kernel endianness, or userspace
> endianness. Are we going to go a different route just for the sake of
> enforcing kernel access?
> 
> I'm inclined to think of userspace access as a valid use case.

It's not for virtio-legacy. It'll be much more productive to influence virtio-1.0 to not redo the same mistakes than enabling even more hackery with the legacy one.

Alex

> 
>    M.
> -- 
> Jazz is not dead. It just smells funny.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier May 7, 2014, 9:57 a.m. UTC | #8
On Wed, May 07 2014 at 10:42:54 am BST, Alexander Graf <agraf@suse.de> wrote:
>> Am 07.05.2014 um 11:34 schrieb Peter Maydell <peter.maydell@linaro.org>:
>> 
>>> On 6 May 2014 19:38, Peter Maydell <peter.maydell@linaro.org> wrote:
>>>> On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>>>> On Tue, May 06 2014 at  3:28:07 pm BST, Will Deacon <will.deacon@arm.com> wrote:
>>>>>> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
>>>>>> +    reg.addr = (u64)&data;
>>>>>> +    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>>>>>> +            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
>>>>>> +
>>>>>> +    return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>>>>> 
>>>>> This rules out guests where userspace and kernelspace can run with different
>>>>> endinness. Whilst Linux doesn't currently do this, can we support it here?
>>>>> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
>>>>> userspace play with a virtio device...
>>>> 
>>>> Yeah, I suppose we could check either EE or E0 depending on the mode
>>>> when the access was made. We already have all the information, just need
>>>> to handle the case. I'll respin the series.
>> 
>>> How virtio implementations should determine their endianness is
>>> a spec question, I think; at any rate QEMU and kvmtool ought to
>>> agree on how it's done. I think the most recent suggestion on the
>>> QEMU mailing list (for PPC) is that we should care about the
>>> guest kernel endianness, but I don't know if anybody thought of
>>> the pass-through-to-userspace usecase...
>> 
>> Current opinion on the qemu-devel thread seems to be that we
>> should just define that the endianness of the virtio device is
>> the endianness of the guest kernel at the point where the guest
>> triggers a reset of the virtio device by writing zero the QueuePFN
>> or Status registers.
>
> Virtio by design has full access to guest physical memory. It doesn't
> route DMA via PCI. So user space drivers simply don't make sense here.

Huh? What if my guest has usespace using an idmap, with Stage-1 MMU for
isolation only (much like an MPU)? R-class guests anyone?

Agreed, this is not the general use case, but that doesn't seem to be
completely unrealistic either.

	M.
Peter Maydell May 7, 2014, 10:10 a.m. UTC | #9
On 7 May 2014 10:52, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On Wed, May 07 2014 at 10:34:30 am BST, Peter Maydell <peter.maydell@linaro.org> wrote:
>> Current opinion on the qemu-devel thread seems to be that we
>> should just define that the endianness of the virtio device is
>> the endianness of the guest kernel at the point where the guest
>> triggers a reset of the virtio device by writing zero the QueuePFN
>> or Status registers.
>
> On AArch32, we only have the CPSR.E bit to select the endiannes. Are we
> going to simply explode if the access comes from userspace?

There's SCTLR.EE in AArch32, right?

> On AArch64, we can either select the kernel endianness, or userspace
> endianness. Are we going to go a different route just for the sake of
> enforcing kernel access?
>
> I'm inclined to think of userspace access as a valid use case.

I don't actually care much about the details of what we decide;
I just want us to be consistent between QEMU and kvmtool and
(to the extent that architectural differences permit) consistent
between PPC and ARM. At the moment we seem to be heading
in gratuitously different directions.

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf May 7, 2014, 10:11 a.m. UTC | #10
On 05/07/2014 11:57 AM, Marc Zyngier wrote:
> On Wed, May 07 2014 at 10:42:54 am BST, Alexander Graf <agraf@suse.de> wrote:
>>> Am 07.05.2014 um 11:34 schrieb Peter Maydell <peter.maydell@linaro.org>:
>>>
>>>> On 6 May 2014 19:38, Peter Maydell <peter.maydell@linaro.org> wrote:
>>>>> On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>>>>> On Tue, May 06 2014 at  3:28:07 pm BST, Will Deacon <will.deacon@arm.com> wrote:
>>>>>>> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
>>>>>>> +    reg.addr = (u64)&data;
>>>>>>> +    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>>>>>>> +            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
>>>>>>> +
>>>>>>> +    return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
>>>>>> This rules out guests where userspace and kernelspace can run with different
>>>>>> endinness. Whilst Linux doesn't currently do this, can we support it here?
>>>>>> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
>>>>>> userspace play with a virtio device...
>>>>> Yeah, I suppose we could check either EE or E0 depending on the mode
>>>>> when the access was made. We already have all the information, just need
>>>>> to handle the case. I'll respin the series.
>>>> How virtio implementations should determine their endianness is
>>>> a spec question, I think; at any rate QEMU and kvmtool ought to
>>>> agree on how it's done. I think the most recent suggestion on the
>>>> QEMU mailing list (for PPC) is that we should care about the
>>>> guest kernel endianness, but I don't know if anybody thought of
>>>> the pass-through-to-userspace usecase...
>>> Current opinion on the qemu-devel thread seems to be that we
>>> should just define that the endianness of the virtio device is
>>> the endianness of the guest kernel at the point where the guest
>>> triggers a reset of the virtio device by writing zero the QueuePFN
>>> or Status registers.
>> Virtio by design has full access to guest physical memory. It doesn't
>> route DMA via PCI. So user space drivers simply don't make sense here.
> Huh? What if my guest has usespace using an idmap, with Stage-1 MMU for
> isolation only (much like an MPU)? R-class guests anyone?
>
> Agreed, this is not the general use case, but that doesn't seem to be
> completely unrealistic either.

Yes, and once that user tries the same without idmap virtio ends up 
overwriting random memory. It's just not a good idea and I'd much rather 
see us solve this properly with virtio 1.0 really.

Of course alternatively we can continue bikeshedding about this until 
everything becomes moot because we switched to virtio 1.0 ;).

Rusty / Michael, virtio 1.0 does go via normal DMA channels, right?


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier May 7, 2014, 10:19 a.m. UTC | #11
On Wed, May 07 2014 at 10:55:45 am BST, Alexander Graf <agraf@suse.de> wrote:
>> Am 07.05.2014 um 11:52 schrieb Marc Zyngier <marc.zyngier@arm.com>:
>> 
>>> On Wed, May 07 2014 at 10:34:30 am BST, Peter Maydell
>>> <peter.maydell@linaro.org> wrote:
>>>> On 6 May 2014 19:38, Peter Maydell <peter.maydell@linaro.org> wrote:
>>>>> On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>>>>> On Tue, May 06 2014 at 3:28:07 pm BST, Will Deacon
>>>>>> <will.deacon@arm.com> wrote:
>>>>>>> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
>>>>>>> +    reg.addr = (u64)&data;
>>>>>>> +    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>>>>>>> +            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
>>>>>>> +
>>>>>>> + return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE :
>>>>>>> VIRTIO_ENDIAN_LE;
>>>>>> 
>>>>>> This rules out guests where userspace and kernelspace can run
>>>>>> with different
>>>>>> endinness. Whilst Linux doesn't currently do this, can we support it here?
>>>>>> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
>>>>>> userspace play with a virtio device...
>>>>> 
>>>>> Yeah, I suppose we could check either EE or E0 depending on the mode
>>>>> when the access was made. We already have all the information, just need
>>>>> to handle the case. I'll respin the series.
>>> 
>>>> How virtio implementations should determine their endianness is
>>>> a spec question, I think; at any rate QEMU and kvmtool ought to
>>>> agree on how it's done. I think the most recent suggestion on the
>>>> QEMU mailing list (for PPC) is that we should care about the
>>>> guest kernel endianness, but I don't know if anybody thought of
>>>> the pass-through-to-userspace usecase...
>>> 
>>> Current opinion on the qemu-devel thread seems to be that we
>>> should just define that the endianness of the virtio device is
>>> the endianness of the guest kernel at the point where the guest
>>> triggers a reset of the virtio device by writing zero the QueuePFN
>>> or Status registers.
>> 
>> On AArch32, we only have the CPSR.E bit to select the endiannes. Are we
>> going to simply explode if the access comes from userspace?
>> 
>> On AArch64, we can either select the kernel endianness, or userspace
>> endianness. Are we going to go a different route just for the sake of
>> enforcing kernel access?
>> 
>> I'm inclined to think of userspace access as a valid use case.
>
> It's not for virtio-legacy. It'll be much more productive to influence
> virtio-1.0 to not redo the same mistakes than enabling even more
> hackery with the legacy one.

Are you saying I shouldn't improve an existing code base and implement a
useful feature, and should instead work on some new fancy stuff for
which there is no platform support, no kernel support, and not an
official spec either? Watch me.

	M.
Michael S. Tsirkin May 7, 2014, 10:30 a.m. UTC | #12
On Wed, May 07, 2014 at 12:11:13PM +0200, Alexander Graf wrote:
> On 05/07/2014 11:57 AM, Marc Zyngier wrote:
> >On Wed, May 07 2014 at 10:42:54 am BST, Alexander Graf <agraf@suse.de> wrote:
> >>>Am 07.05.2014 um 11:34 schrieb Peter Maydell <peter.maydell@linaro.org>:
> >>>
> >>>>On 6 May 2014 19:38, Peter Maydell <peter.maydell@linaro.org> wrote:
> >>>>>On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
> >>>>>>On Tue, May 06 2014 at  3:28:07 pm BST, Will Deacon <will.deacon@arm.com> wrote:
> >>>>>>>On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
> >>>>>>>+    reg.addr = (u64)&data;
> >>>>>>>+    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
> >>>>>>>+            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
> >>>>>>>+
> >>>>>>>+    return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
> >>>>>>This rules out guests where userspace and kernelspace can run with different
> >>>>>>endinness. Whilst Linux doesn't currently do this, can we support it here?
> >>>>>>It all gets a bit hairy if the guest is using a stage-1 SMMU to let
> >>>>>>userspace play with a virtio device...
> >>>>>Yeah, I suppose we could check either EE or E0 depending on the mode
> >>>>>when the access was made. We already have all the information, just need
> >>>>>to handle the case. I'll respin the series.
> >>>>How virtio implementations should determine their endianness is
> >>>>a spec question, I think; at any rate QEMU and kvmtool ought to
> >>>>agree on how it's done. I think the most recent suggestion on the
> >>>>QEMU mailing list (for PPC) is that we should care about the
> >>>>guest kernel endianness, but I don't know if anybody thought of
> >>>>the pass-through-to-userspace usecase...
> >>>Current opinion on the qemu-devel thread seems to be that we
> >>>should just define that the endianness of the virtio device is
> >>>the endianness of the guest kernel at the point where the guest
> >>>triggers a reset of the virtio device by writing zero the QueuePFN
> >>>or Status registers.
> >>Virtio by design has full access to guest physical memory. It doesn't
> >>route DMA via PCI. So user space drivers simply don't make sense here.
> >Huh? What if my guest has usespace using an idmap, with Stage-1 MMU for
> >isolation only (much like an MPU)? R-class guests anyone?
> >
> >Agreed, this is not the general use case, but that doesn't seem to be
> >completely unrealistic either.
> 
> Yes, and once that user tries the same without idmap virtio ends up
> overwriting random memory. It's just not a good idea and I'd much
> rather see us solve this properly with virtio 1.0 really.
> 
> Of course alternatively we can continue bikeshedding about this
> until everything becomes moot because we switched to virtio 1.0 ;).
> 
> Rusty / Michael, virtio 1.0 does go via normal DMA channels, right?
> 
> 
> Alex

By default it doesn't at the moment, in particular IOMMUs really
seem to hurt performance when enabled, and many guests seem to be
dumb and enable it everywhere if present.

By design IOMMUs can protect you from malicious devices, which
is relevant if you assign a device but of course isn't
relevant for qemu as virtio is part of qemu atm.


In virtio 1.0 it's possible for a device to have "required features"
which drivers must ack.
So we'll be able to have hypervisor tell guest that it requires DMA for
some VFs.
You would then be able to do mix fast virtio PF bypassing IOMMU and handle
that in kernel, and slow virtio VF going through an IOMMU and handle that
in userspace.
Marc Zyngier May 7, 2014, 10:39 a.m. UTC | #13
On Wed, May 07 2014 at 11:11:13 am BST, Alexander Graf <agraf@suse.de> wrote:
> On 05/07/2014 11:57 AM, Marc Zyngier wrote:

>> Huh? What if my guest has usespace using an idmap, with Stage-1 MMU for
>> isolation only (much like an MPU)? R-class guests anyone?
>>
>> Agreed, this is not the general use case, but that doesn't seem to be
>> completely unrealistic either.
>
> Yes, and once that user tries the same without idmap virtio ends up
> overwriting random memory.

And how different is that from the kernel suddenly deciding to use VAs
instead of PAs? Just as broken. Are we going to prevent the kernel from
using virtio?

> It's just not a good idea and I'd much rather see us solve this
> properly with virtio 1.0 really.

Again, what is virtio 1.0 doing here?

       M.
Greg Kurz May 7, 2014, 10:40 a.m. UTC | #14
On Wed, 07 May 2014 10:52:01 +0100
Marc Zyngier <marc.zyngier@arm.com> wrote:

> On Wed, May 07 2014 at 10:34:30 am BST, Peter Maydell <peter.maydell@linaro.org> wrote:
> > On 6 May 2014 19:38, Peter Maydell <peter.maydell@linaro.org> wrote:
> >> On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
> >>> On Tue, May 06 2014 at  3:28:07 pm BST, Will Deacon <will.deacon@arm.com> wrote:
> >>>> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
> >>>>> +    reg.addr = (u64)&data;
> >>>>> +    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
> >>>>> +            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
> >>>>> +
> >>>>> +    return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
> >>>>
> >>>> This rules out guests where userspace and kernelspace can run with different
> >>>> endinness. Whilst Linux doesn't currently do this, can we support it here?
> >>>> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
> >>>> userspace play with a virtio device...
> >>>
> >>> Yeah, I suppose we could check either EE or E0 depending on the mode
> >>> when the access was made. We already have all the information, just need
> >>> to handle the case. I'll respin the series.
> >
> >> How virtio implementations should determine their endianness is
> >> a spec question, I think; at any rate QEMU and kvmtool ought to
> >> agree on how it's done. I think the most recent suggestion on the
> >> QEMU mailing list (for PPC) is that we should care about the
> >> guest kernel endianness, but I don't know if anybody thought of
> >> the pass-through-to-userspace usecase...
> >
> > Current opinion on the qemu-devel thread seems to be that we
> > should just define that the endianness of the virtio device is
> > the endianness of the guest kernel at the point where the guest
> > triggers a reset of the virtio device by writing zero the QueuePFN
> > or Status registers.
> 
> On AArch32, we only have the CPSR.E bit to select the endiannes. Are we
> going to simply explode if the access comes from userspace?
> 
> On AArch64, we can either select the kernel endianness, or userspace
> endianness. Are we going to go a different route just for the sake of
> enforcing kernel access?
> 
> I'm inclined to think of userspace access as a valid use case.
> 
> 	M.

All the fuzz is not really about enforcing kernel access... PPC also
has a current endianness selector (MSR_LE) but it only makes sense
if you are in the cpu context. Initial versions of the virtio biendian
support for QEMU PPC64 used an arbitrary cpu: in this case, the
only sensible thing to look at to support kernel based virtio is the 
interrupt endianness selector (LPCR_ILE), because if gives a safe
hint of the kernel endianness.

The patch set has evolved and now uses current_cpu at device reset time.
As a consequence, we are not necessarily tied to the kernel LPCR_ILE
selector I guess.

Cheers.
Marc Zyngier May 7, 2014, 10:46 a.m. UTC | #15
On Wed, May 07 2014 at 11:10:56 am BST, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 7 May 2014 10:52, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> On Wed, May 07 2014 at 10:34:30 am BST, Peter Maydell
>> <peter.maydell@linaro.org> wrote:
>>> Current opinion on the qemu-devel thread seems to be that we
>>> should just define that the endianness of the virtio device is
>>> the endianness of the guest kernel at the point where the guest
>>> triggers a reset of the virtio device by writing zero the QueuePFN
>>> or Status registers.
>>
>> On AArch32, we only have the CPSR.E bit to select the endiannes. Are we
>> going to simply explode if the access comes from userspace?
>
> There's SCTLR.EE in AArch32, right?

Indeed, good point.

>> On AArch64, we can either select the kernel endianness, or userspace
>> endianness. Are we going to go a different route just for the sake of
>> enforcing kernel access?
>>
>> I'm inclined to think of userspace access as a valid use case.
>
> I don't actually care much about the details of what we decide; I just
> want us to be consistent between QEMU and kvmtool and (to the extent
> that architectural differences permit) consistent between PPC and
> ARM. At the moment we seem to be heading in gratuitously different
> directions.

My point is: is there any good technical reason for deciding not to
support guest user space access, other than religious matters about the
latest incarnation of The Holy Virtio Spec?

Thanks,

	M.
Marc Zyngier May 7, 2014, 11:04 a.m. UTC | #16
On Wed, May 07 2014 at 11:40:54 am BST, Greg Kurz <gkurz@linux.vnet.ibm.com> wrote:
> On Wed, 07 May 2014 10:52:01 +0100
> Marc Zyngier <marc.zyngier@arm.com> wrote:
>
>> On Wed, May 07 2014 at 10:34:30 am BST, Peter Maydell
>> <peter.maydell@linaro.org> wrote:
>> > On 6 May 2014 19:38, Peter Maydell <peter.maydell@linaro.org> wrote:
>> >> On 6 May 2014 18:25, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> >>> On Tue, May 06 2014 at 3:28:07 pm BST, Will Deacon
>> >>> <will.deacon@arm.com> wrote:
>> >>>> On Thu, Apr 24, 2014 at 07:17:23PM +0100, Marc Zyngier wrote:
>> >>>>> +    reg.addr = (u64)&data;
>> >>>>> +    if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
>> >>>>> +            die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
>> >>>>> +
>> >>>>> + return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE :
>> >>>>> VIRTIO_ENDIAN_LE;
>> >>>>
>> >>>> This rules out guests where userspace and kernelspace can run
>> >>>> with different
>> >>>> endinness. Whilst Linux doesn't currently do this, can we support it here?
>> >>>> It all gets a bit hairy if the guest is using a stage-1 SMMU to let
>> >>>> userspace play with a virtio device...
>> >>>
>> >>> Yeah, I suppose we could check either EE or E0 depending on the mode
>> >>> when the access was made. We already have all the information, just need
>> >>> to handle the case. I'll respin the series.
>> >
>> >> How virtio implementations should determine their endianness is
>> >> a spec question, I think; at any rate QEMU and kvmtool ought to
>> >> agree on how it's done. I think the most recent suggestion on the
>> >> QEMU mailing list (for PPC) is that we should care about the
>> >> guest kernel endianness, but I don't know if anybody thought of
>> >> the pass-through-to-userspace usecase...
>> >
>> > Current opinion on the qemu-devel thread seems to be that we
>> > should just define that the endianness of the virtio device is
>> > the endianness of the guest kernel at the point where the guest
>> > triggers a reset of the virtio device by writing zero the QueuePFN
>> > or Status registers.
>> 
>> On AArch32, we only have the CPSR.E bit to select the endiannes. Are we
>> going to simply explode if the access comes from userspace?
>> 
>> On AArch64, we can either select the kernel endianness, or userspace
>> endianness. Are we going to go a different route just for the sake of
>> enforcing kernel access?
>> 
>> I'm inclined to think of userspace access as a valid use case.
>> 
>> 	M.

Hi Gregory,

> All the fuzz is not really about enforcing kernel access... PPC also
> has a current endianness selector (MSR_LE) but it only makes sense
> if you are in the cpu context. Initial versions of the virtio biendian
> support for QEMU PPC64 used an arbitrary cpu: in this case, the
> only sensible thing to look at to support kernel based virtio is the 
> interrupt endianness selector (LPCR_ILE), because if gives a safe
> hint of the kernel endianness.
>
> The patch set has evolved and now uses current_cpu at device reset time.
> As a consequence, we are not necessarily tied to the kernel LPCR_ILE
> selector I guess.

That makes a lot of sense, thanks for explaining that. You're basically
doing the exact same thing we do with kvmtool on ARM. So if we have
similar architectural features on both sides, why don't we support both
kernel and userspace access?

Cheers,

	M.
Alexander Graf May 7, 2014, 11:49 a.m. UTC | #17
On 05/07/2014 12:46 PM, Marc Zyngier wrote:
> On Wed, May 07 2014 at 11:10:56 am BST, Peter Maydell <peter.maydell@linaro.org> wrote:
>> On 7 May 2014 10:52, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>> On Wed, May 07 2014 at 10:34:30 am BST, Peter Maydell
>>> <peter.maydell@linaro.org> wrote:
>>>> Current opinion on the qemu-devel thread seems to be that we
>>>> should just define that the endianness of the virtio device is
>>>> the endianness of the guest kernel at the point where the guest
>>>> triggers a reset of the virtio device by writing zero the QueuePFN
>>>> or Status registers.
>>> On AArch32, we only have the CPSR.E bit to select the endiannes. Are we
>>> going to simply explode if the access comes from userspace?
>> There's SCTLR.EE in AArch32, right?
> Indeed, good point.
>
>>> On AArch64, we can either select the kernel endianness, or userspace
>>> endianness. Are we going to go a different route just for the sake of
>>> enforcing kernel access?
>>>
>>> I'm inclined to think of userspace access as a valid use case.
>> I don't actually care much about the details of what we decide; I just
>> want us to be consistent between QEMU and kvmtool and (to the extent
>> that architectural differences permit) consistent between PPC and
>> ARM. At the moment we seem to be heading in gratuitously different
>> directions.
> My point is: is there any good technical reason for deciding not to
> support guest user space access, other than religious matters about the
> latest incarnation of The Holy Virtio Spec?

Yes, because it can't be isolated as per the current spec. User space 
has no business in physical addresses. And since so far I haven't heard 
of a single case where people on ARM are either

   a) nesting virtualization or
   b) running different endian user space

I don't think this point is valid. Virtio 1.0 is defined to be little 
endian only, so we don't need all that messy magic logic anymore. By the 
time people will do nesting or different endian user space we will most 
likely be in virtio 1.0 land. Shoehorning in anything in between is just 
a waste of time.

If you like to see a constructed case where your logic falls apart, I 
can easily give you one too (because the whole thing is just insanely 
fragile). Imagine you have nesting. Your L1 guest passes its virtio 
device into the L2 guest with idmap. The L1 guest wants to trace MMIO 
accesses, so it traps on every access and delivers it on its own. L2 is 
LE, L1 is BE. Virtio gets initialized BE even through the guest that 
really wants to access it is LE.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier May 7, 2014, 12:16 p.m. UTC | #18
On 07/05/14 12:49, Alexander Graf wrote:
> On 05/07/2014 12:46 PM, Marc Zyngier wrote:
>> On Wed, May 07 2014 at 11:10:56 am BST, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> On 7 May 2014 10:52, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>>> On Wed, May 07 2014 at 10:34:30 am BST, Peter Maydell
>>>> <peter.maydell@linaro.org> wrote:
>>>>> Current opinion on the qemu-devel thread seems to be that we
>>>>> should just define that the endianness of the virtio device is
>>>>> the endianness of the guest kernel at the point where the guest
>>>>> triggers a reset of the virtio device by writing zero the QueuePFN
>>>>> or Status registers.
>>>> On AArch32, we only have the CPSR.E bit to select the endiannes. Are we
>>>> going to simply explode if the access comes from userspace?
>>> There's SCTLR.EE in AArch32, right?
>> Indeed, good point.
>>
>>>> On AArch64, we can either select the kernel endianness, or userspace
>>>> endianness. Are we going to go a different route just for the sake of
>>>> enforcing kernel access?
>>>>
>>>> I'm inclined to think of userspace access as a valid use case.
>>> I don't actually care much about the details of what we decide; I just
>>> want us to be consistent between QEMU and kvmtool and (to the extent
>>> that architectural differences permit) consistent between PPC and
>>> ARM. At the moment we seem to be heading in gratuitously different
>>> directions.
>> My point is: is there any good technical reason for deciding not to
>> support guest user space access, other than religious matters about the
>> latest incarnation of The Holy Virtio Spec?
> 
> Yes, because it can't be isolated as per the current spec. User space 
> has no business in physical addresses. And since so far I haven't heard 
> of a single case where people on ARM are either
> 
>    a) nesting virtualization or
>    b) running different endian user space
> 
> I don't think this point is valid. Virtio 1.0 is defined to be little 
> endian only, so we don't need all that messy magic logic anymore. By the 

Alex, please read my lips: at the moment, I don't care about virtio-1.0.
At all. Doesn't register. And hammering it on and on won't change a
thing (yes, I've rewritten this sentence at least five times to remove
all the fscking swear words).

> time people will do nesting or different endian user space we will most 
> likely be in virtio 1.0 land. Shoehorning in anything in between is just 
> a waste of time.

If you don't want to support it on your pet platform/environment, fine.

> If you like to see a constructed case where your logic falls apart, I 
> can easily give you one too (because the whole thing is just insanely 
> fragile). Imagine you have nesting. Your L1 guest passes its virtio 
> device into the L2 guest with idmap. The L1 guest wants to trace MMIO 
> accesses, so it traps on every access and delivers it on its own. L2 is 
> LE, L1 is BE. Virtio gets initialized BE even through the guest that 
> really wants to access it is LE.

Then it is a bug in your L1 that doesn't properly emulate accesses it
traps. Not that I care, really.

That being said, I'm going to stop replying to this thread, and instead
go back writing code, posting it, and getting on with my life in
virtio-legacy land.

Thanks,

	M.
Peter Maydell May 7, 2014, 12:17 p.m. UTC | #19
On 7 May 2014 12:04, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On Wed, May 07 2014 at 11:40:54 am BST, Greg Kurz <gkurz@linux.vnet.ibm.com> wrote:
>> All the fuzz is not really about enforcing kernel access... PPC also
>> has a current endianness selector (MSR_LE) but it only makes sense
>> if you are in the cpu context. Initial versions of the virtio biendian
>> support for QEMU PPC64 used an arbitrary cpu: in this case, the
>> only sensible thing to look at to support kernel based virtio is the
>> interrupt endianness selector (LPCR_ILE), because if gives a safe
>> hint of the kernel endianness.
>>
>> The patch set has evolved and now uses current_cpu at device reset time.
>> As a consequence, we are not necessarily tied to the kernel LPCR_ILE
>> selector I guess.

Ah yes, I'd forgotten the history behind why we ended up looking
at interrupt endianness.

> That makes a lot of sense, thanks for explaining that. You're basically
> doing the exact same thing we do with kvmtool on ARM. So if we have
> similar architectural features on both sides, why don't we support both
> kernel and userspace access?

I don't think that we really need to get into whether userspace
access is or is not a good idea -- "endianness of the CPU which
does the virtio reset at the point when it does that reset" is a
nice simple rule that should generalise across architectures,
so why make it more complicated than that?

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Maydell May 7, 2014, 12:18 p.m. UTC | #20
On 7 May 2014 13:16, Marc Zyngier <marc.zyngier@arm.com> wrote:
> That being said, I'm going to stop replying to this thread, and instead
> go back writing code, posting it, and getting on with my life in
> virtio-legacy land.

Some of us are trying to have a conversation in this thread
about virtio-legacy behaviour :-)

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier May 7, 2014, 12:25 p.m. UTC | #21
On 07/05/14 13:17, Peter Maydell wrote:
> On 7 May 2014 12:04, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> On Wed, May 07 2014 at 11:40:54 am BST, Greg Kurz <gkurz@linux.vnet.ibm.com> wrote:
>>> All the fuzz is not really about enforcing kernel access... PPC also
>>> has a current endianness selector (MSR_LE) but it only makes sense
>>> if you are in the cpu context. Initial versions of the virtio biendian
>>> support for QEMU PPC64 used an arbitrary cpu: in this case, the
>>> only sensible thing to look at to support kernel based virtio is the
>>> interrupt endianness selector (LPCR_ILE), because if gives a safe
>>> hint of the kernel endianness.
>>>
>>> The patch set has evolved and now uses current_cpu at device reset time.
>>> As a consequence, we are not necessarily tied to the kernel LPCR_ILE
>>> selector I guess.
> 
> Ah yes, I'd forgotten the history behind why we ended up looking
> at interrupt endianness.
> 
>> That makes a lot of sense, thanks for explaining that. You're basically
>> doing the exact same thing we do with kvmtool on ARM. So if we have
>> similar architectural features on both sides, why don't we support both
>> kernel and userspace access?
> 
> I don't think that we really need to get into whether userspace
> access is or is not a good idea -- "endianness of the CPU which
> does the virtio reset at the point when it does that reset" is a
> nice simple rule that should generalise across architectures,
> so why make it more complicated than that?

This definition looks pretty good to me. Simple and to the point.

Thanks,

	M.
Greg Kurz May 7, 2014, 12:27 p.m. UTC | #22
On Wed, 7 May 2014 13:17:51 +0100
Peter Maydell <peter.maydell@linaro.org> wrote:

> On 7 May 2014 12:04, Marc Zyngier <marc.zyngier@arm.com> wrote:
> > On Wed, May 07 2014 at 11:40:54 am BST, Greg Kurz <gkurz@linux.vnet.ibm.com> wrote:
> >> All the fuzz is not really about enforcing kernel access... PPC also
> >> has a current endianness selector (MSR_LE) but it only makes sense
> >> if you are in the cpu context. Initial versions of the virtio biendian
> >> support for QEMU PPC64 used an arbitrary cpu: in this case, the
> >> only sensible thing to look at to support kernel based virtio is the
> >> interrupt endianness selector (LPCR_ILE), because if gives a safe
> >> hint of the kernel endianness.
> >>
> >> The patch set has evolved and now uses current_cpu at device reset time.
> >> As a consequence, we are not necessarily tied to the kernel LPCR_ILE
> >> selector I guess.
> 
> Ah yes, I'd forgotten the history behind why we ended up looking
> at interrupt endianness.
> 
> > That makes a lot of sense, thanks for explaining that. You're basically
> > doing the exact same thing we do with kvmtool on ARM. So if we have
> > similar architectural features on both sides, why don't we support both
> > kernel and userspace access?
> 
> I don't think that we really need to get into whether userspace
> access is or is not a good idea -- "endianness of the CPU which
> does the virtio reset at the point when it does that reset" is a
> nice simple rule that should generalise across architectures,
> so why make it more complicated than that?
> 
> thanks
> -- PMM
> 

I am convinced... and feeling a bit guilty for all the noise ;)
I'll come with a new virtio patch set for QEMU that does just what
you say.
Rusty Russell May 8, 2014, 1:52 a.m. UTC | #23
Alexander Graf <agraf@suse.de> writes:
> On 05/07/2014 11:57 AM, Marc Zyngier wrote:
>>>>> How virtio implementations should determine their endianness is
>>>>> a spec question, I think; at any rate QEMU and kvmtool ought to
>>>>> agree on how it's done. I think the most recent suggestion on the
>>>>> QEMU mailing list (for PPC) is that we should care about the
>>>>> guest kernel endianness, but I don't know if anybody thought of
>>>>> the pass-through-to-userspace usecase...
>>>> Current opinion on the qemu-devel thread seems to be that we
>>>> should just define that the endianness of the virtio device is
>>>> the endianness of the guest kernel at the point where the guest
>>>> triggers a reset of the virtio device by writing zero the QueuePFN
>>>> or Status registers.
>>> Virtio by design has full access to guest physical memory. It doesn't
>>> route DMA via PCI. So user space drivers simply don't make sense here.
>> Huh? What if my guest has usespace using an idmap, with Stage-1 MMU for
>> isolation only (much like an MPU)? R-class guests anyone?
>>
>> Agreed, this is not the general use case, but that doesn't seem to be
>> completely unrealistic either.
>
> Yes, and once that user tries the same without idmap virtio ends up 
> overwriting random memory. It's just not a good idea and I'd much rather 
> see us solve this properly with virtio 1.0 really.

Slightly orthogonal: virtio 1.0 is LE, so endianness is solved.

> Of course alternatively we can continue bikeshedding about this until 
> everything becomes moot because we switched to virtio 1.0 ;).

Transition will be long...

> Rusty / Michael, virtio 1.0 does go via normal DMA channels, right?

No.  We argued about this; it's more PCI-like to do, but there's a
performance cost and it's really unclear that passing through a virtio
PCI device to a (sub)guest is a scenario worth supporting.

Maybe someone will come up with a convincing reason, and we'll add
a feature bit in a future revision...

Cheers,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/tools/kvm/arm/aarch32/kvm-cpu.c b/tools/kvm/arm/aarch32/kvm-cpu.c
index bd71037..464b473 100644
--- a/tools/kvm/arm/aarch32/kvm-cpu.c
+++ b/tools/kvm/arm/aarch32/kvm-cpu.c
@@ -1,5 +1,6 @@ 
 #include "kvm/kvm-cpu.h"
 #include "kvm/kvm.h"
+#include "kvm/virtio.h"
 
 #include <asm/ptrace.h>
 
@@ -76,6 +77,19 @@  void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
 		die_perror("KVM_SET_ONE_REG failed (pc)");
 }
 
+int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
+{
+	struct kvm_one_reg reg;
+	u32 data;
+
+	reg.id = ARM_CORE_REG(usr_regs.ARM_cpsr);
+	reg.addr = (u64)(unsigned long)&data;
+	if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
+		die("KVM_GET_ONE_REG failed (cpsr)");
+
+	return (data & PSR_E_BIT) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
+}
+
 void kvm_cpu__show_code(struct kvm_cpu *vcpu)
 {
 	struct kvm_one_reg reg;
diff --git a/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h b/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h
index 7d70c3b..ed7da45 100644
--- a/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h
+++ b/tools/kvm/arm/aarch64/include/kvm/kvm-cpu-arch.h
@@ -13,5 +13,7 @@ 
 #define ARM_MPIDR_HWID_BITMASK	0xFF00FFFFFFUL
 #define ARM_CPU_ID		3, 0, 0, 0
 #define ARM_CPU_ID_MPIDR	5
+#define ARM_CPU_CTRL		3, 0, 1, 0
+#define ARM_CPU_CTRL_SCTLR	0
 
 #endif /* KVM__KVM_CPU_ARCH_H */
diff --git a/tools/kvm/arm/aarch64/kvm-cpu.c b/tools/kvm/arm/aarch64/kvm-cpu.c
index 059e42c..b3ce2c8 100644
--- a/tools/kvm/arm/aarch64/kvm-cpu.c
+++ b/tools/kvm/arm/aarch64/kvm-cpu.c
@@ -1,12 +1,16 @@ 
 #include "kvm/kvm-cpu.h"
 #include "kvm/kvm.h"
+#include "kvm/virtio.h"
 
 #include <asm/ptrace.h>
 
 #define COMPAT_PSR_F_BIT	0x00000040
 #define COMPAT_PSR_I_BIT	0x00000080
+#define COMPAT_PSR_E_BIT	0x00000200
 #define COMPAT_PSR_MODE_SVC	0x00000013
 
+#define SCTLR_EL1_EE_MASK	(1 << 25)
+
 #define ARM64_CORE_REG(x)	(KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
 				 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 
@@ -133,6 +137,27 @@  void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu)
 		return reset_vcpu_aarch64(vcpu);
 }
 
+int kvm_cpu__get_endianness(struct kvm_cpu *vcpu)
+{
+	struct kvm_one_reg reg;
+	u64 data;
+
+	reg.id = ARM64_CORE_REG(regs.pstate);
+	reg.addr = (u64)&data;
+	if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
+		die("KVM_GET_ONE_REG failed (spsr[EL1])");
+
+	if (data & PSR_MODE32_BIT)
+		return (data & COMPAT_PSR_E_BIT) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
+
+	reg.id = ARM64_SYS_REG(ARM_CPU_CTRL, ARM_CPU_CTRL_SCTLR); /* SCTLR_EL1 */
+	reg.addr = (u64)&data;
+	if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg) < 0)
+		die("KVM_GET_ONE_REG failed (SCTLR_EL1)");
+
+	return (data & SCTLR_EL1_EE_MASK) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE;
+}
+
 void kvm_cpu__show_code(struct kvm_cpu *vcpu)
 {
 	struct kvm_one_reg reg;
diff --git a/tools/kvm/arm/include/arm-common/kvm-arch.h b/tools/kvm/arm/include/arm-common/kvm-arch.h
index b6c4bf8..5d2fab2 100644
--- a/tools/kvm/arm/include/arm-common/kvm-arch.h
+++ b/tools/kvm/arm/include/arm-common/kvm-arch.h
@@ -35,6 +35,8 @@ 
 #define VIRTIO_DEFAULT_TRANS(kvm)	\
 	((kvm)->cfg.arch.virtio_trans_pci ? VIRTIO_PCI : VIRTIO_MMIO)
 
+#define VIRTIO_RING_ENDIAN	(VIRTIO_ENDIAN_LE | VIRTIO_ENDIAN_BE)
+
 static inline bool arm_addr_in_ioport_region(u64 phys_addr)
 {
 	u64 limit = KVM_IOPORT_AREA + ARM_IOPORT_SIZE;