diff mbox

[v1,15/16] kvm: arm64: Allow configuring physical address space size

Message ID 20180109190414.4017-16-suzuki.poulose@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Suzuki K Poulose Jan. 9, 2018, 7:04 p.m. UTC
Allow the guests to choose a larger physical address space size.
The default and minimum size is 40bits. A guest can change this
right after the VM creation, but before the stage2 entry page
tables are allocated (i.e, before it registers a memory range
or maps a device address). The size is restricted to the maximum
supported by the host. Also, the guest can only increase the PA size,
from the existing value, as reducing it could break the devices which
may have verified their physical address for validity and may do a
lazy mapping(e.g, VGIC).

Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoffer Dall <cdall@linaro.org>
Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
---
 Documentation/virtual/kvm/api.txt | 27 ++++++++++++++++++++++++++
 arch/arm/include/asm/kvm_host.h   |  7 +++++++
 arch/arm64/include/asm/kvm_host.h |  1 +
 arch/arm64/include/asm/kvm_mmu.h  | 41 ++++++++++++++++++++++++++++++---------
 arch/arm64/kvm/reset.c            | 28 ++++++++++++++++++++++++++
 include/uapi/linux/kvm.h          |  4 ++++
 virt/kvm/arm/arm.c                |  2 +-
 7 files changed, 100 insertions(+), 10 deletions(-)

Comments

Christoffer Dall Feb. 8, 2018, 11:14 a.m. UTC | #1
On Tue, Jan 09, 2018 at 07:04:10PM +0000, Suzuki K Poulose wrote:
> Allow the guests to choose a larger physical address space size.
> The default and minimum size is 40bits. A guest can change this
> right after the VM creation, but before the stage2 entry page
> tables are allocated (i.e, before it registers a memory range
> or maps a device address). The size is restricted to the maximum
> supported by the host. Also, the guest can only increase the PA size,
> from the existing value, as reducing it could break the devices which
> may have verified their physical address for validity and may do a
> lazy mapping(e.g, VGIC).
> 
> Cc: Marc Zyngier <marc.zyngier@arm.com>
> Cc: Christoffer Dall <cdall@linaro.org>
> Cc: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> ---
>  Documentation/virtual/kvm/api.txt | 27 ++++++++++++++++++++++++++
>  arch/arm/include/asm/kvm_host.h   |  7 +++++++
>  arch/arm64/include/asm/kvm_host.h |  1 +
>  arch/arm64/include/asm/kvm_mmu.h  | 41 ++++++++++++++++++++++++++++++---------
>  arch/arm64/kvm/reset.c            | 28 ++++++++++++++++++++++++++
>  include/uapi/linux/kvm.h          |  4 ++++
>  virt/kvm/arm/arm.c                |  2 +-
>  7 files changed, 100 insertions(+), 10 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 57d3ee9e4bde..a203faf768c4 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3403,6 +3403,33 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
>  or if no page table is present for the addresses (e.g. when using
>  hugepages).
>  
> +4.109 KVM_ARM_GET_PHYS_SHIFT
> +
> +Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
> +Architectures: arm64
> +Type: vm ioctl
> +Parameters: __u32 (out)
> +Returns: 0 on success, a negative value on error
> +
> +This ioctl is used to get the current maximum physical address size for
> +the VM. The value is Log2(Maximum_Physical_Address). This is neither the
> + amount of physical memory assigned to the VM nor the maximum physical address
> +that a real CPU on the host can handle. Rather, this is the upper limit of the
> +guest physical address that can be used by the VM.

What is the point of this?  Presumably if userspace has set the size, it
already knows the size?

> +
> +4.109 KVM_ARM_SET_PHYS_SHIFT
> +
> +Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
> +Architectures: arm64
> +Type: vm ioctl
> +Parameters: __u32 (in)
> +Returns: 0 on success, a negative value on error
> +
> +This ioctl is used to set the maximum physical address size for
> +the VM. The value is Log2(Maximum_Physical_Address). The value can only
> +be increased from the existing setting. The value cannot be changed
> +after the stage-2 page tables are allocated and will return an error.
> +

Is there a way for userspace to discover what the underlying hardware
can actually support, beyond trial-and-error on this ioctl?

>  5. The kvm_run structure
>  ------------------------
>  
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index a9f7d3f47134..fa8e68a4f692 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -268,6 +268,13 @@ static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
>  	return 0;
>  }
>  
> +static inline long kvm_arch_dev_vm_ioctl(struct kvm *kvm,
> +					 unsigned int ioctl,
> +					 unsigned long arg)
> +{
> +	return -EINVAL;
> +}
> +
>  int kvm_perf_init(void);
>  int kvm_perf_teardown(void);
>  
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 1e66e5ab3dde..2895c2cda8fc 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -50,6 +50,7 @@
>  int __attribute_const__ kvm_target_cpu(void);
>  int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
>  int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
> +long kvm_arch_dev_vm_ioctl(struct kvm *kvm, unsigned int ioctl, unsigned long arg);
>  void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
>  
>  struct kvm_arch {
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index ab6a8b905065..ab7f50f20bcd 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -347,21 +347,44 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
>  	return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x);
>  }
>  
> +static inline int kvm_reconfig_stage2(struct kvm *kvm, u32 phys_shift)
> +{
> +	int rc = 0;
> +	unsigned int pa_max, parange;
> +
> +	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
> +	pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
> +	/* Raise it to 40bits for backward compatibility */
> +	pa_max = (pa_max < 40) ? 40 : pa_max;
> +	/* Make sure the size is supported/available */
> +	if (phys_shift > PHYS_MASK_SHIFT || phys_shift > pa_max)
> +		return -EINVAL;
> +	/*
> +	 * The stage2 PGD is dependent on the settings we initialise here
> +	 * and should be allocated only after this step. We cannot allow
> +	 * down sizing the IPA size as there could be devices or memory
> +	 * regions, that depend on the previous size.
> +	 */
> +	mutex_lock(&kvm->slots_lock);
> +	if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift) {
> +		rc = -EPERM;
> +	} else if (phys_shift > kvm->arch.phys_shift) {
> +		kvm->arch.phys_shift = phys_shift;
> +		kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
> +		kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
> +					 TCR_T0SZ(kvm->arch.phys_shift);
> +	}

I think you can rework the above to make it more obvious what's going on
in this way:

	rc = -EPERM;
	if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift)
		goto out_unlock;

	rc = 0;
	if (phys_shift == kvm->arch.phys_shift)
		goto out_unlock;

	kvm->arch.phys_shift = phys_shift;
	kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
	kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
				 TCR_T0SZ(kvm->arch.phys_shift);

out_unlock:

> +	mutex_unlock(&kvm->slots_lock);
> +	return rc;
> +}
> +
>  /*
>   * kvm_init_stage2_config: Initialise the VM specific stage2 page table
>   * details to default IPA size.
>   */
>  static inline void kvm_init_stage2_config(struct kvm *kvm)
>  {
> -	/*
> -	 * The stage2 PGD is dependent on the settings we initialise here
> -	 * and should be allocated only after this step.
> -	 */
> -	VM_BUG_ON(kvm->arch.pgd != NULL);
> -	kvm->arch.phys_shift = KVM_PHYS_SHIFT_DEFAULT;
> -	kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
> -	kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
> -				 TCR_T0SZ(kvm->arch.phys_shift);
> +	kvm_reconfig_stage2(kvm, KVM_PHYS_SHIFT_DEFAULT);
>  }
>  
>  #endif /* __ASSEMBLY__ */
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 3256b9228e75..90ceca823aca 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -23,6 +23,7 @@
>  #include <linux/kvm_host.h>
>  #include <linux/kvm.h>
>  #include <linux/hw_breakpoint.h>
> +#include <linux/uaccess.h>
>  
>  #include <kvm/arm_arch_timer.h>
>  
> @@ -81,6 +82,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_VCPU_ATTRIBUTES:
>  		r = 1;
>  		break;
> +	case KVM_CAP_ARM_CONFIG_PHYS_SHIFT:
> +		r = 1;
> +		break;
>  	default:
>  		r = 0;
>  	}
> @@ -88,6 +92,30 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
>  	return r;
>  }
>  
> +long kvm_arch_dev_vm_ioctl(struct kvm *kvm,
> +			  unsigned int ioctl, unsigned long arg)
> +{
> +	void __user *argp = (void __user *)arg;
> +	u32 phys_shift;
> +	long r = -EFAULT;
> +
> +	switch (ioctl) {
> +	case KVM_ARM_GET_PHYS_SHIFT:
> +		phys_shift = kvm_phys_shift(kvm);
> +		if (!put_user(phys_shift, (u32 __user *)argp))
> +			r = 0;
> +		break;
> +        case KVM_ARM_SET_PHYS_SHIFT:
> +		if (!get_user(phys_shift, (u32 __user*)argp))
> +			r = kvm_reconfig_stage2(kvm, phys_shift);
> +		break;
> +	default:
> +		r = -EINVAL;
> +	}
> +	return r;
> +}
> +
> +
>  /**
>   * kvm_reset_vcpu - sets core registers and sys_regs to reset value
>   * @vcpu: The VCPU pointer
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 496e59a2738b..66bfbe19b434 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -932,6 +932,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_HYPERV_SYNIC2 148
>  #define KVM_CAP_HYPERV_VP_INDEX 149
>  #define KVM_CAP_S390_AIS_MIGRATION 150
> +#define KVM_CAP_ARM_CONFIG_PHYS_SHIFT 151
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> @@ -1261,6 +1262,9 @@ struct kvm_s390_ucas_mapping {
>  #define KVM_PPC_CONFIGURE_V3_MMU  _IOW(KVMIO,  0xaf, struct kvm_ppc_mmuv3_cfg)
>  /* Available with KVM_CAP_PPC_RADIX_MMU */
>  #define KVM_PPC_GET_RMMU_INFO	  _IOW(KVMIO,  0xb0, struct kvm_ppc_rmmu_info)
> +/* Available with KVM_CAP_ARM_CONFIG_PHYS_SHIFT */
> +#define KVM_ARM_GET_PHYS_SHIFT	  _IOR(KVMIO, 0xb1, __u32)
> +#define KVM_ARM_SET_PHYS_SHIFT	  _IOW(KVMIO, 0xb2, __u32)
>  
>  /* ioctl for vm fd */
>  #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index e0bf8d19fcfe..05fc49304722 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -1136,7 +1136,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		return 0;
>  	}
>  	default:
> -		return -EINVAL;
> +		return kvm_arch_dev_vm_ioctl(kvm, ioctl, arg);
>  	}
>  }
>  
> -- 
> 2.13.6
> 

Have you considered making this capability a generic capability and
encoding this in the 'type' argument to KVM_CREATE_VM?  This would
significantly simplify all the above and would allow you to drop patch 8
and 9 I think.

Thanks,
-Christoffer
Suzuki K Poulose Feb. 8, 2018, 5:53 p.m. UTC | #2
On 08/02/18 11:14, Christoffer Dall wrote:
> On Tue, Jan 09, 2018 at 07:04:10PM +0000, Suzuki K Poulose wrote:
>> Allow the guests to choose a larger physical address space size.
>> The default and minimum size is 40bits. A guest can change this
>> right after the VM creation, but before the stage2 entry page
>> tables are allocated (i.e, before it registers a memory range
>> or maps a device address). The size is restricted to the maximum
>> supported by the host. Also, the guest can only increase the PA size,
>> from the existing value, as reducing it could break the devices which
>> may have verified their physical address for validity and may do a
>> lazy mapping(e.g, VGIC).
>>
>> Cc: Marc Zyngier <marc.zyngier@arm.com>
>> Cc: Christoffer Dall <cdall@linaro.org>
>> Cc: Peter Maydell <peter.maydell@linaro.org>
>> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
>> ---
>>   Documentation/virtual/kvm/api.txt | 27 ++++++++++++++++++++++++++
>>   arch/arm/include/asm/kvm_host.h   |  7 +++++++
>>   arch/arm64/include/asm/kvm_host.h |  1 +
>>   arch/arm64/include/asm/kvm_mmu.h  | 41 ++++++++++++++++++++++++++++++---------
>>   arch/arm64/kvm/reset.c            | 28 ++++++++++++++++++++++++++
>>   include/uapi/linux/kvm.h          |  4 ++++
>>   virt/kvm/arm/arm.c                |  2 +-
>>   7 files changed, 100 insertions(+), 10 deletions(-)
>>
>> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
>> index 57d3ee9e4bde..a203faf768c4 100644
>> --- a/Documentation/virtual/kvm/api.txt
>> +++ b/Documentation/virtual/kvm/api.txt
>> @@ -3403,6 +3403,33 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
>>   or if no page table is present for the addresses (e.g. when using
>>   hugepages).
>>   
>> +4.109 KVM_ARM_GET_PHYS_SHIFT
>> +
>> +Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
>> +Architectures: arm64
>> +Type: vm ioctl
>> +Parameters: __u32 (out)
>> +Returns: 0 on success, a negative value on error
>> +
>> +This ioctl is used to get the current maximum physical address size for
>> +the VM. The value is Log2(Maximum_Physical_Address). This is neither the
>> + amount of physical memory assigned to the VM nor the maximum physical address
>> +that a real CPU on the host can handle. Rather, this is the upper limit of the
>> +guest physical address that can be used by the VM.
> 
> What is the point of this?  Presumably if userspace has set the size, it
> already knows the size?

This can help the userspace know, what the "default" limit is. As such I am
not particular about keeping this around.

> 
>> +
>> +4.109 KVM_ARM_SET_PHYS_SHIFT
>> +
>> +Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
>> +Architectures: arm64
>> +Type: vm ioctl
>> +Parameters: __u32 (in)
>> +Returns: 0 on success, a negative value on error
>> +
>> +This ioctl is used to set the maximum physical address size for
>> +the VM. The value is Log2(Maximum_Physical_Address). The value can only
>> +be increased from the existing setting. The value cannot be changed
>> +after the stage-2 page tables are allocated and will return an error.
>> +
> 
> Is there a way for userspace to discover what the underlying hardware
> can actually support, beyond trial-and-error on this ioctl?

Unfortunately, there is none. We don't expose ID_AA64MMFR0 via mrs emulation.

>> +static inline int kvm_reconfig_stage2(struct kvm *kvm, u32 phys_shift)
>> +{
>> +	int rc = 0;
>> +	unsigned int pa_max, parange;
>> +
>> +	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
>> +	pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
>> +	/* Raise it to 40bits for backward compatibility */
>> +	pa_max = (pa_max < 40) ? 40 : pa_max;
>> +	/* Make sure the size is supported/available */
>> +	if (phys_shift > PHYS_MASK_SHIFT || phys_shift > pa_max)
>> +		return -EINVAL;
>> +	/*
>> +	 * The stage2 PGD is dependent on the settings we initialise here
>> +	 * and should be allocated only after this step. We cannot allow
>> +	 * down sizing the IPA size as there could be devices or memory
>> +	 * regions, that depend on the previous size.
>> +	 */
>> +	mutex_lock(&kvm->slots_lock);
>> +	if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift) {
>> +		rc = -EPERM;
>> +	} else if (phys_shift > kvm->arch.phys_shift) {
>> +		kvm->arch.phys_shift = phys_shift;
>> +		kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
>> +		kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
>> +					 TCR_T0SZ(kvm->arch.phys_shift);
>> +	}
> 
> I think you can rework the above to make it more obvious what's going on
> in this way:
> 
> 	rc = -EPERM;
> 	if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift)
> 		goto out_unlock;
> 
> 	rc = 0;
> 	if (phys_shift == kvm->arch.phys_shift)
> 		goto out_unlock;
> 
> 	kvm->arch.phys_shift = phys_shift;
> 	kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
> 	kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
> 				 TCR_T0SZ(kvm->arch.phys_shift);
> 
> out_unlock:
> 

Sure.



>> --- a/virt/kvm/arm/arm.c
>> +++ b/virt/kvm/arm/arm.c
>> @@ -1136,7 +1136,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>   		return 0;
>>   	}
>>   	default:
>> -		return -EINVAL;
>> +		return kvm_arch_dev_vm_ioctl(kvm, ioctl, arg);
>>   	}
>>   }
>>   
>> -- 
>> 2.13.6
>>
> 
> Have you considered making this capability a generic capability and
> encoding this in the 'type' argument to KVM_CREATE_VM?  This would
> significantly simplify all the above and would allow you to drop patch 8
> and 9 I think.

No. I will take a look. Btw, there were patches flying around to support
"userspace" requesting specific values for ID feature registers. But even that
doesn't help with the detection part. May be that is another way to configure
the size, but not sure about the current status of that work.

Cheers
Suzuki
Christoffer Dall Feb. 9, 2018, 8:16 a.m. UTC | #3
On Thu, Feb 08, 2018 at 05:53:17PM +0000, Suzuki K Poulose wrote:
> On 08/02/18 11:14, Christoffer Dall wrote:
> >On Tue, Jan 09, 2018 at 07:04:10PM +0000, Suzuki K Poulose wrote:
> >>Allow the guests to choose a larger physical address space size.
> >>The default and minimum size is 40bits. A guest can change this
> >>right after the VM creation, but before the stage2 entry page
> >>tables are allocated (i.e, before it registers a memory range
> >>or maps a device address). The size is restricted to the maximum
> >>supported by the host. Also, the guest can only increase the PA size,
> >>from the existing value, as reducing it could break the devices which
> >>may have verified their physical address for validity and may do a
> >>lazy mapping(e.g, VGIC).
> >>
> >>Cc: Marc Zyngier <marc.zyngier@arm.com>
> >>Cc: Christoffer Dall <cdall@linaro.org>
> >>Cc: Peter Maydell <peter.maydell@linaro.org>
> >>Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> >>---
> >>  Documentation/virtual/kvm/api.txt | 27 ++++++++++++++++++++++++++
> >>  arch/arm/include/asm/kvm_host.h   |  7 +++++++
> >>  arch/arm64/include/asm/kvm_host.h |  1 +
> >>  arch/arm64/include/asm/kvm_mmu.h  | 41 ++++++++++++++++++++++++++++++---------
> >>  arch/arm64/kvm/reset.c            | 28 ++++++++++++++++++++++++++
> >>  include/uapi/linux/kvm.h          |  4 ++++
> >>  virt/kvm/arm/arm.c                |  2 +-
> >>  7 files changed, 100 insertions(+), 10 deletions(-)
> >>
> >>diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> >>index 57d3ee9e4bde..a203faf768c4 100644
> >>--- a/Documentation/virtual/kvm/api.txt
> >>+++ b/Documentation/virtual/kvm/api.txt
> >>@@ -3403,6 +3403,33 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
> >>  or if no page table is present for the addresses (e.g. when using
> >>  hugepages).
> >>+4.109 KVM_ARM_GET_PHYS_SHIFT
> >>+
> >>+Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
> >>+Architectures: arm64
> >>+Type: vm ioctl
> >>+Parameters: __u32 (out)
> >>+Returns: 0 on success, a negative value on error
> >>+
> >>+This ioctl is used to get the current maximum physical address size for
> >>+the VM. The value is Log2(Maximum_Physical_Address). This is neither the
> >>+ amount of physical memory assigned to the VM nor the maximum physical address
> >>+that a real CPU on the host can handle. Rather, this is the upper limit of the
> >>+guest physical address that can be used by the VM.
> >
> >What is the point of this?  Presumably if userspace has set the size, it
> >already knows the size?
> 
> This can help the userspace know, what the "default" limit is. As such I am
> not particular about keeping this around.
> 

Userspace has to already know, since otherwise things don't work today,
so I think we can omit this.

> >
> >>+
> >>+4.109 KVM_ARM_SET_PHYS_SHIFT
> >>+
> >>+Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
> >>+Architectures: arm64
> >>+Type: vm ioctl
> >>+Parameters: __u32 (in)
> >>+Returns: 0 on success, a negative value on error
> >>+
> >>+This ioctl is used to set the maximum physical address size for
> >>+the VM. The value is Log2(Maximum_Physical_Address). The value can only
> >>+be increased from the existing setting. The value cannot be changed
> >>+after the stage-2 page tables are allocated and will return an error.
> >>+
> >
> >Is there a way for userspace to discover what the underlying hardware
> >can actually support, beyond trial-and-error on this ioctl?
> 
> Unfortunately, there is none. We don't expose ID_AA64MMFR0 via mrs emulation.
> 

We should probably think about that.  Perhaps it could be tied to the
return value of KVM_CAP_ARM_CONFIG_PHYS_SHIFT ?

> >>+static inline int kvm_reconfig_stage2(struct kvm *kvm, u32 phys_shift)
> >>+{
> >>+	int rc = 0;
> >>+	unsigned int pa_max, parange;
> >>+
> >>+	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
> >>+	pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
> >>+	/* Raise it to 40bits for backward compatibility */
> >>+	pa_max = (pa_max < 40) ? 40 : pa_max;
> >>+	/* Make sure the size is supported/available */
> >>+	if (phys_shift > PHYS_MASK_SHIFT || phys_shift > pa_max)
> >>+		return -EINVAL;
> >>+	/*
> >>+	 * The stage2 PGD is dependent on the settings we initialise here
> >>+	 * and should be allocated only after this step. We cannot allow
> >>+	 * down sizing the IPA size as there could be devices or memory
> >>+	 * regions, that depend on the previous size.
> >>+	 */
> >>+	mutex_lock(&kvm->slots_lock);
> >>+	if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift) {
> >>+		rc = -EPERM;
> >>+	} else if (phys_shift > kvm->arch.phys_shift) {
> >>+		kvm->arch.phys_shift = phys_shift;
> >>+		kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
> >>+		kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
> >>+					 TCR_T0SZ(kvm->arch.phys_shift);
> >>+	}
> >
> >I think you can rework the above to make it more obvious what's going on
> >in this way:
> >
> >	rc = -EPERM;
> >	if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift)
> >		goto out_unlock;
> >
> >	rc = 0;
> >	if (phys_shift == kvm->arch.phys_shift)
> >		goto out_unlock;
> >
> >	kvm->arch.phys_shift = phys_shift;
> >	kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
> >	kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
> >				 TCR_T0SZ(kvm->arch.phys_shift);
> >
> >out_unlock:
> >
> 
> Sure.
> 
> 
> 
> >>--- a/virt/kvm/arm/arm.c
> >>+++ b/virt/kvm/arm/arm.c
> >>@@ -1136,7 +1136,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >>  		return 0;
> >>  	}
> >>  	default:
> >>-		return -EINVAL;
> >>+		return kvm_arch_dev_vm_ioctl(kvm, ioctl, arg);
> >>  	}
> >>  }
> >>-- 
> >>2.13.6
> >>
> >
> >Have you considered making this capability a generic capability and
> >encoding this in the 'type' argument to KVM_CREATE_VM?  This would
> >significantly simplify all the above and would allow you to drop patch 8
> >and 9 I think.
> 
> No. I will take a look. Btw, there were patches flying around to support
> "userspace" requesting specific values for ID feature registers. But even that
> doesn't help with the detection part. May be that is another way to configure
> the size, but not sure about the current status of that work.
> 

It's a bit stranded.  Drew was driving this work (on cc).  But the ID
register exposed to the guest should represent the actual limits
of the VM, so I don't think we need userspace to configure this, but we
can implement this in KVM based on the PA range configured for the VM.

Thanks,
-Christoffer
Andrew Jones Feb. 9, 2018, 9:27 a.m. UTC | #4
On Fri, Feb 09, 2018 at 09:16:06AM +0100, Christoffer Dall wrote:
> On Thu, Feb 08, 2018 at 05:53:17PM +0000, Suzuki K Poulose wrote:
> > On 08/02/18 11:14, Christoffer Dall wrote:
> > >On Tue, Jan 09, 2018 at 07:04:10PM +0000, Suzuki K Poulose wrote:
> > >>Allow the guests to choose a larger physical address space size.
> > >>The default and minimum size is 40bits. A guest can change this
> > >>right after the VM creation, but before the stage2 entry page
> > >>tables are allocated (i.e, before it registers a memory range
> > >>or maps a device address). The size is restricted to the maximum
> > >>supported by the host. Also, the guest can only increase the PA size,
> > >>from the existing value, as reducing it could break the devices which
> > >>may have verified their physical address for validity and may do a
> > >>lazy mapping(e.g, VGIC).
> > >>
> > >>Cc: Marc Zyngier <marc.zyngier@arm.com>
> > >>Cc: Christoffer Dall <cdall@linaro.org>
> > >>Cc: Peter Maydell <peter.maydell@linaro.org>
> > >>Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> > >>---
> > >>  Documentation/virtual/kvm/api.txt | 27 ++++++++++++++++++++++++++
> > >>  arch/arm/include/asm/kvm_host.h   |  7 +++++++
> > >>  arch/arm64/include/asm/kvm_host.h |  1 +
> > >>  arch/arm64/include/asm/kvm_mmu.h  | 41 ++++++++++++++++++++++++++++++---------
> > >>  arch/arm64/kvm/reset.c            | 28 ++++++++++++++++++++++++++
> > >>  include/uapi/linux/kvm.h          |  4 ++++
> > >>  virt/kvm/arm/arm.c                |  2 +-
> > >>  7 files changed, 100 insertions(+), 10 deletions(-)
> > >>
> > >>diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> > >>index 57d3ee9e4bde..a203faf768c4 100644
> > >>--- a/Documentation/virtual/kvm/api.txt
> > >>+++ b/Documentation/virtual/kvm/api.txt
> > >>@@ -3403,6 +3403,33 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
> > >>  or if no page table is present for the addresses (e.g. when using
> > >>  hugepages).
> > >>+4.109 KVM_ARM_GET_PHYS_SHIFT
> > >>+
> > >>+Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
> > >>+Architectures: arm64
> > >>+Type: vm ioctl
> > >>+Parameters: __u32 (out)
> > >>+Returns: 0 on success, a negative value on error
> > >>+
> > >>+This ioctl is used to get the current maximum physical address size for
> > >>+the VM. The value is Log2(Maximum_Physical_Address). This is neither the
> > >>+ amount of physical memory assigned to the VM nor the maximum physical address
> > >>+that a real CPU on the host can handle. Rather, this is the upper limit of the
> > >>+guest physical address that can be used by the VM.
> > >
> > >What is the point of this?  Presumably if userspace has set the size, it
> > >already knows the size?
> > 
> > This can help the userspace know, what the "default" limit is. As such I am
> > not particular about keeping this around.
> > 
> 
> Userspace has to already know, since otherwise things don't work today,
> so I think we can omit this.
> 
> > >
> > >>+
> > >>+4.109 KVM_ARM_SET_PHYS_SHIFT
> > >>+
> > >>+Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
> > >>+Architectures: arm64
> > >>+Type: vm ioctl
> > >>+Parameters: __u32 (in)
> > >>+Returns: 0 on success, a negative value on error
> > >>+
> > >>+This ioctl is used to set the maximum physical address size for
> > >>+the VM. The value is Log2(Maximum_Physical_Address). The value can only
> > >>+be increased from the existing setting. The value cannot be changed
> > >>+after the stage-2 page tables are allocated and will return an error.
> > >>+
> > >
> > >Is there a way for userspace to discover what the underlying hardware
> > >can actually support, beyond trial-and-error on this ioctl?
> > 
> > Unfortunately, there is none. We don't expose ID_AA64MMFR0 via mrs emulation.
> > 
> 
> We should probably think about that.  Perhaps it could be tied to the
> return value of KVM_CAP_ARM_CONFIG_PHYS_SHIFT ?

FWIW, that sounds good to me.

> 
> > >>+static inline int kvm_reconfig_stage2(struct kvm *kvm, u32 phys_shift)
> > >>+{
> > >>+	int rc = 0;
> > >>+	unsigned int pa_max, parange;
> > >>+
> > >>+	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
> > >>+	pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
> > >>+	/* Raise it to 40bits for backward compatibility */
> > >>+	pa_max = (pa_max < 40) ? 40 : pa_max;
> > >>+	/* Make sure the size is supported/available */
> > >>+	if (phys_shift > PHYS_MASK_SHIFT || phys_shift > pa_max)
> > >>+		return -EINVAL;
> > >>+	/*
> > >>+	 * The stage2 PGD is dependent on the settings we initialise here
> > >>+	 * and should be allocated only after this step. We cannot allow
> > >>+	 * down sizing the IPA size as there could be devices or memory
> > >>+	 * regions, that depend on the previous size.
> > >>+	 */
> > >>+	mutex_lock(&kvm->slots_lock);
> > >>+	if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift) {
> > >>+		rc = -EPERM;
> > >>+	} else if (phys_shift > kvm->arch.phys_shift) {
> > >>+		kvm->arch.phys_shift = phys_shift;
> > >>+		kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
> > >>+		kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
> > >>+					 TCR_T0SZ(kvm->arch.phys_shift);
> > >>+	}
> > >
> > >I think you can rework the above to make it more obvious what's going on
> > >in this way:
> > >
> > >	rc = -EPERM;
> > >	if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift)
> > >		goto out_unlock;
> > >
> > >	rc = 0;
> > >	if (phys_shift == kvm->arch.phys_shift)
> > >		goto out_unlock;
> > >
> > >	kvm->arch.phys_shift = phys_shift;
> > >	kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
> > >	kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
> > >				 TCR_T0SZ(kvm->arch.phys_shift);
> > >
> > >out_unlock:
> > >
> > 
> > Sure.
> > 
> > 
> > 
> > >>--- a/virt/kvm/arm/arm.c
> > >>+++ b/virt/kvm/arm/arm.c
> > >>@@ -1136,7 +1136,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
> > >>  		return 0;
> > >>  	}
> > >>  	default:
> > >>-		return -EINVAL;
> > >>+		return kvm_arch_dev_vm_ioctl(kvm, ioctl, arg);
> > >>  	}
> > >>  }
> > >>-- 
> > >>2.13.6
> > >>
> > >
> > >Have you considered making this capability a generic capability and
> > >encoding this in the 'type' argument to KVM_CREATE_VM?  This would
> > >significantly simplify all the above and would allow you to drop patch 8
> > >and 9 I think.
> > 
> > No. I will take a look. Btw, there were patches flying around to support
> > "userspace" requesting specific values for ID feature registers. But even that
> > doesn't help with the detection part. May be that is another way to configure
> > the size, but not sure about the current status of that work.
> > 
> 
> It's a bit stranded.  Drew was driving this work (on cc).  But the ID
> register exposed to the guest should represent the actual limits
> of the VM, so I don't think we need userspace to configure this, but we
> can implement this in KVM based on the PA range configured for the VM.
>

I heard there were some patches being worked by someone at Arm, which
haven't been posted yet (obviously), but maybe that was just a rumor?
I was about to revisit this topic myself, at least to some degree, to
attempt to address PMU hiding. We really need to put some thought into
how best to generally give userspace control of the VM's ID registers,
within the constraints of the host. Anyway, I guess that should be done
in a separate thread, so I won't start brainstorming now, here.

Thanks,
drew
Suzuki K Poulose March 15, 2018, 11:06 a.m. UTC | #5
On 09/02/18 08:16, Christoffer Dall wrote:
> On Thu, Feb 08, 2018 at 05:53:17PM +0000, Suzuki K Poulose wrote:
>> On 08/02/18 11:14, Christoffer Dall wrote:
>>> On Tue, Jan 09, 2018 at 07:04:10PM +0000, Suzuki K Poulose wrote:
>>>> Allow the guests to choose a larger physical address space size.
>>>> The default and minimum size is 40bits. A guest can change this
>>>> right after the VM creation, but before the stage2 entry page
>>>> tables are allocated (i.e, before it registers a memory range
>>>> or maps a device address). The size is restricted to the maximum
>>>> supported by the host. Also, the guest can only increase the PA size,
>>> >from the existing value, as reducing it could break the devices which
>>>> may have verified their physical address for validity and may do a
>>>> lazy mapping(e.g, VGIC).
>>>>
>>>> Cc: Marc Zyngier <marc.zyngier@arm.com>
>>>> Cc: Christoffer Dall <cdall@linaro.org>
>>>> Cc: Peter Maydell <peter.maydell@linaro.org>
>>>> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>

>>>> +
>>>> +4.109 KVM_ARM_SET_PHYS_SHIFT
>>>> +
>>>> +Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
>>>> +Architectures: arm64
>>>> +Type: vm ioctl
>>>> +Parameters: __u32 (in)
>>>> +Returns: 0 on success, a negative value on error
>>>> +
>>>> +This ioctl is used to set the maximum physical address size for
>>>> +the VM. The value is Log2(Maximum_Physical_Address). The value can only
>>>> +be increased from the existing setting. The value cannot be changed
>>>> +after the stage-2 page tables are allocated and will return an error.
>>>> +
>>>
>>> Is there a way for userspace to discover what the underlying hardware
>>> can actually support, beyond trial-and-error on this ioctl?
>>
>> Unfortunately, there is none. We don't expose ID_AA64MMFR0 via mrs emulation.
>>
> 
> We should probably think about that.  Perhaps it could be tied to the
> return value of KVM_CAP_ARM_CONFIG_PHYS_SHIFT ?

See below.

>>>
>>> Have you considered making this capability a generic capability and
>>> encoding this in the 'type' argument to KVM_CREATE_VM?  This would
>>> significantly simplify all the above and would allow you to drop patch 8
>>> and 9 I think.
>>>> No. I will take a look.

We could add a KVM dev capability hooked to the kvm_arch_dev_ioctl() for ARM
to give out the maximum supported physical shift. And then the user could request
the physical shift via the type argument (of course, encoded to allow future uses)
to KVM_CREATE_VM.

Cheers
Suzuki
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 57d3ee9e4bde..a203faf768c4 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3403,6 +3403,33 @@  invalid, if invalid pages are written to (e.g. after the end of memory)
 or if no page table is present for the addresses (e.g. when using
 hugepages).
 
+4.109 KVM_ARM_GET_PHYS_SHIFT
+
+Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
+Architectures: arm64
+Type: vm ioctl
+Parameters: __u32 (out)
+Returns: 0 on success, a negative value on error
+
+This ioctl is used to get the current maximum physical address size for
+the VM. The value is Log2(Maximum_Physical_Address). This is neither the
+ amount of physical memory assigned to the VM nor the maximum physical address
+that a real CPU on the host can handle. Rather, this is the upper limit of the
+guest physical address that can be used by the VM.
+
+4.109 KVM_ARM_SET_PHYS_SHIFT
+
+Capability: KVM_CAP_ARM_CONFIG_PHYS_SHIFT
+Architectures: arm64
+Type: vm ioctl
+Parameters: __u32 (in)
+Returns: 0 on success, a negative value on error
+
+This ioctl is used to set the maximum physical address size for
+the VM. The value is Log2(Maximum_Physical_Address). The value can only
+be increased from the existing setting. The value cannot be changed
+after the stage-2 page tables are allocated and will return an error.
+
 5. The kvm_run structure
 ------------------------
 
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index a9f7d3f47134..fa8e68a4f692 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -268,6 +268,13 @@  static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 	return 0;
 }
 
+static inline long kvm_arch_dev_vm_ioctl(struct kvm *kvm,
+					 unsigned int ioctl,
+					 unsigned long arg)
+{
+	return -EINVAL;
+}
+
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 1e66e5ab3dde..2895c2cda8fc 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -50,6 +50,7 @@ 
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
+long kvm_arch_dev_vm_ioctl(struct kvm *kvm, unsigned int ioctl, unsigned long arg);
 void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
 struct kvm_arch {
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index ab6a8b905065..ab7f50f20bcd 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -347,21 +347,44 @@  static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
 	return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x);
 }
 
+static inline int kvm_reconfig_stage2(struct kvm *kvm, u32 phys_shift)
+{
+	int rc = 0;
+	unsigned int pa_max, parange;
+
+	parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
+	pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
+	/* Raise it to 40bits for backward compatibility */
+	pa_max = (pa_max < 40) ? 40 : pa_max;
+	/* Make sure the size is supported/available */
+	if (phys_shift > PHYS_MASK_SHIFT || phys_shift > pa_max)
+		return -EINVAL;
+	/*
+	 * The stage2 PGD is dependent on the settings we initialise here
+	 * and should be allocated only after this step. We cannot allow
+	 * down sizing the IPA size as there could be devices or memory
+	 * regions, that depend on the previous size.
+	 */
+	mutex_lock(&kvm->slots_lock);
+	if (kvm->arch.pgd || phys_shift < kvm->arch.phys_shift) {
+		rc = -EPERM;
+	} else if (phys_shift > kvm->arch.phys_shift) {
+		kvm->arch.phys_shift = phys_shift;
+		kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
+		kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
+					 TCR_T0SZ(kvm->arch.phys_shift);
+	}
+	mutex_unlock(&kvm->slots_lock);
+	return rc;
+}
+
 /*
  * kvm_init_stage2_config: Initialise the VM specific stage2 page table
  * details to default IPA size.
  */
 static inline void kvm_init_stage2_config(struct kvm *kvm)
 {
-	/*
-	 * The stage2 PGD is dependent on the settings we initialise here
-	 * and should be allocated only after this step.
-	 */
-	VM_BUG_ON(kvm->arch.pgd != NULL);
-	kvm->arch.phys_shift = KVM_PHYS_SHIFT_DEFAULT;
-	kvm->arch.s2_levels = stage2_pt_levels(kvm->arch.phys_shift);
-	kvm->arch.vtcr_private = VTCR_EL2_SL0(kvm->arch.s2_levels) |
-				 TCR_T0SZ(kvm->arch.phys_shift);
+	kvm_reconfig_stage2(kvm, KVM_PHYS_SHIFT_DEFAULT);
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 3256b9228e75..90ceca823aca 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -23,6 +23,7 @@ 
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/uaccess.h>
 
 #include <kvm/arm_arch_timer.h>
 
@@ -81,6 +82,9 @@  int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_VCPU_ATTRIBUTES:
 		r = 1;
 		break;
+	case KVM_CAP_ARM_CONFIG_PHYS_SHIFT:
+		r = 1;
+		break;
 	default:
 		r = 0;
 	}
@@ -88,6 +92,30 @@  int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 	return r;
 }
 
+long kvm_arch_dev_vm_ioctl(struct kvm *kvm,
+			  unsigned int ioctl, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	u32 phys_shift;
+	long r = -EFAULT;
+
+	switch (ioctl) {
+	case KVM_ARM_GET_PHYS_SHIFT:
+		phys_shift = kvm_phys_shift(kvm);
+		if (!put_user(phys_shift, (u32 __user *)argp))
+			r = 0;
+		break;
+        case KVM_ARM_SET_PHYS_SHIFT:
+		if (!get_user(phys_shift, (u32 __user*)argp))
+			r = kvm_reconfig_stage2(kvm, phys_shift);
+		break;
+	default:
+		r = -EINVAL;
+	}
+	return r;
+}
+
+
 /**
  * kvm_reset_vcpu - sets core registers and sys_regs to reset value
  * @vcpu: The VCPU pointer
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 496e59a2738b..66bfbe19b434 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -932,6 +932,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_HYPERV_SYNIC2 148
 #define KVM_CAP_HYPERV_VP_INDEX 149
 #define KVM_CAP_S390_AIS_MIGRATION 150
+#define KVM_CAP_ARM_CONFIG_PHYS_SHIFT 151
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1261,6 +1262,9 @@  struct kvm_s390_ucas_mapping {
 #define KVM_PPC_CONFIGURE_V3_MMU  _IOW(KVMIO,  0xaf, struct kvm_ppc_mmuv3_cfg)
 /* Available with KVM_CAP_PPC_RADIX_MMU */
 #define KVM_PPC_GET_RMMU_INFO	  _IOW(KVMIO,  0xb0, struct kvm_ppc_rmmu_info)
+/* Available with KVM_CAP_ARM_CONFIG_PHYS_SHIFT */
+#define KVM_ARM_GET_PHYS_SHIFT	  _IOR(KVMIO, 0xb1, __u32)
+#define KVM_ARM_SET_PHYS_SHIFT	  _IOW(KVMIO, 0xb2, __u32)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index e0bf8d19fcfe..05fc49304722 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1136,7 +1136,7 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		return 0;
 	}
 	default:
-		return -EINVAL;
+		return kvm_arch_dev_vm_ioctl(kvm, ioctl, arg);
 	}
 }