diff mbox series

[RFC,2/2] arm64: kvm: Introduce MTE VCPU feature

Message ID 20200617123844.29960-3-steven.price@arm.com (mailing list archive)
State New, archived
Headers show
Series MTE support for KVM guest | expand

Commit Message

Steven Price June 17, 2020, 12:38 p.m. UTC
Add a new VCPU features 'KVM_ARM_VCPU_MTE' which enables memory tagging
on a VCPU. When enabled on any VCPU in the virtual machine this causes
all pages that are faulted into the VM to have the PG_mte_tagged flag
set (and the tag storage cleared if this is the first use).

Signed-off-by: Steven Price <steven.price@arm.com>
---
 arch/arm64/include/asm/kvm_emulate.h |  3 +++
 arch/arm64/include/asm/kvm_host.h    |  2 +-
 arch/arm64/include/uapi/asm/kvm.h    |  1 +
 arch/arm64/kvm/reset.c               |  8 ++++++++
 arch/arm64/kvm/sys_regs.c            |  3 ++-
 virt/kvm/arm/mmu.c                   | 11 +++++++++++
 6 files changed, 26 insertions(+), 2 deletions(-)

Comments

Catalin Marinas June 17, 2020, 2:38 p.m. UTC | #1
On Wed, Jun 17, 2020 at 01:38:44PM +0100, Steven Price wrote:
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index e3b9ee268823..040a7fffaa93 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -1783,6 +1783,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  			vma_pagesize = PMD_SIZE;
>  	}
>  
> +	if (system_supports_mte() && kvm->arch.vcpu_has_mte) {
> +		/*
> +		 * VM will be able to see the page's tags, so we must ensure
> +		 * they have been initialised.
> +		 */
> +		struct page *page = pfn_to_page(pfn);
> +
> +		if (!test_and_set_bit(PG_mte_tagged, &page->flags))
> +			mte_clear_page_tags(page_address(page), page_size(page));
> +	}

Are all the guest pages always mapped via a Stage 2 fault? It may be
better if we did that via kvm_set_spte_hva().
Steven Price June 17, 2020, 3:34 p.m. UTC | #2
On 17/06/2020 15:38, Catalin Marinas wrote:
> On Wed, Jun 17, 2020 at 01:38:44PM +0100, Steven Price wrote:
>> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
>> index e3b9ee268823..040a7fffaa93 100644
>> --- a/virt/kvm/arm/mmu.c
>> +++ b/virt/kvm/arm/mmu.c
>> @@ -1783,6 +1783,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>   			vma_pagesize = PMD_SIZE;
>>   	}
>>   
>> +	if (system_supports_mte() && kvm->arch.vcpu_has_mte) {
>> +		/*
>> +		 * VM will be able to see the page's tags, so we must ensure
>> +		 * they have been initialised.
>> +		 */
>> +		struct page *page = pfn_to_page(pfn);
>> +
>> +		if (!test_and_set_bit(PG_mte_tagged, &page->flags))
>> +			mte_clear_page_tags(page_address(page), page_size(page));
>> +	}
> 
> Are all the guest pages always mapped via a Stage 2 fault? It may be
> better if we did that via kvm_set_spte_hva().
> 

I was under the impression that pages are always faulted into the stage 
2, but I have to admit I'm not 100% sure about that.

kvm_set_spte_hva() may be more appropriate, although on first look I 
don't understand how that function deals with huge pages. Is it actually 
called for normal mappings or only for changes due to the likes of KSM?

Steve
James Morse June 26, 2020, 4:40 p.m. UTC | #3
Hi Steve,

On 17/06/2020 16:34, Steven Price wrote:
> On 17/06/2020 15:38, Catalin Marinas wrote:
>> On Wed, Jun 17, 2020 at 01:38:44PM +0100, Steven Price wrote:
>>> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
>>> index e3b9ee268823..040a7fffaa93 100644
>>> --- a/virt/kvm/arm/mmu.c
>>> +++ b/virt/kvm/arm/mmu.c
>>> @@ -1783,6 +1783,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t
>>> fault_ipa,
>>>               vma_pagesize = PMD_SIZE;
>>>       }
>>>   +    if (system_supports_mte() && kvm->arch.vcpu_has_mte) {
>>> +        /*
>>> +         * VM will be able to see the page's tags, so we must ensure
>>> +         * they have been initialised.
>>> +         */
>>> +        struct page *page = pfn_to_page(pfn);
>>> +
>>> +        if (!test_and_set_bit(PG_mte_tagged, &page->flags))
>>> +            mte_clear_page_tags(page_address(page), page_size(page));
>>> +    }
>>
>> Are all the guest pages always mapped via a Stage 2 fault? It may be
>> better if we did that via kvm_set_spte_hva().

> I was under the impression that pages are always faulted into the stage 2, but I have to
> admit I'm not 100% sure about that.

I think there is only one case: VMA with VM_PFNMAP set will get pre-populated during
kvm_arch_prepare_memory_region(), but they are always made device at stage2, so MTE isn't
a concern there.


> kvm_set_spte_hva() may be more appropriate, although on first look I don't understand how
> that function deals with huge pages. Is it actually called for normal mappings or only for
> changes due to the likes of KSM?

It looks like its only called through set_pte_at_notify(), which is used by things like
KSM/COW that change a mapping, and really don't want to fault it a second time. I guess
they are only for PAGE_SIZE mappings.

Other mapping sizes would get faulted in by user_mem_abort().


I think this should happen in the same places as we clean new pages to PoC, as that is
also an additional piece of maintenance KVM has to do that the host's stage 1 doesn't.

You may be able to rename clean_dcache_guest_page() to encompass maintenance that we need
when a page is accessible to a different EL1.


Thanks,

James
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index a30b4eec7cb4..b118f466a40b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -79,6 +79,9 @@  static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 	if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
 	    vcpu_el1_is_32bit(vcpu))
 		vcpu->arch.hcr_el2 |= HCR_TID2;
+
+	if (test_bit(KVM_ARM_VCPU_MTE, vcpu->arch.features))
+		vcpu->arch.hcr_el2 |= HCR_ATA;
 }
 
 static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 1f10e9dee2e0..3461639bb08a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -37,7 +37,7 @@ 
 
 #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
 
-#define KVM_VCPU_MAX_FEATURES 7
+#define KVM_VCPU_MAX_FEATURES 8
 
 #define KVM_REQ_SLEEP \
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index ba85bb23f060..2677e1ab8c16 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -106,6 +106,7 @@  struct kvm_regs {
 #define KVM_ARM_VCPU_SVE		4 /* enable SVE for this CPU */
 #define KVM_ARM_VCPU_PTRAUTH_ADDRESS	5 /* VCPU uses address authentication */
 #define KVM_ARM_VCPU_PTRAUTH_GENERIC	6 /* VCPU uses generic authentication */
+#define KVM_ARM_VCPU_MTE		7 /* VCPU supports Memory Tagging */
 
 struct kvm_vcpu_init {
 	__u32 target;
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index ab76728e2742..f87a434c0849 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -287,6 +287,14 @@  int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	if (test_bit(KVM_ARM_VCPU_MTE, vcpu->arch.features)) {
+		if (!system_supports_mte()) {
+			ret = -EINVAL;
+			goto out;
+		}
+		vcpu->kvm->arch.vcpu_has_mte = true;
+	}
+
 	switch (vcpu->arch.target) {
 	default:
 		if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 3ae008a9b0bd..a6a9552d1233 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1096,7 +1096,8 @@  static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 			val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
 		val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT);
 	} else if (id == SYS_ID_AA64PFR1_EL1) {
-		val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT);
+		if (!test_bit(KVM_ARM_VCPU_MTE, vcpu->arch.features))
+			val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT);
 	} else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) {
 		val &= ~((0xfUL << ID_AA64ISAR1_APA_SHIFT) |
 			 (0xfUL << ID_AA64ISAR1_API_SHIFT) |
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index e3b9ee268823..040a7fffaa93 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1783,6 +1783,17 @@  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			vma_pagesize = PMD_SIZE;
 	}
 
+	if (system_supports_mte() && kvm->arch.vcpu_has_mte) {
+		/*
+		 * VM will be able to see the page's tags, so we must ensure
+		 * they have been initialised.
+		 */
+		struct page *page = pfn_to_page(pfn);
+
+		if (!test_and_set_bit(PG_mte_tagged, &page->flags))
+			mte_clear_page_tags(page_address(page), page_size(page));
+	}
+
 	if (writable)
 		kvm_set_pfn_dirty(pfn);