diff mbox series

[v6,05/16] KVM: TDX: Pass size to reclaim_page()

Message ID 31552f714f2fd8178f9467e9afaaf28ba3de3c7b.1699368363.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM TDX: TDP MMU: large page support | expand

Commit Message

Isaku Yamahata Nov. 7, 2023, 3 p.m. UTC
From: Xiaoyao Li <xiaoyao.li@intel.com>

A 2MB large page can be tdh_mem_page_aug()'ed to TD directly. In this case,
it needs to reclaim and clear the page as 2MB size.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/vmx/tdx.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

Comments

Binbin Wu Nov. 19, 2023, 6:42 a.m. UTC | #1
On 11/7/2023 11:00 PM, isaku.yamahata@intel.com wrote:
> From: Xiaoyao Li <xiaoyao.li@intel.com>
>
> A 2MB large page can be tdh_mem_page_aug()'ed to TD directly. In this case,
> it needs to reclaim and clear the page as 2MB size.
>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>   arch/x86/kvm/vmx/tdx.c | 27 +++++++++++++++------------
>   1 file changed, 15 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index a728175c4a6d..0fca863faeee 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -200,12 +200,13 @@ static void tdx_disassociate_vp_on_cpu(struct kvm_vcpu *vcpu)
>   	smp_call_function_single(cpu, tdx_disassociate_vp_arg, vcpu, 1);
>   }
>   
> -static void tdx_clear_page(unsigned long page_pa)
> +static void tdx_clear_page(unsigned long page_pa, int size)
Should use "unsigned long" instead of "int" for size to avoid implicit 
type conversion.

>   {
>   	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
>   	void *page = __va(page_pa);
>   	unsigned long i;
>   
> +	WARN_ON_ONCE(size % PAGE_SIZE);
>   	/*
>   	 * When re-assign one page from old keyid to a new keyid, MOVDIR64B is
>   	 * required to clear/write the page with new keyid to prevent integrity
> @@ -214,7 +215,7 @@ static void tdx_clear_page(unsigned long page_pa)
>   	 * clflush doesn't flush cache with HKID set.  The cache line could be
>   	 * poisoned (even without MKTME-i), clear the poison bit.
>   	 */
> -	for (i = 0; i < PAGE_SIZE; i += 64)
> +	for (i = 0; i < size; i += 64)
>   		movdir64b(page + i, zero_page);
>   	/*
>   	 * MOVDIR64B store uses WC buffer.  Prevent following memory reads
> @@ -223,7 +224,7 @@ static void tdx_clear_page(unsigned long page_pa)
>   	__mb();
>   }
>   
> -static int __tdx_reclaim_page(hpa_t pa)
> +static int __tdx_reclaim_page(hpa_t pa, enum pg_level level)
>   {
>   	struct tdx_module_args out;
>   	u64 err;
> @@ -241,17 +242,19 @@ static int __tdx_reclaim_page(hpa_t pa)
>   		pr_tdx_error(TDH_PHYMEM_PAGE_RECLAIM, err, &out);
>   		return -EIO;
>   	}
> +	/* out.r8 == tdx sept page level */
> +	WARN_ON_ONCE(out.r8 != pg_level_to_tdx_sept_level(level));
>   
>   	return 0;
>   }
>   
> -static int tdx_reclaim_page(hpa_t pa)
> +static int tdx_reclaim_page(hpa_t pa, enum pg_level level)
>   {
>   	int r;
>   
> -	r = __tdx_reclaim_page(pa);
> +	r = __tdx_reclaim_page(pa, level);
>   	if (!r)
> -		tdx_clear_page(pa);
> +		tdx_clear_page(pa, KVM_HPAGE_SIZE(level));
>   	return r;
>   }
>   
> @@ -265,7 +268,7 @@ static void tdx_reclaim_td_page(unsigned long td_page_pa)
>   	 * was already flushed by TDH.PHYMEM.CACHE.WB before here, So
>   	 * cache doesn't need to be flushed again.
>   	 */
> -	if (tdx_reclaim_page(td_page_pa))
> +	if (tdx_reclaim_page(td_page_pa, PG_LEVEL_4K))
>   		/*
>   		 * Leak the page on failure:
>   		 * tdx_reclaim_page() returns an error if and only if there's an
> @@ -497,7 +500,7 @@ void tdx_vm_free(struct kvm *kvm)
>   
>   	if (!kvm_tdx->tdr_pa)
>   		return;
> -	if (__tdx_reclaim_page(kvm_tdx->tdr_pa))
> +	if (__tdx_reclaim_page(kvm_tdx->tdr_pa, PG_LEVEL_4K))
>   		return;
>   	/*
>   	 * TDX module maps TDR with TDX global HKID.  TDX module may access TDR
> @@ -510,7 +513,7 @@ void tdx_vm_free(struct kvm *kvm)
>   		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL);
>   		return;
>   	}
> -	tdx_clear_page(kvm_tdx->tdr_pa);
> +	tdx_clear_page(kvm_tdx->tdr_pa, PAGE_SIZE);
>   
>   	free_page((unsigned long)__va(kvm_tdx->tdr_pa));
>   	kvm_tdx->tdr_pa = 0;
> @@ -1597,7 +1600,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
>   		 * The HKID assigned to this TD was already freed and cache
>   		 * was already flushed. We don't have to flush again.
>   		 */
> -		err = tdx_reclaim_page(hpa);
> +		err = tdx_reclaim_page(hpa, level);
>   		if (KVM_BUG_ON(err, kvm))
>   			return -EIO;
>   		tdx_unpin(kvm, pfn);
> @@ -1630,7 +1633,7 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
>   		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL);
>   		return -EIO;
>   	}
> -	tdx_clear_page(hpa);
> +	tdx_clear_page(hpa, PAGE_SIZE);
Should here be KVM_HPAGE_SIZE(level) instead of  PAGE_SIZE?

>   	tdx_unpin(kvm, pfn);
>   	return 0;
>   }
> @@ -1742,7 +1745,7 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
>   	 * already flushed. We don't have to flush again.
>   	 */
>   	if (!is_hkid_assigned(kvm_tdx))
> -		return tdx_reclaim_page(__pa(private_spt));
> +		return tdx_reclaim_page(__pa(private_spt), PG_LEVEL_4K);
>   
>   	/*
>   	 * free_private_spt() is (obviously) called when a shadow page is being
Binbin Wu Nov. 19, 2023, 6:58 a.m. UTC | #2
On 11/19/2023 2:42 PM, Binbin Wu wrote:
>
>
> On 11/7/2023 11:00 PM, isaku.yamahata@intel.com wrote:
>> @@ -1597,7 +1600,7 @@ static int tdx_sept_drop_private_spte(struct 
>> kvm *kvm, gfn_t gfn,
>>            * The HKID assigned to this TD was already freed and cache
>>            * was already flushed. We don't have to flush again.
>>            */
>> -        err = tdx_reclaim_page(hpa);
>> +        err = tdx_reclaim_page(hpa, level);
>>           if (KVM_BUG_ON(err, kvm))
>>               return -EIO;
>>           tdx_unpin(kvm, pfn);
>> @@ -1630,7 +1633,7 @@ static int tdx_sept_drop_private_spte(struct 
>> kvm *kvm, gfn_t gfn,
>>           pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL);
>>           return -EIO;
>>       }
>> -    tdx_clear_page(hpa);
>> +    tdx_clear_page(hpa, PAGE_SIZE);
> Should here be KVM_HPAGE_SIZE(level) instead of  PAGE_SIZE?

OK, please ignore this comment, I see this is handled by the following 
patch.

>
>>       tdx_unpin(kvm, pfn);
>>       return 0;
>>   }
>> @@ -1742,7 +1745,7 @@ static int tdx_sept_free_private_spt(struct kvm 
>> *kvm, gfn_t gfn,
>>        * already flushed. We don't have to flush again.
>>        */
>>       if (!is_hkid_assigned(kvm_tdx))
>> -        return tdx_reclaim_page(__pa(private_spt));
>> +        return tdx_reclaim_page(__pa(private_spt), PG_LEVEL_4K);
>>         /*
>>        * free_private_spt() is (obviously) called when a shadow page 
>> is being
>
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index a728175c4a6d..0fca863faeee 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -200,12 +200,13 @@  static void tdx_disassociate_vp_on_cpu(struct kvm_vcpu *vcpu)
 	smp_call_function_single(cpu, tdx_disassociate_vp_arg, vcpu, 1);
 }
 
-static void tdx_clear_page(unsigned long page_pa)
+static void tdx_clear_page(unsigned long page_pa, int size)
 {
 	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
 	void *page = __va(page_pa);
 	unsigned long i;
 
+	WARN_ON_ONCE(size % PAGE_SIZE);
 	/*
 	 * When re-assign one page from old keyid to a new keyid, MOVDIR64B is
 	 * required to clear/write the page with new keyid to prevent integrity
@@ -214,7 +215,7 @@  static void tdx_clear_page(unsigned long page_pa)
 	 * clflush doesn't flush cache with HKID set.  The cache line could be
 	 * poisoned (even without MKTME-i), clear the poison bit.
 	 */
-	for (i = 0; i < PAGE_SIZE; i += 64)
+	for (i = 0; i < size; i += 64)
 		movdir64b(page + i, zero_page);
 	/*
 	 * MOVDIR64B store uses WC buffer.  Prevent following memory reads
@@ -223,7 +224,7 @@  static void tdx_clear_page(unsigned long page_pa)
 	__mb();
 }
 
-static int __tdx_reclaim_page(hpa_t pa)
+static int __tdx_reclaim_page(hpa_t pa, enum pg_level level)
 {
 	struct tdx_module_args out;
 	u64 err;
@@ -241,17 +242,19 @@  static int __tdx_reclaim_page(hpa_t pa)
 		pr_tdx_error(TDH_PHYMEM_PAGE_RECLAIM, err, &out);
 		return -EIO;
 	}
+	/* out.r8 == tdx sept page level */
+	WARN_ON_ONCE(out.r8 != pg_level_to_tdx_sept_level(level));
 
 	return 0;
 }
 
-static int tdx_reclaim_page(hpa_t pa)
+static int tdx_reclaim_page(hpa_t pa, enum pg_level level)
 {
 	int r;
 
-	r = __tdx_reclaim_page(pa);
+	r = __tdx_reclaim_page(pa, level);
 	if (!r)
-		tdx_clear_page(pa);
+		tdx_clear_page(pa, KVM_HPAGE_SIZE(level));
 	return r;
 }
 
@@ -265,7 +268,7 @@  static void tdx_reclaim_td_page(unsigned long td_page_pa)
 	 * was already flushed by TDH.PHYMEM.CACHE.WB before here, So
 	 * cache doesn't need to be flushed again.
 	 */
-	if (tdx_reclaim_page(td_page_pa))
+	if (tdx_reclaim_page(td_page_pa, PG_LEVEL_4K))
 		/*
 		 * Leak the page on failure:
 		 * tdx_reclaim_page() returns an error if and only if there's an
@@ -497,7 +500,7 @@  void tdx_vm_free(struct kvm *kvm)
 
 	if (!kvm_tdx->tdr_pa)
 		return;
-	if (__tdx_reclaim_page(kvm_tdx->tdr_pa))
+	if (__tdx_reclaim_page(kvm_tdx->tdr_pa, PG_LEVEL_4K))
 		return;
 	/*
 	 * TDX module maps TDR with TDX global HKID.  TDX module may access TDR
@@ -510,7 +513,7 @@  void tdx_vm_free(struct kvm *kvm)
 		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL);
 		return;
 	}
-	tdx_clear_page(kvm_tdx->tdr_pa);
+	tdx_clear_page(kvm_tdx->tdr_pa, PAGE_SIZE);
 
 	free_page((unsigned long)__va(kvm_tdx->tdr_pa));
 	kvm_tdx->tdr_pa = 0;
@@ -1597,7 +1600,7 @@  static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
 		 * The HKID assigned to this TD was already freed and cache
 		 * was already flushed. We don't have to flush again.
 		 */
-		err = tdx_reclaim_page(hpa);
+		err = tdx_reclaim_page(hpa, level);
 		if (KVM_BUG_ON(err, kvm))
 			return -EIO;
 		tdx_unpin(kvm, pfn);
@@ -1630,7 +1633,7 @@  static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
 		pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL);
 		return -EIO;
 	}
-	tdx_clear_page(hpa);
+	tdx_clear_page(hpa, PAGE_SIZE);
 	tdx_unpin(kvm, pfn);
 	return 0;
 }
@@ -1742,7 +1745,7 @@  static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
 	 * already flushed. We don't have to flush again.
 	 */
 	if (!is_hkid_assigned(kvm_tdx))
-		return tdx_reclaim_page(__pa(private_spt));
+		return tdx_reclaim_page(__pa(private_spt), PG_LEVEL_4K);
 
 	/*
 	 * free_private_spt() is (obviously) called when a shadow page is being