diff mbox series

[v13,09/12] mm: x86: Invoke hypercall when page encryption status is changed

Message ID f2340642c5b8d597a099285194fca8d05c9843bd.1618498113.git.ashish.kalra@amd.com (mailing list archive)
State New, archived
Headers show
Series Add AMD SEV guest live migration support | expand

Commit Message

Kalra, Ashish April 15, 2021, 3:57 p.m. UTC
From: Brijesh Singh <brijesh.singh@amd.com>

Invoke a hypercall when a memory region is changed from encrypted ->
decrypted and vice versa. Hypervisor needs to know the page encryption
status during the guest migration.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: x86@kernel.org
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Steve Rutherford <srutherford@google.com>
Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 arch/x86/include/asm/paravirt.h       | 10 +++++
 arch/x86/include/asm/paravirt_types.h |  2 +
 arch/x86/kernel/paravirt.c            |  1 +
 arch/x86/mm/mem_encrypt.c             | 57 ++++++++++++++++++++++++++-
 arch/x86/mm/pat/set_memory.c          |  7 ++++
 5 files changed, 76 insertions(+), 1 deletion(-)

Comments

Paolo Bonzini April 20, 2021, 9:39 a.m. UTC | #1
On 15/04/21 17:57, Ashish Kalra wrote:
> From: Brijesh Singh <brijesh.singh@amd.com>
> 
> Invoke a hypercall when a memory region is changed from encrypted ->
> decrypted and vice versa. Hypervisor needs to know the page encryption
> status during the guest migration.

Boris, can you ack this patch?

Paolo

> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: Borislav Petkov <bp@suse.de>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: x86@kernel.org
> Cc: kvm@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Reviewed-by: Steve Rutherford <srutherford@google.com>
> Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>   arch/x86/include/asm/paravirt.h       | 10 +++++
>   arch/x86/include/asm/paravirt_types.h |  2 +
>   arch/x86/kernel/paravirt.c            |  1 +
>   arch/x86/mm/mem_encrypt.c             | 57 ++++++++++++++++++++++++++-
>   arch/x86/mm/pat/set_memory.c          |  7 ++++
>   5 files changed, 76 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
> index 4abf110e2243..efaa3e628967 100644
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -84,6 +84,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
>   	PVOP_VCALL1(mmu.exit_mmap, mm);
>   }
>   
> +static inline void page_encryption_changed(unsigned long vaddr, int npages,
> +						bool enc)
> +{
> +	PVOP_VCALL3(mmu.page_encryption_changed, vaddr, npages, enc);
> +}
> +
>   #ifdef CONFIG_PARAVIRT_XXL
>   static inline void load_sp0(unsigned long sp0)
>   {
> @@ -799,6 +805,10 @@ static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
>   static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
>   {
>   }
> +
> +static inline void page_encryption_changed(unsigned long vaddr, int npages, bool enc)
> +{
> +}
>   #endif
>   #endif /* __ASSEMBLY__ */
>   #endif /* _ASM_X86_PARAVIRT_H */
> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
> index de87087d3bde..69ef9c207b38 100644
> --- a/arch/x86/include/asm/paravirt_types.h
> +++ b/arch/x86/include/asm/paravirt_types.h
> @@ -195,6 +195,8 @@ struct pv_mmu_ops {
>   
>   	/* Hook for intercepting the destruction of an mm_struct. */
>   	void (*exit_mmap)(struct mm_struct *mm);
> +	void (*page_encryption_changed)(unsigned long vaddr, int npages,
> +					bool enc);
>   
>   #ifdef CONFIG_PARAVIRT_XXL
>   	struct paravirt_callee_save read_cr2;
> diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
> index c60222ab8ab9..9f206e192f6b 100644
> --- a/arch/x86/kernel/paravirt.c
> +++ b/arch/x86/kernel/paravirt.c
> @@ -335,6 +335,7 @@ struct paravirt_patch_template pv_ops = {
>   			(void (*)(struct mmu_gather *, void *))tlb_remove_page,
>   
>   	.mmu.exit_mmap		= paravirt_nop,
> +	.mmu.page_encryption_changed	= paravirt_nop,
>   
>   #ifdef CONFIG_PARAVIRT_XXL
>   	.mmu.read_cr2		= __PV_IS_CALLEE_SAVE(native_read_cr2),
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index ae78cef79980..fae9ccbd0da7 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -19,6 +19,7 @@
>   #include <linux/kernel.h>
>   #include <linux/bitops.h>
>   #include <linux/dma-mapping.h>
> +#include <linux/kvm_para.h>
>   
>   #include <asm/tlbflush.h>
>   #include <asm/fixmap.h>
> @@ -29,6 +30,7 @@
>   #include <asm/processor-flags.h>
>   #include <asm/msr.h>
>   #include <asm/cmdline.h>
> +#include <asm/kvm_para.h>
>   
>   #include "mm_internal.h"
>   
> @@ -229,6 +231,47 @@ void __init sev_setup_arch(void)
>   	swiotlb_adjust_size(size);
>   }
>   
> +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages,
> +					bool enc)
> +{
> +	unsigned long sz = npages << PAGE_SHIFT;
> +	unsigned long vaddr_end, vaddr_next;
> +
> +	vaddr_end = vaddr + sz;
> +
> +	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
> +		int psize, pmask, level;
> +		unsigned long pfn;
> +		pte_t *kpte;
> +
> +		kpte = lookup_address(vaddr, &level);
> +		if (!kpte || pte_none(*kpte))
> +			return;
> +
> +		switch (level) {
> +		case PG_LEVEL_4K:
> +			pfn = pte_pfn(*kpte);
> +			break;
> +		case PG_LEVEL_2M:
> +			pfn = pmd_pfn(*(pmd_t *)kpte);
> +			break;
> +		case PG_LEVEL_1G:
> +			pfn = pud_pfn(*(pud_t *)kpte);
> +			break;
> +		default:
> +			return;
> +		}
> +
> +		psize = page_level_size(level);
> +		pmask = page_level_mask(level);
> +
> +		kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS,
> +				   pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc);
> +
> +		vaddr_next = (vaddr & pmask) + psize;
> +	}
> +}
> +
>   static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
>   {
>   	pgprot_t old_prot, new_prot;
> @@ -286,12 +329,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
>   static int __init early_set_memory_enc_dec(unsigned long vaddr,
>   					   unsigned long size, bool enc)
>   {
> -	unsigned long vaddr_end, vaddr_next;
> +	unsigned long vaddr_end, vaddr_next, start;
>   	unsigned long psize, pmask;
>   	int split_page_size_mask;
>   	int level, ret;
>   	pte_t *kpte;
>   
> +	start = vaddr;
>   	vaddr_next = vaddr;
>   	vaddr_end = vaddr + size;
>   
> @@ -346,6 +390,8 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
>   
>   	ret = 0;
>   
> +	set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT,
> +					enc);
>   out:
>   	__flush_tlb_all();
>   	return ret;
> @@ -481,6 +527,15 @@ void __init mem_encrypt_init(void)
>   	if (sev_active() && !sev_es_active())
>   		static_branch_enable(&sev_enable_key);
>   
> +#ifdef CONFIG_PARAVIRT
> +	/*
> +	 * With SEV, we need to make a hypercall when page encryption state is
> +	 * changed.
> +	 */
> +	if (sev_active())
> +		pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall;
> +#endif
> +
>   	print_mem_encrypt_feature_info();
>   }
>   
> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
> index 16f878c26667..3576b583ac65 100644
> --- a/arch/x86/mm/pat/set_memory.c
> +++ b/arch/x86/mm/pat/set_memory.c
> @@ -27,6 +27,7 @@
>   #include <asm/proto.h>
>   #include <asm/memtype.h>
>   #include <asm/set_memory.h>
> +#include <asm/paravirt.h>
>   
>   #include "../mm_internal.h"
>   
> @@ -2012,6 +2013,12 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
>   	 */
>   	cpa_flush(&cpa, 0);
>   
> +	/* Notify hypervisor that a given memory range is mapped encrypted
> +	 * or decrypted. The hypervisor will use this information during the
> +	 * VM migration.
> +	 */
> +	page_encryption_changed(addr, numpages, enc);
> +
>   	return ret;
>   }
>   
>
Borislav Petkov April 21, 2021, 10:05 a.m. UTC | #2
On Thu, Apr 15, 2021 at 03:57:26PM +0000, Ashish Kalra wrote:
> +static inline void page_encryption_changed(unsigned long vaddr, int npages,
> +						bool enc)

When you see a function name "page_encryption_changed", what does that
tell you about what that function does?

Dunno but it doesn't tell me a whole lot.

Now look at the other function names in struct pv_mmu_ops.

See the difference?

> +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages,

If I had to guess what that function does just by reading its name, it
sets a memory encryption/decryption hypercall.

Am I close?

> +					bool enc)
> +{
> +	unsigned long sz = npages << PAGE_SHIFT;
> +	unsigned long vaddr_end, vaddr_next;
> +
> +	vaddr_end = vaddr + sz;
> +
> +	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
> +		int psize, pmask, level;
> +		unsigned long pfn;
> +		pte_t *kpte;
> +
> +		kpte = lookup_address(vaddr, &level);
> +		if (!kpte || pte_none(*kpte))
> +			return;
> +
> +		switch (level) {
> +		case PG_LEVEL_4K:
> +			pfn = pte_pfn(*kpte);
> +			break;
> +		case PG_LEVEL_2M:
> +			pfn = pmd_pfn(*(pmd_t *)kpte);
> +			break;
> +		case PG_LEVEL_1G:
> +			pfn = pud_pfn(*(pud_t *)kpte);
> +			break;
> +		default:
> +			return;
> +		}

Pretty much that same thing is in __set_clr_pte_enc(). Make a helper
function pls.

> +
> +		psize = page_level_size(level);
> +		pmask = page_level_mask(level);
> +
> +		kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS,
> +				   pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc);
> +
> +		vaddr_next = (vaddr & pmask) + psize;
> +	}

As with other patches from Brijesh, that should be a while loop. :)

> +}
> +
>  static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
>  {
>  	pgprot_t old_prot, new_prot;
> @@ -286,12 +329,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
>  static int __init early_set_memory_enc_dec(unsigned long vaddr,
>  					   unsigned long size, bool enc)
>  {
> -	unsigned long vaddr_end, vaddr_next;
> +	unsigned long vaddr_end, vaddr_next, start;
>  	unsigned long psize, pmask;
>  	int split_page_size_mask;
>  	int level, ret;
>  	pte_t *kpte;
>  
> +	start = vaddr;
>  	vaddr_next = vaddr;
>  	vaddr_end = vaddr + size;
>  
> @@ -346,6 +390,8 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
>  
>  	ret = 0;
>  
> +	set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT,
> +					enc);
>  out:
>  	__flush_tlb_all();
>  	return ret;
> @@ -481,6 +527,15 @@ void __init mem_encrypt_init(void)
>  	if (sev_active() && !sev_es_active())
>  		static_branch_enable(&sev_enable_key);
>  
> +#ifdef CONFIG_PARAVIRT
> +	/*
> +	 * With SEV, we need to make a hypercall when page encryption state is
> +	 * changed.
> +	 */
> +	if (sev_active())
> +		pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall;
> +#endif

There's already a sev_active() check above it. Merge the two pls.

> +
>  	print_mem_encrypt_feature_info();
>  }
>  
> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
> index 16f878c26667..3576b583ac65 100644
> --- a/arch/x86/mm/pat/set_memory.c
> +++ b/arch/x86/mm/pat/set_memory.c
> @@ -27,6 +27,7 @@
>  #include <asm/proto.h>
>  #include <asm/memtype.h>
>  #include <asm/set_memory.h>
> +#include <asm/paravirt.h>
>  
>  #include "../mm_internal.h"
>  
> @@ -2012,6 +2013,12 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
>  	 */
>  	cpa_flush(&cpa, 0);
>  
> +	/* Notify hypervisor that a given memory range is mapped encrypted
> +	 * or decrypted. The hypervisor will use this information during the
> +	 * VM migration.
> +	 */

Kernel comments style is:

	/*
	 * A sentence ending with a full-stop.
	 * Another sentence. ...
	 * More sentences. ...
	 */
Paolo Bonzini April 21, 2021, noon UTC | #3
On 21/04/21 12:05, Borislav Petkov wrote:
> On Thu, Apr 15, 2021 at 03:57:26PM +0000, Ashish Kalra wrote:
>> +static inline void page_encryption_changed(unsigned long vaddr, int npages,
>> +						bool enc)
> 
> When you see a function name "page_encryption_changed", what does that
> tell you about what that function does?
> 
> Dunno but it doesn't tell me a whole lot.
> 
> Now look at the other function names in struct pv_mmu_ops.
> 
> See the difference?
> 
>> +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages,
> 
> If I had to guess what that function does just by reading its name, it
> sets a memory encryption/decryption hypercall.
> 
> Am I close?

The words are right but the order is wrong (more like "hypercall to set 
some memory's encrypted/decrypted state").  Perhaps? 
kvm_hypercall_set_page_enc_status.

page_encryption_changed does not sound bad to me though, it's a 
notification-like function name.  Maybe notify_page_enc_status_changed?

Paolo

>> +					bool enc)
>> +{
>> +	unsigned long sz = npages << PAGE_SHIFT;
>> +	unsigned long vaddr_end, vaddr_next;
>> +
>> +	vaddr_end = vaddr + sz;
>> +
>> +	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
>> +		int psize, pmask, level;
>> +		unsigned long pfn;
>> +		pte_t *kpte;
>> +
>> +		kpte = lookup_address(vaddr, &level);
>> +		if (!kpte || pte_none(*kpte))
>> +			return;
>> +
>> +		switch (level) {
>> +		case PG_LEVEL_4K:
>> +			pfn = pte_pfn(*kpte);
>> +			break;
>> +		case PG_LEVEL_2M:
>> +			pfn = pmd_pfn(*(pmd_t *)kpte);
>> +			break;
>> +		case PG_LEVEL_1G:
>> +			pfn = pud_pfn(*(pud_t *)kpte);
>> +			break;
>> +		default:
>> +			return;
>> +		}
> 
> Pretty much that same thing is in __set_clr_pte_enc(). Make a helper
> function pls.
> 
>> +
>> +		psize = page_level_size(level);
>> +		pmask = page_level_mask(level);
>> +
>> +		kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS,
>> +				   pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc);
>> +
>> +		vaddr_next = (vaddr & pmask) + psize;
>> +	}
> 
> As with other patches from Brijesh, that should be a while loop. :)
> 
>> +}
>> +
>>   static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
>>   {
>>   	pgprot_t old_prot, new_prot;
>> @@ -286,12 +329,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
>>   static int __init early_set_memory_enc_dec(unsigned long vaddr,
>>   					   unsigned long size, bool enc)
>>   {
>> -	unsigned long vaddr_end, vaddr_next;
>> +	unsigned long vaddr_end, vaddr_next, start;
>>   	unsigned long psize, pmask;
>>   	int split_page_size_mask;
>>   	int level, ret;
>>   	pte_t *kpte;
>>   
>> +	start = vaddr;
>>   	vaddr_next = vaddr;
>>   	vaddr_end = vaddr + size;
>>   
>> @@ -346,6 +390,8 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
>>   
>>   	ret = 0;
>>   
>> +	set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT,
>> +					enc);
>>   out:
>>   	__flush_tlb_all();
>>   	return ret;
>> @@ -481,6 +527,15 @@ void __init mem_encrypt_init(void)
>>   	if (sev_active() && !sev_es_active())
>>   		static_branch_enable(&sev_enable_key);
>>   
>> +#ifdef CONFIG_PARAVIRT
>> +	/*
>> +	 * With SEV, we need to make a hypercall when page encryption state is
>> +	 * changed.
>> +	 */
>> +	if (sev_active())
>> +		pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall;
>> +#endif
> 
> There's already a sev_active() check above it. Merge the two pls.
> 
>> +
>>   	print_mem_encrypt_feature_info();
>>   }
>>   
>> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
>> index 16f878c26667..3576b583ac65 100644
>> --- a/arch/x86/mm/pat/set_memory.c
>> +++ b/arch/x86/mm/pat/set_memory.c
>> @@ -27,6 +27,7 @@
>>   #include <asm/proto.h>
>>   #include <asm/memtype.h>
>>   #include <asm/set_memory.h>
>> +#include <asm/paravirt.h>
>>   
>>   #include "../mm_internal.h"
>>   
>> @@ -2012,6 +2013,12 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
>>   	 */
>>   	cpa_flush(&cpa, 0);
>>   
>> +	/* Notify hypervisor that a given memory range is mapped encrypted
>> +	 * or decrypted. The hypervisor will use this information during the
>> +	 * VM migration.
>> +	 */
> 
> Kernel comments style is:
> 
> 	/*
> 	 * A sentence ending with a full-stop.
> 	 * Another sentence. ...
> 	 * More sentences. ...
> 	 */
>
Kalra, Ashish April 21, 2021, 12:12 p.m. UTC | #4
On Wed, Apr 21, 2021 at 12:05:08PM +0200, Borislav Petkov wrote:
> On Thu, Apr 15, 2021 at 03:57:26PM +0000, Ashish Kalra wrote:
> > +static inline void page_encryption_changed(unsigned long vaddr, int npages,
> > +						bool enc)
> 
> When you see a function name "page_encryption_changed", what does that
> tell you about what that function does?
> 
> Dunno but it doesn't tell me a whole lot.
> 
> Now look at the other function names in struct pv_mmu_ops.
> 
> See the difference?
> 
> > +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages,
> 
> If I had to guess what that function does just by reading its name, it
> sets a memory encryption/decryption hypercall.
> 
> Am I close?
> 
> > +					bool enc)
> > +{
> > +	unsigned long sz = npages << PAGE_SHIFT;
> > +	unsigned long vaddr_end, vaddr_next;
> > +
> > +	vaddr_end = vaddr + sz;
> > +
> > +	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
> > +		int psize, pmask, level;
> > +		unsigned long pfn;
> > +		pte_t *kpte;
> > +
> > +		kpte = lookup_address(vaddr, &level);
> > +		if (!kpte || pte_none(*kpte))
> > +			return;
> > +
> > +		switch (level) {
> > +		case PG_LEVEL_4K:
> > +			pfn = pte_pfn(*kpte);
> > +			break;
> > +		case PG_LEVEL_2M:
> > +			pfn = pmd_pfn(*(pmd_t *)kpte);
> > +			break;
> > +		case PG_LEVEL_1G:
> > +			pfn = pud_pfn(*(pud_t *)kpte);
> > +			break;
> > +		default:
> > +			return;
> > +		}
> 
> Pretty much that same thing is in __set_clr_pte_enc(). Make a helper
> function pls.
> 

Yes, both have some common code, but it is only this page level/size
check, and they pretty much do different things with page size
evaluation, i think it will be cleaner to keep the code separately for
both these functions.

> > +
> > +		psize = page_level_size(level);
> > +		pmask = page_level_mask(level);
> > +
> > +		kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS,
> > +				   pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc);
> > +
> > +		vaddr_next = (vaddr & pmask) + psize;
> > +	}
> 
> As with other patches from Brijesh, that should be a while loop. :)
>

I see that early_set_memory_enc_dec() is also using a for loop, so which
patches are you referring to ?

> > +}
> > +
> >  static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
> >  {
> >  	pgprot_t old_prot, new_prot;
> > @@ -286,12 +329,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
> >  static int __init early_set_memory_enc_dec(unsigned long vaddr,
> >  					   unsigned long size, bool enc)
> >  {
> > -	unsigned long vaddr_end, vaddr_next;
> > +	unsigned long vaddr_end, vaddr_next, start;
> >  	unsigned long psize, pmask;
> >  	int split_page_size_mask;
> >  	int level, ret;
> >  	pte_t *kpte;
> >  
> > +	start = vaddr;
> >  	vaddr_next = vaddr;
> >  	vaddr_end = vaddr + size;
> >  
> > @@ -346,6 +390,8 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
> >  
> >  	ret = 0;
> >  
> > +	set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT,
> > +					enc);
> >  out:
> >  	__flush_tlb_all();
> >  	return ret;
> > @@ -481,6 +527,15 @@ void __init mem_encrypt_init(void)
> >  	if (sev_active() && !sev_es_active())
> >  		static_branch_enable(&sev_enable_key);
> >  
> > +#ifdef CONFIG_PARAVIRT
> > +	/*
> > +	 * With SEV, we need to make a hypercall when page encryption state is
> > +	 * changed.
> > +	 */
> > +	if (sev_active())
> > +		pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall;
> > +#endif
> 
> There's already a sev_active() check above it. Merge the two pls.
>

Ok. 

> > +
> >  	print_mem_encrypt_feature_info();
> >  }
> >  
> > diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
> > index 16f878c26667..3576b583ac65 100644
> > --- a/arch/x86/mm/pat/set_memory.c
> > +++ b/arch/x86/mm/pat/set_memory.c
> > @@ -27,6 +27,7 @@
> >  #include <asm/proto.h>
> >  #include <asm/memtype.h>
> >  #include <asm/set_memory.h>
> > +#include <asm/paravirt.h>
> >  
> >  #include "../mm_internal.h"
> >  
> > @@ -2012,6 +2013,12 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
> >  	 */
> >  	cpa_flush(&cpa, 0);
> >  
> > +	/* Notify hypervisor that a given memory range is mapped encrypted
> > +	 * or decrypted. The hypervisor will use this information during the
> > +	 * VM migration.
> > +	 */
> 
> Kernel comments style is:
> 
> 	/*
> 	 * A sentence ending with a full-stop.
> 	 * Another sentence. ...
> 	 * More sentences. ...
> 	 */

Ok.

> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&amp;data=04%7C01%7CAshish.Kalra%40amd.com%7Cf953299226ec42b5077308d904ad427c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637545964477197662%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=XS9tvx%2BlDPCKGFgsv7jruSrF6kUzAMIqUhBke7rtO5k%3D&amp;reserved=0
Brijesh Singh April 21, 2021, 1:50 p.m. UTC | #5
On 4/21/21 7:12 AM, Ashish Kalra wrote:
>>> +
>>> +		psize = page_level_size(level);
>>> +		pmask = page_level_mask(level);
>>> +
>>> +		kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS,
>>> +				   pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc);
>>> +
>>> +		vaddr_next = (vaddr & pmask) + psize;
>>> +	}
>> As with other patches from Brijesh, that should be a while loop. :)
>>
> I see that early_set_memory_enc_dec() is also using a for loop, so which
> patches are you referring to ?
>
I guess Boris is referring to my SNP patches. Please go ahead and use
the while loop as recommended by Boris.

-Brijesh
Borislav Petkov April 21, 2021, 1:52 p.m. UTC | #6
On Wed, Apr 21, 2021 at 12:12:13PM +0000, Ashish Kalra wrote:
> Yes, both have some common code, but it is only this page level/size
> ...

See below for what I mean. Diff ontop of yours.

> I see that early_set_memory_enc_dec() is also using a for loop, so which
> patches are you referring to ?

The SNP guest set has this pattern:

https://lkml.kernel.org/r/20210408114049.GI10192@zn.tnic

---

diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b1d59d2b3bf6..e823645101ee 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -232,6 +232,37 @@ void __init sev_setup_arch(void)
 	swiotlb_adjust_size(size);
 }
 
+static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
+{
+	unsigned long pfn = 0;
+	pgprot_t prot;
+
+	switch (level) {
+	case PG_LEVEL_4K:
+		pfn = pte_pfn(*kpte);
+		prot = pte_pgprot(*kpte);
+		break;
+
+	case PG_LEVEL_2M:
+		pfn = pmd_pfn(*(pmd_t *)kpte);
+		prot = pmd_pgprot(*(pmd_t *)kpte);
+		break;
+
+	case PG_LEVEL_1G:
+		pfn = pud_pfn(*(pud_t *)kpte);
+		prot = pud_pgprot(*(pud_t *)kpte);
+		break;
+
+	default:
+		return 0;
+	}
+
+	if (ret_prot)
+		*ret_prot = prot;
+
+	return pfn;
+}
+
 static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages,
 					bool enc)
 {
@@ -249,19 +280,9 @@ static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages,
 		if (!kpte || pte_none(*kpte))
 			return;
 
-		switch (level) {
-		case PG_LEVEL_4K:
-			pfn = pte_pfn(*kpte);
-			break;
-		case PG_LEVEL_2M:
-			pfn = pmd_pfn(*(pmd_t *)kpte);
-			break;
-		case PG_LEVEL_1G:
-			pfn = pud_pfn(*(pud_t *)kpte);
-			break;
-		default:
-			return;
-		}
+		pfn = pg_level_to_pfn(level, kpte, NULL);
+		if (!pfn)
+			continue;
 
 		psize = page_level_size(level);
 		pmask = page_level_mask(level);
@@ -279,22 +300,9 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 	unsigned long pfn, pa, size;
 	pte_t new_pte;
 
-	switch (level) {
-	case PG_LEVEL_4K:
-		pfn = pte_pfn(*kpte);
-		old_prot = pte_pgprot(*kpte);
-		break;
-	case PG_LEVEL_2M:
-		pfn = pmd_pfn(*(pmd_t *)kpte);
-		old_prot = pmd_pgprot(*(pmd_t *)kpte);
-		break;
-	case PG_LEVEL_1G:
-		pfn = pud_pfn(*(pud_t *)kpte);
-		old_prot = pud_pgprot(*(pud_t *)kpte);
-		break;
-	default:
+	pfn = pg_level_to_pfn(level, kpte, &old_prot);
+	if (!pfn)
 		return;
-	}
 
 	new_prot = old_prot;
 	if (enc)
Borislav Petkov April 21, 2021, 2:09 p.m. UTC | #7
On Wed, Apr 21, 2021 at 02:00:42PM +0200, Paolo Bonzini wrote:
> The words are right but the order is wrong (more like "hypercall to set some
> memory's encrypted/decrypted state").  Perhaps?
> kvm_hypercall_set_page_enc_status.

Yap.

> page_encryption_changed does not sound bad to me though, it's a
> notification-like function name. Maybe notify_page_enc_status_changed?

Yap again. Those are better.

Thx.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 4abf110e2243..efaa3e628967 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -84,6 +84,12 @@  static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
 	PVOP_VCALL1(mmu.exit_mmap, mm);
 }
 
+static inline void page_encryption_changed(unsigned long vaddr, int npages,
+						bool enc)
+{
+	PVOP_VCALL3(mmu.page_encryption_changed, vaddr, npages, enc);
+}
+
 #ifdef CONFIG_PARAVIRT_XXL
 static inline void load_sp0(unsigned long sp0)
 {
@@ -799,6 +805,10 @@  static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm,
 static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
 {
 }
+
+static inline void page_encryption_changed(unsigned long vaddr, int npages, bool enc)
+{
+}
 #endif
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index de87087d3bde..69ef9c207b38 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -195,6 +195,8 @@  struct pv_mmu_ops {
 
 	/* Hook for intercepting the destruction of an mm_struct. */
 	void (*exit_mmap)(struct mm_struct *mm);
+	void (*page_encryption_changed)(unsigned long vaddr, int npages,
+					bool enc);
 
 #ifdef CONFIG_PARAVIRT_XXL
 	struct paravirt_callee_save read_cr2;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c60222ab8ab9..9f206e192f6b 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -335,6 +335,7 @@  struct paravirt_patch_template pv_ops = {
 			(void (*)(struct mmu_gather *, void *))tlb_remove_page,
 
 	.mmu.exit_mmap		= paravirt_nop,
+	.mmu.page_encryption_changed	= paravirt_nop,
 
 #ifdef CONFIG_PARAVIRT_XXL
 	.mmu.read_cr2		= __PV_IS_CALLEE_SAVE(native_read_cr2),
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index ae78cef79980..fae9ccbd0da7 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -19,6 +19,7 @@ 
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/dma-mapping.h>
+#include <linux/kvm_para.h>
 
 #include <asm/tlbflush.h>
 #include <asm/fixmap.h>
@@ -29,6 +30,7 @@ 
 #include <asm/processor-flags.h>
 #include <asm/msr.h>
 #include <asm/cmdline.h>
+#include <asm/kvm_para.h>
 
 #include "mm_internal.h"
 
@@ -229,6 +231,47 @@  void __init sev_setup_arch(void)
 	swiotlb_adjust_size(size);
 }
 
+static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages,
+					bool enc)
+{
+	unsigned long sz = npages << PAGE_SHIFT;
+	unsigned long vaddr_end, vaddr_next;
+
+	vaddr_end = vaddr + sz;
+
+	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+		int psize, pmask, level;
+		unsigned long pfn;
+		pte_t *kpte;
+
+		kpte = lookup_address(vaddr, &level);
+		if (!kpte || pte_none(*kpte))
+			return;
+
+		switch (level) {
+		case PG_LEVEL_4K:
+			pfn = pte_pfn(*kpte);
+			break;
+		case PG_LEVEL_2M:
+			pfn = pmd_pfn(*(pmd_t *)kpte);
+			break;
+		case PG_LEVEL_1G:
+			pfn = pud_pfn(*(pud_t *)kpte);
+			break;
+		default:
+			return;
+		}
+
+		psize = page_level_size(level);
+		pmask = page_level_mask(level);
+
+		kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS,
+				   pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc);
+
+		vaddr_next = (vaddr & pmask) + psize;
+	}
+}
+
 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 {
 	pgprot_t old_prot, new_prot;
@@ -286,12 +329,13 @@  static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 static int __init early_set_memory_enc_dec(unsigned long vaddr,
 					   unsigned long size, bool enc)
 {
-	unsigned long vaddr_end, vaddr_next;
+	unsigned long vaddr_end, vaddr_next, start;
 	unsigned long psize, pmask;
 	int split_page_size_mask;
 	int level, ret;
 	pte_t *kpte;
 
+	start = vaddr;
 	vaddr_next = vaddr;
 	vaddr_end = vaddr + size;
 
@@ -346,6 +390,8 @@  static int __init early_set_memory_enc_dec(unsigned long vaddr,
 
 	ret = 0;
 
+	set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT,
+					enc);
 out:
 	__flush_tlb_all();
 	return ret;
@@ -481,6 +527,15 @@  void __init mem_encrypt_init(void)
 	if (sev_active() && !sev_es_active())
 		static_branch_enable(&sev_enable_key);
 
+#ifdef CONFIG_PARAVIRT
+	/*
+	 * With SEV, we need to make a hypercall when page encryption state is
+	 * changed.
+	 */
+	if (sev_active())
+		pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall;
+#endif
+
 	print_mem_encrypt_feature_info();
 }
 
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 16f878c26667..3576b583ac65 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -27,6 +27,7 @@ 
 #include <asm/proto.h>
 #include <asm/memtype.h>
 #include <asm/set_memory.h>
+#include <asm/paravirt.h>
 
 #include "../mm_internal.h"
 
@@ -2012,6 +2013,12 @@  static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
 	 */
 	cpa_flush(&cpa, 0);
 
+	/* Notify hypervisor that a given memory range is mapped encrypted
+	 * or decrypted. The hypervisor will use this information during the
+	 * VM migration.
+	 */
+	page_encryption_changed(addr, numpages, enc);
+
 	return ret;
 }