Message ID | f2340642c5b8d597a099285194fca8d05c9843bd.1618498113.git.ashish.kalra@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add AMD SEV guest live migration support | expand |
On 15/04/21 17:57, Ashish Kalra wrote: > From: Brijesh Singh <brijesh.singh@amd.com> > > Invoke a hypercall when a memory region is changed from encrypted -> > decrypted and vice versa. Hypervisor needs to know the page encryption > status during the guest migration. Boris, can you ack this patch? Paolo > Cc: Thomas Gleixner <tglx@linutronix.de> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: "H. Peter Anvin" <hpa@zytor.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Joerg Roedel <joro@8bytes.org> > Cc: Borislav Petkov <bp@suse.de> > Cc: Tom Lendacky <thomas.lendacky@amd.com> > Cc: x86@kernel.org > Cc: kvm@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > Reviewed-by: Steve Rutherford <srutherford@google.com> > Reviewed-by: Venu Busireddy <venu.busireddy@oracle.com> > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > --- > arch/x86/include/asm/paravirt.h | 10 +++++ > arch/x86/include/asm/paravirt_types.h | 2 + > arch/x86/kernel/paravirt.c | 1 + > arch/x86/mm/mem_encrypt.c | 57 ++++++++++++++++++++++++++- > arch/x86/mm/pat/set_memory.c | 7 ++++ > 5 files changed, 76 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h > index 4abf110e2243..efaa3e628967 100644 > --- a/arch/x86/include/asm/paravirt.h > +++ b/arch/x86/include/asm/paravirt.h > @@ -84,6 +84,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) > PVOP_VCALL1(mmu.exit_mmap, mm); > } > > +static inline void page_encryption_changed(unsigned long vaddr, int npages, > + bool enc) > +{ > + PVOP_VCALL3(mmu.page_encryption_changed, vaddr, npages, enc); > +} > + > #ifdef CONFIG_PARAVIRT_XXL > static inline void load_sp0(unsigned long sp0) > { > @@ -799,6 +805,10 @@ static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, > static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) > { > } > + > +static inline void page_encryption_changed(unsigned long vaddr, int npages, bool enc) > +{ > +} > #endif > #endif /* __ASSEMBLY__ */ > #endif /* _ASM_X86_PARAVIRT_H */ > diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h > index de87087d3bde..69ef9c207b38 100644 > --- a/arch/x86/include/asm/paravirt_types.h > +++ b/arch/x86/include/asm/paravirt_types.h > @@ -195,6 +195,8 @@ struct pv_mmu_ops { > > /* Hook for intercepting the destruction of an mm_struct. */ > void (*exit_mmap)(struct mm_struct *mm); > + void (*page_encryption_changed)(unsigned long vaddr, int npages, > + bool enc); > > #ifdef CONFIG_PARAVIRT_XXL > struct paravirt_callee_save read_cr2; > diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c > index c60222ab8ab9..9f206e192f6b 100644 > --- a/arch/x86/kernel/paravirt.c > +++ b/arch/x86/kernel/paravirt.c > @@ -335,6 +335,7 @@ struct paravirt_patch_template pv_ops = { > (void (*)(struct mmu_gather *, void *))tlb_remove_page, > > .mmu.exit_mmap = paravirt_nop, > + .mmu.page_encryption_changed = paravirt_nop, > > #ifdef CONFIG_PARAVIRT_XXL > .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2), > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c > index ae78cef79980..fae9ccbd0da7 100644 > --- a/arch/x86/mm/mem_encrypt.c > +++ b/arch/x86/mm/mem_encrypt.c > @@ -19,6 +19,7 @@ > #include <linux/kernel.h> > #include <linux/bitops.h> > #include <linux/dma-mapping.h> > +#include <linux/kvm_para.h> > > #include <asm/tlbflush.h> > #include <asm/fixmap.h> > @@ -29,6 +30,7 @@ > #include <asm/processor-flags.h> > #include <asm/msr.h> > #include <asm/cmdline.h> > +#include <asm/kvm_para.h> > > #include "mm_internal.h" > > @@ -229,6 +231,47 @@ void __init sev_setup_arch(void) > swiotlb_adjust_size(size); > } > > +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages, > + bool enc) > +{ > + unsigned long sz = npages << PAGE_SHIFT; > + unsigned long vaddr_end, vaddr_next; > + > + vaddr_end = vaddr + sz; > + > + for (; vaddr < vaddr_end; vaddr = vaddr_next) { > + int psize, pmask, level; > + unsigned long pfn; > + pte_t *kpte; > + > + kpte = lookup_address(vaddr, &level); > + if (!kpte || pte_none(*kpte)) > + return; > + > + switch (level) { > + case PG_LEVEL_4K: > + pfn = pte_pfn(*kpte); > + break; > + case PG_LEVEL_2M: > + pfn = pmd_pfn(*(pmd_t *)kpte); > + break; > + case PG_LEVEL_1G: > + pfn = pud_pfn(*(pud_t *)kpte); > + break; > + default: > + return; > + } > + > + psize = page_level_size(level); > + pmask = page_level_mask(level); > + > + kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS, > + pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc); > + > + vaddr_next = (vaddr & pmask) + psize; > + } > +} > + > static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) > { > pgprot_t old_prot, new_prot; > @@ -286,12 +329,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) > static int __init early_set_memory_enc_dec(unsigned long vaddr, > unsigned long size, bool enc) > { > - unsigned long vaddr_end, vaddr_next; > + unsigned long vaddr_end, vaddr_next, start; > unsigned long psize, pmask; > int split_page_size_mask; > int level, ret; > pte_t *kpte; > > + start = vaddr; > vaddr_next = vaddr; > vaddr_end = vaddr + size; > > @@ -346,6 +390,8 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, > > ret = 0; > > + set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, > + enc); > out: > __flush_tlb_all(); > return ret; > @@ -481,6 +527,15 @@ void __init mem_encrypt_init(void) > if (sev_active() && !sev_es_active()) > static_branch_enable(&sev_enable_key); > > +#ifdef CONFIG_PARAVIRT > + /* > + * With SEV, we need to make a hypercall when page encryption state is > + * changed. > + */ > + if (sev_active()) > + pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall; > +#endif > + > print_mem_encrypt_feature_info(); > } > > diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c > index 16f878c26667..3576b583ac65 100644 > --- a/arch/x86/mm/pat/set_memory.c > +++ b/arch/x86/mm/pat/set_memory.c > @@ -27,6 +27,7 @@ > #include <asm/proto.h> > #include <asm/memtype.h> > #include <asm/set_memory.h> > +#include <asm/paravirt.h> > > #include "../mm_internal.h" > > @@ -2012,6 +2013,12 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) > */ > cpa_flush(&cpa, 0); > > + /* Notify hypervisor that a given memory range is mapped encrypted > + * or decrypted. The hypervisor will use this information during the > + * VM migration. > + */ > + page_encryption_changed(addr, numpages, enc); > + > return ret; > } > >
On Thu, Apr 15, 2021 at 03:57:26PM +0000, Ashish Kalra wrote: > +static inline void page_encryption_changed(unsigned long vaddr, int npages, > + bool enc) When you see a function name "page_encryption_changed", what does that tell you about what that function does? Dunno but it doesn't tell me a whole lot. Now look at the other function names in struct pv_mmu_ops. See the difference? > +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages, If I had to guess what that function does just by reading its name, it sets a memory encryption/decryption hypercall. Am I close? > + bool enc) > +{ > + unsigned long sz = npages << PAGE_SHIFT; > + unsigned long vaddr_end, vaddr_next; > + > + vaddr_end = vaddr + sz; > + > + for (; vaddr < vaddr_end; vaddr = vaddr_next) { > + int psize, pmask, level; > + unsigned long pfn; > + pte_t *kpte; > + > + kpte = lookup_address(vaddr, &level); > + if (!kpte || pte_none(*kpte)) > + return; > + > + switch (level) { > + case PG_LEVEL_4K: > + pfn = pte_pfn(*kpte); > + break; > + case PG_LEVEL_2M: > + pfn = pmd_pfn(*(pmd_t *)kpte); > + break; > + case PG_LEVEL_1G: > + pfn = pud_pfn(*(pud_t *)kpte); > + break; > + default: > + return; > + } Pretty much that same thing is in __set_clr_pte_enc(). Make a helper function pls. > + > + psize = page_level_size(level); > + pmask = page_level_mask(level); > + > + kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS, > + pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc); > + > + vaddr_next = (vaddr & pmask) + psize; > + } As with other patches from Brijesh, that should be a while loop. :) > +} > + > static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) > { > pgprot_t old_prot, new_prot; > @@ -286,12 +329,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) > static int __init early_set_memory_enc_dec(unsigned long vaddr, > unsigned long size, bool enc) > { > - unsigned long vaddr_end, vaddr_next; > + unsigned long vaddr_end, vaddr_next, start; > unsigned long psize, pmask; > int split_page_size_mask; > int level, ret; > pte_t *kpte; > > + start = vaddr; > vaddr_next = vaddr; > vaddr_end = vaddr + size; > > @@ -346,6 +390,8 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, > > ret = 0; > > + set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, > + enc); > out: > __flush_tlb_all(); > return ret; > @@ -481,6 +527,15 @@ void __init mem_encrypt_init(void) > if (sev_active() && !sev_es_active()) > static_branch_enable(&sev_enable_key); > > +#ifdef CONFIG_PARAVIRT > + /* > + * With SEV, we need to make a hypercall when page encryption state is > + * changed. > + */ > + if (sev_active()) > + pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall; > +#endif There's already a sev_active() check above it. Merge the two pls. > + > print_mem_encrypt_feature_info(); > } > > diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c > index 16f878c26667..3576b583ac65 100644 > --- a/arch/x86/mm/pat/set_memory.c > +++ b/arch/x86/mm/pat/set_memory.c > @@ -27,6 +27,7 @@ > #include <asm/proto.h> > #include <asm/memtype.h> > #include <asm/set_memory.h> > +#include <asm/paravirt.h> > > #include "../mm_internal.h" > > @@ -2012,6 +2013,12 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) > */ > cpa_flush(&cpa, 0); > > + /* Notify hypervisor that a given memory range is mapped encrypted > + * or decrypted. The hypervisor will use this information during the > + * VM migration. > + */ Kernel comments style is: /* * A sentence ending with a full-stop. * Another sentence. ... * More sentences. ... */
On 21/04/21 12:05, Borislav Petkov wrote: > On Thu, Apr 15, 2021 at 03:57:26PM +0000, Ashish Kalra wrote: >> +static inline void page_encryption_changed(unsigned long vaddr, int npages, >> + bool enc) > > When you see a function name "page_encryption_changed", what does that > tell you about what that function does? > > Dunno but it doesn't tell me a whole lot. > > Now look at the other function names in struct pv_mmu_ops. > > See the difference? > >> +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages, > > If I had to guess what that function does just by reading its name, it > sets a memory encryption/decryption hypercall. > > Am I close? The words are right but the order is wrong (more like "hypercall to set some memory's encrypted/decrypted state"). Perhaps? kvm_hypercall_set_page_enc_status. page_encryption_changed does not sound bad to me though, it's a notification-like function name. Maybe notify_page_enc_status_changed? Paolo >> + bool enc) >> +{ >> + unsigned long sz = npages << PAGE_SHIFT; >> + unsigned long vaddr_end, vaddr_next; >> + >> + vaddr_end = vaddr + sz; >> + >> + for (; vaddr < vaddr_end; vaddr = vaddr_next) { >> + int psize, pmask, level; >> + unsigned long pfn; >> + pte_t *kpte; >> + >> + kpte = lookup_address(vaddr, &level); >> + if (!kpte || pte_none(*kpte)) >> + return; >> + >> + switch (level) { >> + case PG_LEVEL_4K: >> + pfn = pte_pfn(*kpte); >> + break; >> + case PG_LEVEL_2M: >> + pfn = pmd_pfn(*(pmd_t *)kpte); >> + break; >> + case PG_LEVEL_1G: >> + pfn = pud_pfn(*(pud_t *)kpte); >> + break; >> + default: >> + return; >> + } > > Pretty much that same thing is in __set_clr_pte_enc(). Make a helper > function pls. > >> + >> + psize = page_level_size(level); >> + pmask = page_level_mask(level); >> + >> + kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS, >> + pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc); >> + >> + vaddr_next = (vaddr & pmask) + psize; >> + } > > As with other patches from Brijesh, that should be a while loop. :) > >> +} >> + >> static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) >> { >> pgprot_t old_prot, new_prot; >> @@ -286,12 +329,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) >> static int __init early_set_memory_enc_dec(unsigned long vaddr, >> unsigned long size, bool enc) >> { >> - unsigned long vaddr_end, vaddr_next; >> + unsigned long vaddr_end, vaddr_next, start; >> unsigned long psize, pmask; >> int split_page_size_mask; >> int level, ret; >> pte_t *kpte; >> >> + start = vaddr; >> vaddr_next = vaddr; >> vaddr_end = vaddr + size; >> >> @@ -346,6 +390,8 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, >> >> ret = 0; >> >> + set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, >> + enc); >> out: >> __flush_tlb_all(); >> return ret; >> @@ -481,6 +527,15 @@ void __init mem_encrypt_init(void) >> if (sev_active() && !sev_es_active()) >> static_branch_enable(&sev_enable_key); >> >> +#ifdef CONFIG_PARAVIRT >> + /* >> + * With SEV, we need to make a hypercall when page encryption state is >> + * changed. >> + */ >> + if (sev_active()) >> + pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall; >> +#endif > > There's already a sev_active() check above it. Merge the two pls. > >> + >> print_mem_encrypt_feature_info(); >> } >> >> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c >> index 16f878c26667..3576b583ac65 100644 >> --- a/arch/x86/mm/pat/set_memory.c >> +++ b/arch/x86/mm/pat/set_memory.c >> @@ -27,6 +27,7 @@ >> #include <asm/proto.h> >> #include <asm/memtype.h> >> #include <asm/set_memory.h> >> +#include <asm/paravirt.h> >> >> #include "../mm_internal.h" >> >> @@ -2012,6 +2013,12 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) >> */ >> cpa_flush(&cpa, 0); >> >> + /* Notify hypervisor that a given memory range is mapped encrypted >> + * or decrypted. The hypervisor will use this information during the >> + * VM migration. >> + */ > > Kernel comments style is: > > /* > * A sentence ending with a full-stop. > * Another sentence. ... > * More sentences. ... > */ >
On Wed, Apr 21, 2021 at 12:05:08PM +0200, Borislav Petkov wrote: > On Thu, Apr 15, 2021 at 03:57:26PM +0000, Ashish Kalra wrote: > > +static inline void page_encryption_changed(unsigned long vaddr, int npages, > > + bool enc) > > When you see a function name "page_encryption_changed", what does that > tell you about what that function does? > > Dunno but it doesn't tell me a whole lot. > > Now look at the other function names in struct pv_mmu_ops. > > See the difference? > > > +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages, > > If I had to guess what that function does just by reading its name, it > sets a memory encryption/decryption hypercall. > > Am I close? > > > + bool enc) > > +{ > > + unsigned long sz = npages << PAGE_SHIFT; > > + unsigned long vaddr_end, vaddr_next; > > + > > + vaddr_end = vaddr + sz; > > + > > + for (; vaddr < vaddr_end; vaddr = vaddr_next) { > > + int psize, pmask, level; > > + unsigned long pfn; > > + pte_t *kpte; > > + > > + kpte = lookup_address(vaddr, &level); > > + if (!kpte || pte_none(*kpte)) > > + return; > > + > > + switch (level) { > > + case PG_LEVEL_4K: > > + pfn = pte_pfn(*kpte); > > + break; > > + case PG_LEVEL_2M: > > + pfn = pmd_pfn(*(pmd_t *)kpte); > > + break; > > + case PG_LEVEL_1G: > > + pfn = pud_pfn(*(pud_t *)kpte); > > + break; > > + default: > > + return; > > + } > > Pretty much that same thing is in __set_clr_pte_enc(). Make a helper > function pls. > Yes, both have some common code, but it is only this page level/size check, and they pretty much do different things with page size evaluation, i think it will be cleaner to keep the code separately for both these functions. > > + > > + psize = page_level_size(level); > > + pmask = page_level_mask(level); > > + > > + kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS, > > + pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc); > > + > > + vaddr_next = (vaddr & pmask) + psize; > > + } > > As with other patches from Brijesh, that should be a while loop. :) > I see that early_set_memory_enc_dec() is also using a for loop, so which patches are you referring to ? > > +} > > + > > static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) > > { > > pgprot_t old_prot, new_prot; > > @@ -286,12 +329,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) > > static int __init early_set_memory_enc_dec(unsigned long vaddr, > > unsigned long size, bool enc) > > { > > - unsigned long vaddr_end, vaddr_next; > > + unsigned long vaddr_end, vaddr_next, start; > > unsigned long psize, pmask; > > int split_page_size_mask; > > int level, ret; > > pte_t *kpte; > > > > + start = vaddr; > > vaddr_next = vaddr; > > vaddr_end = vaddr + size; > > > > @@ -346,6 +390,8 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, > > > > ret = 0; > > > > + set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, > > + enc); > > out: > > __flush_tlb_all(); > > return ret; > > @@ -481,6 +527,15 @@ void __init mem_encrypt_init(void) > > if (sev_active() && !sev_es_active()) > > static_branch_enable(&sev_enable_key); > > > > +#ifdef CONFIG_PARAVIRT > > + /* > > + * With SEV, we need to make a hypercall when page encryption state is > > + * changed. > > + */ > > + if (sev_active()) > > + pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall; > > +#endif > > There's already a sev_active() check above it. Merge the two pls. > Ok. > > + > > print_mem_encrypt_feature_info(); > > } > > > > diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c > > index 16f878c26667..3576b583ac65 100644 > > --- a/arch/x86/mm/pat/set_memory.c > > +++ b/arch/x86/mm/pat/set_memory.c > > @@ -27,6 +27,7 @@ > > #include <asm/proto.h> > > #include <asm/memtype.h> > > #include <asm/set_memory.h> > > +#include <asm/paravirt.h> > > > > #include "../mm_internal.h" > > > > @@ -2012,6 +2013,12 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) > > */ > > cpa_flush(&cpa, 0); > > > > + /* Notify hypervisor that a given memory range is mapped encrypted > > + * or decrypted. The hypervisor will use this information during the > > + * VM migration. > > + */ > > Kernel comments style is: > > /* > * A sentence ending with a full-stop. > * Another sentence. ... > * More sentences. ... > */ Ok. > > -- > Regards/Gruss, > Boris. > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquette&data=04%7C01%7CAshish.Kalra%40amd.com%7Cf953299226ec42b5077308d904ad427c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637545964477197662%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=XS9tvx%2BlDPCKGFgsv7jruSrF6kUzAMIqUhBke7rtO5k%3D&reserved=0
On 4/21/21 7:12 AM, Ashish Kalra wrote: >>> + >>> + psize = page_level_size(level); >>> + pmask = page_level_mask(level); >>> + >>> + kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS, >>> + pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc); >>> + >>> + vaddr_next = (vaddr & pmask) + psize; >>> + } >> As with other patches from Brijesh, that should be a while loop. :) >> > I see that early_set_memory_enc_dec() is also using a for loop, so which > patches are you referring to ? > I guess Boris is referring to my SNP patches. Please go ahead and use the while loop as recommended by Boris. -Brijesh
On Wed, Apr 21, 2021 at 12:12:13PM +0000, Ashish Kalra wrote: > Yes, both have some common code, but it is only this page level/size > ... See below for what I mean. Diff ontop of yours. > I see that early_set_memory_enc_dec() is also using a for loop, so which > patches are you referring to ? The SNP guest set has this pattern: https://lkml.kernel.org/r/20210408114049.GI10192@zn.tnic --- diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index b1d59d2b3bf6..e823645101ee 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -232,6 +232,37 @@ void __init sev_setup_arch(void) swiotlb_adjust_size(size); } +static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) +{ + unsigned long pfn = 0; + pgprot_t prot; + + switch (level) { + case PG_LEVEL_4K: + pfn = pte_pfn(*kpte); + prot = pte_pgprot(*kpte); + break; + + case PG_LEVEL_2M: + pfn = pmd_pfn(*(pmd_t *)kpte); + prot = pmd_pgprot(*(pmd_t *)kpte); + break; + + case PG_LEVEL_1G: + pfn = pud_pfn(*(pud_t *)kpte); + prot = pud_pgprot(*(pud_t *)kpte); + break; + + default: + return 0; + } + + if (ret_prot) + *ret_prot = prot; + + return pfn; +} + static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { @@ -249,19 +280,9 @@ static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages, if (!kpte || pte_none(*kpte)) return; - switch (level) { - case PG_LEVEL_4K: - pfn = pte_pfn(*kpte); - break; - case PG_LEVEL_2M: - pfn = pmd_pfn(*(pmd_t *)kpte); - break; - case PG_LEVEL_1G: - pfn = pud_pfn(*(pud_t *)kpte); - break; - default: - return; - } + pfn = pg_level_to_pfn(level, kpte, NULL); + if (!pfn) + continue; psize = page_level_size(level); pmask = page_level_mask(level); @@ -279,22 +300,9 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) unsigned long pfn, pa, size; pte_t new_pte; - switch (level) { - case PG_LEVEL_4K: - pfn = pte_pfn(*kpte); - old_prot = pte_pgprot(*kpte); - break; - case PG_LEVEL_2M: - pfn = pmd_pfn(*(pmd_t *)kpte); - old_prot = pmd_pgprot(*(pmd_t *)kpte); - break; - case PG_LEVEL_1G: - pfn = pud_pfn(*(pud_t *)kpte); - old_prot = pud_pgprot(*(pud_t *)kpte); - break; - default: + pfn = pg_level_to_pfn(level, kpte, &old_prot); + if (!pfn) return; - } new_prot = old_prot; if (enc)
On Wed, Apr 21, 2021 at 02:00:42PM +0200, Paolo Bonzini wrote: > The words are right but the order is wrong (more like "hypercall to set some > memory's encrypted/decrypted state"). Perhaps? > kvm_hypercall_set_page_enc_status. Yap. > page_encryption_changed does not sound bad to me though, it's a > notification-like function name. Maybe notify_page_enc_status_changed? Yap again. Those are better. Thx.
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 4abf110e2243..efaa3e628967 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -84,6 +84,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) PVOP_VCALL1(mmu.exit_mmap, mm); } +static inline void page_encryption_changed(unsigned long vaddr, int npages, + bool enc) +{ + PVOP_VCALL3(mmu.page_encryption_changed, vaddr, npages, enc); +} + #ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { @@ -799,6 +805,10 @@ static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) { } + +static inline void page_encryption_changed(unsigned long vaddr, int npages, bool enc) +{ +} #endif #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PARAVIRT_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index de87087d3bde..69ef9c207b38 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -195,6 +195,8 @@ struct pv_mmu_ops { /* Hook for intercepting the destruction of an mm_struct. */ void (*exit_mmap)(struct mm_struct *mm); + void (*page_encryption_changed)(unsigned long vaddr, int npages, + bool enc); #ifdef CONFIG_PARAVIRT_XXL struct paravirt_callee_save read_cr2; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c60222ab8ab9..9f206e192f6b 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -335,6 +335,7 @@ struct paravirt_patch_template pv_ops = { (void (*)(struct mmu_gather *, void *))tlb_remove_page, .mmu.exit_mmap = paravirt_nop, + .mmu.page_encryption_changed = paravirt_nop, #ifdef CONFIG_PARAVIRT_XXL .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2), diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index ae78cef79980..fae9ccbd0da7 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/dma-mapping.h> +#include <linux/kvm_para.h> #include <asm/tlbflush.h> #include <asm/fixmap.h> @@ -29,6 +30,7 @@ #include <asm/processor-flags.h> #include <asm/msr.h> #include <asm/cmdline.h> +#include <asm/kvm_para.h> #include "mm_internal.h" @@ -229,6 +231,47 @@ void __init sev_setup_arch(void) swiotlb_adjust_size(size); } +static void set_memory_enc_dec_hypercall(unsigned long vaddr, int npages, + bool enc) +{ + unsigned long sz = npages << PAGE_SHIFT; + unsigned long vaddr_end, vaddr_next; + + vaddr_end = vaddr + sz; + + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + int psize, pmask, level; + unsigned long pfn; + pte_t *kpte; + + kpte = lookup_address(vaddr, &level); + if (!kpte || pte_none(*kpte)) + return; + + switch (level) { + case PG_LEVEL_4K: + pfn = pte_pfn(*kpte); + break; + case PG_LEVEL_2M: + pfn = pmd_pfn(*(pmd_t *)kpte); + break; + case PG_LEVEL_1G: + pfn = pud_pfn(*(pud_t *)kpte); + break; + default: + return; + } + + psize = page_level_size(level); + pmask = page_level_mask(level); + + kvm_sev_hypercall3(KVM_HC_PAGE_ENC_STATUS, + pfn << PAGE_SHIFT, psize >> PAGE_SHIFT, enc); + + vaddr_next = (vaddr & pmask) + psize; + } +} + static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) { pgprot_t old_prot, new_prot; @@ -286,12 +329,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) static int __init early_set_memory_enc_dec(unsigned long vaddr, unsigned long size, bool enc) { - unsigned long vaddr_end, vaddr_next; + unsigned long vaddr_end, vaddr_next, start; unsigned long psize, pmask; int split_page_size_mask; int level, ret; pte_t *kpte; + start = vaddr; vaddr_next = vaddr; vaddr_end = vaddr + size; @@ -346,6 +390,8 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, ret = 0; + set_memory_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, + enc); out: __flush_tlb_all(); return ret; @@ -481,6 +527,15 @@ void __init mem_encrypt_init(void) if (sev_active() && !sev_es_active()) static_branch_enable(&sev_enable_key); +#ifdef CONFIG_PARAVIRT + /* + * With SEV, we need to make a hypercall when page encryption state is + * changed. + */ + if (sev_active()) + pv_ops.mmu.page_encryption_changed = set_memory_enc_dec_hypercall; +#endif + print_mem_encrypt_feature_info(); } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 16f878c26667..3576b583ac65 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -27,6 +27,7 @@ #include <asm/proto.h> #include <asm/memtype.h> #include <asm/set_memory.h> +#include <asm/paravirt.h> #include "../mm_internal.h" @@ -2012,6 +2013,12 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) */ cpa_flush(&cpa, 0); + /* Notify hypervisor that a given memory range is mapped encrypted + * or decrypted. The hypervisor will use this information during the + * VM migration. + */ + page_encryption_changed(addr, numpages, enc); + return ret; }