diff mbox series

[Part2,v5,26/45] KVM: SVM: Mark the private vma unmerable for SEV-SNP guests

Message ID 20210820155918.7518-27-brijesh.singh@amd.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Brijesh Singh Aug. 20, 2021, 3:58 p.m. UTC
When SEV-SNP is enabled, the guest private pages are added in the RMP
table; while adding the pages, the rmp_make_private() unmaps the pages
from the direct map. If KSM attempts to access those unmapped pages then
it will trigger #PF (page-not-present).

Encrypted guest pages cannot be shared between the process, so an
userspace should not mark the region mergeable but to be safe, mark the
process vma unmerable before adding the pages in the RMP table.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/kvm/svm/sev.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

Comments

Dr. David Alan Gilbert Sept. 23, 2021, 5:18 p.m. UTC | #1
* Brijesh Singh (brijesh.singh@amd.com) wrote:
> When SEV-SNP is enabled, the guest private pages are added in the RMP
> table; while adding the pages, the rmp_make_private() unmaps the pages
> from the direct map. If KSM attempts to access those unmapped pages then
> it will trigger #PF (page-not-present).
> 
> Encrypted guest pages cannot be shared between the process, so an
> userspace should not mark the region mergeable but to be safe, mark the
> process vma unmerable before adding the pages in the RMP table.
              ^^^^^^^^^

(and in the subject) -> unmergeable

> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  arch/x86/kvm/svm/sev.c | 32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)
> 
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 4b126598b7aa..dcef0ae5f8e4 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -18,11 +18,13 @@
>  #include <linux/processor.h>
>  #include <linux/trace_events.h>
>  #include <linux/sev.h>
> +#include <linux/ksm.h>
>  #include <asm/fpu/internal.h>
>  
>  #include <asm/pkru.h>
>  #include <asm/trapnr.h>
>  #include <asm/sev.h>
> +#include <asm/mman.h>
>  
>  #include "x86.h"
>  #include "svm.h"
> @@ -1683,6 +1685,30 @@ static bool is_hva_registered(struct kvm *kvm, hva_t hva, size_t len)
>  	return false;
>  }
>  
> +static int snp_mark_unmergable(struct kvm *kvm, u64 start, u64 size)
                       ^^^^^^^^^^

> +{
> +	struct vm_area_struct *vma;
> +	u64 end = start + size;

Do you need to worry about wrap there? (User supplied start/size?)

Dave

> +	int ret;
> +
> +	do {
> +		vma = find_vma_intersection(kvm->mm, start, end);
> +		if (!vma) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> +				  MADV_UNMERGEABLE, &vma->vm_flags);
> +		if (ret)
> +			break;
> +
> +		start = vma->vm_end;
> +	} while (end > vma->vm_end);
> +
> +	return ret;
> +}
> +
>  static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  {
>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> @@ -1707,6 +1733,12 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	if (!is_hva_registered(kvm, params.uaddr, params.len))
>  		return -EINVAL;
>  
> +	mmap_write_lock(kvm->mm);
> +	ret = snp_mark_unmergable(kvm, params.uaddr, params.len);
> +	mmap_write_unlock(kvm->mm);
> +	if (ret)
> +		return -EFAULT;
> +
>  	/*
>  	 * The userspace memory is already locked so technically we don't
>  	 * need to lock it again. Later part of the function needs to know
> -- 
> 2.17.1
> 
>
Sean Christopherson Oct. 12, 2021, 6:46 p.m. UTC | #2
On Fri, Aug 20, 2021, Brijesh Singh wrote:
> When SEV-SNP is enabled, the guest private pages are added in the RMP
> table; while adding the pages, the rmp_make_private() unmaps the pages
> from the direct map. If KSM attempts to access those unmapped pages then
> it will trigger #PF (page-not-present).
> 
> Encrypted guest pages cannot be shared between the process, so an
> userspace should not mark the region mergeable but to be safe, mark the
> process vma unmerable before adding the pages in the RMP table.

To be safe from what?  Does the !PRESENT #PF crash the kernel?

> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  arch/x86/kvm/svm/sev.c | 32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)
> 
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 4b126598b7aa..dcef0ae5f8e4 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -18,11 +18,13 @@
>  #include <linux/processor.h>
>  #include <linux/trace_events.h>
>  #include <linux/sev.h>
> +#include <linux/ksm.h>
>  #include <asm/fpu/internal.h>
>  
>  #include <asm/pkru.h>
>  #include <asm/trapnr.h>
>  #include <asm/sev.h>
> +#include <asm/mman.h>
>  
>  #include "x86.h"
>  #include "svm.h"
> @@ -1683,6 +1685,30 @@ static bool is_hva_registered(struct kvm *kvm, hva_t hva, size_t len)
>  	return false;
>  }
>  
> +static int snp_mark_unmergable(struct kvm *kvm, u64 start, u64 size)
> +{
> +	struct vm_area_struct *vma;
> +	u64 end = start + size;
> +	int ret;
> +
> +	do {
> +		vma = find_vma_intersection(kvm->mm, start, end);
> +		if (!vma) {
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
> +				  MADV_UNMERGEABLE, &vma->vm_flags);
> +		if (ret)
> +			break;
> +
> +		start = vma->vm_end;
> +	} while (end > vma->vm_end);
> +
> +	return ret;
> +}
> +
>  static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  {
>  	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
> @@ -1707,6 +1733,12 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
>  	if (!is_hva_registered(kvm, params.uaddr, params.len))
>  		return -EINVAL;
>  
> +	mmap_write_lock(kvm->mm);
> +	ret = snp_mark_unmergable(kvm, params.uaddr, params.len);
> +	mmap_write_unlock(kvm->mm);

This does not, and practically speaking cannot, work.  There are multiple TOCTOU
bugs, here and in __snp_handle_page_state_change().  Userspace can madvise() the
range at any later point, munmap()/mmap() the entire range, mess with the memslots
in the PSC case, and so on and so forth.  Relying on MADV_UNMERGEABLE for functional
correctness simply cannot work in KVM, barring mmu_notifier and a big pile of code.

> +	if (ret)
> +		return -EFAULT;
> +
>  	/*
>  	 * The userspace memory is already locked so technically we don't
>  	 * need to lock it again. Later part of the function needs to know
> -- 
> 2.17.1
>
Brijesh Singh Oct. 13, 2021, 12:39 p.m. UTC | #3
On 10/12/21 11:46 AM, Sean Christopherson wrote:
> On Fri, Aug 20, 2021, Brijesh Singh wrote:
>> When SEV-SNP is enabled, the guest private pages are added in the RMP
>> table; while adding the pages, the rmp_make_private() unmaps the pages
>> from the direct map. If KSM attempts to access those unmapped pages then
>> it will trigger #PF (page-not-present).
>>
>> Encrypted guest pages cannot be shared between the process, so an
>> userspace should not mark the region mergeable but to be safe, mark the
>> process vma unmerable before adding the pages in the RMP table.
> To be safe from what?  Does the !PRESENT #PF crash the kernel?

Yes, kernel crashes when KSM attempts to access to an unmaped pfn.

[...]
>> +	mmap_write_lock(kvm->mm);
>> +	ret = snp_mark_unmergable(kvm, params.uaddr, params.len);
>> +	mmap_write_unlock(kvm->mm);
> This does not, and practically speaking cannot, work.  There are multiple TOCTOU
> bugs, here and in __snp_handle_page_state_change().  Userspace can madvise() the
> range at any later point, munmap()/mmap() the entire range, mess with the memslots
> in the PSC case, and so on and so forth.  Relying on MADV_UNMERGEABLE for functional
> correctness simply cannot work in KVM, barring mmu_notifier and a big pile of code.

AFAICT, ksm does not exclude the unmapped pfn from its scan list. We
need to tell ksm somehow to exclude the unmapped pfn from its scan list.
I understand that if userspace is messing with us, we have an issue, but
it's a userspace bug ;) To fix it right, we need to enhance ksm to
exclude the pfn when it is getting unmapped from the direct map. I
believe that work can be done outside of the SNP series. I am okay to
drop snp_mark_unmerable(), and until then, we just run with KSM
disabled. Thoughts?

thanks
Sean Christopherson Oct. 13, 2021, 2:34 p.m. UTC | #4
On Wed, Oct 13, 2021, Brijesh Singh wrote:
> 
> On 10/12/21 11:46 AM, Sean Christopherson wrote:
> > On Fri, Aug 20, 2021, Brijesh Singh wrote:
> >> When SEV-SNP is enabled, the guest private pages are added in the RMP
> >> table; while adding the pages, the rmp_make_private() unmaps the pages
> >> from the direct map. If KSM attempts to access those unmapped pages then
> >> it will trigger #PF (page-not-present).
> >>
> >> Encrypted guest pages cannot be shared between the process, so an
> >> userspace should not mark the region mergeable but to be safe, mark the
> >> process vma unmerable before adding the pages in the RMP table.
> > To be safe from what?  Does the !PRESENT #PF crash the kernel?
> 
> Yes, kernel crashes when KSM attempts to access to an unmaped pfn.

Is this problem unique to nuking the direct map (patch 05), or would it also be
a problem (in the form of an RMP violation) if the direct map were demoted to 4k
pages?
 
> [...]
> >> +	mmap_write_lock(kvm->mm);
> >> +	ret = snp_mark_unmergable(kvm, params.uaddr, params.len);
> >> +	mmap_write_unlock(kvm->mm);
> > This does not, and practically speaking cannot, work.  There are multiple TOCTOU
> > bugs, here and in __snp_handle_page_state_change().  Userspace can madvise() the
> > range at any later point, munmap()/mmap() the entire range, mess with the memslots
> > in the PSC case, and so on and so forth.  Relying on MADV_UNMERGEABLE for functional
> > correctness simply cannot work in KVM, barring mmu_notifier and a big pile of code.
> 
> AFAICT, ksm does not exclude the unmapped pfn from its scan list. We
> need to tell ksm somehow to exclude the unmapped pfn from its scan list.
> I understand that if userspace is messing with us, we have an issue, but
> it's a userspace bug ;) To fix it right, we need to enhance ksm to
> exclude the pfn when it is getting unmapped from the direct map. I
> believe that work can be done outside of the SNP series. I am okay to
> drop snp_mark_unmerable(), and until then, we just run with KSM
> disabled. Thoughts?
> 
> thanks
Brijesh Singh Oct. 13, 2021, 2:51 p.m. UTC | #5
On 10/13/21 7:34 AM, Sean Christopherson wrote:
> On Wed, Oct 13, 2021, Brijesh Singh wrote:
>> On 10/12/21 11:46 AM, Sean Christopherson wrote:
>>> On Fri, Aug 20, 2021, Brijesh Singh wrote:
>>>> When SEV-SNP is enabled, the guest private pages are added in the RMP
>>>> table; while adding the pages, the rmp_make_private() unmaps the pages
>>>> from the direct map. If KSM attempts to access those unmapped pages then
>>>> it will trigger #PF (page-not-present).
>>>>
>>>> Encrypted guest pages cannot be shared between the process, so an
>>>> userspace should not mark the region mergeable but to be safe, mark the
>>>> process vma unmerable before adding the pages in the RMP table.
>>> To be safe from what?  Does the !PRESENT #PF crash the kernel?
>> Yes, kernel crashes when KSM attempts to access to an unmaped pfn.
> Is this problem unique to nuking the direct map (patch 05), 

Yes. This problem didn't exist in previous series because we were not
nuking the page from direct map and KSM was able to read the memory just
fine. Now with the page removed from the direct map causes #PF
(not-present).


> or would it also be
> a problem (in the form of an RMP violation) if the direct map were demoted to 4k
> pages?
>  

No, this problem does happen due to the demotion. In previous series, we
were demoting the pages to 4k and everyone was happy (including ksm). In
the case of ksm, the page will *never* be merged because ciphertext for
two private pages will never be the same. Removing the pages from direct
map certainly brings additional complexity in the KVM and other places
in the kernel. From architecture point of view, there is actually no
need to mark the page *not present* in the direct map. I believe in TDX
that is must but for the SEV-SNP its not required at all. A hypervisor
can read the guest private pages just fine, only the write will cause an
RMP fault.


>> [...]
>>>> +	mmap_write_lock(kvm->mm);
>>>> +	ret = snp_mark_unmergable(kvm, params.uaddr, params.len);
>>>> +	mmap_write_unlock(kvm->mm);
>>> This does not, and practically speaking cannot, work.  There are multiple TOCTOU
>>> bugs, here and in __snp_handle_page_state_change().  Userspace can madvise() the
>>> range at any later point, munmap()/mmap() the entire range, mess with the memslots
>>> in the PSC case, and so on and so forth.  Relying on MADV_UNMERGEABLE for functional
>>> correctness simply cannot work in KVM, barring mmu_notifier and a big pile of code.
>> AFAICT, ksm does not exclude the unmapped pfn from its scan list. We
>> need to tell ksm somehow to exclude the unmapped pfn from its scan list.
>> I understand that if userspace is messing with us, we have an issue, but
>> it's a userspace bug ;) To fix it right, we need to enhance ksm to
>> exclude the pfn when it is getting unmapped from the direct map. I
>> believe that work can be done outside of the SNP series. I am okay to
>> drop snp_mark_unmerable(), and until then, we just run with KSM
>> disabled. Thoughts?
>>
>> thanks
Sean Christopherson Oct. 13, 2021, 3:33 p.m. UTC | #6
On Wed, Oct 13, 2021, Brijesh Singh wrote:
> 
> On 10/13/21 7:34 AM, Sean Christopherson wrote:
> > On Wed, Oct 13, 2021, Brijesh Singh wrote:
> >> On 10/12/21 11:46 AM, Sean Christopherson wrote:
> >>> On Fri, Aug 20, 2021, Brijesh Singh wrote:
> >>>> When SEV-SNP is enabled, the guest private pages are added in the RMP
> >>>> table; while adding the pages, the rmp_make_private() unmaps the pages
> >>>> from the direct map. If KSM attempts to access those unmapped pages then
> >>>> it will trigger #PF (page-not-present).
> >>>>
> >>>> Encrypted guest pages cannot be shared between the process, so an
> >>>> userspace should not mark the region mergeable but to be safe, mark the
> >>>> process vma unmerable before adding the pages in the RMP table.
> >>> To be safe from what?  Does the !PRESENT #PF crash the kernel?
> >> Yes, kernel crashes when KSM attempts to access to an unmaped pfn.
> > Is this problem unique to nuking the direct map (patch 05), 
> 
> Yes. This problem didn't exist in previous series because we were not
> nuking the page from direct map and KSM was able to read the memory just
> fine. Now with the page removed from the direct map causes #PF
> (not-present).

Hrm, so regardless of what manipulations are done to the direct map, any errant
write to guest private memory via the direct map would be fatal to the kernel.
That's both mildly terrifying and oddly encouraging, as it means silent guest data
corruption is no longer a thing, at least for private memory.

One concrete takeaway for me is that "silently" nuking the direct map on RMP
assignment is not an option.  Nuking the direct map if the kernel has a way to
determine that the backing store is for guest private memory is perfectly ok,
but pulling the rug out so to speak is setting us up for maintenance hell.

> > or would it also be a problem (in the form of an RMP violation) if the
> > direct map were demoted to 4k pages?
> >  
> 
> No, this problem does happen due to the demotion. In previous series, we
> were demoting the pages to 4k and everyone was happy (including ksm). In
> the case of ksm, the page will *never* be merged because ciphertext for
> two private pages will never be the same. Removing the pages from direct
> map certainly brings additional complexity in the KVM and other places
> in the kernel. From architecture point of view, there is actually no
> need to mark the page *not present* in the direct map. I believe in TDX
> that is must but for the SEV-SNP its not required at all.

Nuking the direct map is not strictly required for TDX either, as reads do not
compromise the integrity of the memory, i.e. don't poison memory and lead to
#MC.  Like SNP, writes via the direct map would be fatal.

The issue with TDX that is not shared by SNP is that writes through _user_ mappings
can be fatal the system.  With SNP, those generate RMP violations, but because they
are "just" page faults, the normal uaccess machinery happily eats them and SIGBUSes
the VMM.

> A hypervisor can read the guest private pages just fine, only the write will
> cause an RMP fault.

Well, for some definitions of "read".  I'm kinda joking, kinda serious.  KSM may
"work" when it reads garbage, but the same is likely not true for other kernel
code that wanders into guest private memory.  Ideally, the kernel would provide
a mechanism to _prevent_ any such reads/writes, and violations would be treated
as kernel bugs.  Given that SEV has been successfully deployed, the probability
of lurking bugs is quite low, but I still dislike the idea of latent bugs going
unnoticed or manifesting in weird ways.
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 4b126598b7aa..dcef0ae5f8e4 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -18,11 +18,13 @@ 
 #include <linux/processor.h>
 #include <linux/trace_events.h>
 #include <linux/sev.h>
+#include <linux/ksm.h>
 #include <asm/fpu/internal.h>
 
 #include <asm/pkru.h>
 #include <asm/trapnr.h>
 #include <asm/sev.h>
+#include <asm/mman.h>
 
 #include "x86.h"
 #include "svm.h"
@@ -1683,6 +1685,30 @@  static bool is_hva_registered(struct kvm *kvm, hva_t hva, size_t len)
 	return false;
 }
 
+static int snp_mark_unmergable(struct kvm *kvm, u64 start, u64 size)
+{
+	struct vm_area_struct *vma;
+	u64 end = start + size;
+	int ret;
+
+	do {
+		vma = find_vma_intersection(kvm->mm, start, end);
+		if (!vma) {
+			ret = -EINVAL;
+			break;
+		}
+
+		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+				  MADV_UNMERGEABLE, &vma->vm_flags);
+		if (ret)
+			break;
+
+		start = vma->vm_end;
+	} while (end > vma->vm_end);
+
+	return ret;
+}
+
 static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
 	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -1707,6 +1733,12 @@  static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
 	if (!is_hva_registered(kvm, params.uaddr, params.len))
 		return -EINVAL;
 
+	mmap_write_lock(kvm->mm);
+	ret = snp_mark_unmergable(kvm, params.uaddr, params.len);
+	mmap_write_unlock(kvm->mm);
+	if (ret)
+		return -EFAULT;
+
 	/*
 	 * The userspace memory is already locked so technically we don't
 	 * need to lock it again. Later part of the function needs to know