diff mbox series

[Part2,RFC,v2,32/37] KVM: SVM: Add support to handle MSR based Page State Change VMGEXIT

Message ID 20210430123822.13825-33-brijesh.singh@amd.com (mailing list archive)
State New, archived
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Brijesh Singh April 30, 2021, 12:38 p.m. UTC
SEV-SNP VMs can ask the hypervisor to change the page state in the RMP
table to be private or shared using the Page State Change MSR protocol
as defined in the GHCB specification.

Before changing the page state in the RMP entry, we lookup the page in
the TDP to make sure that there is a valid mapping for it. If the mapping
exist then try to find a workable page level between the TDP and RMP for
the page. If the page is not mapped in the TDP, then create a fault such
that it gets mapped before we change the page state in the RMP entry.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/kvm/svm/sev.c | 137 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

Comments

Peter Gonda May 10, 2021, 5:30 p.m. UTC | #1
> +static int snp_make_page_shared(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level)
> +{
> +       struct rmpupdate val;
> +       int rc, rmp_level;
> +       struct rmpentry *e;
> +
> +       e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &rmp_level);
> +       if (!e)
> +               return -EINVAL;
> +
> +       if (!rmpentry_assigned(e))
> +               return 0;
> +
> +       /* Log if the entry is validated */
> +       if (rmpentry_validated(e))
> +               pr_debug_ratelimited("Remove RMP entry for a validated gpa 0x%llx\n", gpa);
> +
> +       /*
> +        * Is the page part of an existing 2M RMP entry ? Split the 2MB into multiple
> +        * of 4K-page before making the memory shared.
> +        */
> +       if ((level == PG_LEVEL_4K) && (rmp_level == PG_LEVEL_2M)) {
> +               rc = snp_rmptable_psmash(vcpu, pfn);
> +               if (rc)
> +                       return rc;
> +       }
> +
> +       memset(&val, 0, sizeof(val));
> +       val.pagesize = X86_TO_RMP_PG_LEVEL(level);

This is slightly different from Rev 2.00 of the GHCB spec. This
defaults to 2MB page sizes, when the spec says the only valid settings
for level are 0 -> 4k pages or 1 -> 2MB pages. Should this enforce the
same strictness as the spec?

> +       return rmpupdate(pfn_to_page(pfn), &val);
> +}
> +
> +static int snp_make_page_private(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level)
> +{
> +       struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
> +       struct rmpupdate val;
> +       struct rmpentry *e;
> +       int rmp_level;
> +
> +       e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &rmp_level);
> +       if (!e)
> +               return -EINVAL;
> +
> +       /* Log if the entry is validated */
> +       if (rmpentry_validated(e))
> +               pr_err_ratelimited("Asked to make a pre-validated gpa %llx private\n", gpa);
> +
> +       memset(&val, 0, sizeof(val));
> +       val.gpa = gpa;
> +       val.asid = sev->asid;
> +       val.pagesize = X86_TO_RMP_PG_LEVEL(level);

Same comment as above.

> +       val.assigned = true;
> +
> +       return rmpupdate(pfn_to_page(pfn), &val);
> +}
Brijesh Singh May 10, 2021, 5:51 p.m. UTC | #2
Hi Peter,

On 5/10/21 12:30 PM, Peter Gonda wrote:
>> +static int snp_make_page_shared(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level)
>> +{
>> +       struct rmpupdate val;
>> +       int rc, rmp_level;
>> +       struct rmpentry *e;
>> +
>> +       e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &rmp_level);
>> +       if (!e)
>> +               return -EINVAL;
>> +
>> +       if (!rmpentry_assigned(e))
>> +               return 0;
>> +
>> +       /* Log if the entry is validated */
>> +       if (rmpentry_validated(e))
>> +               pr_debug_ratelimited("Remove RMP entry for a validated gpa 0x%llx\n", gpa);
>> +
>> +       /*
>> +        * Is the page part of an existing 2M RMP entry ? Split the 2MB into multiple
>> +        * of 4K-page before making the memory shared.
>> +        */
>> +       if ((level == PG_LEVEL_4K) && (rmp_level == PG_LEVEL_2M)) {
>> +               rc = snp_rmptable_psmash(vcpu, pfn);
>> +               if (rc)
>> +                       return rc;
>> +       }
>> +
>> +       memset(&val, 0, sizeof(val));
>> +       val.pagesize = X86_TO_RMP_PG_LEVEL(level);
> This is slightly different from Rev 2.00 of the GHCB spec. This
> defaults to 2MB page sizes, when the spec says the only valid settings
> for level are 0 -> 4k pages or 1 -> 2MB pages. Should this enforce the
> same strictness as the spec?


The caller of the snp_make_page_shared() must pass the x86 page level.
We should reach here after all the guest provide value have passed
through checks.

The call sequence in this case should be:

snp_handle_vmgexit_msr_protocol()

 __snp_handle_page_state_change(vcpu, gfn_to_gpa(gfn), PG_LEVEL_4K)

  snp_make_page_shared(..., level)

Am I missing something  ?

>> +       return rmpupdate(pfn_to_page(pfn), &val);
>> +}
>> +
>> +static int snp_make_page_private(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level)
>> +{
>> +       struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
>> +       struct rmpupdate val;
>> +       struct rmpentry *e;
>> +       int rmp_level;
>> +
>> +       e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &rmp_level);
>> +       if (!e)
>> +               return -EINVAL;
>> +
>> +       /* Log if the entry is validated */
>> +       if (rmpentry_validated(e))
>> +               pr_err_ratelimited("Asked to make a pre-validated gpa %llx private\n", gpa);
>> +
>> +       memset(&val, 0, sizeof(val));
>> +       val.gpa = gpa;
>> +       val.asid = sev->asid;
>> +       val.pagesize = X86_TO_RMP_PG_LEVEL(level);
> Same comment as above.

See my above response.


>
>> +       val.assigned = true;
>> +
>> +       return rmpupdate(pfn_to_page(pfn), &val);
>> +}
Peter Gonda May 10, 2021, 7:59 p.m. UTC | #3
On Mon, May 10, 2021 at 11:51 AM Brijesh Singh <brijesh.singh@amd.com> wrote:
>
> Hi Peter,
>
> On 5/10/21 12:30 PM, Peter Gonda wrote:
> >> +static int snp_make_page_shared(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level)
> >> +{
> >> +       struct rmpupdate val;
> >> +       int rc, rmp_level;
> >> +       struct rmpentry *e;
> >> +
> >> +       e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &rmp_level);
> >> +       if (!e)
> >> +               return -EINVAL;
> >> +
> >> +       if (!rmpentry_assigned(e))
> >> +               return 0;
> >> +
> >> +       /* Log if the entry is validated */
> >> +       if (rmpentry_validated(e))
> >> +               pr_debug_ratelimited("Remove RMP entry for a validated gpa 0x%llx\n", gpa);
> >> +
> >> +       /*
> >> +        * Is the page part of an existing 2M RMP entry ? Split the 2MB into multiple
> >> +        * of 4K-page before making the memory shared.
> >> +        */
> >> +       if ((level == PG_LEVEL_4K) && (rmp_level == PG_LEVEL_2M)) {
> >> +               rc = snp_rmptable_psmash(vcpu, pfn);
> >> +               if (rc)
> >> +                       return rc;
> >> +       }
> >> +
> >> +       memset(&val, 0, sizeof(val));
> >> +       val.pagesize = X86_TO_RMP_PG_LEVEL(level);
> > This is slightly different from Rev 2.00 of the GHCB spec. This
> > defaults to 2MB page sizes, when the spec says the only valid settings
> > for level are 0 -> 4k pages or 1 -> 2MB pages. Should this enforce the
> > same strictness as the spec?
>
>
> The caller of the snp_make_page_shared() must pass the x86 page level.
> We should reach here after all the guest provide value have passed
> through checks.
>
> The call sequence in this case should be:
>
> snp_handle_vmgexit_msr_protocol()
>
>  __snp_handle_page_state_change(vcpu, gfn_to_gpa(gfn), PG_LEVEL_4K)
>
>   snp_make_page_shared(..., level)
>
> Am I missing something  ?

Thanks Brijesh. I think my comment was misplaced. Looking at 33/37

+static unsigned long snp_handle_page_state_change(struct vcpu_svm
*svm, struct ghcb *ghcb)
+{
...
+ while (info->header.cur_entry <= info->header.end_entry) {
+ entry = &info->entry[info->header.cur_entry];
+ gpa = gfn_to_gpa(entry->gfn);
+ level = RMP_TO_X86_PG_LEVEL(entry->pagesize);
+ op = entry->operation;

This call to RMP_TO_X86_PG_LEVEL is not as strict as the spec. Is that OK?

>
> >> +       return rmpupdate(pfn_to_page(pfn), &val);
> >> +}
> >> +
> >> +static int snp_make_page_private(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level)
> >> +{
> >> +       struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
> >> +       struct rmpupdate val;
> >> +       struct rmpentry *e;
> >> +       int rmp_level;
> >> +
> >> +       e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &rmp_level);
> >> +       if (!e)
> >> +               return -EINVAL;
> >> +
> >> +       /* Log if the entry is validated */
> >> +       if (rmpentry_validated(e))
> >> +               pr_err_ratelimited("Asked to make a pre-validated gpa %llx private\n", gpa);
> >> +
> >> +       memset(&val, 0, sizeof(val));
> >> +       val.gpa = gpa;
> >> +       val.asid = sev->asid;
> >> +       val.pagesize = X86_TO_RMP_PG_LEVEL(level);
> > Same comment as above.
>
> See my above response.
>
>
> >
> >> +       val.assigned = true;
> >> +
> >> +       return rmpupdate(pfn_to_page(pfn), &val);
> >> +}
Brijesh Singh May 10, 2021, 8:50 p.m. UTC | #4
On 5/10/21 2:59 PM, Peter Gonda wrote:
> On Mon, May 10, 2021 at 11:51 AM Brijesh Singh <brijesh.singh@amd.com> wrote:
>> Hi Peter,
>>
>> On 5/10/21 12:30 PM, Peter Gonda wrote:
>>>> +static int snp_make_page_shared(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level)
>>>> +{
>>>> +       struct rmpupdate val;
>>>> +       int rc, rmp_level;
>>>> +       struct rmpentry *e;
>>>> +
>>>> +       e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &rmp_level);
>>>> +       if (!e)
>>>> +               return -EINVAL;
>>>> +
>>>> +       if (!rmpentry_assigned(e))
>>>> +               return 0;
>>>> +
>>>> +       /* Log if the entry is validated */
>>>> +       if (rmpentry_validated(e))
>>>> +               pr_debug_ratelimited("Remove RMP entry for a validated gpa 0x%llx\n", gpa);
>>>> +
>>>> +       /*
>>>> +        * Is the page part of an existing 2M RMP entry ? Split the 2MB into multiple
>>>> +        * of 4K-page before making the memory shared.
>>>> +        */
>>>> +       if ((level == PG_LEVEL_4K) && (rmp_level == PG_LEVEL_2M)) {
>>>> +               rc = snp_rmptable_psmash(vcpu, pfn);
>>>> +               if (rc)
>>>> +                       return rc;
>>>> +       }
>>>> +
>>>> +       memset(&val, 0, sizeof(val));
>>>> +       val.pagesize = X86_TO_RMP_PG_LEVEL(level);
>>> This is slightly different from Rev 2.00 of the GHCB spec. This
>>> defaults to 2MB page sizes, when the spec says the only valid settings
>>> for level are 0 -> 4k pages or 1 -> 2MB pages. Should this enforce the
>>> same strictness as the spec?
>>
>> The caller of the snp_make_page_shared() must pass the x86 page level.
>> We should reach here after all the guest provide value have passed
>> through checks.
>>
>> The call sequence in this case should be:
>>
>> snp_handle_vmgexit_msr_protocol()
>>
>>  __snp_handle_page_state_change(vcpu, gfn_to_gpa(gfn), PG_LEVEL_4K)
>>
>>   snp_make_page_shared(..., level)
>>
>> Am I missing something  ?
> Thanks Brijesh. I think my comment was misplaced. Looking at 33/37
>
> +static unsigned long snp_handle_page_state_change(struct vcpu_svm
> *svm, struct ghcb *ghcb)
> +{
> ...
> + while (info->header.cur_entry <= info->header.end_entry) {
> + entry = &info->entry[info->header.cur_entry];
> + gpa = gfn_to_gpa(entry->gfn);
> + level = RMP_TO_X86_PG_LEVEL(entry->pagesize);
> + op = entry->operation;
>
> This call to RMP_TO_X86_PG_LEVEL is not as strict as the spec. Is that OK?

I am not able to follow which part of the spec we are missing here. Can
you please elaborate it a bit more - thanks

The entry->pagesize is boolean, so, the level returned by the macro is
either a 4K or 2MB.
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1cba9d770860..cc2628d8ef1b 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -28,6 +28,7 @@ 
 #include "svm_ops.h"
 #include "cpuid.h"
 #include "trace.h"
+#include "mmu.h"
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
@@ -2825,6 +2826,127 @@  static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
 	svm->vmcb->control.ghcb_gpa = value;
 }
 
+static int snp_rmptable_psmash(struct kvm_vcpu *vcpu, kvm_pfn_t pfn)
+{
+	pfn = pfn & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
+
+	return psmash(pfn_to_page(pfn));
+}
+
+static int snp_make_page_shared(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level)
+{
+	struct rmpupdate val;
+	int rc, rmp_level;
+	struct rmpentry *e;
+
+	e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &rmp_level);
+	if (!e)
+		return -EINVAL;
+
+	if (!rmpentry_assigned(e))
+		return 0;
+
+	/* Log if the entry is validated */
+	if (rmpentry_validated(e))
+		pr_debug_ratelimited("Remove RMP entry for a validated gpa 0x%llx\n", gpa);
+
+	/*
+	 * Is the page part of an existing 2M RMP entry ? Split the 2MB into multiple
+	 * of 4K-page before making the memory shared.
+	 */
+	if ((level == PG_LEVEL_4K) && (rmp_level == PG_LEVEL_2M)) {
+		rc = snp_rmptable_psmash(vcpu, pfn);
+		if (rc)
+			return rc;
+	}
+
+	memset(&val, 0, sizeof(val));
+	val.pagesize = X86_TO_RMP_PG_LEVEL(level);
+	return rmpupdate(pfn_to_page(pfn), &val);
+}
+
+static int snp_make_page_private(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level)
+{
+	struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+	struct rmpupdate val;
+	struct rmpentry *e;
+	int rmp_level;
+
+	e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &rmp_level);
+	if (!e)
+		return -EINVAL;
+
+	/* Log if the entry is validated */
+	if (rmpentry_validated(e))
+		pr_err_ratelimited("Asked to make a pre-validated gpa %llx private\n", gpa);
+
+	memset(&val, 0, sizeof(val));
+	val.gpa = gpa;
+	val.asid = sev->asid;
+	val.pagesize = X86_TO_RMP_PG_LEVEL(level);
+	val.assigned = true;
+
+	return rmpupdate(pfn_to_page(pfn), &val);
+}
+
+static int __snp_handle_page_state_change(struct kvm_vcpu *vcpu, int op, gpa_t gpa, int level)
+{
+	struct kvm *kvm = vcpu->kvm;
+	int rc, tdp_level;
+	kvm_pfn_t pfn;
+	gpa_t gpa_end;
+
+	gpa_end = gpa + page_level_size(level);
+
+	while (gpa < gpa_end) {
+		/*
+		 * Get the pfn and level for the gpa from the nested page table.
+		 *
+		 * If the TDP walk failed, then its safe to say that we don't have a valid
+		 * mapping for the gpa in the nested page table. Create a fault to map the
+		 * page is nested page table.
+		 */
+		if (!kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &tdp_level)) {
+			pfn = kvm_mmu_map_tdp_page(vcpu, gpa, PFERR_USER_MASK, level);
+			if (is_error_noslot_pfn(pfn))
+				goto out;
+
+			if (!kvm_mmu_get_tdp_walk(vcpu, gpa, &pfn, &tdp_level))
+				goto out;
+		}
+
+		/* Adjust the level so that we don't go higher than the backing page level */
+		level = min_t(size_t, level, tdp_level);
+
+		write_lock(&kvm->mmu_lock);
+
+		switch (op) {
+		case SNP_PAGE_STATE_SHARED:
+			rc = snp_make_page_shared(vcpu, gpa, pfn, level);
+			break;
+		case SNP_PAGE_STATE_PRIVATE:
+			rc = snp_make_page_private(vcpu, gpa, pfn, level);
+			break;
+		default:
+			rc = -EINVAL;
+			break;
+		}
+
+		write_unlock(&kvm->mmu_lock);
+
+		if (rc) {
+			pr_err_ratelimited("Error op %d gpa %llx pfn %llx level %d rc %d\n",
+					   op, gpa, pfn, level, rc);
+			goto out;
+		}
+
+		gpa = gpa + page_level_size(level);
+	}
+
+out:
+	return rc;
+}
+
 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -2923,6 +3045,21 @@  static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 				  GHCB_MSR_INFO_POS);
 		break;
 	}
+	case GHCB_MSR_PSC_REQ: {
+		gfn_t gfn;
+		int ret;
+		u8 op;
+
+		gfn = get_ghcb_msr_bits(svm, GHCB_MSR_PSC_GFN_MASK, GHCB_MSR_PSC_GFN_POS);
+		op = get_ghcb_msr_bits(svm, GHCB_MSR_PSC_OP_MASK, GHCB_MSR_PSC_OP_POS);
+
+		ret = __snp_handle_page_state_change(vcpu, op, gfn_to_gpa(gfn), PG_LEVEL_4K);
+
+		set_ghcb_msr_bits(svm, ret, GHCB_MSR_PSC_ERROR_MASK, GHCB_MSR_PSC_ERROR_POS);
+		set_ghcb_msr_bits(svm, 0, GHCB_MSR_PSC_RSVD_MASK, GHCB_MSR_PSC_RSVD_POS);
+		set_ghcb_msr_bits(svm, GHCB_MSR_PSC_RESP, GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS);
+		break;
+	}
 	case GHCB_MSR_TERM_REQ: {
 		u64 reason_set, reason_code;