@@ -207,3 +207,17 @@ The hypercall lets a guest request control register flags to be pinned for
itself.
Returns 0 on success or a KVM error code otherwise.
+
+10. KVM_HC_PROTECT_MEMORY
+-------------------------
+
+:Architecture: x86
+:Status: active
+:Purpose: Request permissions to be set in EPT
+
+- a0: physical address of a struct heki_page_list
+
+The hypercall lets a guest request memory permissions to be set for a list
+of physical pages.
+
+Returns 0 on success or a KVM error code otherwise.
@@ -47,9 +47,11 @@
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/hash.h>
+#include <linux/heki.h>
#include <linux/kern_levels.h>
#include <linux/kstrtox.h>
#include <linux/kthread.h>
+#include <linux/kvm_mem_attr.h>
#include <asm/page.h>
#include <asm/memtype.h>
@@ -4446,6 +4448,75 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
}
+static bool mem_attr_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
+{
+ unsigned long perm;
+ bool noexec, nowrite;
+
+ if (unlikely(fault->rsvd))
+ return false;
+
+ if (!fault->present)
+ return false;
+
+ perm = kvm_permissions_get(vcpu->kvm, fault->gfn);
+ noexec = !(perm & MEM_ATTR_EXEC);
+ nowrite = !(perm & MEM_ATTR_WRITE);
+
+ if (fault->exec && noexec) {
+ struct x86_exception exception = {
+ .vector = PF_VECTOR,
+ .error_code_valid = true,
+ .error_code = fault->error_code,
+ .nested_page_fault = false,
+ /*
+ * TODO: This kind of kernel page fault needs to be
+ * handled by the guest, which is not currently the
+ * case, making it try again and again.
+ *
+ * You may want to test with cr2_or_gva to see the page
+ * fault caught by the guest kernel (thinking it is a
+ * user space fault).
+ */
+ .address = static_call(kvm_x86_fault_gva)(vcpu),
+ .async_page_fault = false,
+ };
+
+ pr_warn_ratelimited(
+ "heki: Creating fetch #PF at 0x%016llx GFN=%llx\n",
+ exception.address, fault->gfn);
+ kvm_inject_page_fault(vcpu, &exception);
+ return true;
+ }
+
+ if (fault->write && nowrite) {
+ struct x86_exception exception = {
+ .vector = PF_VECTOR,
+ .error_code_valid = true,
+ .error_code = fault->error_code,
+ .nested_page_fault = false,
+ /*
+ * TODO: This kind of kernel page fault needs to be
+ * handled by the guest, which is not currently the
+ * case, making it try again and again.
+ *
+ * You may want to test with cr2_or_gva to see the page
+ * fault caught by the guest kernel (thinking it is a
+ * user space fault).
+ */
+ .address = static_call(kvm_x86_fault_gva)(vcpu),
+ .async_page_fault = false,
+ };
+
+ pr_warn_ratelimited(
+ "heki: Creating write #PF at 0x%016llx GFN=%llx\n",
+ exception.address, fault->gfn);
+ kvm_inject_page_fault(vcpu, &exception);
+ return true;
+ }
+ return false;
+}
+
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
int r;
@@ -4457,6 +4528,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
if (page_fault_handle_page_track(vcpu, fault))
return RET_PF_EMULATE;
+ if (mem_attr_fault(vcpu, fault))
+ return RET_PF_RETRY;
+
r = fast_page_fault(vcpu, fault);
if (r != RET_PF_INVALID)
return r;
@@ -4537,6 +4611,9 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
if (page_fault_handle_page_track(vcpu, fault))
return RET_PF_EMULATE;
+ if (mem_attr_fault(vcpu, fault))
+ return RET_PF_RETRY;
+
r = fast_page_fault(vcpu, fault);
if (r != RET_PF_INVALID)
return r;
@@ -820,6 +820,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
return RET_PF_EMULATE;
}
+ if (mem_attr_fault(vcpu, fault))
+ return RET_PF_RETRY;
+
r = mmu_topup_memory_caches(vcpu, true);
if (r)
return r;
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kvm_host.h>
+#include <linux/kvm_mem_attr.h>
#include "mmu.h"
#include "mmu_internal.h"
#include "x86.h"
@@ -143,6 +144,11 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
int level = sp->role.level;
u64 spte = SPTE_MMU_PRESENT_MASK;
bool wrprot = false;
+ unsigned long perm;
+
+ perm = kvm_permissions_get(vcpu->kvm, gfn);
+ if (!(perm & MEM_ATTR_WRITE))
+ pte_access &= ~ACC_WRITE_MASK;
WARN_ON_ONCE(!pte_access && !shadow_present_mask);
@@ -178,10 +184,15 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
pte_access &= ~ACC_EXEC_MASK;
}
- if (pte_access & ACC_EXEC_MASK)
+ if (pte_access & ACC_EXEC_MASK) {
spte |= shadow_x_mask;
- else
+#ifdef CONFIG_HEKI
+ if (enable_mbec && !(perm & MEM_ATTR_EXEC))
+ spte &= ~VMX_EPT_EXECUTABLE_MASK;
+#endif
+ } else {
spte |= shadow_nx_mask;
+ }
if (pte_access & ACC_USER_MASK)
spte |= shadow_user_mask;
@@ -62,6 +62,8 @@
#include <linux/entry-kvm.h>
#include <linux/suspend.h>
#include <linux/smp.h>
+#include <linux/heki.h>
+#include <linux/kvm_mem_attr.h>
#include <trace/events/ipi.h>
#include <trace/events/kvm.h>
@@ -9983,6 +9985,131 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
return;
}
+#ifdef CONFIG_HEKI
+
+static int heki_protect_memory(struct kvm *const kvm, gpa_t list_pa)
+{
+ struct heki_page_list *list, *head;
+ struct heki_pages *pages;
+ size_t size;
+ int i, npages, err = 0;
+
+ /* Read in the page list. */
+ head = NULL;
+ npages = 0;
+ while (list_pa) {
+ list = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!list) {
+ /* For want of a better error number. */
+ err = -KVM_E2BIG;
+ goto free;
+ }
+
+ err = kvm_read_guest(kvm, list_pa, list, sizeof(*list));
+ if (err) {
+ pr_warn("heki: Can't read list %llx\n", list_pa);
+ err = -KVM_EFAULT;
+ goto free;
+ }
+ list_pa += sizeof(*list);
+
+ size = list->npages * sizeof(*pages);
+ pages = list->pages;
+ err = kvm_read_guest(kvm, list_pa, pages, size);
+ if (err) {
+ pr_warn("heki: Can't read pages %llx\n", list_pa);
+ err = -KVM_EFAULT;
+ goto free;
+ }
+
+ list->next = head;
+ head = list;
+ npages += list->npages;
+ list_pa = list->next_pa;
+ }
+
+ /* For kvm_permissions_set() -> kvm_vm_set_mem_attributes() */
+ mutex_lock(&kvm->slots_arch_lock);
+
+ /*
+ * Walk the page list, apply the permissions for each guest page and
+ * zap the EPT entry of each page. The pages will be faulted in on
+ * demand and the correct permissions will be applied at the correct
+ * level for the pages.
+ */
+ for (list = head; list; list = list->next) {
+ pages = list->pages;
+
+ for (i = 0; i < list->npages; i++) {
+ gfn_t gfn_start, gfn_end;
+ unsigned long permissions;
+
+ if (!PAGE_ALIGNED(pages[i].pa)) {
+ pr_warn("heki: GPA not aligned: %llx\n",
+ pages[i].pa);
+ err = -KVM_EINVAL;
+ goto unlock;
+ }
+ if (!PAGE_ALIGNED(pages[i].epa)) {
+ pr_warn("heki: GPA not aligned: %llx\n",
+ pages[i].epa);
+ err = -KVM_EINVAL;
+ goto unlock;
+ }
+
+ gfn_start = gpa_to_gfn(pages[i].pa);
+ gfn_end = gpa_to_gfn(pages[i].epa);
+ permissions = pages[i].permissions;
+
+ if (!permissions || (permissions & ~MEM_ATTR_PROT)) {
+ err = -KVM_EINVAL;
+ goto unlock;
+ }
+
+ if (!(permissions & MEM_ATTR_EXEC) && !enable_mbec) {
+ /*
+ * Guests can check for MBEC support to avoid
+ * this error message. We will continue
+ * applying restrictions partially.
+ */
+ pr_warn("heki: Clearing kernel exec "
+ "depends on MBEC, which is disabled.");
+ permissions |= MEM_ATTR_EXEC;
+ }
+
+ pr_warn("heki: Request to protect GFNs %llx-%llx"
+ " with %s permissions=%s%s%s\n",
+ gfn_start, gfn_end,
+ (permissions & MEM_ATTR_IMMUTABLE) ?
+ "immutable" :
+ "mutable",
+ (permissions & MEM_ATTR_READ) ? "r" : "_",
+ (permissions & MEM_ATTR_WRITE) ? "w" : "_",
+ (permissions & MEM_ATTR_EXEC) ? "x" : "_");
+
+ err = kvm_permissions_set(kvm, gfn_start, gfn_end,
+ permissions);
+ if (err) {
+ pr_warn("heki: Failed to set permissions\n");
+ goto unlock;
+ }
+ }
+ }
+
+unlock:
+ mutex_unlock(&kvm->slots_arch_lock);
+
+free:
+ while (head) {
+ list = head;
+ head = head->next;
+ kfree(list);
+ }
+ return err;
+}
+
+#endif /* CONFIG_HEKI */
+
static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
{
u64 ret = vcpu->run->hypercall.ret;
@@ -10097,6 +10224,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
return ret;
}
break;
+ case KVM_HC_PROTECT_MEMORY:
+ ret = heki_protect_memory(vcpu->kvm, a0);
+ break;
#endif /* CONFIG_HEKI */
default:
ret = -KVM_ENOSYS;
@@ -8,6 +8,7 @@
#ifndef __HEKI_H__
#define __HEKI_H__
+#include <linux/kvm_types.h>
#include <linux/types.h>
#include <linux/bug.h>
#include <linux/cache.h>
@@ -17,6 +18,32 @@
#ifdef CONFIG_HEKI
+/*
+ * This structure contains a guest physical range and its permissions (RWX).
+ */
+struct heki_pages {
+ gpa_t pa;
+ gpa_t epa;
+ unsigned long permissions;
+};
+
+/*
+ * Guest ranges are passed to the VMM or hypervisor so they can be authenticated
+ * and their permissions can be set in the host page table. When an array of
+ * these is passed to the Hypervisor or VMM, the array must be in physically
+ * contiguous memory.
+ *
+ * This struct occupies one page. In each page, an array of guest ranges can
+ * be passed. A guest request to the VMM/Hypervisor may contain a list of
+ * these structs (linked by "next_pa").
+ */
+struct heki_page_list {
+ struct heki_page_list *next;
+ gpa_t next_pa;
+ unsigned long npages;
+ struct heki_pages pages[];
+};
+
/*
* A hypervisor that supports Heki will instantiate this structure to
* provide hypervisor specific functions for Heki.
@@ -36,6 +63,8 @@ struct heki {
extern struct heki heki;
extern bool heki_enabled;
+extern bool __read_mostly enable_mbec;
+
void heki_early_init(void);
void heki_late_init(void);
@@ -31,6 +31,7 @@
#define KVM_HC_SCHED_YIELD 11
#define KVM_HC_MAP_GPA_RANGE 12
#define KVM_HC_LOCK_CR_UPDATE 13
+#define KVM_HC_PROTECT_MEMORY 14
/*
* hypercalls use architecture specific