[RFC,v5,08/20] kvm: page track: add support for preread, prewrite and preexec

Message ID	20181220182850.4579-9-alazar@bitdefender.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: =?utf-8?q?Adalbert_Laz=C4=83r?= <alazar@bitdefender.com> To: kvm@vger.kernel.org Cc: Paolo Bonzini <pbonzini@redhat.com>, =?utf-8?b?UmFkaW0gS3LEjW3DocWZ?= <rkrcmar@redhat.com>, =?utf-8?q?Mihai_Don?= =?utf-8?q?=C8=9Bu?= <mdontu@bitdefender.com>, =?utf-8?q?Adalbert_Laz=C4=83r?= <alazar@bitdefender.com> Subject: [RFC PATCH v5 08/20] kvm: page track: add support for preread, prewrite and preexec Date: Thu, 20 Dec 2018 20:28:38 +0200 Message-Id: <20181220182850.4579-9-alazar@bitdefender.com> In-Reply-To: <20181220182850.4579-1-alazar@bitdefender.com> References: <20181220182850.4579-1-alazar@bitdefender.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sender: kvm-owner@vger.kernel.org Precedence: bulk
Series	VM introspection \| expand [RFC,v5,00/20] VM introspection [RFC,v5,01/20] kvm: document the VM introspection API [RFC,v5,02/20] kvm: document the VM introspection ioctl-s and capability [RFC,v5,03/20] kvm: document the VM introspection hypercalls [RFC,v5,04/20] kvm: add the VM introspection API/ABI headers [RFC,v5,05/20] kvm: x86: do not unconditionally patch the hypercall instruction during emulation [RFC,v5,06/20] mm: add support for remote mapping [RFC,v5,07/20] add memory map/unmap support for VM introspection on the guest side [RFC,v5,08/20] kvm: page track: add support for preread, prewrite and preexec [RFC,v5,09/20] kvm: extend to accomodate the VM introspection subsystem [RFC,v5,10/20] kvm: x86: add tracepoints for interrupt and exception injections [RFC,v5,12/20] kvm: x86: hook in the VM introspection subsystem [RFC,v5,13/20] kvm: x86: hook in kvmi_msr_event() [RFC,v5,14/20] kvm: x86: hook in kvmi_breakpoint_event() [RFC,v5,15/20] kvm: x86: intercept accesses to IDTR, GDTR, LDTR and TR [RFC,v5,16/20] kvm: x86: hook the single-step events [RFC,v5,17/20] kvm: x86: hook in kvmi_cr_event() [RFC,v5,18/20] kvm: x86: hook in kvmi_xsetbv_event() [RFC,v5,19/20] kvm: x86: handle the introspection hypercalls [RFC,v5,20/20] kvm: x86: hook in kvmi_trap_event()

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fbda5a917c5b..796fc5f1d76c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1251,7 +1251,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); bool pdptrs_changed(struct kvm_vcpu *vcpu); -int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, +int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, const void *val, int bytes); struct kvm_irq_mask_notifier { diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 172f9749dbb2..a431e5e1e5cb 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -3,7 +3,10 @@ #define _ASM_X86_KVM_PAGE_TRACK_H enum kvm_page_track_mode { + KVM_PAGE_TRACK_PREREAD, + KVM_PAGE_TRACK_PREWRITE, KVM_PAGE_TRACK_WRITE, + KVM_PAGE_TRACK_PREEXEC, KVM_PAGE_TRACK_MAX, }; @@ -22,6 +25,13 @@ struct kvm_page_track_notifier_head { struct kvm_page_track_notifier_node { struct hlist_node node; + bool (*track_preread)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + u8 *new, int bytes, + struct kvm_page_track_notifier_node *node, + bool *data_ready); + bool (*track_prewrite)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + const u8 *new, int bytes, + struct kvm_page_track_notifier_node *node); /* * It is called when guest is writing the write-tracked page * and write emulation is finished at that time. @@ -32,11 +42,17 @@ struct kvm_page_track_notifier_node { * @bytes: the written length. * @node: this node */ - void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, - int bytes, struct kvm_page_track_notifier_node *node); + void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + const u8 *new, int bytes, + struct kvm_page_track_notifier_node *node); + bool (*track_preexec)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + struct kvm_page_track_notifier_node *node); + void (*track_create_slot)(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages, + struct kvm_page_track_notifier_node *node); /* * It is called when memory slot is being moved or removed - * users can drop write-protection for the pages in that memory slot + * users can drop active protection for the pages in that memory slot * * @kvm: the kvm where memory slot being moved or removed * @slot: the memory slot being moved or removed @@ -51,7 +67,7 @@ void kvm_page_track_cleanup(struct kvm *kvm); void kvm_page_track_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, +int kvm_page_track_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages); void kvm_slot_page_track_add_page(struct kvm *kvm, @@ -69,7 +85,12 @@ kvm_page_track_register_notifier(struct kvm *kvm, void kvm_page_track_unregister_notifier(struct kvm *kvm, struct kvm_page_track_notifier_node *n); -void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, - int bytes); +bool kvm_page_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + u8 *new, int bytes, bool *data_ready); +bool kvm_page_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + const u8 *new, int bytes); +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + const u8 *new, int bytes); +bool kvm_page_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva); void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7c03c0f35444..ff3252621fcf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1082,9 +1082,13 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) slot = __gfn_to_memslot(slots, gfn); /* the non-leaf shadow pages are keeping readonly. */ - if (sp->role.level > PT_PAGE_TABLE_LEVEL) - return kvm_slot_page_track_add_page(kvm, slot, gfn, - KVM_PAGE_TRACK_WRITE); + if (sp->role.level > PT_PAGE_TABLE_LEVEL) { + kvm_slot_page_track_add_page(kvm, slot, gfn, + KVM_PAGE_TRACK_PREWRITE); + kvm_slot_page_track_add_page(kvm, slot, gfn, + KVM_PAGE_TRACK_WRITE); + return; + } kvm_mmu_gfn_disallow_lpage(slot, gfn); } @@ -1099,9 +1103,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); - if (sp->role.level > PT_PAGE_TABLE_LEVEL) - return kvm_slot_page_track_remove_page(kvm, slot, gfn, - KVM_PAGE_TRACK_WRITE); + if (sp->role.level > PT_PAGE_TABLE_LEVEL) { + kvm_slot_page_track_remove_page(kvm, slot, gfn, + KVM_PAGE_TRACK_PREWRITE); + kvm_slot_page_track_remove_page(kvm, slot, gfn, + KVM_PAGE_TRACK_WRITE); + return; + } kvm_mmu_gfn_allow_lpage(slot, gfn); } @@ -1490,6 +1498,31 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect) return mmu_spte_update(sptep, spte); } +static bool spte_read_protect(u64 *sptep) +{ + u64 spte = *sptep; + bool exec_only_supported = (shadow_present_mask == 0ull); + + rmap_printk("rmap_read_protect: spte %p %llx\n", sptep, *sptep); + + WARN_ON_ONCE(!exec_only_supported); + + spte = spte & ~(PT_WRITABLE_MASK | PT_PRESENT_MASK); + + return mmu_spte_update(sptep, spte); +} + +static bool spte_exec_protect(u64 *sptep) +{ + u64 spte = *sptep; + + rmap_printk("rmap_exec_protect: spte %p %llx\n", sptep, *sptep); + + spte = spte & ~PT_USER_MASK; + + return mmu_spte_update(sptep, spte); +} + static bool __rmap_write_protect(struct kvm *kvm, struct kvm_rmap_head *rmap_head, bool pt_protect) @@ -1504,6 +1537,32 @@ static bool __rmap_write_protect(struct kvm *kvm, return flush; } +static bool __rmap_read_protect(struct kvm *kvm, + struct kvm_rmap_head *rmap_head) +{ + u64 *sptep; + struct rmap_iterator iter; + bool flush = false; + + for_each_rmap_spte(rmap_head, &iter, sptep) + flush |= spte_read_protect(sptep); + + return flush; +} + +static bool __rmap_exec_protect(struct kvm *kvm, + struct kvm_rmap_head *rmap_head) +{ + u64 *sptep; + struct rmap_iterator iter; + bool flush = false; + + for_each_rmap_spte(rmap_head, &iter, sptep) + flush |= spte_exec_protect(sptep); + + return flush; +} + static bool spte_clear_dirty(u64 *sptep) { u64 spte = *sptep; @@ -1674,6 +1733,36 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, return write_protected; } +bool kvm_mmu_slot_gfn_read_protect(struct kvm *kvm, + struct kvm_memory_slot *slot, u64 gfn) +{ + struct kvm_rmap_head *rmap_head; + int i; + bool read_protected = false; + + for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { + rmap_head = __gfn_to_rmap(gfn, i, slot); + read_protected |= __rmap_read_protect(kvm, rmap_head); + } + + return read_protected; +} + +bool kvm_mmu_slot_gfn_exec_protect(struct kvm *kvm, + struct kvm_memory_slot *slot, u64 gfn) +{ + struct kvm_rmap_head *rmap_head; + int i; + bool exec_protected = false; + + for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { + rmap_head = __gfn_to_rmap(gfn, i, slot); + exec_protected |= __rmap_exec_protect(kvm, rmap_head); + } + + return exec_protected; +} + static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) { struct kvm_memory_slot *slot; @@ -2366,6 +2455,20 @@ static void clear_sp_write_flooding_count(u64 *spte) __clear_sp_write_flooding_count(sp); } +static unsigned int kvm_mmu_page_track_acc(struct kvm_vcpu *vcpu, gfn_t gfn, + unsigned int acc) +{ + if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREREAD)) + acc &= ~ACC_USER_MASK; + if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREWRITE) || + kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE)) + acc &= ~ACC_WRITE_MASK; + if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREEXEC)) + acc &= ~ACC_EXEC_MASK; + + return acc; +} + static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gaddr, @@ -2386,7 +2489,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role.direct = direct; if (role.direct) role.cr4_pae = 0; - role.access = access; + role.access = kvm_mmu_page_track_acc(vcpu, gfn, access); if (!vcpu->arch.mmu->direct_map && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); @@ -2767,7 +2870,8 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, { struct kvm_mmu_page *sp; - if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE)) + if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREWRITE) || + kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE)) return true; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { @@ -3106,7 +3210,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { if (iterator.level == level) { - emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, + unsigned int acc = kvm_mmu_page_track_acc(vcpu, gfn, + ACC_ALL); + + emulate = mmu_set_spte(vcpu, iterator.sptep, acc, write, level, gfn, pfn, prefault, map_writable); direct_pte_prefetch(vcpu, iterator.sptep); @@ -3881,15 +3988,21 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu, if (unlikely(error_code & PFERR_RSVD_MASK)) return false; - if (!(error_code & PFERR_PRESENT_MASK) || - !(error_code & PFERR_WRITE_MASK)) + if (!(error_code & PFERR_PRESENT_MASK)) return false; /* - * guest is writing the page which is write tracked which can + * guest is reading/writing/fetching the page which is + * read/write/execute tracked which can * not be fixed by page fault handler. */ - if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE)) + if (((error_code & PFERR_USER_MASK) + && kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREREAD)) + || ((error_code & PFERR_WRITE_MASK) + && (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREWRITE) + || kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))) + || ((error_code & PFERR_FETCH_MASK) + && kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREEXEC))) return true; return false; @@ -5175,7 +5288,7 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) return spte; } -static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, +static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, const u8 *new, int bytes, struct kvm_page_track_notifier_node *node) { @@ -5335,7 +5448,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * explicitly shadowing L1's page tables, i.e. unprotecting something * for L1 isn't going to magically fix whatever issue cause L2 to fail. */ - if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) + if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu) + && kvm_page_track_is_active(vcpu, gpa_to_gfn(cr2), + KVM_PAGE_TRACK_PREEXEC)) emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index c7b333147c4a..45948dabe0b6 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -210,5 +210,9 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); +bool kvm_mmu_slot_gfn_read_protect(struct kvm *kvm, + struct kvm_memory_slot *slot, u64 gfn); +bool kvm_mmu_slot_gfn_exec_protect(struct kvm *kvm, + struct kvm_memory_slot *slot, u64 gfn); int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 3052a59a3065..fc792939a05c 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -1,5 +1,5 @@ /* - * Support KVM gust page tracking + * Support KVM guest page tracking * * This feature allows us to track page access in guest. Currently, only * write access is tracked. @@ -34,10 +34,13 @@ void kvm_page_track_free_memslot(struct kvm_memory_slot *free, } } -int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, +int kvm_page_track_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { - int i; + struct kvm_page_track_notifier_head *head; + struct kvm_page_track_notifier_node *n; + int idx; + int i; for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { slot->arch.gfn_track[i] = @@ -47,6 +50,17 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, goto track_free; } + head = &kvm->arch.track_notifier_head; + + if (hlist_empty(&head->track_notifier_list)) + return 0; + + idx = srcu_read_lock(&head->track_srcu); + hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) + if (n->track_create_slot) + n->track_create_slot(kvm, slot, npages, n); + srcu_read_unlock(&head->track_srcu, idx); + return 0; track_free: @@ -87,7 +101,7 @@ static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, * @kvm: the guest instance we are interested in. * @slot: the @gfn belongs to. * @gfn: the guest page. - * @mode: tracking mode, currently only write track is supported. + * @mode: tracking mode. */ void kvm_slot_page_track_add_page(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, @@ -105,9 +119,16 @@ void kvm_slot_page_track_add_page(struct kvm *kvm, */ kvm_mmu_gfn_disallow_lpage(slot, gfn); - if (mode == KVM_PAGE_TRACK_WRITE) + if (mode == KVM_PAGE_TRACK_PREWRITE || mode == KVM_PAGE_TRACK_WRITE) { if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn)) kvm_flush_remote_tlbs(kvm); + } else if (mode == KVM_PAGE_TRACK_PREREAD) { + if (kvm_mmu_slot_gfn_read_protect(kvm, slot, gfn)) + kvm_flush_remote_tlbs(kvm); + } else if (mode == KVM_PAGE_TRACK_PREEXEC) { + if (kvm_mmu_slot_gfn_exec_protect(kvm, slot, gfn)) + kvm_flush_remote_tlbs(kvm); + } } EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page); @@ -122,7 +143,7 @@ EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page); * @kvm: the guest instance we are interested in. * @slot: the @gfn belongs to. * @gfn: the guest page. - * @mode: tracking mode, currently only write track is supported. + * @mode: tracking mode. */ void kvm_slot_page_track_remove_page(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, @@ -215,15 +236,84 @@ kvm_page_track_unregister_notifier(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier); +/* + * Notify the node that a read access is about to happen. Returning false + * doesn't stop the other nodes from being called, but it will stop + * the emulation. + * + * The node should figure out if the written page is the one that the node + * is interested in by itself. + * + * The nodes will always be in conflict if they track the same page: + * - accepting a read won't guarantee that the next node will not override + * the data (filling new/bytes and setting data_ready) + * - filling new/bytes with custom data won't guarantee that the next node + * will not override that + */ +bool kvm_page_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + u8 *new, int bytes, bool *data_ready) +{ + struct kvm_page_track_notifier_head *head; + struct kvm_page_track_notifier_node *n; + int idx; + bool ret = true; + + *data_ready = false; + + head = &vcpu->kvm->arch.track_notifier_head; + + if (hlist_empty(&head->track_notifier_list)) + return ret; + + idx = srcu_read_lock(&head->track_srcu); + hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) + if (n->track_preread) + if (!n->track_preread(vcpu, gpa, gva, new, bytes, n, + data_ready)) + ret = false; + srcu_read_unlock(&head->track_srcu, idx); + return ret; +} + +/* + * Notify the node that a write access is about to happen. Returning false + * doesn't stop the other nodes from being called, but it will stop + * the emulation. + * + * The node should figure out if the written page is the one that the node + * is interested in by itself. + */ +bool kvm_page_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + const u8 *new, int bytes) +{ + struct kvm_page_track_notifier_head *head; + struct kvm_page_track_notifier_node *n; + int idx; + bool ret = true; + + head = &vcpu->kvm->arch.track_notifier_head; + + if (hlist_empty(&head->track_notifier_list)) + return ret; + + idx = srcu_read_lock(&head->track_srcu); + hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) + if (n->track_prewrite) + if (!n->track_prewrite(vcpu, gpa, gva, new, bytes, n)) + ret = false; + srcu_read_unlock(&head->track_srcu, idx); + return ret; +} + /* * Notify the node that write access is intercepted and write emulation is * finished at this time. * - * The node should figure out if the written page is the one that node is - * interested in by itself. + * The node should figure out if the written page is the one that the node + * is interested in by itself. */ -void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, - int bytes) +void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + const u8 *new, int bytes) { struct kvm_page_track_notifier_head *head; struct kvm_page_track_notifier_node *n; @@ -237,16 +327,45 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, idx = srcu_read_lock(&head->track_srcu); hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) if (n->track_write) - n->track_write(vcpu, gpa, new, bytes, n); + n->track_write(vcpu, gpa, gva, new, bytes, n); + srcu_read_unlock(&head->track_srcu, idx); +} + +/* + * Notify the node that an instruction is about to be executed. + * Returning false doesn't stop the other nodes from being called, + * but it will stop the emulation with X86EMUL_RETRY_INSTR. + * + * The node should figure out if the written page is the one that the node + * is interested in by itself. + */ +bool kvm_page_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva) +{ + struct kvm_page_track_notifier_head *head; + struct kvm_page_track_notifier_node *n; + int idx; + bool ret = true; + + head = &vcpu->kvm->arch.track_notifier_head; + + if (hlist_empty(&head->track_notifier_list)) + return ret; + + idx = srcu_read_lock(&head->track_srcu); + hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) + if (n->track_preexec) + if (!n->track_preexec(vcpu, gpa, gva, n)) + ret = false; srcu_read_unlock(&head->track_srcu, idx); + return ret; } /* * Notify the node that memory slot is being removed or moved so that it can - * drop write-protection for the pages in the memory slot. + * drop active protection for the pages in the memory slot. * - * The node should figure out it has any write-protected pages in this slot - * by itself. + * The node should figure out if the written page is the one that the node + * is interested in by itself. */ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d4bab80617c..3f332b947c1b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5024,6 +5024,9 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, if (unlikely(gpa == UNMAPPED_GVA)) return X86EMUL_PROPAGATE_FAULT; + if (!kvm_page_track_preexec(vcpu, gpa, addr)) + return X86EMUL_RETRY_INSTR; + offset = addr & (PAGE_SIZE-1); if (WARN_ON(offset + bytes > PAGE_SIZE)) bytes = (unsigned)PAGE_SIZE - offset; @@ -5192,22 +5195,35 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write); } -int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, +int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, const void *val, int bytes) { - int ret; + if (!kvm_page_track_prewrite(vcpu, gpa, gva, val, bytes)) + return X86EMUL_RETRY_INSTR; + if (kvm_vcpu_write_guest(vcpu, gpa, val, bytes) < 0) + return X86EMUL_UNHANDLEABLE; + kvm_page_track_write(vcpu, gpa, gva, val, bytes); + return X86EMUL_CONTINUE; +} - ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes); - if (ret < 0) - return 0; - kvm_page_track_write(vcpu, gpa, val, bytes); - return 1; +static int emulator_read_phys(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, + void *val, int bytes) +{ + bool data_ready; + + if (!kvm_page_track_preread(vcpu, gpa, gva, val, bytes, &data_ready)) + return X86EMUL_RETRY_INSTR; + if (data_ready) + return X86EMUL_CONTINUE; + if (kvm_vcpu_read_guest(vcpu, gpa, val, bytes) < 0) + return X86EMUL_UNHANDLEABLE; + return X86EMUL_CONTINUE; } struct read_write_emulator_ops { int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val, int bytes); - int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa, + int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, void *val, int bytes); int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val); @@ -5228,16 +5244,16 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) return 0; } -static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, +static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, void *val, int bytes) { - return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes); + return emulator_read_phys(vcpu, gpa, gva, val, bytes); } -static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, +static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, void *val, int bytes) { - return emulator_write_phys(vcpu, gpa, val, bytes); + return emulator_write_phys(vcpu, gpa, gva, val, bytes); } static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) @@ -5306,8 +5322,11 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, return X86EMUL_PROPAGATE_FAULT; } - if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes)) - return X86EMUL_CONTINUE; + if (!ret) { + ret = ops->read_write_emulate(vcpu, gpa, addr, val, bytes); + if (ret == X86EMUL_CONTINUE || ret == X86EMUL_RETRY_INSTR) + return ret; + } /* * Is this MMIO handled locally? @@ -5442,6 +5461,9 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, if (is_error_page(page)) goto emul_write; + if (!kvm_page_track_prewrite(vcpu, gpa, addr, new, bytes)) + return X86EMUL_RETRY_INSTR; + kaddr = kmap_atomic(page); kaddr += offset_in_page(gpa); switch (bytes) { @@ -5467,7 +5489,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, return X86EMUL_CMPXCHG_FAILED; kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); - kvm_page_track_write(vcpu, gpa, new, bytes); + kvm_page_track_write(vcpu, gpa, addr, new, bytes); return X86EMUL_CONTINUE; @@ -9243,7 +9265,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, } } - if (kvm_page_track_create_memslot(slot, npages)) + if (kvm_page_track_create_memslot(kvm, slot, npages)) goto out_free; return 0; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index c1072143da1d..8c4858603825 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1540,7 +1540,7 @@ static int kvmgt_page_track_remove(unsigned long handle, u64 gfn) return 0; } -static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, +static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva, const u8 *val, int len, struct kvm_page_track_notifier_node *node) {

[RFC,v5,08/20] kvm: page track: add support for preread, prewrite and preexec

Commit Message

Patch