@@ -2135,9 +2135,11 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
flush |= kvm_sync_page(vcpu, sp, &invalid_list);
mmu_pages_clear_parents(&parents);
}
- if (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)) {
+ if (need_resched()) {
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
- cond_resched_lock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
+ schedule();
+ write_lock(&vcpu->kvm->mmu_lock);
flush = false;
}
}
@@ -2488,7 +2490,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
{
LIST_HEAD(invalid_list);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) {
/* Need to free some mmu pages to achieve the goal. */
@@ -2502,7 +2504,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
}
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
@@ -2513,7 +2515,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
sp->role.word);
@@ -2521,7 +2523,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
}
kvm_mmu_commit_zap_page(kvm, &invalid_list);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
return r;
}
@@ -3090,19 +3092,19 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
return r;
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
make_mmu_pages_available(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
return r;
out_unlock:
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return 0;
}
@@ -3122,19 +3124,19 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.direct_map)) {
hpa_t root = vcpu->arch.mmu.root_hpa;
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
sp = page_header(root);
--sp->root_count;
if (!sp->root_count && sp->role.invalid) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
}
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
return;
}
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
@@ -3149,7 +3151,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
}
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
}
@@ -3171,24 +3173,24 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
unsigned i;
if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL);
++sp->root_count;
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = __pa(sp->spt);
} else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) {
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
MMU_WARN_ON(VALID_PAGE(root));
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
root = __pa(sp->spt);
++sp->root_count;
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
}
vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
@@ -3219,13 +3221,13 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
MMU_WARN_ON(VALID_PAGE(root));
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
0, ACC_ALL);
root = __pa(sp->spt);
++sp->root_count;
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.root_hpa = root;
return 0;
}
@@ -3253,13 +3255,13 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
if (mmu_check_root(vcpu, root_gfn))
return 1;
}
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
make_mmu_pages_available(vcpu);
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
0, ACC_ALL);
root = __pa(sp->spt);
++sp->root_count;
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.pae_root[i] = root | pm_mask;
}
@@ -3335,9 +3337,9 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
{
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
mmu_sync_roots(vcpu);
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
}
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
@@ -3625,19 +3627,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
return r;
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
make_mmu_pages_available(vcpu);
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
return r;
out_unlock:
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return 0;
}
@@ -4491,7 +4493,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
*/
mmu_topup_memory_caches(vcpu);
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
@@ -4522,7 +4524,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
}
kvm_mmu_flush_or_zap(vcpu, &invalid_list, remote_flush, local_flush);
kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE);
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
}
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
@@ -4698,12 +4700,14 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (iterator.rmap)
flush |= fn(kvm, iterator.rmap);
- if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ if (need_resched()) {
+ write_unlock(&kvm->mmu_lock);
+ schedule();
+ write_lock(&kvm->mmu_lock);
if (flush && lock_flush_tlb) {
kvm_flush_remote_tlbs(kvm);
flush = false;
}
- cond_resched_lock(&kvm->mmu_lock);
}
}
@@ -4756,7 +4760,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
struct kvm_memory_slot *memslot;
int i;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
slots = __kvm_memslots(kvm, i);
kvm_for_each_memslot(memslot, slots) {
@@ -4773,7 +4777,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
}
}
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
}
static bool slot_rmap_write_protect(struct kvm *kvm,
@@ -4787,10 +4791,10 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
{
bool flush;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
flush = slot_handle_all_level(kvm, memslot, slot_rmap_write_protect,
false);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
/*
* kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log()
@@ -4892,10 +4896,10 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
/* FIXME: const-ify all uses of struct kvm_memory_slot. */
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
kvm_mmu_zap_collapsible_spte, true);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
}
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
@@ -4903,9 +4907,9 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
{
bool flush;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
lockdep_assert_held(&kvm->slots_lock);
@@ -4925,10 +4929,10 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
{
bool flush;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
false);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
/* see kvm_mmu_slot_remove_write_access */
lockdep_assert_held(&kvm->slots_lock);
@@ -4943,9 +4947,9 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
{
bool flush;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
lockdep_assert_held(&kvm->slots_lock);
@@ -4985,8 +4989,10 @@ restart:
* Need not flush tlb since we only zap the sp with invalid
* generation number.
*/
- if (batch >= BATCH_ZAP_PAGES &&
- cond_resched_lock(&kvm->mmu_lock)) {
+ if (batch >= BATCH_ZAP_PAGES && need_resched()) {
+ write_unlock(&kvm->mmu_lock);
+ schedule();
+ write_lock(&kvm->mmu_lock);
batch = 0;
goto restart;
}
@@ -5017,7 +5023,7 @@ restart:
*/
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
{
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
trace_kvm_mmu_invalidate_zap_all_pages(kvm);
kvm->arch.mmu_valid_gen++;
@@ -5033,7 +5039,7 @@ void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
kvm_reload_remote_mmus(kvm);
kvm_zap_obsolete_pages(kvm);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
}
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
@@ -5085,7 +5091,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
continue;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
if (kvm_has_zapped_obsolete_pages(kvm)) {
kvm_mmu_commit_zap_page(kvm,
@@ -5098,7 +5104,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
kvm_mmu_commit_zap_page(kvm, &invalid_list);
unlock:
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
/*
@@ -177,9 +177,9 @@ kvm_page_track_register_notifier(struct kvm *kvm,
head = &kvm->arch.track_notifier_head;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
}
/*
@@ -194,9 +194,9 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
head = &kvm->arch.track_notifier_head;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
hlist_del_rcu(&n->node);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
synchronize_srcu(&head->track_srcu);
}
@@ -792,7 +792,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
walker.pte_access &= ~ACC_EXEC_MASK;
}
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
@@ -804,12 +804,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
level, pfn, map_writable, prefault);
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
return r;
out_unlock:
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return 0;
}
@@ -846,7 +846,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
return;
}
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
for_each_shadow_entry(vcpu, gva, iterator) {
level = iterator.level;
sptep = iterator.sptep;
@@ -878,7 +878,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
break;
}
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
@@ -5210,9 +5210,9 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
if (vcpu->arch.mmu.direct_map) {
unsigned int indirect_shadow_pages;
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
if (indirect_shadow_pages)
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
@@ -408,7 +408,7 @@ struct kvm_mt {
};
struct kvm {
- spinlock_t mmu_lock;
+ rwlock_t mmu_lock;
struct mutex slots_lock;
struct mm_struct *mm; /* userspace tied to this vm */
struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
@@ -342,7 +342,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
* is going to be freed.
*/
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
@@ -350,7 +350,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
if (need_tlb_flush)
kvm_flush_remote_tlbs(kvm);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
kvm_arch_mmu_notifier_invalidate_page(kvm, address);
@@ -366,10 +366,10 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
int idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
kvm_set_spte_hva(kvm, address, pte);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -382,7 +382,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
int need_tlb_flush = 0, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
/*
* The count increase must become visible at unlock time as no
* spte can be established without taking the mmu_lock and
@@ -395,7 +395,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
if (need_tlb_flush)
kvm_flush_remote_tlbs(kvm);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -406,7 +406,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
/*
* This sequence increase will notify the kvm page fault that
* the page that is going to be mapped in the spte could have
@@ -420,7 +420,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
* in conjunction with the smp_rmb in mmu_notifier_retry().
*/
kvm->mmu_notifier_count--;
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
BUG_ON(kvm->mmu_notifier_count < 0);
}
@@ -434,13 +434,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
int young, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
young = kvm_age_hva(kvm, start, end);
if (young)
kvm_flush_remote_tlbs(kvm);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
return young;
@@ -455,7 +455,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
int young, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
/*
* Even though we do not flush TLB, this will still adversely
* affect performance on pre-Haswell Intel EPT, where there is
@@ -470,7 +470,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
* more sophisticated heuristic later.
*/
young = kvm_age_hva(kvm, start, end);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
return young;
@@ -484,9 +484,9 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
int young, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
young = kvm_test_age_hva(kvm, address);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
return young;
@@ -593,7 +593,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (!kvm)
return ERR_PTR(-ENOMEM);
- spin_lock_init(&kvm->mmu_lock);
+ rwlock_init(&kvm->mmu_lock);
atomic_inc(¤t->mm->mm_count);
kvm->mm = current->mm;
kvm_eventfd_init(kvm);
@@ -1165,7 +1165,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
memset(dirty_bitmap_buffer, 0, n);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
*is_dirty = false;
for (i = 0; i < n / sizeof(long); i++) {
unsigned long mask;
@@ -1186,7 +1186,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
}
}
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
r = -EFAULT;
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
@@ -3257,7 +3257,7 @@ static int mt_reset_all_gfns(struct kvm *kvm)
if (!kvm->mt.quiesced)
return -EINVAL;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
kvm_for_each_vcpu(i, vcpu, kvm) {
gfnlist = &vcpu->kvm->vcpu_mt[vcpu->vcpu_id].gfn_list;
@@ -3295,7 +3295,7 @@ global_gfn_loop:
if (count)
goto global_gfn_loop;
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
if (cleared)
@@ -3412,7 +3412,7 @@ static int mt_sublist_req_nowait(struct kvm *kvm,
* below, but must take mmu_lock _before_ gfnlist lock.
*/
if (reset)
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
if (need_locks)
spin_lock(&gfnlist->lock);
@@ -3440,7 +3440,7 @@ static int mt_sublist_req_nowait(struct kvm *kvm,
if (need_locks)
spin_unlock(&gfnlist->lock);
if (reset)
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
mutex_unlock(&gfnlist->mtx);
if (len != avail) {
@@ -3489,7 +3489,7 @@ static int mt_sublist_req_nowait(struct kvm *kvm,
* below, but must take mmu_lock _before_ gfnlist lock.
*/
if (reset)
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
if (need_locks)
spin_lock(&gfnlist->lock);
@@ -3517,7 +3517,7 @@ static int mt_sublist_req_nowait(struct kvm *kvm,
if (need_locks)
spin_unlock(&gfnlist->lock);
if (reset)
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
mutex_unlock(&gfnlist->mtx);
if (len != avail) {
Change mmu_lock to be rwlock_t. This change allows multiple threads to harvest and process dirty memory in parallel using the new memory tracking fetch ioctls. In all places where KVM had used spin_lock/unlock it now uses write_lock/unlock, preserving the original KVM behaviors. Threads that use the new fetch ioctls acquire the mmu_lock using read_lock/unlock. Signed-off-by: Lei Cao <lei.cao@stratus.com> --- arch/x86/kvm/mmu.c | 102 ++++++++++++++++++----------------- arch/x86/kvm/page_track.c | 8 +-- arch/x86/kvm/paging_tmpl.h | 10 ++-- arch/x86/kvm/x86.c | 4 +- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 46 ++++++++-------- 6 files changed, 89 insertions(+), 83 deletions(-)