diff mbox

[07/10] KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU update

Message ID 20090921234124.596305294@amt.cnet (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti Sept. 21, 2009, 11:37 p.m. UTC
Use two steps for memslot deletion: mark the slot invalid (which stops 
instantiation of new shadow pages for that slot, but allows destruction),
then instantiate the new empty slot.

Also simplifies kvm_handle_hva locking.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity Sept. 22, 2009, 6:59 a.m. UTC | #1
On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
> Use two steps for memslot deletion: mark the slot invalid (which stops
> instantiation of new shadow pages for that slot, but allows destruction),
> then instantiate the new empty slot.
>
> Also simplifies kvm_handle_hva locking.
>
>   unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
>   {
> -	int i;
> +	int i, idx;
>   	unsigned int nr_mmu_pages;
>   	unsigned int  nr_pages = 0;
> +	struct kvm_memslots *slots;
>
> -	for (i = 0; i<  kvm->memslots->nmemslots; i++)
> -		nr_pages += kvm->memslots->memslots[i].npages;
> +	idx = srcu_read_lock(&kvm->srcu);
>    

Doesn't the caller hold the srcu_read_lock() here?

>
> Index: kvm-slotslock/arch/x86/kvm/vmx.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/vmx.c
> +++ kvm-slotslock/arch/x86/kvm/vmx.c
> @@ -24,6 +24,7 @@
>   #include<linux/mm.h>
>   #include<linux/highmem.h>
>   #include<linux/sched.h>
> +#include<linux/srcu.h>
>   #include<linux/moduleparam.h>
>   #include<linux/ftrace_event.h>
>   #include "kvm_cache_regs.h"
> @@ -1465,10 +1466,18 @@ static void enter_pmode(struct kvm_vcpu
>   static gva_t rmode_tss_base(struct kvm *kvm)
>   {
>   	if (!kvm->arch.tss_addr) {
> -		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
> -				 kvm->memslots->memslots[0].npages - 3;
> +		struct kvm_memslots *slots;
> +		gfn_t base_gfn;
> +		int idx;
> +
> +		idx = srcu_read_lock(&kvm->srcu);
> +		slots = rcu_dereference(kvm->memslots);
> + 		base_gfn = slots->memslots[0].base_gfn +
> +				 slots->memslots[0].npages - 3;
> +		srcu_read_unlock(&kvm->srcu, idx);
>   		return base_gfn<<  PAGE_SHIFT;
>   	}
> +
>    

And here?  Maybe we should take the srcu_lock in vcpu_load/put and only 
drop in when going into vcpu context or explicitly sleeping, just to 
simplify things.
Fernando Carrijo Sept. 22, 2009, 10:40 a.m. UTC | #2
Resending with Cc: added

On Mon, 2009-09-21 at 20:37 -0300, Marcelo Tosatti wrote:

> plain text document attachment (introduce-srcu-and-use-for-slots)
> Use two steps for memslot deletion: mark the slot invalid (which stops 
> instantiation of new shadow pages for that slot, but allows destruction),
> then instantiate the new empty slot.
> 
> Also simplifies kvm_handle_hva locking.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> Index: kvm-slotslock/include/linux/kvm.h
> ===================================================================
> --- kvm-slotslock.orig/include/linux/kvm.h
> +++ kvm-slotslock/include/linux/kvm.h
> @@ -39,7 +39,7 @@ struct kvm_userspace_memory_region {
>  
>  /* for kvm_memory_region::flags */
>  #define KVM_MEM_LOG_DIRTY_PAGES  1UL
> -
> +#define KVM_MEMSLOT_INVALID      (1UL << 1)
>  
>  /* for KVM_IRQ_LINE */
>  struct kvm_irq_level {
> Index: kvm-slotslock/virt/kvm/kvm_main.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/kvm_main.c
> +++ kvm-slotslock/virt/kvm/kvm_main.c
> @@ -43,6 +43,7 @@
>  #include <linux/swap.h>
>  #include <linux/bitops.h>
>  #include <linux/spinlock.h>
> +#include <linux/srcu.h>
>  
>  #include <asm/processor.h>
>  #include <asm/io.h>
> @@ -352,11 +353,15 @@ static struct kvm *kvm_create_vm(void)
>  	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
>  	if (!kvm->memslots)
>  		goto out_err;
> +	if (init_srcu_struct(&kvm->srcu))
> +		goto out_err;
>  
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>  	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> -	if (!page)
> +	if (!page) {
> +		cleanup_srcu_struct(&kvm->srcu);
>  		goto out_err;
> +	}
>  
>  	kvm->coalesced_mmio_ring =
>  			(struct kvm_coalesced_mmio_ring *)page_address(page);
> @@ -367,6 +372,7 @@ static struct kvm *kvm_create_vm(void)
>  		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
>  		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
>  		if (r) {
> +			cleanup_srcu_struct(&kvm->srcu);
>  #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
>  			put_page(page);
>  #endif
> @@ -462,6 +468,7 @@ static void kvm_destroy_vm(struct kvm *k
>  	kvm_arch_flush_shadow(kvm);
>  #endif
>  	kvm_arch_destroy_vm(kvm);
> +	cleanup_srcu_struct(&kvm->srcu);
>  	hardware_disable_all();
>  	mmdrop(mm);
>  }
> @@ -502,12 +509,13 @@ int __kvm_set_memory_region(struct kvm *
>  			    struct kvm_userspace_memory_region *mem,
>  			    int user_alloc)
>  {
> -	int r;
> +	int r, flush_shadow = 0;
>  	gfn_t base_gfn;
>  	unsigned long npages;
>  	unsigned long i;
>  	struct kvm_memory_slot *memslot;
>  	struct kvm_memory_slot old, new;
> +	struct kvm_memslots *slots, *old_memslots;
>  
>  	r = -EINVAL;
>  	/* General sanity checks */
> @@ -569,15 +577,7 @@ int __kvm_set_memory_region(struct kvm *
>  		memset(new.rmap, 0, npages * sizeof(*new.rmap));
>  
>  		new.user_alloc = user_alloc;
> -		/*
> -		 * hva_to_rmmap() serialzies with the mmu_lock and to be
> -		 * safe it has to ignore memslots with !user_alloc &&
> -		 * !userspace_addr.
> -		 */
> -		if (user_alloc)
> -			new.userspace_addr = mem->userspace_addr;
> -		else
> -			new.userspace_addr = 0;
> +		new.userspace_addr = mem->userspace_addr;
>  	}
>  	if (!npages)
>  		goto skip_lpage;
> @@ -632,8 +632,9 @@ skip_lpage:
>  		if (!new.dirty_bitmap)
>  			goto out_free;
>  		memset(new.dirty_bitmap, 0, dirty_bytes);
> +		/* destroy any largepage mappings for dirty tracking */
>  		if (old.npages)
> -			kvm_arch_flush_shadow(kvm);
> +			flush_shadow = 1;
>  	}
>  #else  /* not defined CONFIG_S390 */
>  	new.user_alloc = user_alloc;
> @@ -641,34 +642,69 @@ skip_lpage:
>  		new.userspace_addr = mem->userspace_addr;
>  #endif /* not defined CONFIG_S390 */
>  
> -	if (!npages)
> +	if (!npages) {
> +		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +		if (!slots)
> +			goto out_free;
> +		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));

Nothing wrong with the above line, but it makes me think if

                  *slots = *kvm->memslots;

would save us the function call overhead

> +		if (mem->slot >= slots->nmemslots)
> +			slots->nmemslots = mem->slot + 1;
> +		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
> +
> +		old_memslots = kvm->memslots;
> +		rcu_assign_pointer(kvm->memslots, slots);
> +		synchronize_srcu(&kvm->srcu);
> +		/* From this point no new shadow pages pointing to a deleted
> +		 * memslot will be created.
> +         	 *
> +         	 * validation of sp->gfn happens in:
> +         	 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
> +         	 * 	- kvm_is_visible_gfn (mmu_check_roots)
> +         	 */
>  		kvm_arch_flush_shadow(kvm);
> +		kfree(old_memslots);
> +	}
>  
>  	r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
>  	if (r)
>  		goto out_free;
>  
> -	spin_lock(&kvm->mmu_lock);
> -	if (mem->slot >= kvm->memslots->nmemslots)
> -		kvm->memslots->nmemslots = mem->slot + 1;
> +#ifdef CONFIG_DMAR
> +	/* map the pages in iommu page table */
> +	if (npages)
> +		r = kvm_iommu_map_pages(kvm, &new);
> +		if (r)
> +			goto out_free;
> +#endif
>  
> -	*memslot = new;
> -	spin_unlock(&kvm->mmu_lock);
> +	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +	if (!slots)
> +		goto out_free;
> +	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));

Ditto

Cheers,

Fernando Carrijo.

> +	if (mem->slot >= slots->nmemslots)
> +		slots->nmemslots = mem->slot + 1;
> +
> +	/* actual memory is freed via old in kvm_free_physmem_slot below */
> +	if (!npages) {
> +		new.rmap = NULL;
> +		new.dirty_bitmap = NULL;
> +		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
> +			new.lpage_info[i] = NULL;
> +	}
> +
> +	slots->memslots[mem->slot] = new;
> +	old_memslots = kvm->memslots;
> +	rcu_assign_pointer(kvm->memslots, slots);
> +	synchronize_srcu(&kvm->srcu);
>  
>  	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
>  
> -	kvm_free_physmem_slot(&old, npages ? &new : NULL);
> -	/* Slot deletion case: we have to update the current slot */
> -	spin_lock(&kvm->mmu_lock);
> -	if (!npages)
> -		*memslot = old;
> -	spin_unlock(&kvm->mmu_lock);
> -#ifdef CONFIG_DMAR
> -	/* map the pages in iommu page table */
> -	r = kvm_iommu_map_pages(kvm, memslot);
> -	if (r)
> -		goto out;
> -#endif
> +	kvm_free_physmem_slot(&old, &new);
> +	kfree(old_memslots);
> +
> +	if (flush_shadow)
> +		kvm_arch_flush_shadow(kvm);
> +
>  	return 0;
>  
>  out_free:
> @@ -768,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
>  struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
>  {
>  	int i;
> -	struct kvm_memslots *slots = kvm->memslots;
> +	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
>  
>  	for (i = 0; i < slots->nmemslots; ++i) {
>  		struct kvm_memory_slot *memslot = &slots->memslots[i];
> @@ -790,12 +826,15 @@ struct kvm_memory_slot *gfn_to_memslot(s
>  int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
>  {
>  	int i;
> -	struct kvm_memslots *slots = kvm->memslots;
> +	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
>  
>  	gfn = unalias_gfn(kvm, gfn);
>  	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
>  		struct kvm_memory_slot *memslot = &slots->memslots[i];
>  
> +		if (memslot->flags & KVM_MEMSLOT_INVALID)
> +			continue;
> +
>  		if (gfn >= memslot->base_gfn
>  		    && gfn < memslot->base_gfn + memslot->npages)
>  			return 1;
> @@ -810,7 +849,7 @@ unsigned long gfn_to_hva(struct kvm *kvm
>  
>  	gfn = unalias_gfn(kvm, gfn);
>  	slot = gfn_to_memslot_unaliased(kvm, gfn);
> -	if (!slot)
> +	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
>  		return bad_hva();
>  	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
>  }
> Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
> ===================================================================
> --- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
> +++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
> @@ -1834,6 +1834,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
>  	struct kvm_memory_slot *memslot;
>  	int is_dirty = 0;
>  
> +	down_write(&kvm->slots_lock);
>  	spin_lock(&kvm->arch.dirty_log_lock);
>  
>  	r = kvm_ia64_sync_dirty_log(kvm, log);
> @@ -1853,6 +1854,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kv
>  	}
>  	r = 0;
>  out:
> +	up_write(&kvm->slots_lock);
>  	spin_unlock(&kvm->arch.dirty_log_lock);
>  	return r;
>  }
> Index: kvm-slotslock/arch/x86/kvm/mmu.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/mmu.c
> +++ kvm-slotslock/arch/x86/kvm/mmu.c
> @@ -29,6 +29,7 @@
>  #include <linux/swap.h>
>  #include <linux/hugetlb.h>
>  #include <linux/compiler.h>
> +#include <linux/srcu.h>
>  
>  #include <asm/page.h>
>  #include <asm/cmpxchg.h>
> @@ -766,23 +767,18 @@ static int kvm_unmap_rmapp(struct kvm *k
>  static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
>  			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
>  {
> -	int i, j;
> +	int i, j, idx;
>  	int retval = 0;
> -	struct kvm_memslots *slots = kvm->memslots;
> +	struct kvm_memslots *slots;
> +
> +	idx = srcu_read_lock(&kvm->srcu);
> +	slots = rcu_dereference(kvm->memslots);
>  
> -	/*
> -	 * If mmap_sem isn't taken, we can look the memslots with only
> -	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
> -	 */
>  	for (i = 0; i < slots->nmemslots; i++) {
>  		struct kvm_memory_slot *memslot = &slots->memslots[i];
>  		unsigned long start = memslot->userspace_addr;
>  		unsigned long end;
>  
> -		/* mmu_lock protects userspace_addr */
> -		if (!start)
> -			continue;
> -
>  		end = start + (memslot->npages << PAGE_SHIFT);
>  		if (hva >= start && hva < end) {
>  			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
> @@ -797,6 +793,7 @@ static int kvm_handle_hva(struct kvm *kv
>  			}
>  		}
>  	}
> +	srcu_read_unlock(&kvm->srcu, idx);
>  
>  	return retval;
>  }
> @@ -2966,16 +2963,20 @@ nomem:
>   */
>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
>  {
> -	int i;
> +	int i, idx;
>  	unsigned int nr_mmu_pages;
>  	unsigned int  nr_pages = 0;
> +	struct kvm_memslots *slots;
>  
> -	for (i = 0; i < kvm->memslots->nmemslots; i++)
> -		nr_pages += kvm->memslots->memslots[i].npages;
> +	idx = srcu_read_lock(&kvm->srcu);
> +	slots = rcu_dereference(kvm->memslots);
> +	for (i = 0; i < slots->nmemslots; i++)
> +		nr_pages += slots->memslots[i].npages;
>  
>  	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
>  	nr_mmu_pages = max(nr_mmu_pages,
>  			(unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
> +	srcu_read_unlock(&kvm->srcu, idx);
>  
>  	return nr_mmu_pages;
>  }
> @@ -3241,10 +3242,12 @@ static void audit_mappings(struct kvm_vc
>  static int count_rmaps(struct kvm_vcpu *vcpu)
>  {
>  	int nmaps = 0;
> -	int i, j, k;
> +	int i, j, k, idx;
>  
> +	idx = srcu_read_lock(&kvm->srcu);
> +	slots = rcu_dereference(kvm->memslots);
>  	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
> -		struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
> +		struct kvm_memory_slot *m = &slots->memslots[i];
>  		struct kvm_rmap_desc *d;
>  
>  		for (j = 0; j < m->npages; ++j) {
> @@ -3267,6 +3270,7 @@ static int count_rmaps(struct kvm_vcpu *
>  			}
>  		}
>  	}
> +	srcu_read_unlock(&kvm->srcu, idx);
>  	return nmaps;
>  }
>  
> Index: kvm-slotslock/arch/x86/kvm/vmx.c
> ===================================================================
> --- kvm-slotslock.orig/arch/x86/kvm/vmx.c
> +++ kvm-slotslock/arch/x86/kvm/vmx.c
> @@ -24,6 +24,7 @@
>  #include <linux/mm.h>
>  #include <linux/highmem.h>
>  #include <linux/sched.h>
> +#include <linux/srcu.h>
>  #include <linux/moduleparam.h>
>  #include <linux/ftrace_event.h>
>  #include "kvm_cache_regs.h"
> @@ -1465,10 +1466,18 @@ static void enter_pmode(struct kvm_vcpu 
>  static gva_t rmode_tss_base(struct kvm *kvm)
>  {
>  	if (!kvm->arch.tss_addr) {
> -		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
> -				 kvm->memslots->memslots[0].npages - 3;
> +		struct kvm_memslots *slots;
> +		gfn_t base_gfn;
> +		int idx;
> +
> +		idx = srcu_read_lock(&kvm->srcu);
> +		slots = rcu_dereference(kvm->memslots);
> + 		base_gfn = slots->memslots[0].base_gfn +
> +				 slots->memslots[0].npages - 3;
> +		srcu_read_unlock(&kvm->srcu, idx);
>  		return base_gfn << PAGE_SHIFT;
>  	}
> +
>  	return kvm->arch.tss_addr;
>  }
>  
> Index: kvm-slotslock/include/linux/kvm_host.h
> ===================================================================
> --- kvm-slotslock.orig/include/linux/kvm_host.h
> +++ kvm-slotslock/include/linux/kvm_host.h
> @@ -163,6 +163,7 @@ struct kvm {
>  	struct rw_semaphore slots_lock;
>  	struct mm_struct *mm; /* userspace tied to this vm */
>  	struct kvm_memslots *memslots;
> +	struct srcu_struct srcu;
>  #ifdef CONFIG_KVM_APIC_ARCHITECTURE
>  	u32 bsp_vcpu_id;
>  	struct kvm_vcpu *bsp_vcpu;
> Index: kvm-slotslock/virt/kvm/assigned-dev.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/assigned-dev.c
> +++ kvm-slotslock/virt/kvm/assigned-dev.c
> @@ -504,11 +504,11 @@ out:
>  static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
>  				      struct kvm_assigned_pci_dev *assigned_dev)
>  {
> -	int r = 0;
> +	int r = 0, idx;
>  	struct kvm_assigned_dev_kernel *match;
>  	struct pci_dev *dev;
>  
> -	down_read(&kvm->slots_lock);
> +	idx = srcu_read_lock(&kvm->srcu);
>  	mutex_lock(&kvm->lock);
>  
>  	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> @@ -574,7 +574,7 @@ static int kvm_vm_ioctl_assign_device(st
>  
>  out:
>  	mutex_unlock(&kvm->lock);
> -	up_read(&kvm->slots_lock);
> +	srcu_read_unlock(&kvm->srcu, idx);
>  	return r;
>  out_list_del:
>  	list_del(&match->list);
> @@ -586,7 +586,7 @@ out_put:
>  out_free:
>  	kfree(match);
>  	mutex_unlock(&kvm->lock);
> -	up_read(&kvm->slots_lock);
> +	srcu_read_unlock(&kvm->srcu, idx);
>  	return r;
>  }
>  
> Index: kvm-slotslock/virt/kvm/iommu.c
> ===================================================================
> --- kvm-slotslock.orig/virt/kvm/iommu.c
> +++ kvm-slotslock/virt/kvm/iommu.c
> @@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct
>  	int i, r = 0;
>  	struct kvm_memslots *slots;
>  
> -	slots = kvm->memslots;
> +	slots = rcu_dereference(kvm->memslots);
>  
>  	for (i = 0; i < slots->nmemslots; i++) {
>  		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
> @@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(stru
>  	int i;
>  	struct kvm_memslots *slots;
>  
> -	slots = kvm->memslots;
> +	slots = rcu_dereference(kvm->memslots);
>  
>  	for (i = 0; i < slots->nmemslots; i++) {
>  		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Sept. 22, 2009, 12:55 p.m. UTC | #3
On Tue, Sep 22, 2009 at 07:40:10AM -0300, Fernando Carrijo wrote:
> Resending with Cc: added
> 
> On Mon, 2009-09-21 at 20:37 -0300, Marcelo Tosatti wrote:
> 
> > -			kvm_arch_flush_shadow(kvm);
> > +			flush_shadow = 1;
> >  	}
> >  #else  /* not defined CONFIG_S390 */
> >  	new.user_alloc = user_alloc;
> > @@ -641,34 +642,69 @@ skip_lpage:
> >  		new.userspace_addr = mem->userspace_addr;
> >  #endif /* not defined CONFIG_S390 */
> >  
> > -	if (!npages)
> > +	if (!npages) {
> > +		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> > +		if (!slots)
> > +			goto out_free;
> > +		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> 
> Nothing wrong with the above line, but it makes me think if
> 
>                   *slots = *kvm->memslots;
> 
> would save us the function call overhead

Perhaps. But this is a slow path anyway, so it does not matter much.

Thanks for the review.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Sept. 22, 2009, 4:16 p.m. UTC | #4
On Tue, Sep 22, 2009 at 09:59:04AM +0300, Avi Kivity wrote:
> On 09/22/2009 02:37 AM, Marcelo Tosatti wrote:
>> Use two steps for memslot deletion: mark the slot invalid (which stops
>> instantiation of new shadow pages for that slot, but allows destruction),
>> then instantiate the new empty slot.
>>
>> Also simplifies kvm_handle_hva locking.
>>
>>   unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
>>   {
>> -	int i;
>> +	int i, idx;
>>   	unsigned int nr_mmu_pages;
>>   	unsigned int  nr_pages = 0;
>> +	struct kvm_memslots *slots;
>>
>> -	for (i = 0; i<  kvm->memslots->nmemslots; i++)
>> -		nr_pages += kvm->memslots->memslots[i].npages;
>> +	idx = srcu_read_lock(&kvm->srcu);
>>    
>
> Doesn't the caller hold the srcu_read_lock() here?

No:

kvm_vm_ioctl_set_nr_mmu_pages -> kvm_mmu_change_mmu_pages

And even if the caller did, recursive "locking" is tolerated.

>> Index: kvm-slotslock/arch/x86/kvm/vmx.c
>> ===================================================================
>> --- kvm-slotslock.orig/arch/x86/kvm/vmx.c
>> +++ kvm-slotslock/arch/x86/kvm/vmx.c
>> @@ -24,6 +24,7 @@
>>   #include<linux/mm.h>
>>   #include<linux/highmem.h>
>>   #include<linux/sched.h>
>> +#include<linux/srcu.h>
>>   #include<linux/moduleparam.h>
>>   #include<linux/ftrace_event.h>
>>   #include "kvm_cache_regs.h"
>> @@ -1465,10 +1466,18 @@ static void enter_pmode(struct kvm_vcpu
>>   static gva_t rmode_tss_base(struct kvm *kvm)
>>   {
>>   	if (!kvm->arch.tss_addr) {
>> -		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
>> -				 kvm->memslots->memslots[0].npages - 3;
>> +		struct kvm_memslots *slots;
>> +		gfn_t base_gfn;
>> +		int idx;
>> +
>> +		idx = srcu_read_lock(&kvm->srcu);
>> +		slots = rcu_dereference(kvm->memslots);
>> + 		base_gfn = slots->memslots[0].base_gfn +
>> +				 slots->memslots[0].npages - 3;
>> +		srcu_read_unlock(&kvm->srcu, idx);
>>   		return base_gfn<<  PAGE_SHIFT;
>>   	}
>> +
>>    
>
> And here?  

kvm_arch_vcpu_ioctl_set_sregs -> kvm_x86_ops->set_cr0.

> Maybe we should take the srcu_lock in vcpu_load/put and only  
> drop in when going into vcpu context or explicitly sleeping, just to  
> simplify things.

Hum, possible, but i'd rather leave it for later.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Sept. 24, 2009, 2:06 p.m. UTC | #5
On Mon, Sep 21, 2009 at 08:37:18PM -0300, Marcelo Tosatti wrote:
> Use two steps for memslot deletion: mark the slot invalid (which stops 
> instantiation of new shadow pages for that slot, but allows destruction),
> then instantiate the new empty slot.
> 
> Also simplifies kvm_handle_hva locking.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 

<snip>

> -	if (!npages)
> +	if (!npages) {
> +		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +		if (!slots)
> +			goto out_free;
> +		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> +		if (mem->slot >= slots->nmemslots)
> +			slots->nmemslots = mem->slot + 1;
> +		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
> +
> +		old_memslots = kvm->memslots;
> +		rcu_assign_pointer(kvm->memslots, slots);
> +		synchronize_srcu(&kvm->srcu);
> +		/* From this point no new shadow pages pointing to a deleted
> +		 * memslot will be created.
> +         	 *
> +         	 * validation of sp->gfn happens in:
> +         	 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
> +         	 * 	- kvm_is_visible_gfn (mmu_check_roots)
> +         	 */
>  		kvm_arch_flush_shadow(kvm);
> +		kfree(old_memslots);
> +	}
>  
>  	r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
>  	if (r)
>  		goto out_free;
>  
> -	spin_lock(&kvm->mmu_lock);
> -	if (mem->slot >= kvm->memslots->nmemslots)
> -		kvm->memslots->nmemslots = mem->slot + 1;
> +#ifdef CONFIG_DMAR
> +	/* map the pages in iommu page table */
> +	if (npages)
> +		r = kvm_iommu_map_pages(kvm, &new);
> +		if (r)
> +			goto out_free;
> +#endif
>  
> -	*memslot = new;
> -	spin_unlock(&kvm->mmu_lock);
> +	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> +	if (!slots)
> +		goto out_free;
> +	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> +	if (mem->slot >= slots->nmemslots)
> +		slots->nmemslots = mem->slot + 1;
> +
> +	/* actual memory is freed via old in kvm_free_physmem_slot below */
> +	if (!npages) {
> +		new.rmap = NULL;
> +		new.dirty_bitmap = NULL;
> +		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
> +			new.lpage_info[i] = NULL;
> +	}
> +
> +	slots->memslots[mem->slot] = new;
> +	old_memslots = kvm->memslots;
> +	rcu_assign_pointer(kvm->memslots, slots);
> +	synchronize_srcu(&kvm->srcu);
>  
>  	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);

Paul,

There is a scenario where this path, which updates KVM memory slots, is
called relatively often.

Each synchronize_srcu() call takes about 10ms (avg 3ms per
synchronize_sched call), so this is hurting us.

Is this expected? Is there any possibility for synchronize_srcu()
optimization?

There are other sides we can work on, such as reducing the memory slot 
updates, but i'm wondering what can be done regarding SRCU itself.

TIA

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul E. McKenney Sept. 24, 2009, 5:28 p.m. UTC | #6
On Thu, Sep 24, 2009 at 11:06:51AM -0300, Marcelo Tosatti wrote:
> On Mon, Sep 21, 2009 at 08:37:18PM -0300, Marcelo Tosatti wrote:
> > Use two steps for memslot deletion: mark the slot invalid (which stops 
> > instantiation of new shadow pages for that slot, but allows destruction),
> > then instantiate the new empty slot.
> > 
> > Also simplifies kvm_handle_hva locking.
> > 
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> > 
> 
> <snip>
> 
> > -	if (!npages)
> > +	if (!npages) {
> > +		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> > +		if (!slots)
> > +			goto out_free;
> > +		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> > +		if (mem->slot >= slots->nmemslots)
> > +			slots->nmemslots = mem->slot + 1;
> > +		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
> > +
> > +		old_memslots = kvm->memslots;
> > +		rcu_assign_pointer(kvm->memslots, slots);
> > +		synchronize_srcu(&kvm->srcu);
> > +		/* From this point no new shadow pages pointing to a deleted
> > +		 * memslot will be created.
> > +         	 *
> > +         	 * validation of sp->gfn happens in:
> > +         	 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
> > +         	 * 	- kvm_is_visible_gfn (mmu_check_roots)
> > +         	 */
> >  		kvm_arch_flush_shadow(kvm);
> > +		kfree(old_memslots);
> > +	}
> >  
> >  	r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
> >  	if (r)
> >  		goto out_free;
> >  
> > -	spin_lock(&kvm->mmu_lock);
> > -	if (mem->slot >= kvm->memslots->nmemslots)
> > -		kvm->memslots->nmemslots = mem->slot + 1;
> > +#ifdef CONFIG_DMAR
> > +	/* map the pages in iommu page table */
> > +	if (npages)
> > +		r = kvm_iommu_map_pages(kvm, &new);
> > +		if (r)
> > +			goto out_free;
> > +#endif
> >  
> > -	*memslot = new;
> > -	spin_unlock(&kvm->mmu_lock);
> > +	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
> > +	if (!slots)
> > +		goto out_free;
> > +	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
> > +	if (mem->slot >= slots->nmemslots)
> > +		slots->nmemslots = mem->slot + 1;
> > +
> > +	/* actual memory is freed via old in kvm_free_physmem_slot below */
> > +	if (!npages) {
> > +		new.rmap = NULL;
> > +		new.dirty_bitmap = NULL;
> > +		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
> > +			new.lpage_info[i] = NULL;
> > +	}
> > +
> > +	slots->memslots[mem->slot] = new;
> > +	old_memslots = kvm->memslots;
> > +	rcu_assign_pointer(kvm->memslots, slots);
> > +	synchronize_srcu(&kvm->srcu);
> >  
> >  	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
> 
> Paul,
> 
> There is a scenario where this path, which updates KVM memory slots, is
> called relatively often.
> 
> Each synchronize_srcu() call takes about 10ms (avg 3ms per
> synchronize_sched call), so this is hurting us.
> 
> Is this expected? Is there any possibility for synchronize_srcu()
> optimization?
> 
> There are other sides we can work on, such as reducing the memory slot 
> updates, but i'm wondering what can be done regarding SRCU itself.

This is expected behavior, but there is a possible fix currently
in mainline (Linus's git tree).  The idea would be to create a
synchronize_srcu_expedited(), which starts with synchronize_srcu(), and
replaces the synchronize_sched() calls with synchronize_sched_expedited().

This could potentially reduce the overall synchronize_srcu() latency
to well under a microsecond.  The price to be paid is that each instance
of synchronize_sched_expedited() IPIs all the online CPUs, and awakens
the migration thread on each.

Would this approach likely work for you?

							Thanx, Paul
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Sept. 24, 2009, 6:05 p.m. UTC | #7
On Thu, Sep 24, 2009 at 10:28:41AM -0700, Paul E. McKenney wrote:
> > Paul,
> > 
> > There is a scenario where this path, which updates KVM memory slots, is
> > called relatively often.
> > 
> > Each synchronize_srcu() call takes about 10ms (avg 3ms per
> > synchronize_sched call), so this is hurting us.
> > 
> > Is this expected? Is there any possibility for synchronize_srcu()
> > optimization?
> > 
> > There are other sides we can work on, such as reducing the memory slot 
> > updates, but i'm wondering what can be done regarding SRCU itself.
> 
> This is expected behavior, but there is a possible fix currently
> in mainline (Linus's git tree).  The idea would be to create a
> synchronize_srcu_expedited(), which starts with synchronize_srcu(), and
> replaces the synchronize_sched() calls with synchronize_sched_expedited().
> 
> This could potentially reduce the overall synchronize_srcu() latency
> to well under a microsecond.  The price to be paid is that each instance
> of synchronize_sched_expedited() IPIs all the online CPUs, and awakens
> the migration thread on each.
> 
> Would this approach likely work for you?

Hum, this path can be triggered by a guest, so IPI'ing all online CPUs 
is not a happy thought.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Sept. 25, 2009, 3:05 p.m. UTC | #8
On 09/24/2009 08:28 PM, Paul E. McKenney wrote:
>
>> Each synchronize_srcu() call takes about 10ms (avg 3ms per
>> synchronize_sched call), so this is hurting us.
>>
>> Is this expected? Is there any possibility for synchronize_srcu()
>> optimization?
>>
>> There are other sides we can work on, such as reducing the memory slot
>> updates, but i'm wondering what can be done regarding SRCU itself.
>>      
> This is expected behavior, but there is a possible fix currently
> in mainline (Linus's git tree).  The idea would be to create a
> synchronize_srcu_expedited(), which starts with synchronize_srcu(), and
> replaces the synchronize_sched() calls with synchronize_sched_expedited().
>
> This could potentially reduce the overall synchronize_srcu() latency
> to well under a microsecond.  The price to be paid is that each instance
> of synchronize_sched_expedited() IPIs all the online CPUs, and awakens
> the migration thread on each.
>
> Would this approach likely work for you?
>    

It's perfect.
diff mbox

Patch

Index: kvm-slotslock/include/linux/kvm.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm.h
+++ kvm-slotslock/include/linux/kvm.h
@@ -39,7 +39,7 @@  struct kvm_userspace_memory_region {
 
 /* for kvm_memory_region::flags */
 #define KVM_MEM_LOG_DIRTY_PAGES  1UL
-
+#define KVM_MEMSLOT_INVALID      (1UL << 1)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
Index: kvm-slotslock/virt/kvm/kvm_main.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/kvm_main.c
+++ kvm-slotslock/virt/kvm/kvm_main.c
@@ -43,6 +43,7 @@ 
 #include <linux/swap.h>
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
+#include <linux/srcu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -352,11 +353,15 @@  static struct kvm *kvm_create_vm(void)
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
 		goto out_err;
+	if (init_srcu_struct(&kvm->srcu))
+		goto out_err;
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!page)
+	if (!page) {
+		cleanup_srcu_struct(&kvm->srcu);
 		goto out_err;
+	}
 
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
@@ -367,6 +372,7 @@  static struct kvm *kvm_create_vm(void)
 		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
 		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
 		if (r) {
+			cleanup_srcu_struct(&kvm->srcu);
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 			put_page(page);
 #endif
@@ -462,6 +468,7 @@  static void kvm_destroy_vm(struct kvm *k
 	kvm_arch_flush_shadow(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
+	cleanup_srcu_struct(&kvm->srcu);
 	hardware_disable_all();
 	mmdrop(mm);
 }
@@ -502,12 +509,13 @@  int __kvm_set_memory_region(struct kvm *
 			    struct kvm_userspace_memory_region *mem,
 			    int user_alloc)
 {
-	int r;
+	int r, flush_shadow = 0;
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long i;
 	struct kvm_memory_slot *memslot;
 	struct kvm_memory_slot old, new;
+	struct kvm_memslots *slots, *old_memslots;
 
 	r = -EINVAL;
 	/* General sanity checks */
@@ -569,15 +577,7 @@  int __kvm_set_memory_region(struct kvm *
 		memset(new.rmap, 0, npages * sizeof(*new.rmap));
 
 		new.user_alloc = user_alloc;
-		/*
-		 * hva_to_rmmap() serialzies with the mmu_lock and to be
-		 * safe it has to ignore memslots with !user_alloc &&
-		 * !userspace_addr.
-		 */
-		if (user_alloc)
-			new.userspace_addr = mem->userspace_addr;
-		else
-			new.userspace_addr = 0;
+		new.userspace_addr = mem->userspace_addr;
 	}
 	if (!npages)
 		goto skip_lpage;
@@ -632,8 +632,9 @@  skip_lpage:
 		if (!new.dirty_bitmap)
 			goto out_free;
 		memset(new.dirty_bitmap, 0, dirty_bytes);
+		/* destroy any largepage mappings for dirty tracking */
 		if (old.npages)
-			kvm_arch_flush_shadow(kvm);
+			flush_shadow = 1;
 	}
 #else  /* not defined CONFIG_S390 */
 	new.user_alloc = user_alloc;
@@ -641,34 +642,69 @@  skip_lpage:
 		new.userspace_addr = mem->userspace_addr;
 #endif /* not defined CONFIG_S390 */
 
-	if (!npages)
+	if (!npages) {
+		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+		if (!slots)
+			goto out_free;
+		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+		if (mem->slot >= slots->nmemslots)
+			slots->nmemslots = mem->slot + 1;
+		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+
+		old_memslots = kvm->memslots;
+		rcu_assign_pointer(kvm->memslots, slots);
+		synchronize_srcu(&kvm->srcu);
+		/* From this point no new shadow pages pointing to a deleted
+		 * memslot will be created.
+         	 *
+         	 * validation of sp->gfn happens in:
+         	 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+         	 * 	- kvm_is_visible_gfn (mmu_check_roots)
+         	 */
 		kvm_arch_flush_shadow(kvm);
+		kfree(old_memslots);
+	}
 
 	r = kvm_arch_prepare_memory_region(kvm, &new, old, user_alloc);
 	if (r)
 		goto out_free;
 
-	spin_lock(&kvm->mmu_lock);
-	if (mem->slot >= kvm->memslots->nmemslots)
-		kvm->memslots->nmemslots = mem->slot + 1;
+#ifdef CONFIG_DMAR
+	/* map the pages in iommu page table */
+	if (npages)
+		r = kvm_iommu_map_pages(kvm, &new);
+		if (r)
+			goto out_free;
+#endif
 
-	*memslot = new;
-	spin_unlock(&kvm->mmu_lock);
+	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+	if (!slots)
+		goto out_free;
+	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+	if (mem->slot >= slots->nmemslots)
+		slots->nmemslots = mem->slot + 1;
+
+	/* actual memory is freed via old in kvm_free_physmem_slot below */
+	if (!npages) {
+		new.rmap = NULL;
+		new.dirty_bitmap = NULL;
+		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
+			new.lpage_info[i] = NULL;
+	}
+
+	slots->memslots[mem->slot] = new;
+	old_memslots = kvm->memslots;
+	rcu_assign_pointer(kvm->memslots, slots);
+	synchronize_srcu(&kvm->srcu);
 
 	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
 
-	kvm_free_physmem_slot(&old, npages ? &new : NULL);
-	/* Slot deletion case: we have to update the current slot */
-	spin_lock(&kvm->mmu_lock);
-	if (!npages)
-		*memslot = old;
-	spin_unlock(&kvm->mmu_lock);
-#ifdef CONFIG_DMAR
-	/* map the pages in iommu page table */
-	r = kvm_iommu_map_pages(kvm, memslot);
-	if (r)
-		goto out;
-#endif
+	kvm_free_physmem_slot(&old, &new);
+	kfree(old_memslots);
+
+	if (flush_shadow)
+		kvm_arch_flush_shadow(kvm);
+
 	return 0;
 
 out_free:
@@ -768,7 +804,7 @@  EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -790,12 +826,15 @@  struct kvm_memory_slot *gfn_to_memslot(s
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
 	gfn = unalias_gfn(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
+		if (memslot->flags & KVM_MEMSLOT_INVALID)
+			continue;
+
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
 			return 1;
@@ -810,7 +849,7 @@  unsigned long gfn_to_hva(struct kvm *kvm
 
 	gfn = unalias_gfn(kvm, gfn);
 	slot = gfn_to_memslot_unaliased(kvm, gfn);
-	if (!slot)
+	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return bad_hva();
 	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
 }
Index: kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
===================================================================
--- kvm-slotslock.orig/arch/ia64/kvm/kvm-ia64.c
+++ kvm-slotslock/arch/ia64/kvm/kvm-ia64.c
@@ -1834,6 +1834,7 @@  int kvm_vm_ioctl_get_dirty_log(struct kv
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
+	down_write(&kvm->slots_lock);
 	spin_lock(&kvm->arch.dirty_log_lock);
 
 	r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1853,6 +1854,7 @@  int kvm_vm_ioctl_get_dirty_log(struct kv
 	}
 	r = 0;
 out:
+	up_write(&kvm->slots_lock);
 	spin_unlock(&kvm->arch.dirty_log_lock);
 	return r;
 }
Index: kvm-slotslock/arch/x86/kvm/mmu.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/mmu.c
+++ kvm-slotslock/arch/x86/kvm/mmu.c
@@ -29,6 +29,7 @@ 
 #include <linux/swap.h>
 #include <linux/hugetlb.h>
 #include <linux/compiler.h>
+#include <linux/srcu.h>
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -766,23 +767,18 @@  static int kvm_unmap_rmapp(struct kvm *k
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
 {
-	int i, j;
+	int i, j, idx;
 	int retval = 0;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = rcu_dereference(kvm->memslots);
 
-	/*
-	 * If mmap_sem isn't taken, we can look the memslots with only
-	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
-	 */
 	for (i = 0; i < slots->nmemslots; i++) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 		unsigned long start = memslot->userspace_addr;
 		unsigned long end;
 
-		/* mmu_lock protects userspace_addr */
-		if (!start)
-			continue;
-
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
@@ -797,6 +793,7 @@  static int kvm_handle_hva(struct kvm *kv
 			}
 		}
 	}
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	return retval;
 }
@@ -2966,16 +2963,20 @@  nomem:
  */
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 {
-	int i;
+	int i, idx;
 	unsigned int nr_mmu_pages;
 	unsigned int  nr_pages = 0;
+	struct kvm_memslots *slots;
 
-	for (i = 0; i < kvm->memslots->nmemslots; i++)
-		nr_pages += kvm->memslots->memslots[i].npages;
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = rcu_dereference(kvm->memslots);
+	for (i = 0; i < slots->nmemslots; i++)
+		nr_pages += slots->memslots[i].npages;
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 	nr_mmu_pages = max(nr_mmu_pages,
 			(unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	return nr_mmu_pages;
 }
@@ -3241,10 +3242,12 @@  static void audit_mappings(struct kvm_vc
 static int count_rmaps(struct kvm_vcpu *vcpu)
 {
 	int nmaps = 0;
-	int i, j, k;
+	int i, j, k, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = rcu_dereference(kvm->memslots);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
+		struct kvm_memory_slot *m = &slots->memslots[i];
 		struct kvm_rmap_desc *d;
 
 		for (j = 0; j < m->npages; ++j) {
@@ -3267,6 +3270,7 @@  static int count_rmaps(struct kvm_vcpu *
 			}
 		}
 	}
+	srcu_read_unlock(&kvm->srcu, idx);
 	return nmaps;
 }
 
Index: kvm-slotslock/arch/x86/kvm/vmx.c
===================================================================
--- kvm-slotslock.orig/arch/x86/kvm/vmx.c
+++ kvm-slotslock/arch/x86/kvm/vmx.c
@@ -24,6 +24,7 @@ 
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/sched.h>
+#include <linux/srcu.h>
 #include <linux/moduleparam.h>
 #include <linux/ftrace_event.h>
 #include "kvm_cache_regs.h"
@@ -1465,10 +1466,18 @@  static void enter_pmode(struct kvm_vcpu 
 static gva_t rmode_tss_base(struct kvm *kvm)
 {
 	if (!kvm->arch.tss_addr) {
-		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
-				 kvm->memslots->memslots[0].npages - 3;
+		struct kvm_memslots *slots;
+		gfn_t base_gfn;
+		int idx;
+
+		idx = srcu_read_lock(&kvm->srcu);
+		slots = rcu_dereference(kvm->memslots);
+ 		base_gfn = slots->memslots[0].base_gfn +
+				 slots->memslots[0].npages - 3;
+		srcu_read_unlock(&kvm->srcu, idx);
 		return base_gfn << PAGE_SHIFT;
 	}
+
 	return kvm->arch.tss_addr;
 }
 
Index: kvm-slotslock/include/linux/kvm_host.h
===================================================================
--- kvm-slotslock.orig/include/linux/kvm_host.h
+++ kvm-slotslock/include/linux/kvm_host.h
@@ -163,6 +163,7 @@  struct kvm {
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
+	struct srcu_struct srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 	struct kvm_vcpu *bsp_vcpu;
Index: kvm-slotslock/virt/kvm/assigned-dev.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/assigned-dev.c
+++ kvm-slotslock/virt/kvm/assigned-dev.c
@@ -504,11 +504,11 @@  out:
 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 				      struct kvm_assigned_pci_dev *assigned_dev)
 {
-	int r = 0;
+	int r = 0, idx;
 	struct kvm_assigned_dev_kernel *match;
 	struct pci_dev *dev;
 
-	down_read(&kvm->slots_lock);
+	idx = srcu_read_lock(&kvm->srcu);
 	mutex_lock(&kvm->lock);
 
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
@@ -574,7 +574,7 @@  static int kvm_vm_ioctl_assign_device(st
 
 out:
 	mutex_unlock(&kvm->lock);
-	up_read(&kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 	return r;
 out_list_del:
 	list_del(&match->list);
@@ -586,7 +586,7 @@  out_put:
 out_free:
 	kfree(match);
 	mutex_unlock(&kvm->lock);
-	up_read(&kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 	return r;
 }
 
Index: kvm-slotslock/virt/kvm/iommu.c
===================================================================
--- kvm-slotslock.orig/virt/kvm/iommu.c
+++ kvm-slotslock/virt/kvm/iommu.c
@@ -78,7 +78,7 @@  static int kvm_iommu_map_memslots(struct
 	int i, r = 0;
 	struct kvm_memslots *slots;
 
-	slots = kvm->memslots;
+	slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; i++) {
 		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
@@ -214,7 +214,7 @@  static int kvm_iommu_unmap_memslots(stru
 	int i;
 	struct kvm_memslots *slots;
 
-	slots = kvm->memslots;
+	slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; i++) {
 		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,