diff mbox

[v2] MMIO: Make coalesced mmio use a device per zone

Message ID 1311071471-15546-1-git-send-email-levinsasha928@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sasha Levin July 19, 2011, 10:31 a.m. UTC
This patch changes coalesced mmio to create one mmio device per
zone instead of handling all zones in one device.

Doing so enables us to take advantage of existing locking and prevents
a race condition between coalesced mmio registration/unregistration
and lookups.

Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Suggested-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---
 include/linux/kvm_host.h  |    7 ++-
 virt/kvm/coalesced_mmio.c |  114 ++++++++++++++++----------------------------
 virt/kvm/coalesced_mmio.h |    7 +--
 3 files changed, 50 insertions(+), 78 deletions(-)

Comments

Avi Kivity July 19, 2011, 10:57 a.m. UTC | #1
On 07/19/2011 01:31 PM, Sasha Levin wrote:
> This patch changes coalesced mmio to create one mmio device per
> zone instead of handling all zones in one device.
>
> Doing so enables us to take advantage of existing locking and prevents
> a race condition between coalesced mmio registration/unregistration
> and lookups.
>
> @@ -63,7 +63,7 @@ extern struct kmem_cache *kvm_vcpu_cache;
>    */
>   struct kvm_io_bus {
>   	int                   dev_count;
> -#define NR_IOBUS_DEVS 200
> +#define NR_IOBUS_DEVS 300
>   	struct kvm_io_device *devs[NR_IOBUS_DEVS];
>   };

This means that a lot of non-coalesced-mmio users can squeeze out 
coalesced-mmio.  I don't know if it's really worthwhile, but the 100 
coalesced mmio slots should be reserved so we are guaranteed they are 
available.

>
> @@ -95,6 +85,8 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
>   {
>   	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
>
> +	list_del(&dev->list);
> +
>   	kfree(dev);
>   }
>

No lock?

>   int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
>   					   struct kvm_coalesced_mmio_zone *zone)
>   {
> -	int i;
> -	struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
> -	struct kvm_coalesced_mmio_zone *z;
> -
> -	if (dev == NULL)
> -		return -ENXIO;
> +	struct kvm_coalesced_mmio_dev *dev;
>
>   	mutex_lock(&kvm->slots_lock);
>
> -	i = dev->nb_zones;
> -	while (i) {
> -		z =&dev->zone[i - 1];
> -
> -		/* unregister all zones
> -		 * included in (zone->addr, zone->size)
> -		 */
> -
> -		if (zone->addr<= z->addr&&
> -		    z->addr + z->size<= zone->addr + zone->size) {
> -			dev->nb_zones--;
> -			*z = dev->zone[dev->nb_zones];
> +	list_for_each_entry(dev,&kvm->coalesced_zones.items, list)
> +		if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
> +			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,&dev->dev);
> +			kvm_iodevice_destructor(&dev->dev);
>   		}
> -		i--;
> -	}

No lock?

>
>   struct kvm_coalesced_mmio_dev {
> +	struct list_head list;
>   	struct kvm_io_device dev;
>   	struct kvm *kvm;
> -	spinlock_t lock;
> -	int nb_zones;
> -	struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
> +	struct kvm_coalesced_mmio_zone zone;
>   };
>

Why a list instead of a linear array?
Sasha Levin July 19, 2011, 11:05 a.m. UTC | #2
On Tue, 2011-07-19 at 13:57 +0300, Avi Kivity wrote:
> On 07/19/2011 01:31 PM, Sasha Levin wrote:
> > This patch changes coalesced mmio to create one mmio device per
> > zone instead of handling all zones in one device.
> >
> > Doing so enables us to take advantage of existing locking and prevents
> > a race condition between coalesced mmio registration/unregistration
> > and lookups.
> >
> > @@ -63,7 +63,7 @@ extern struct kmem_cache *kvm_vcpu_cache;
> >    */
> >   struct kvm_io_bus {
> >   	int                   dev_count;
> > -#define NR_IOBUS_DEVS 200
> > +#define NR_IOBUS_DEVS 300
> >   	struct kvm_io_device *devs[NR_IOBUS_DEVS];
> >   };
> 
> This means that a lot of non-coalesced-mmio users can squeeze out 
> coalesced-mmio.  I don't know if it's really worthwhile, but the 100 
> coalesced mmio slots should be reserved so we are guaranteed they are 
> available.

We are currently registering 4 devices, plus how many
ioeventfds/coalesced mmio zones the user wants. I felt bad about upping
it to 300 really.

> 
> >
> > @@ -95,6 +85,8 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
> >   {
> >   	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
> >
> > +	list_del(&dev->list);
> > +
> >   	kfree(dev);
> >   }
> >
> 
> No lock?

The lock is there to synchronize access to the coalesced ring (it was
here before this patch too, it's not something new), not the device
list.

The device list is only accessed when kvm->slots_lock is held, so it
takes care of that.

> 
> >   int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
> >   					   struct kvm_coalesced_mmio_zone *zone)
> >   {
> > -	int i;
> > -	struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
> > -	struct kvm_coalesced_mmio_zone *z;
> > -
> > -	if (dev == NULL)
> > -		return -ENXIO;
> > +	struct kvm_coalesced_mmio_dev *dev;
> >
> >   	mutex_lock(&kvm->slots_lock);
> >
> > -	i = dev->nb_zones;
> > -	while (i) {
> > -		z =&dev->zone[i - 1];
> > -
> > -		/* unregister all zones
> > -		 * included in (zone->addr, zone->size)
> > -		 */
> > -
> > -		if (zone->addr<= z->addr&&
> > -		    z->addr + z->size<= zone->addr + zone->size) {
> > -			dev->nb_zones--;
> > -			*z = dev->zone[dev->nb_zones];
> > +	list_for_each_entry(dev,&kvm->coalesced_zones.items, list)
> > +		if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
> > +			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,&dev->dev);
> > +			kvm_iodevice_destructor(&dev->dev);
> >   		}
> > -		i--;
> > -	}
> 
> No lock?
> 
> >
> >   struct kvm_coalesced_mmio_dev {
> > +	struct list_head list;
> >   	struct kvm_io_device dev;
> >   	struct kvm *kvm;
> > -	spinlock_t lock;
> > -	int nb_zones;
> > -	struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
> > +	struct kvm_coalesced_mmio_zone zone;
> >   };
> >
> 
> Why a list instead of a linear array?
> 

We have an unknown amount of coalesced devices which we allocate
dynamically on creation, it sounded more logical to me to just chain
them in a list.
Avi Kivity July 19, 2011, 12:24 p.m. UTC | #3
On 07/19/2011 02:05 PM, Sasha Levin wrote:
> On Tue, 2011-07-19 at 13:57 +0300, Avi Kivity wrote:
> >  On 07/19/2011 01:31 PM, Sasha Levin wrote:
> >  >  This patch changes coalesced mmio to create one mmio device per
> >  >  zone instead of handling all zones in one device.
> >  >
> >  >  Doing so enables us to take advantage of existing locking and prevents
> >  >  a race condition between coalesced mmio registration/unregistration
> >  >  and lookups.
> >  >
> >  >  @@ -63,7 +63,7 @@ extern struct kmem_cache *kvm_vcpu_cache;
> >  >     */
> >  >    struct kvm_io_bus {
> >  >    	int                   dev_count;
> >  >  -#define NR_IOBUS_DEVS 200
> >  >  +#define NR_IOBUS_DEVS 300
> >  >    	struct kvm_io_device *devs[NR_IOBUS_DEVS];
> >  >    };
> >
> >  This means that a lot of non-coalesced-mmio users can squeeze out
> >  coalesced-mmio.  I don't know if it's really worthwhile, but the 100
> >  coalesced mmio slots should be reserved so we are guaranteed they are
> >  available.
>
> We are currently registering 4 devices, plus how many
> ioeventfds/coalesced mmio zones the user wants. I felt bad about upping
> it to 300 really.

It's just a few kilobytes, where even a small guest occupies half a 
gigabyte.  Even just its pagetables swallow up megabytes.

An array means less opportunities to screw up the code and better cache 
usage with small objects.

> >
> >  >
> >  >  @@ -95,6 +85,8 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
> >  >    {
> >  >    	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
> >  >
> >  >  +	list_del(&dev->list);
> >  >  +
> >  >    	kfree(dev);
> >  >    }
> >  >
> >
> >  No lock?
>
> The lock is there to synchronize access to the coalesced ring (it was
> here before this patch too, it's not something new), not the device
> list.
>
> The device list is only accessed when kvm->slots_lock is held, so it
> takes care of that.

Right.  A comment please.

btw, don't we leak all zones on guest destruction? the array didn't need 
any cleanup, but this list does.
Sasha Levin July 19, 2011, 12:34 p.m. UTC | #4
On Tue, 2011-07-19 at 15:24 +0300, Avi Kivity wrote:
> On 07/19/2011 02:05 PM, Sasha Levin wrote:
> > On Tue, 2011-07-19 at 13:57 +0300, Avi Kivity wrote:
> > >  On 07/19/2011 01:31 PM, Sasha Levin wrote:
> > >  >  This patch changes coalesced mmio to create one mmio device per
> > >  >  zone instead of handling all zones in one device.
> > >  >
> > >  >  Doing so enables us to take advantage of existing locking and prevents
> > >  >  a race condition between coalesced mmio registration/unregistration
> > >  >  and lookups.
> > >  >
> > >  >  @@ -63,7 +63,7 @@ extern struct kmem_cache *kvm_vcpu_cache;
> > >  >     */
> > >  >    struct kvm_io_bus {
> > >  >    	int                   dev_count;
> > >  >  -#define NR_IOBUS_DEVS 200
> > >  >  +#define NR_IOBUS_DEVS 300
> > >  >    	struct kvm_io_device *devs[NR_IOBUS_DEVS];
> > >  >    };
> > >
> > >  This means that a lot of non-coalesced-mmio users can squeeze out
> > >  coalesced-mmio.  I don't know if it's really worthwhile, but the 100
> > >  coalesced mmio slots should be reserved so we are guaranteed they are
> > >  available.
> >
> > We are currently registering 4 devices, plus how many
> > ioeventfds/coalesced mmio zones the user wants. I felt bad about upping
> > it to 300 really.
> 
> It's just a few kilobytes, where even a small guest occupies half a 
> gigabyte.  Even just its pagetables swallow up megabytes.
> 
> An array means less opportunities to screw up the code and better cache 
> usage with small objects.
> 
> > >
> > >  >
> > >  >  @@ -95,6 +85,8 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
> > >  >    {
> > >  >    	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
> > >  >
> > >  >  +	list_del(&dev->list);
> > >  >  +
> > >  >    	kfree(dev);
> > >  >    }
> > >  >
> > >
> > >  No lock?
> >
> > The lock is there to synchronize access to the coalesced ring (it was
> > here before this patch too, it's not something new), not the device
> > list.
> >
> > The device list is only accessed when kvm->slots_lock is held, so it
> > takes care of that.
> 
> Right.  A comment please.
> 
> btw, don't we leak all zones on guest destruction? the array didn't need 
> any cleanup, but this list does.
> 

No, the destructor is called for all devices on the bus when the bus is
going down. We're handling it in coalesced_mmio_destructor() which frees
the device.
Avi Kivity July 19, 2011, 12:39 p.m. UTC | #5
On 07/19/2011 03:34 PM, Sasha Levin wrote:
> >
> >  btw, don't we leak all zones on guest destruction? the array didn't need
> >  any cleanup, but this list does.
> >
>
> No, the destructor is called for all devices on the bus when the bus is
> going down. We're handling it in coalesced_mmio_destructor() which frees
> the device.

Ah, okay.  A somewhat strange setup.
diff mbox

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eabb21a..b1d7f2d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -63,7 +63,7 @@  extern struct kmem_cache *kvm_vcpu_cache;
  */
 struct kvm_io_bus {
 	int                   dev_count;
-#define NR_IOBUS_DEVS 200
+#define NR_IOBUS_DEVS 300
 	struct kvm_io_device *devs[NR_IOBUS_DEVS];
 };
 
@@ -256,8 +256,11 @@  struct kvm {
 	struct kvm_arch arch;
 	atomic_t users_count;
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
-	struct kvm_coalesced_mmio_dev *coalesced_mmio_dev;
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
+	struct {
+		spinlock_t lock;
+		struct list_head items;
+	} coalesced_zones;
 #endif
 
 	struct mutex irq_lock;
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index ae075dc..01049a5 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -24,23 +24,13 @@  static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
 static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
 				   gpa_t addr, int len)
 {
-	struct kvm_coalesced_mmio_zone *zone;
-	int i;
-
-	/* is it in a batchable area ? */
-
-	for (i = 0; i < dev->nb_zones; i++) {
-		zone = &dev->zone[i];
-
-		/* (addr,len) is fully included in
-		 * (zone->addr, zone->size)
-		 */
+	/* is it in a batchable area ?
+	 * (addr,len) is fully included in
+	 * (zone->addr, zone->size)
+	 */
 
-		if (zone->addr <= addr &&
-		    addr + len <= zone->addr + zone->size)
-			return 1;
-	}
-	return 0;
+	return (dev->zone.addr <= addr &&
+		addr + len <= dev->zone.addr + dev->zone.size);
 }
 
 static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
@@ -73,10 +63,10 @@  static int coalesced_mmio_write(struct kvm_io_device *this,
 	if (!coalesced_mmio_in_range(dev, addr, len))
 		return -EOPNOTSUPP;
 
-	spin_lock(&dev->lock);
+	spin_lock(&dev->kvm->coalesced_zones.lock);
 
 	if (!coalesced_mmio_has_room(dev)) {
-		spin_unlock(&dev->lock);
+		spin_unlock(&dev->kvm->coalesced_zones.lock);
 		return -EOPNOTSUPP;
 	}
 
@@ -87,7 +77,7 @@  static int coalesced_mmio_write(struct kvm_io_device *this,
 	memcpy(ring->coalesced_mmio[ring->last].data, val, len);
 	smp_wmb();
 	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
-	spin_unlock(&dev->lock);
+	spin_unlock(&dev->kvm->coalesced_zones.lock);
 	return 0;
 }
 
@@ -95,6 +85,8 @@  static void coalesced_mmio_destructor(struct kvm_io_device *this)
 {
 	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
 
+	list_del(&dev->list);
+
 	kfree(dev);
 }
 
@@ -105,7 +97,6 @@  static const struct kvm_io_device_ops coalesced_mmio_ops = {
 
 int kvm_coalesced_mmio_init(struct kvm *kvm)
 {
-	struct kvm_coalesced_mmio_dev *dev;
 	struct page *page;
 	int ret;
 
@@ -113,31 +104,13 @@  int kvm_coalesced_mmio_init(struct kvm *kvm)
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
 		goto out_err;
-	kvm->coalesced_mmio_ring = page_address(page);
 
-	ret = -ENOMEM;
-	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
-	if (!dev)
-		goto out_free_page;
-	spin_lock_init(&dev->lock);
-	kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
-	dev->kvm = kvm;
-	kvm->coalesced_mmio_dev = dev;
+	ret = 0;
+	kvm->coalesced_mmio_ring = page_address(page);
 
-	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
-	mutex_unlock(&kvm->slots_lock);
-	if (ret < 0)
-		goto out_free_dev;
+	spin_lock_init(&kvm->coalesced_zones.lock);
+	INIT_LIST_HEAD(&kvm->coalesced_zones.items);
 
-	return ret;
-
-out_free_dev:
-	kvm->coalesced_mmio_dev = NULL;
-	kfree(dev);
-out_free_page:
-	kvm->coalesced_mmio_ring = NULL;
-	__free_page(page);
 out_err:
 	return ret;
 }
@@ -151,51 +124,48 @@  void kvm_coalesced_mmio_free(struct kvm *kvm)
 int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 					 struct kvm_coalesced_mmio_zone *zone)
 {
-	struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
+	int ret;
+	struct kvm_coalesced_mmio_dev *dev;
 
-	if (dev == NULL)
-		return -ENXIO;
+	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
+	dev->kvm = kvm;
+	dev->zone = *zone;
 
 	mutex_lock(&kvm->slots_lock);
-	if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
-		mutex_unlock(&kvm->slots_lock);
-		return -ENOBUFS;
-	}
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+	mutex_unlock(&kvm->slots_lock);
+	if (ret < 0)
+		goto out_free_dev;
 
-	dev->zone[dev->nb_zones] = *zone;
-	dev->nb_zones++;
+	list_add_tail(&dev->list, &kvm->coalesced_zones.items);
+
+	return ret;
+
+out_free_dev:
+	kfree(dev);
+
+	if (dev == NULL)
+		return -ENXIO;
 
-	mutex_unlock(&kvm->slots_lock);
 	return 0;
 }
 
 int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 					   struct kvm_coalesced_mmio_zone *zone)
 {
-	int i;
-	struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
-	struct kvm_coalesced_mmio_zone *z;
-
-	if (dev == NULL)
-		return -ENXIO;
+	struct kvm_coalesced_mmio_dev *dev;
 
 	mutex_lock(&kvm->slots_lock);
 
-	i = dev->nb_zones;
-	while (i) {
-		z = &dev->zone[i - 1];
-
-		/* unregister all zones
-		 * included in (zone->addr, zone->size)
-		 */
-
-		if (zone->addr <= z->addr &&
-		    z->addr + z->size <= zone->addr + zone->size) {
-			dev->nb_zones--;
-			*z = dev->zone[dev->nb_zones];
+	list_for_each_entry(dev, &kvm->coalesced_zones.items, list)
+		if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
+			kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+			kvm_iodevice_destructor(&dev->dev);
 		}
-		i--;
-	}
 
 	mutex_unlock(&kvm->slots_lock);
 
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
index 8a5959e..b280c20 100644
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -12,14 +12,13 @@ 
 
 #ifdef CONFIG_KVM_MMIO
 
-#define KVM_COALESCED_MMIO_ZONE_MAX 100
+#include <linux/list.h>
 
 struct kvm_coalesced_mmio_dev {
+	struct list_head list;
 	struct kvm_io_device dev;
 	struct kvm *kvm;
-	spinlock_t lock;
-	int nb_zones;
-	struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
+	struct kvm_coalesced_mmio_zone zone;
 };
 
 int kvm_coalesced_mmio_init(struct kvm *kvm);