diff mbox series

[v2,4/4] xen-blkback: support dynamic unbind/bind

Message ID 20191210113347.3404-5-pdurrant@amazon.com (mailing list archive)
State Superseded
Headers show
Series xen-blkback: support live update | expand

Commit Message

Paul Durrant Dec. 10, 2019, 11:33 a.m. UTC
By simply re-attaching to shared rings during connect_ring() rather than
assuming they are freshly allocated (i.e assuming the counters are zero)
it is possible for vbd instances to be unbound and re-bound from and to
(respectively) a running guest.

This has been tested by running:

while true;
  do fio --name=randwrite --ioengine=libaio --iodepth=16 \
  --rw=randwrite --bs=4k --direct=1 --size=1G --verify=crc32;
  done

in a PV guest whilst running:

while true;
  do echo vbd-$DOMID-$VBD >unbind;
  echo unbound;
  sleep 5;
  echo vbd-$DOMID-$VBD >bind;
  echo bound;
  sleep 3;
  done

in dom0 from /sys/bus/xen-backend/drivers/vbd to continuously unbind and
re-bind its system disk image.

This is a highly useful feature for a backend module as it allows it to be
unloaded and re-loaded (i.e. updated) without requiring domUs to be halted.
This was also tested by running:

while true;
  do echo vbd-$DOMID-$VBD >unbind;
  echo unbound;
  sleep 5;
  rmmod xen-blkback;
  echo unloaded;
  sleep 1;
  modprobe xen-blkback;
  echo bound;
  cd $(pwd);
  sleep 3;
  done

in dom0 whilst running the same loop as above in the (single) PV guest.

Some (less stressful) testing has also been done using a Windows HVM guest
with the latest 9.0 PV drivers installed.

Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>

v2:
 - Apply a sanity check to the value of rsp_prod and fail the re-attach
   if it is implausible
 - Set allow_rebind to prevent ring from being closed on unbind
 - Update test workload from dd to fio (with verification)
---
 drivers/block/xen-blkback/xenbus.c | 59 +++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 18 deletions(-)

Comments

Roger Pau Monné Dec. 11, 2019, 10:45 a.m. UTC | #1
On Tue, Dec 10, 2019 at 11:33:47AM +0000, Paul Durrant wrote:
> By simply re-attaching to shared rings during connect_ring() rather than
> assuming they are freshly allocated (i.e assuming the counters are zero)
> it is possible for vbd instances to be unbound and re-bound from and to
> (respectively) a running guest.
> 
> This has been tested by running:
> 
> while true;
>   do fio --name=randwrite --ioengine=libaio --iodepth=16 \
>   --rw=randwrite --bs=4k --direct=1 --size=1G --verify=crc32;
>   done
> 
> in a PV guest whilst running:
> 
> while true;
>   do echo vbd-$DOMID-$VBD >unbind;
>   echo unbound;
>   sleep 5;

Is there anyway to know when the unbind has finished? AFAICT
xen_blkif_disconnect will return EBUSY if there are in flight
requests, and the disconnect won't be completed until those requests
are finished.

>   echo vbd-$DOMID-$VBD >bind;
>   echo bound;
>   sleep 3;
>   done
> 
> in dom0 from /sys/bus/xen-backend/drivers/vbd to continuously unbind and
> re-bind its system disk image.
> 
> This is a highly useful feature for a backend module as it allows it to be
> unloaded and re-loaded (i.e. updated) without requiring domUs to be halted.
> This was also tested by running:
> 
> while true;
>   do echo vbd-$DOMID-$VBD >unbind;
>   echo unbound;
>   sleep 5;
>   rmmod xen-blkback;
>   echo unloaded;
>   sleep 1;
>   modprobe xen-blkback;
>   echo bound;
>   cd $(pwd);
>   sleep 3;
>   done
> 
> in dom0 whilst running the same loop as above in the (single) PV guest.
> 
> Some (less stressful) testing has also been done using a Windows HVM guest
> with the latest 9.0 PV drivers installed.
> 
> Signed-off-by: Paul Durrant <pdurrant@amazon.com>
> ---
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: "Roger Pau Monné" <roger.pau@citrix.com>
> Cc: Jens Axboe <axboe@kernel.dk>
> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Cc: Juergen Gross <jgross@suse.com>
> Cc: Stefano Stabellini <sstabellini@kernel.org>
> 
> v2:
>  - Apply a sanity check to the value of rsp_prod and fail the re-attach
>    if it is implausible
>  - Set allow_rebind to prevent ring from being closed on unbind
>  - Update test workload from dd to fio (with verification)
> ---
>  drivers/block/xen-blkback/xenbus.c | 59 +++++++++++++++++++++---------
>  1 file changed, 41 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
> index e8c5c54e1d26..13d09630b237 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -181,6 +181,8 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
>  {
>  	int err;
>  	struct xen_blkif *blkif = ring->blkif;
> +	struct blkif_common_sring *sring_common;
> +	RING_IDX rsp_prod, req_prod;
>  
>  	/* Already connected through? */
>  	if (ring->irq)
> @@ -191,46 +193,66 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
>  	if (err < 0)
>  		return err;
>  
> +	sring_common = (struct blkif_common_sring *)ring->blk_ring;
> +	rsp_prod = READ_ONCE(sring_common->rsp_prod);
> +	req_prod = READ_ONCE(sring_common->req_prod);
> +
>  	switch (blkif->blk_protocol) {
>  	case BLKIF_PROTOCOL_NATIVE:
>  	{
> -		struct blkif_sring *sring;
> -		sring = (struct blkif_sring *)ring->blk_ring;
> -		BACK_RING_INIT(&ring->blk_rings.native, sring,
> -			       XEN_PAGE_SIZE * nr_grefs);
> +		struct blkif_sring *sring_native =
> +			(struct blkif_sring *)ring->blk_ring;

I think you can constify both sring_native and sring_common (and the
other instances below).

> +		unsigned int size = __RING_SIZE(sring_native,
> +						XEN_PAGE_SIZE * nr_grefs);
> +
> +		BACK_RING_ATTACH(&ring->blk_rings.native, sring_native,
> +				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
> +		err = (req_prod - rsp_prod > size) ? -EIO : 0;
>  		break;
>  	}
>  	case BLKIF_PROTOCOL_X86_32:
>  	{
> -		struct blkif_x86_32_sring *sring_x86_32;
> -		sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
> -		BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
> -			       XEN_PAGE_SIZE * nr_grefs);
> +		struct blkif_x86_32_sring *sring_x86_32 =
> +			(struct blkif_x86_32_sring *)ring->blk_ring;
> +		unsigned int size = __RING_SIZE(sring_x86_32,
> +						XEN_PAGE_SIZE * nr_grefs);
> +
> +		BACK_RING_ATTACH(&ring->blk_rings.x86_32, sring_x86_32,
> +				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
> +		err = (req_prod - rsp_prod > size) ? -EIO : 0;
>  		break;
>  	}
>  	case BLKIF_PROTOCOL_X86_64:
>  	{
> -		struct blkif_x86_64_sring *sring_x86_64;
> -		sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
> -		BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
> -			       XEN_PAGE_SIZE * nr_grefs);
> +		struct blkif_x86_64_sring *sring_x86_64 =
> +			(struct blkif_x86_64_sring *)ring->blk_ring;
> +		unsigned int size = __RING_SIZE(sring_x86_64,
> +						XEN_PAGE_SIZE * nr_grefs);
> +
> +		BACK_RING_ATTACH(&ring->blk_rings.x86_64, sring_x86_64,
> +				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
> +		err = (req_prod - rsp_prod > size) ? -EIO : 0;

This is repeated for all ring types, might be worth to pull it out of
the switch...

>  		break;
>  	}
>  	default:
>  		BUG();
>  	}
> +	if (err < 0)
> +		goto fail;

...and placed here instead?

Thanks, Roger.
Paul Durrant Dec. 11, 2019, 10:52 a.m. UTC | #2
> -----Original Message-----
> From: Roger Pau Monné <roger.pau@citrix.com>
> Sent: 11 December 2019 10:46
> To: Durrant, Paul <pdurrant@amazon.com>
> Cc: xen-devel@lists.xenproject.org; linux-kernel@vger.kernel.org; Konrad
> Rzeszutek Wilk <konrad.wilk@oracle.com>; Jens Axboe <axboe@kernel.dk>;
> Boris Ostrovsky <boris.ostrovsky@oracle.com>; Juergen Gross
> <jgross@suse.com>; Stefano Stabellini <sstabellini@kernel.org>
> Subject: Re: [PATCH v2 4/4] xen-blkback: support dynamic unbind/bind
> 
> On Tue, Dec 10, 2019 at 11:33:47AM +0000, Paul Durrant wrote:
> > By simply re-attaching to shared rings during connect_ring() rather than
> > assuming they are freshly allocated (i.e assuming the counters are zero)
> > it is possible for vbd instances to be unbound and re-bound from and to
> > (respectively) a running guest.
> >
> > This has been tested by running:
> >
> > while true;
> >   do fio --name=randwrite --ioengine=libaio --iodepth=16 \
> >   --rw=randwrite --bs=4k --direct=1 --size=1G --verify=crc32;
> >   done
> >
> > in a PV guest whilst running:
> >
> > while true;
> >   do echo vbd-$DOMID-$VBD >unbind;
> >   echo unbound;
> >   sleep 5;
> 
> Is there anyway to know when the unbind has finished? AFAICT
> xen_blkif_disconnect will return EBUSY if there are in flight
> requests, and the disconnect won't be completed until those requests
> are finished.

Yes, the device sysfs node will disappear when remove() completes.

> 
> >   echo vbd-$DOMID-$VBD >bind;
> >   echo bound;
> >   sleep 3;
> >   done
> >
> > in dom0 from /sys/bus/xen-backend/drivers/vbd to continuously unbind and
> > re-bind its system disk image.
> >
> > This is a highly useful feature for a backend module as it allows it to
> be
> > unloaded and re-loaded (i.e. updated) without requiring domUs to be
> halted.
> > This was also tested by running:
> >
> > while true;
> >   do echo vbd-$DOMID-$VBD >unbind;
> >   echo unbound;
> >   sleep 5;
> >   rmmod xen-blkback;
> >   echo unloaded;
> >   sleep 1;
> >   modprobe xen-blkback;
> >   echo bound;
> >   cd $(pwd);
> >   sleep 3;
> >   done
> >
> > in dom0 whilst running the same loop as above in the (single) PV guest.
> >
> > Some (less stressful) testing has also been done using a Windows HVM
> guest
> > with the latest 9.0 PV drivers installed.
> >
> > Signed-off-by: Paul Durrant <pdurrant@amazon.com>
> > ---
> > Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > Cc: "Roger Pau Monné" <roger.pau@citrix.com>
> > Cc: Jens Axboe <axboe@kernel.dk>
> > Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> > Cc: Juergen Gross <jgross@suse.com>
> > Cc: Stefano Stabellini <sstabellini@kernel.org>
> >
> > v2:
> >  - Apply a sanity check to the value of rsp_prod and fail the re-attach
> >    if it is implausible
> >  - Set allow_rebind to prevent ring from being closed on unbind
> >  - Update test workload from dd to fio (with verification)
> > ---
> >  drivers/block/xen-blkback/xenbus.c | 59 +++++++++++++++++++++---------
> >  1 file changed, 41 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-
> blkback/xenbus.c
> > index e8c5c54e1d26..13d09630b237 100644
> > --- a/drivers/block/xen-blkback/xenbus.c
> > +++ b/drivers/block/xen-blkback/xenbus.c
> > @@ -181,6 +181,8 @@ static int xen_blkif_map(struct xen_blkif_ring
> *ring, grant_ref_t *gref,
> >  {
> >  	int err;
> >  	struct xen_blkif *blkif = ring->blkif;
> > +	struct blkif_common_sring *sring_common;
> > +	RING_IDX rsp_prod, req_prod;
> >
> >  	/* Already connected through? */
> >  	if (ring->irq)
> > @@ -191,46 +193,66 @@ static int xen_blkif_map(struct xen_blkif_ring
> *ring, grant_ref_t *gref,
> >  	if (err < 0)
> >  		return err;
> >
> > +	sring_common = (struct blkif_common_sring *)ring->blk_ring;
> > +	rsp_prod = READ_ONCE(sring_common->rsp_prod);
> > +	req_prod = READ_ONCE(sring_common->req_prod);
> > +
> >  	switch (blkif->blk_protocol) {
> >  	case BLKIF_PROTOCOL_NATIVE:
> >  	{
> > -		struct blkif_sring *sring;
> > -		sring = (struct blkif_sring *)ring->blk_ring;
> > -		BACK_RING_INIT(&ring->blk_rings.native, sring,
> > -			       XEN_PAGE_SIZE * nr_grefs);
> > +		struct blkif_sring *sring_native =
> > +			(struct blkif_sring *)ring->blk_ring;
> 
> I think you can constify both sring_native and sring_common (and the
> other instances below).

Yes, I can do that. I don't think the macros would mind.

> 
> > +		unsigned int size = __RING_SIZE(sring_native,
> > +						XEN_PAGE_SIZE * nr_grefs);
> > +
> > +		BACK_RING_ATTACH(&ring->blk_rings.native, sring_native,
> > +				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
> > +		err = (req_prod - rsp_prod > size) ? -EIO : 0;
> >  		break;
> >  	}
> >  	case BLKIF_PROTOCOL_X86_32:
> >  	{
> > -		struct blkif_x86_32_sring *sring_x86_32;
> > -		sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
> > -		BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
> > -			       XEN_PAGE_SIZE * nr_grefs);
> > +		struct blkif_x86_32_sring *sring_x86_32 =
> > +			(struct blkif_x86_32_sring *)ring->blk_ring;
> > +		unsigned int size = __RING_SIZE(sring_x86_32,
> > +						XEN_PAGE_SIZE * nr_grefs);
> > +
> > +		BACK_RING_ATTACH(&ring->blk_rings.x86_32, sring_x86_32,
> > +				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
> > +		err = (req_prod - rsp_prod > size) ? -EIO : 0;
> >  		break;
> >  	}
> >  	case BLKIF_PROTOCOL_X86_64:
> >  	{
> > -		struct blkif_x86_64_sring *sring_x86_64;
> > -		sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
> > -		BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
> > -			       XEN_PAGE_SIZE * nr_grefs);
> > +		struct blkif_x86_64_sring *sring_x86_64 =
> > +			(struct blkif_x86_64_sring *)ring->blk_ring;
> > +		unsigned int size = __RING_SIZE(sring_x86_64,
> > +						XEN_PAGE_SIZE * nr_grefs);
> > +
> > +		BACK_RING_ATTACH(&ring->blk_rings.x86_64, sring_x86_64,
> > +				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
> > +		err = (req_prod - rsp_prod > size) ? -EIO : 0;
> 
> This is repeated for all ring types, might be worth to pull it out of
> the switch...
> 

I did wonder about that... I'll do in v3.

> >  		break;
> >  	}
> >  	default:
> >  		BUG();
> >  	}
> > +	if (err < 0)
> > +		goto fail;
> 
> ...and placed here instead?

Indeed.

  Cheers,
    Paul

> 
> Thanks, Roger.
Paul Durrant Dec. 11, 2019, 2:46 p.m. UTC | #3
> -----Original Message-----
> > > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-
> > blkback/xenbus.c
> > > index e8c5c54e1d26..13d09630b237 100644
> > > --- a/drivers/block/xen-blkback/xenbus.c
> > > +++ b/drivers/block/xen-blkback/xenbus.c
> > > @@ -181,6 +181,8 @@ static int xen_blkif_map(struct xen_blkif_ring
> > *ring, grant_ref_t *gref,
> > >  {
> > >  	int err;
> > >  	struct xen_blkif *blkif = ring->blkif;
> > > +	struct blkif_common_sring *sring_common;
> > > +	RING_IDX rsp_prod, req_prod;
> > >
> > >  	/* Already connected through? */
> > >  	if (ring->irq)
> > > @@ -191,46 +193,66 @@ static int xen_blkif_map(struct xen_blkif_ring
> > *ring, grant_ref_t *gref,
> > >  	if (err < 0)
> > >  		return err;
> > >
> > > +	sring_common = (struct blkif_common_sring *)ring->blk_ring;
> > > +	rsp_prod = READ_ONCE(sring_common->rsp_prod);
> > > +	req_prod = READ_ONCE(sring_common->req_prod);
> > > +
> > >  	switch (blkif->blk_protocol) {
> > >  	case BLKIF_PROTOCOL_NATIVE:
> > >  	{
> > > -		struct blkif_sring *sring;
> > > -		sring = (struct blkif_sring *)ring->blk_ring;
> > > -		BACK_RING_INIT(&ring->blk_rings.native, sring,
> > > -			       XEN_PAGE_SIZE * nr_grefs);
> > > +		struct blkif_sring *sring_native =
> > > +			(struct blkif_sring *)ring->blk_ring;
> >
> > I think you can constify both sring_native and sring_common (and the
> > other instances below).
> 
> Yes, I can do that. I don't think the macros would mind.
> 

Spoke to soon. They do mind, of course, because the sring pointer in the front/back ring is not (and should not) be const. I can const sring_common but no others.

  Paul
diff mbox series

Patch

diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index e8c5c54e1d26..13d09630b237 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -181,6 +181,8 @@  static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
 {
 	int err;
 	struct xen_blkif *blkif = ring->blkif;
+	struct blkif_common_sring *sring_common;
+	RING_IDX rsp_prod, req_prod;
 
 	/* Already connected through? */
 	if (ring->irq)
@@ -191,46 +193,66 @@  static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
 	if (err < 0)
 		return err;
 
+	sring_common = (struct blkif_common_sring *)ring->blk_ring;
+	rsp_prod = READ_ONCE(sring_common->rsp_prod);
+	req_prod = READ_ONCE(sring_common->req_prod);
+
 	switch (blkif->blk_protocol) {
 	case BLKIF_PROTOCOL_NATIVE:
 	{
-		struct blkif_sring *sring;
-		sring = (struct blkif_sring *)ring->blk_ring;
-		BACK_RING_INIT(&ring->blk_rings.native, sring,
-			       XEN_PAGE_SIZE * nr_grefs);
+		struct blkif_sring *sring_native =
+			(struct blkif_sring *)ring->blk_ring;
+		unsigned int size = __RING_SIZE(sring_native,
+						XEN_PAGE_SIZE * nr_grefs);
+
+		BACK_RING_ATTACH(&ring->blk_rings.native, sring_native,
+				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+		err = (req_prod - rsp_prod > size) ? -EIO : 0;
 		break;
 	}
 	case BLKIF_PROTOCOL_X86_32:
 	{
-		struct blkif_x86_32_sring *sring_x86_32;
-		sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
-		BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
-			       XEN_PAGE_SIZE * nr_grefs);
+		struct blkif_x86_32_sring *sring_x86_32 =
+			(struct blkif_x86_32_sring *)ring->blk_ring;
+		unsigned int size = __RING_SIZE(sring_x86_32,
+						XEN_PAGE_SIZE * nr_grefs);
+
+		BACK_RING_ATTACH(&ring->blk_rings.x86_32, sring_x86_32,
+				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+		err = (req_prod - rsp_prod > size) ? -EIO : 0;
 		break;
 	}
 	case BLKIF_PROTOCOL_X86_64:
 	{
-		struct blkif_x86_64_sring *sring_x86_64;
-		sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
-		BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
-			       XEN_PAGE_SIZE * nr_grefs);
+		struct blkif_x86_64_sring *sring_x86_64 =
+			(struct blkif_x86_64_sring *)ring->blk_ring;
+		unsigned int size = __RING_SIZE(sring_x86_64,
+						XEN_PAGE_SIZE * nr_grefs);
+
+		BACK_RING_ATTACH(&ring->blk_rings.x86_64, sring_x86_64,
+				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+		err = (req_prod - rsp_prod > size) ? -EIO : 0;
 		break;
 	}
 	default:
 		BUG();
 	}
+	if (err < 0)
+		goto fail;
 
 	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
 						    xen_blkif_be_int, 0,
 						    "blkif-backend", ring);
-	if (err < 0) {
-		xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
-		ring->blk_rings.common.sring = NULL;
-		return err;
-	}
+	if (err < 0)
+		goto fail;
 	ring->irq = err;
 
 	return 0;
+
+fail:
+	xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+	ring->blk_rings.common.sring = NULL;
+	return err;
 }
 
 static int xen_blkif_disconnect(struct xen_blkif *blkif)
@@ -1121,7 +1143,8 @@  static struct xenbus_driver xen_blkbk_driver = {
 	.ids  = xen_blkbk_ids,
 	.probe = xen_blkbk_probe,
 	.remove = xen_blkbk_remove,
-	.otherend_changed = frontend_changed
+	.otherend_changed = frontend_changed,
+	.allow_rebind = true,
 };
 
 int xen_blkif_xenbus_init(void)