xen-blkfront: save uncompleted reqs in blkfront_resume()

Message ID	1467016404-7883-1-git-send-email-bob.liu@oracle.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <xen-devel-bounces@lists.xen.org> From: Bob Liu <bob.liu@oracle.com> To: linux-kernel@vger.kernel.org Date: Mon, 27 Jun 2016 16:33:24 +0800 Message-Id: <1467016404-7883-1-git-send-email-bob.liu@oracle.com> Cc: xen-devel@lists.xenproject.org, Bob Liu <bob.liu@oracle.com>, roger.pau@citrix.com Subject: [Xen-devel] [PATCH] xen-blkfront: save uncompleted reqs in blkfront_resume() Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: xen-devel-bounces@lists.xen.org Sender: "Xen-devel" <xen-devel-bounces@lists.xen.org>

Message ID

1467016404-7883-1-git-send-email-bob.liu@oracle.com (mailing list archive)

State

New, archived

Headers

From: Bob Liu <bob.liu@oracle.com>
To: linux-kernel@vger.kernel.org
Date: Mon, 27 Jun 2016 16:33:24 +0800
Message-Id: <1467016404-7883-1-git-send-email-bob.liu@oracle.com>
Cc: xen-devel@lists.xenproject.org, Bob Liu <bob.liu@oracle.com>,
	roger.pau@citrix.com
Subject: [Xen-devel] [PATCH] xen-blkfront: save uncompleted reqs in
	blkfront_resume()
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: xen-devel-bounces@lists.xen.org
Sender: "Xen-devel" <xen-devel-bounces@lists.xen.org>

Commit Message

Bob Liu June 27, 2016, 8:33 a.m. UTC

Uncompleted reqs used to be 'saved and resubmitted' in blkfront_recover() during
migration, but that's too later after multi-queue introduced.

After a migrate to another host (which may not have multiqueue support), the
number of rings (block hardware queues) may be changed and the ring and shadow
structure will also be reallocated.
So that blkfront_recover() can't 'save and resubmit' the real uncompleted reqs
because shadow structure has been reallocated.

This patch fixes this issue by moving the 'save and resubmit' logic out of
blkfront_recover() to earlier place:blkfront_resume().

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 drivers/block/xen-blkfront.c | 91 +++++++++++++++++++-------------------------
 1 file changed, 40 insertions(+), 51 deletions(-)

Comments

Bob Liu June 27, 2016, 1:29 p.m. UTC | #1

On 06/27/2016 04:33 PM, Bob Liu wrote:
> Uncompleted reqs used to be 'saved and resubmitted' in blkfront_recover() during
> migration, but that's too later after multi-queue introduced.
> 
> After a migrate to another host (which may not have multiqueue support), the
> number of rings (block hardware queues) may be changed and the ring and shadow
> structure will also be reallocated.
> So that blkfront_recover() can't 'save and resubmit' the real uncompleted reqs
> because shadow structure has been reallocated.
> 
> This patch fixes this issue by moving the 'save and resubmit' logic out of

Fix: Just moved the 'save' logic to earlier place:blkfront_resume(), the 'resubmit' was no change and still in blkfront_recover().

> blkfront_recover() to earlier place:blkfront_resume().
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> ---
>  drivers/block/xen-blkfront.c | 91 +++++++++++++++++++-------------------------
>  1 file changed, 40 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 2e6d1e9..fcc5b4e 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -207,6 +207,9 @@ struct blkfront_info
>  	struct blk_mq_tag_set tag_set;
>  	struct blkfront_ring_info *rinfo;
>  	unsigned int nr_rings;
> +	/* Save uncomplete reqs and bios for migration. */
> +	struct list_head requests;
> +	struct bio_list bio_list;
>  };
>  
>  static unsigned int nr_minors;
> @@ -2002,69 +2005,22 @@ static int blkif_recover(struct blkfront_info *info)
>  {
>  	unsigned int i, r_index;
>  	struct request *req, *n;
> -	struct blk_shadow *copy;
>  	int rc;
>  	struct bio *bio, *cloned_bio;
> -	struct bio_list bio_list, merge_bio;
>  	unsigned int segs, offset;
>  	int pending, size;
>  	struct split_bio *split_bio;
> -	struct list_head requests;
>  
>  	blkfront_gather_backend_features(info);
>  	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
>  	blk_queue_max_segments(info->rq, segs);
> -	bio_list_init(&bio_list);
> -	INIT_LIST_HEAD(&requests);
>  
>  	for (r_index = 0; r_index < info->nr_rings; r_index++) {
> -		struct blkfront_ring_info *rinfo;
> -
> -		rinfo = &info->rinfo[r_index];
> -		/* Stage 1: Make a safe copy of the shadow state. */
> -		copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
> -			       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
> -		if (!copy)
> -			return -ENOMEM;
> -
> -		/* Stage 2: Set up free list. */
> -		memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
> -		for (i = 0; i < BLK_RING_SIZE(info); i++)
> -			rinfo->shadow[i].req.u.rw.id = i+1;
> -		rinfo->shadow_free = rinfo->ring.req_prod_pvt;
> -		rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
> +		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
>  
>  		rc = blkfront_setup_indirect(rinfo);
> -		if (rc) {
> -			kfree(copy);
> +		if (rc)
>  			return rc;
> -		}
> -
> -		for (i = 0; i < BLK_RING_SIZE(info); i++) {
> -			/* Not in use? */
> -			if (!copy[i].request)
> -				continue;
> -
> -			/*
> -			 * Get the bios in the request so we can re-queue them.
> -			 */
> -			if (copy[i].request->cmd_flags &
> -			    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
> -				/*
> -				 * Flush operations don't contain bios, so
> -				 * we need to requeue the whole request
> -				 */
> -				list_add(&copy[i].request->queuelist, &requests);
> -				continue;
> -			}
> -			merge_bio.head = copy[i].request->bio;
> -			merge_bio.tail = copy[i].request->biotail;
> -			bio_list_merge(&bio_list, &merge_bio);
> -			copy[i].request->bio = NULL;
> -			blk_end_request_all(copy[i].request, 0);
> -		}
> -
> -		kfree(copy);
>  	}
>  	xenbus_switch_state(info->xbdev, XenbusStateConnected);
>  
> @@ -2079,7 +2035,7 @@ static int blkif_recover(struct blkfront_info *info)
>  		kick_pending_request_queues(rinfo);
>  	}
>  
> -	list_for_each_entry_safe(req, n, &requests, queuelist) {
> +	list_for_each_entry_safe(req, n, &info->requests, queuelist) {
>  		/* Requeue pending requests (flush or discard) */
>  		list_del_init(&req->queuelist);
>  		BUG_ON(req->nr_phys_segments > segs);
> @@ -2087,7 +2043,7 @@ static int blkif_recover(struct blkfront_info *info)
>  	}
>  	blk_mq_kick_requeue_list(info->rq);
>  
> -	while ((bio = bio_list_pop(&bio_list)) != NULL) {
> +	while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
>  		/* Traverse the list of pending bios and re-queue them */
>  		if (bio_segments(bio) > segs) {
>  			/*
> @@ -2133,9 +2089,42 @@ static int blkfront_resume(struct xenbus_device *dev)
>  {
>  	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
>  	int err = 0;
> +	unsigned int i, j;
>  
>  	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
>  
> +	bio_list_init(&info->bio_list);
> +	INIT_LIST_HEAD(&info->requests);
> +	for (i = 0; i < info->nr_rings; i++) {
> +		struct blkfront_ring_info *rinfo = &info->rinfo[i];
> +		struct bio_list merge_bio;
> +		struct blk_shadow *shadow = rinfo->shadow;
> +
> +		for (j = 0; j < BLK_RING_SIZE(info); j++) {
> +			/* Not in use? */
> +			if (!shadow[j].request)
> +				continue;
> +
> +			/*
> +			 * Get the bios in the request so we can re-queue them.
> +			 */
> +			if (shadow[j].request->cmd_flags &
> +					(REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
> +				/*
> +				 * Flush operations don't contain bios, so
> +				 * we need to requeue the whole request
> +				 */
> +				list_add(&shadow[j].request->queuelist, &info->requests);
> +				continue;
> +			}
> +			merge_bio.head = shadow[j].request->bio;
> +			merge_bio.tail = shadow[j].request->biotail;
> +			bio_list_merge(&info->bio_list, &merge_bio);
> +			shadow[j].request->bio = NULL;
> +			blk_mq_end_request(shadow[j].request, 0);
> +		}
> +	}
> +
>  	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
>  
>  	err = negotiate_mq(info);
>

Konrad Rzeszutek Wilk June 28, 2016, 7:53 p.m. UTC | #2

On Mon, Jun 27, 2016 at 04:33:24PM +0800, Bob Liu wrote:
> Uncompleted reqs used to be 'saved and resubmitted' in blkfront_recover() during
> migration, but that's too later after multi-queue introduced.
> 
> After a migrate to another host (which may not have multiqueue support), the
> number of rings (block hardware queues) may be changed and the ring and shadow
> structure will also be reallocated.
> So that blkfront_recover() can't 'save and resubmit' the real uncompleted reqs
> because shadow structure has been reallocated.
> 
> This patch fixes this issue by moving the 'save and resubmit' logic out of
> blkfront_recover() to earlier place:blkfront_resume().
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>

Ack-ed and Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

Let me run it through some tests and if no regressions found I will 
ask Jens to pull it.

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2e6d1e9..fcc5b4e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -207,6 +207,9 @@  struct blkfront_info
 	struct blk_mq_tag_set tag_set;
 	struct blkfront_ring_info *rinfo;
 	unsigned int nr_rings;
+	/* Save uncomplete reqs and bios for migration. */
+	struct list_head requests;
+	struct bio_list bio_list;
 };
 
 static unsigned int nr_minors;
@@ -2002,69 +2005,22 @@  static int blkif_recover(struct blkfront_info *info)
 {
 	unsigned int i, r_index;
 	struct request *req, *n;
-	struct blk_shadow *copy;
 	int rc;
 	struct bio *bio, *cloned_bio;
-	struct bio_list bio_list, merge_bio;
 	unsigned int segs, offset;
 	int pending, size;
 	struct split_bio *split_bio;
-	struct list_head requests;
 
 	blkfront_gather_backend_features(info);
 	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
 	blk_queue_max_segments(info->rq, segs);
-	bio_list_init(&bio_list);
-	INIT_LIST_HEAD(&requests);
 
 	for (r_index = 0; r_index < info->nr_rings; r_index++) {
-		struct blkfront_ring_info *rinfo;
-
-		rinfo = &info->rinfo[r_index];
-		/* Stage 1: Make a safe copy of the shadow state. */
-		copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
-			       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
-		if (!copy)
-			return -ENOMEM;
-
-		/* Stage 2: Set up free list. */
-		memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
-		for (i = 0; i < BLK_RING_SIZE(info); i++)
-			rinfo->shadow[i].req.u.rw.id = i+1;
-		rinfo->shadow_free = rinfo->ring.req_prod_pvt;
-		rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
 
 		rc = blkfront_setup_indirect(rinfo);
-		if (rc) {
-			kfree(copy);
+		if (rc)
 			return rc;
-		}
-
-		for (i = 0; i < BLK_RING_SIZE(info); i++) {
-			/* Not in use? */
-			if (!copy[i].request)
-				continue;
-
-			/*
-			 * Get the bios in the request so we can re-queue them.
-			 */
-			if (copy[i].request->cmd_flags &
-			    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
-				/*
-				 * Flush operations don't contain bios, so
-				 * we need to requeue the whole request
-				 */
-				list_add(&copy[i].request->queuelist, &requests);
-				continue;
-			}
-			merge_bio.head = copy[i].request->bio;
-			merge_bio.tail = copy[i].request->biotail;
-			bio_list_merge(&bio_list, &merge_bio);
-			copy[i].request->bio = NULL;
-			blk_end_request_all(copy[i].request, 0);
-		}
-
-		kfree(copy);
 	}
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
@@ -2079,7 +2035,7 @@  static int blkif_recover(struct blkfront_info *info)
 		kick_pending_request_queues(rinfo);
 	}
 
-	list_for_each_entry_safe(req, n, &requests, queuelist) {
+	list_for_each_entry_safe(req, n, &info->requests, queuelist) {
 		/* Requeue pending requests (flush or discard) */
 		list_del_init(&req->queuelist);
 		BUG_ON(req->nr_phys_segments > segs);
@@ -2087,7 +2043,7 @@  static int blkif_recover(struct blkfront_info *info)
 	}
 	blk_mq_kick_requeue_list(info->rq);
 
-	while ((bio = bio_list_pop(&bio_list)) != NULL) {
+	while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
 		/* Traverse the list of pending bios and re-queue them */
 		if (bio_segments(bio) > segs) {
 			/*
@@ -2133,9 +2089,42 @@  static int blkfront_resume(struct xenbus_device *dev)
 {
 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 	int err = 0;
+	unsigned int i, j;
 
 	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
 
+	bio_list_init(&info->bio_list);
+	INIT_LIST_HEAD(&info->requests);
+	for (i = 0; i < info->nr_rings; i++) {
+		struct blkfront_ring_info *rinfo = &info->rinfo[i];
+		struct bio_list merge_bio;
+		struct blk_shadow *shadow = rinfo->shadow;
+
+		for (j = 0; j < BLK_RING_SIZE(info); j++) {
+			/* Not in use? */
+			if (!shadow[j].request)
+				continue;
+
+			/*
+			 * Get the bios in the request so we can re-queue them.
+			 */
+			if (shadow[j].request->cmd_flags &
+					(REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+				/*
+				 * Flush operations don't contain bios, so
+				 * we need to requeue the whole request
+				 */
+				list_add(&shadow[j].request->queuelist, &info->requests);
+				continue;
+			}
+			merge_bio.head = shadow[j].request->bio;
+			merge_bio.tail = shadow[j].request->biotail;
+			bio_list_merge(&info->bio_list, &merge_bio);
+			shadow[j].request->bio = NULL;
+			blk_mq_end_request(shadow[j].request, 0);
+		}
+	}
+
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
 	err = negotiate_mq(info);

xen-blkfront: save uncompleted reqs in blkfront_resume()

Commit Message

Comments

Patch