diff mbox series

[v2,3/3] xen/blkfront: don't trust the backend response data blindly

Message ID 20210708124345.10173-4-jgross@suse.com (mailing list archive)
State New, archived
Headers show
Series xen: harden blkfront against malicious backends | expand

Commit Message

Juergen Gross July 8, 2021, 12:43 p.m. UTC
Today blkfront will trust the backend to send only sane response data.
In order to avoid privilege escalations or crashes in case of malicious
backends verify the data to be within expected limits. Especially make
sure that the response always references an outstanding request.

Introduce a new state of the ring BLKIF_STATE_ERROR which will be
switched to in case an inconsistency is being detected. Recovering from
this state is possible only via removing and adding the virtual device
again (e.g. via a suspend/resume cycle).

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- use READ_ONCE() for reading the producer index
- check validity of producer index only after memory barrier (Jan Beulich)
- use virt_rmb() as barrier (Jan Beulich)
---
 drivers/block/xen-blkfront.c | 66 ++++++++++++++++++++++++++----------
 1 file changed, 49 insertions(+), 17 deletions(-)

Comments

Jan Beulich July 8, 2021, 1:11 p.m. UTC | #1
On 08.07.2021 14:43, Juergen Gross wrote:
> Today blkfront will trust the backend to send only sane response data.
> In order to avoid privilege escalations or crashes in case of malicious
> backends verify the data to be within expected limits. Especially make
> sure that the response always references an outstanding request.
> 
> Introduce a new state of the ring BLKIF_STATE_ERROR which will be
> switched to in case an inconsistency is being detected. Recovering from
> this state is possible only via removing and adding the virtual device
> again (e.g. via a suspend/resume cycle).
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>
albeit ...

> @@ -1602,7 +1628,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  		case BLKIF_OP_DISCARD:
>  			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
>  				struct request_queue *rq = info->rq;
> -				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> +
> +				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
>  					   info->gd->disk_name, op_name(bret.operation));
>  				blkif_req(req)->error = BLK_STS_NOTSUPP;
>  				info->feature_discard = 0;
> @@ -1614,13 +1641,13 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  		case BLKIF_OP_FLUSH_DISKCACHE:
>  		case BLKIF_OP_WRITE_BARRIER:
>  			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
> -				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> +				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
>  				       info->gd->disk_name, op_name(bret.operation));
>  				blkif_req(req)->error = BLK_STS_NOTSUPP;
>  			}
>  			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
>  				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
> -				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
> +				pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
>  				       info->gd->disk_name, op_name(bret.operation));
>  				blkif_req(req)->error = BLK_STS_NOTSUPP;
>  			}
> @@ -1635,8 +1662,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  		case BLKIF_OP_READ:
>  		case BLKIF_OP_WRITE:
>  			if (unlikely(bret.status != BLKIF_RSP_OKAY))
> -				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
> -					"request: %x\n", bret.status);
> +				dev_dbg_ratelimited(&info->xbdev->dev,
> +					"Bad return from blkdev data request: %x\n", bret.status);
>  
>  			break;
>  		default:

... all of these look kind of unrelated to the topic of the patch,
and the conversion also isn't mentioned as on-purpose in the
description.

Jan
Juergen Gross July 8, 2021, 1:14 p.m. UTC | #2
On 08.07.21 15:11, Jan Beulich wrote:
> On 08.07.2021 14:43, Juergen Gross wrote:
>> Today blkfront will trust the backend to send only sane response data.
>> In order to avoid privilege escalations or crashes in case of malicious
>> backends verify the data to be within expected limits. Especially make
>> sure that the response always references an outstanding request.
>>
>> Introduce a new state of the ring BLKIF_STATE_ERROR which will be
>> switched to in case an inconsistency is being detected. Recovering from
>> this state is possible only via removing and adding the virtual device
>> again (e.g. via a suspend/resume cycle).
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
> 
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> albeit ...
> 
>> @@ -1602,7 +1628,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>   		case BLKIF_OP_DISCARD:
>>   			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
>>   				struct request_queue *rq = info->rq;
>> -				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
>> +
>> +				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
>>   					   info->gd->disk_name, op_name(bret.operation));
>>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>>   				info->feature_discard = 0;
>> @@ -1614,13 +1641,13 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>   		case BLKIF_OP_FLUSH_DISKCACHE:
>>   		case BLKIF_OP_WRITE_BARRIER:
>>   			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
>> -				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
>> +				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
>>   				       info->gd->disk_name, op_name(bret.operation));
>>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>>   			}
>>   			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
>>   				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
>> -				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
>> +				pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
>>   				       info->gd->disk_name, op_name(bret.operation));
>>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>>   			}
>> @@ -1635,8 +1662,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>   		case BLKIF_OP_READ:
>>   		case BLKIF_OP_WRITE:
>>   			if (unlikely(bret.status != BLKIF_RSP_OKAY))
>> -				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
>> -					"request: %x\n", bret.status);
>> +				dev_dbg_ratelimited(&info->xbdev->dev,
>> +					"Bad return from blkdev data request: %x\n", bret.status);
>>   
>>   			break;
>>   		default:
> 
> ... all of these look kind of unrelated to the topic of the patch,
> and the conversion also isn't mentioned as on-purpose in the
> description.

Hmm, yes, I'll add a sentence to the commit message.


Juergen
Roger Pau Monné July 9, 2021, 9:42 a.m. UTC | #3
On Thu, Jul 08, 2021 at 02:43:45PM +0200, Juergen Gross wrote:
> Today blkfront will trust the backend to send only sane response data.
> In order to avoid privilege escalations or crashes in case of malicious
> backends verify the data to be within expected limits. Especially make
> sure that the response always references an outstanding request.
> 
> Introduce a new state of the ring BLKIF_STATE_ERROR which will be
> switched to in case an inconsistency is being detected. Recovering from
> this state is possible only via removing and adding the virtual device
> again (e.g. via a suspend/resume cycle).
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Acked-by: Roger Pau Monné <roger.pau@citrix.com>

> ---
> V2:
> - use READ_ONCE() for reading the producer index
> - check validity of producer index only after memory barrier (Jan Beulich)
> - use virt_rmb() as barrier (Jan Beulich)
> ---
>  drivers/block/xen-blkfront.c | 66 ++++++++++++++++++++++++++----------
>  1 file changed, 49 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 80701860870a..ecdbb0381b4c 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -80,6 +80,7 @@ enum blkif_state {
>  	BLKIF_STATE_DISCONNECTED,
>  	BLKIF_STATE_CONNECTED,
>  	BLKIF_STATE_SUSPENDED,
> +	BLKIF_STATE_ERROR,
>  };
>  
>  struct grant {
> @@ -89,6 +90,7 @@ struct grant {
>  };
>  
>  enum blk_req_status {
> +	REQ_PROCESSING,
>  	REQ_WAITING,
>  	REQ_DONE,
>  	REQ_ERROR,
> @@ -543,7 +545,7 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
>  
>  	id = get_id_from_freelist(rinfo);
>  	rinfo->shadow[id].request = req;
> -	rinfo->shadow[id].status = REQ_WAITING;
> +	rinfo->shadow[id].status = REQ_PROCESSING;
>  	rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
>  
>  	rinfo->shadow[id].req.u.rw.id = id;
> @@ -572,6 +574,7 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf
>  
>  	/* Copy the request to the ring page. */
>  	*final_ring_req = *ring_req;
> +	rinfo->shadow[id].status = REQ_WAITING;
>  
>  	return 0;
>  }
> @@ -847,8 +850,11 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
>  
>  	/* Copy request(s) to the ring page. */
>  	*final_ring_req = *ring_req;
> -	if (unlikely(require_extra_req))
> +	rinfo->shadow[id].status = REQ_WAITING;
> +	if (unlikely(require_extra_req)) {
>  		*final_extra_ring_req = *extra_ring_req;
> +		rinfo->shadow[extra_id].status = REQ_WAITING;
> +	}
>  
>  	if (new_persistent_gnts)
>  		gnttab_free_grant_references(setup.gref_head);
> @@ -1402,8 +1408,8 @@ static enum blk_req_status blkif_rsp_to_req_status(int rsp)
>  static int blkif_get_final_status(enum blk_req_status s1,
>  				  enum blk_req_status s2)
>  {
> -	BUG_ON(s1 == REQ_WAITING);
> -	BUG_ON(s2 == REQ_WAITING);
> +	BUG_ON(s1 < REQ_DONE);
> +	BUG_ON(s2 < REQ_DONE);
>  
>  	if (s1 == REQ_ERROR || s2 == REQ_ERROR)
>  		return BLKIF_RSP_ERROR;
> @@ -1436,7 +1442,7 @@ static bool blkif_completion(unsigned long *id,
>  		s->status = blkif_rsp_to_req_status(bret->status);
>  
>  		/* Wait the second response if not yet here. */
> -		if (s2->status == REQ_WAITING)
> +		if (s2->status < REQ_DONE)
>  			return false;
>  
>  		bret->status = blkif_get_final_status(s->status,
> @@ -1555,11 +1561,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  
>  	spin_lock_irqsave(&rinfo->ring_lock, flags);
>   again:
> -	rp = rinfo->ring.sring->rsp_prod;
> -	rmb(); /* Ensure we see queued responses up to 'rp'. */
> +	rp = READ_ONCE(rinfo->ring.sring->rsp_prod);
> +	virt_rmb(); /* Ensure we see queued responses up to 'rp'. */

Is the READ_ONCE strictly needed? Doesn't the barrier prevent rp from
not being loaded at this point?

> +	if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) {
> +		pr_alert("%s: illegal number of responses %u\n",
> +			 info->gd->disk_name, rp - rinfo->ring.rsp_cons);
> +		goto err;
> +	}
>  
>  	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
>  		unsigned long id;
> +		unsigned int op;
>  
>  		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
>  		id = bret.id;
> @@ -1570,14 +1582,28 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  		 * look in get_id_from_freelist.
>  		 */
>  		if (id >= BLK_RING_SIZE(info)) {
> -			WARN(1, "%s: response to %s has incorrect id (%ld)\n",
> -			     info->gd->disk_name, op_name(bret.operation), id);
> -			/* We can't safely get the 'struct request' as
> -			 * the id is busted. */
> -			continue;
> +			pr_alert("%s: response has incorrect id (%ld)\n",
> +				 info->gd->disk_name, id);
> +			goto err;
>  		}
> +		if (rinfo->shadow[id].status != REQ_WAITING) {
> +			pr_alert("%s: response references no pending request\n",
> +				 info->gd->disk_name);
> +			goto err;
> +		}
> +
> +		rinfo->shadow[id].status = REQ_PROCESSING;
>  		req  = rinfo->shadow[id].request;
>  
> +		op = rinfo->shadow[id].req.operation;
> +		if (op == BLKIF_OP_INDIRECT)
> +			op = rinfo->shadow[id].req.u.indirect.indirect_op;
> +		if (bret.operation != op) {
> +			pr_alert("%s: response has wrong operation (%u instead of %u)\n",
> +				 info->gd->disk_name, bret.operation, op);

You could also use op_name here, but I guess this could mask the
operation as 'unknown' for any number out of the defined ones.

> +			goto err;
> +		}
> +
>  		if (bret.operation != BLKIF_OP_DISCARD) {
>  			/*
>  			 * We may need to wait for an extra response if the
> @@ -1602,7 +1628,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  		case BLKIF_OP_DISCARD:
>  			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
>  				struct request_queue *rq = info->rq;
> -				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> +
> +				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
>  					   info->gd->disk_name, op_name(bret.operation));
>  				blkif_req(req)->error = BLK_STS_NOTSUPP;
>  				info->feature_discard = 0;
> @@ -1614,13 +1641,13 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  		case BLKIF_OP_FLUSH_DISKCACHE:
>  		case BLKIF_OP_WRITE_BARRIER:
>  			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
> -				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> +				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
>  				       info->gd->disk_name, op_name(bret.operation));
>  				blkif_req(req)->error = BLK_STS_NOTSUPP;
>  			}
>  			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
>  				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
> -				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
> +				pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
>  				       info->gd->disk_name, op_name(bret.operation));
>  				blkif_req(req)->error = BLK_STS_NOTSUPP;
>  			}
> @@ -1635,8 +1662,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>  		case BLKIF_OP_READ:
>  		case BLKIF_OP_WRITE:
>  			if (unlikely(bret.status != BLKIF_RSP_OKAY))
> -				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
> -					"request: %x\n", bret.status);
> +				dev_dbg_ratelimited(&info->xbdev->dev,
> +					"Bad return from blkdev data request: %x\n", bret.status);

Since you are touching the line, could you use %#x here? It's IMO not
obvious from the context this status will be printed in hex base. Also
bret.status parameter could be split into a newline.

Thanks, Roger.
Chen, Rong A July 9, 2021, 11:09 a.m. UTC | #4
Hi Juergen,

I love your patch! Perhaps something to improve:

[auto build test WARNING on xen-tip/linux-next]
[also build test WARNING on next-20210708]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Juergen-Gross/xen-harden-blkfront-against-malicious-backends/20210708-204423
base:   https://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git linux-next
config: x86_64-rhel-8.3-kselftests (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-341-g8af24329-dirty
        # https://github.com/0day-ci/linux/commit/26379fb9eaab91fc62eefa414619d27072941f59
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Juergen-Gross/xen-harden-blkfront-against-malicious-backends/20210708-204423
        git checkout 26379fb9eaab91fc62eefa414619d27072941f59
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)
>> drivers/block/xen-blkfront.c:1568:20: sparse: sparse: context imbalance in 'blkif_interrupt' - different lock contexts for basic block

vim +/blkif_interrupt +1568 drivers/block/xen-blkfront.c

9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1567  
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17 @1568  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1569  {
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1570  	struct request *req;
4c0a9a02397621 Juergen Gross         2021-07-08  1571  	struct blkif_response bret;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1572  	RING_IDX i, rp;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1573  	unsigned long flags;
81f35161577236 Bob Liu               2015-11-14  1574  	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
81f35161577236 Bob Liu               2015-11-14  1575  	struct blkfront_info *info = rinfo->dev_info;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1576  
11659569f7202d Bob Liu               2015-11-14  1577  	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1578  		return IRQ_HANDLED;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1579  
11659569f7202d Bob Liu               2015-11-14  1580  	spin_lock_irqsave(&rinfo->ring_lock, flags);
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1581   again:
26379fb9eaab91 Juergen Gross         2021-07-08  1582  	rp = READ_ONCE(rinfo->ring.sring->rsp_prod);
26379fb9eaab91 Juergen Gross         2021-07-08  1583  	virt_rmb(); /* Ensure we see queued responses up to 'rp'. */
26379fb9eaab91 Juergen Gross         2021-07-08  1584  	if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) {
26379fb9eaab91 Juergen Gross         2021-07-08  1585  		pr_alert("%s: illegal number of responses %u\n",
26379fb9eaab91 Juergen Gross         2021-07-08  1586  			 info->gd->disk_name, rp - rinfo->ring.rsp_cons);
26379fb9eaab91 Juergen Gross         2021-07-08  1587  		goto err;
26379fb9eaab91 Juergen Gross         2021-07-08  1588  	}
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1589  
81f35161577236 Bob Liu               2015-11-14  1590  	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1591  		unsigned long id;
26379fb9eaab91 Juergen Gross         2021-07-08  1592  		unsigned int op;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1593  
4c0a9a02397621 Juergen Gross         2021-07-08  1594  		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
4c0a9a02397621 Juergen Gross         2021-07-08  1595  		id = bret.id;
4c0a9a02397621 Juergen Gross         2021-07-08  1596  
6878c32e5cc0e4 Konrad Rzeszutek Wilk 2012-05-25  1597  		/*
6878c32e5cc0e4 Konrad Rzeszutek Wilk 2012-05-25  1598  		 * The backend has messed up and given us an id that we would
6878c32e5cc0e4 Konrad Rzeszutek Wilk 2012-05-25  1599  		 * never have given to it (we stamp it up to BLK_RING_SIZE -
6878c32e5cc0e4 Konrad Rzeszutek Wilk 2012-05-25  1600  		 * look in get_id_from_freelist.
6878c32e5cc0e4 Konrad Rzeszutek Wilk 2012-05-25  1601  		 */
86839c56dee28c Bob Liu               2015-06-03  1602  		if (id >= BLK_RING_SIZE(info)) {
26379fb9eaab91 Juergen Gross         2021-07-08  1603  			pr_alert("%s: response has incorrect id (%ld)\n",
26379fb9eaab91 Juergen Gross         2021-07-08  1604  				 info->gd->disk_name, id);
26379fb9eaab91 Juergen Gross         2021-07-08  1605  			goto err;
26379fb9eaab91 Juergen Gross         2021-07-08  1606  		}
26379fb9eaab91 Juergen Gross         2021-07-08  1607  		if (rinfo->shadow[id].status != REQ_WAITING) {
26379fb9eaab91 Juergen Gross         2021-07-08  1608  			pr_alert("%s: response references no pending request\n",
26379fb9eaab91 Juergen Gross         2021-07-08  1609  				 info->gd->disk_name);
26379fb9eaab91 Juergen Gross         2021-07-08  1610  			goto err;
6878c32e5cc0e4 Konrad Rzeszutek Wilk 2012-05-25  1611  		}
26379fb9eaab91 Juergen Gross         2021-07-08  1612  
26379fb9eaab91 Juergen Gross         2021-07-08  1613  		rinfo->shadow[id].status = REQ_PROCESSING;
81f35161577236 Bob Liu               2015-11-14  1614  		req  = rinfo->shadow[id].request;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1615  
26379fb9eaab91 Juergen Gross         2021-07-08  1616  		op = rinfo->shadow[id].req.operation;
26379fb9eaab91 Juergen Gross         2021-07-08  1617  		if (op == BLKIF_OP_INDIRECT)
26379fb9eaab91 Juergen Gross         2021-07-08  1618  			op = rinfo->shadow[id].req.u.indirect.indirect_op;
26379fb9eaab91 Juergen Gross         2021-07-08  1619  		if (bret.operation != op) {
26379fb9eaab91 Juergen Gross         2021-07-08  1620  			pr_alert("%s: response has wrong operation (%u instead of %u)\n",
26379fb9eaab91 Juergen Gross         2021-07-08  1621  				 info->gd->disk_name, bret.operation, op);
26379fb9eaab91 Juergen Gross         2021-07-08  1622  			goto err;
26379fb9eaab91 Juergen Gross         2021-07-08  1623  		}
26379fb9eaab91 Juergen Gross         2021-07-08  1624  
4c0a9a02397621 Juergen Gross         2021-07-08  1625  		if (bret.operation != BLKIF_OP_DISCARD) {
6cc5683390472c Julien Grall          2015-08-13  1626  			/*
6cc5683390472c Julien Grall          2015-08-13  1627  			 * We may need to wait for an extra response if the
6cc5683390472c Julien Grall          2015-08-13  1628  			 * I/O request is split in 2
6cc5683390472c Julien Grall          2015-08-13  1629  			 */
4c0a9a02397621 Juergen Gross         2021-07-08  1630  			if (!blkif_completion(&id, rinfo, &bret))
6cc5683390472c Julien Grall          2015-08-13  1631  				continue;
6cc5683390472c Julien Grall          2015-08-13  1632  		}
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1633  
81f35161577236 Bob Liu               2015-11-14  1634  		if (add_id_to_freelist(rinfo, id)) {
6878c32e5cc0e4 Konrad Rzeszutek Wilk 2012-05-25  1635  			WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
4c0a9a02397621 Juergen Gross         2021-07-08  1636  			     info->gd->disk_name, op_name(bret.operation), id);
6878c32e5cc0e4 Konrad Rzeszutek Wilk 2012-05-25  1637  			continue;
6878c32e5cc0e4 Konrad Rzeszutek Wilk 2012-05-25  1638  		}
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1639  
4c0a9a02397621 Juergen Gross         2021-07-08  1640  		if (bret.status == BLKIF_RSP_OKAY)
2a842acab109f4 Christoph Hellwig     2017-06-03  1641  			blkif_req(req)->error = BLK_STS_OK;
2a842acab109f4 Christoph Hellwig     2017-06-03  1642  		else
2a842acab109f4 Christoph Hellwig     2017-06-03  1643  			blkif_req(req)->error = BLK_STS_IOERR;
2a842acab109f4 Christoph Hellwig     2017-06-03  1644  
4c0a9a02397621 Juergen Gross         2021-07-08  1645  		switch (bret.operation) {
ed30bf317c5ceb Li Dongyang           2011-09-01  1646  		case BLKIF_OP_DISCARD:
4c0a9a02397621 Juergen Gross         2021-07-08  1647  			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
ed30bf317c5ceb Li Dongyang           2011-09-01  1648  				struct request_queue *rq = info->rq;
26379fb9eaab91 Juergen Gross         2021-07-08  1649  
26379fb9eaab91 Juergen Gross         2021-07-08  1650  				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
4c0a9a02397621 Juergen Gross         2021-07-08  1651  					   info->gd->disk_name, op_name(bret.operation));
2a842acab109f4 Christoph Hellwig     2017-06-03  1652  				blkif_req(req)->error = BLK_STS_NOTSUPP;
ed30bf317c5ceb Li Dongyang           2011-09-01  1653  				info->feature_discard = 0;
5ea42986694a96 Konrad Rzeszutek Wilk 2011-10-12  1654  				info->feature_secdiscard = 0;
8b904b5b6b58b9 Bart Van Assche       2018-03-07  1655  				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
8b904b5b6b58b9 Bart Van Assche       2018-03-07  1656  				blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
ed30bf317c5ceb Li Dongyang           2011-09-01  1657  			}
ed30bf317c5ceb Li Dongyang           2011-09-01  1658  			break;
edf6ef59ec7ee8 Konrad Rzeszutek Wilk 2011-05-03  1659  		case BLKIF_OP_FLUSH_DISKCACHE:
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1660  		case BLKIF_OP_WRITE_BARRIER:
4c0a9a02397621 Juergen Gross         2021-07-08  1661  			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
26379fb9eaab91 Juergen Gross         2021-07-08  1662  				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
4c0a9a02397621 Juergen Gross         2021-07-08  1663  				       info->gd->disk_name, op_name(bret.operation));
31c4ccc3ecb494 Bart Van Assche       2017-07-21  1664  				blkif_req(req)->error = BLK_STS_NOTSUPP;
dcb8baeceaa1c6 Jeremy Fitzhardinge   2010-11-02  1665  			}
4c0a9a02397621 Juergen Gross         2021-07-08  1666  			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
81f35161577236 Bob Liu               2015-11-14  1667  				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
26379fb9eaab91 Juergen Gross         2021-07-08  1668  				pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
4c0a9a02397621 Juergen Gross         2021-07-08  1669  				       info->gd->disk_name, op_name(bret.operation));
2a842acab109f4 Christoph Hellwig     2017-06-03  1670  				blkif_req(req)->error = BLK_STS_NOTSUPP;
dcb8baeceaa1c6 Jeremy Fitzhardinge   2010-11-02  1671  			}
2609587c1eeb4f Christoph Hellwig     2017-04-20  1672  			if (unlikely(blkif_req(req)->error)) {
2a842acab109f4 Christoph Hellwig     2017-06-03  1673  				if (blkif_req(req)->error == BLK_STS_NOTSUPP)
2a842acab109f4 Christoph Hellwig     2017-06-03  1674  					blkif_req(req)->error = BLK_STS_OK;
a418090aa88b9b Mike Christie         2016-06-05  1675  				info->feature_fua = 0;
4913efe456c987 Tejun Heo             2010-09-03  1676  				info->feature_flush = 0;
4913efe456c987 Tejun Heo             2010-09-03  1677  				xlvbd_flush(info);
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1678  			}
df561f6688fef7 Gustavo A. R. Silva   2020-08-23  1679  			fallthrough;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1680  		case BLKIF_OP_READ:
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1681  		case BLKIF_OP_WRITE:
4c0a9a02397621 Juergen Gross         2021-07-08  1682  			if (unlikely(bret.status != BLKIF_RSP_OKAY))
26379fb9eaab91 Juergen Gross         2021-07-08  1683  				dev_dbg_ratelimited(&info->xbdev->dev,
26379fb9eaab91 Juergen Gross         2021-07-08  1684  					"Bad return from blkdev data request: %x\n", bret.status);
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1685  
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1686  			break;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1687  		default:
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1688  			BUG();
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1689  		}
2609587c1eeb4f Christoph Hellwig     2017-04-20  1690  
15f73f5b3e5958 Christoph Hellwig     2020-06-11  1691  		if (likely(!blk_should_fake_timeout(req->q)))
08e0029aa2a4ac Christoph Hellwig     2017-04-20  1692  			blk_mq_complete_request(req);
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1693  	}
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1694  
81f35161577236 Bob Liu               2015-11-14  1695  	rinfo->ring.rsp_cons = i;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1696  
81f35161577236 Bob Liu               2015-11-14  1697  	if (i != rinfo->ring.req_prod_pvt) {
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1698  		int more_to_do;
81f35161577236 Bob Liu               2015-11-14  1699  		RING_FINAL_CHECK_FOR_RESPONSES(&rinfo->ring, more_to_do);
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1700  		if (more_to_do)
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1701  			goto again;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1702  	} else
81f35161577236 Bob Liu               2015-11-14  1703  		rinfo->ring.sring->rsp_event = i + 1;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1704  
11659569f7202d Bob Liu               2015-11-14  1705  	kick_pending_request_queues_locked(rinfo);
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1706  
11659569f7202d Bob Liu               2015-11-14  1707  	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1708  
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1709  	return IRQ_HANDLED;
26379fb9eaab91 Juergen Gross         2021-07-08  1710  
26379fb9eaab91 Juergen Gross         2021-07-08  1711   err:
26379fb9eaab91 Juergen Gross         2021-07-08  1712  	info->connected = BLKIF_STATE_ERROR;
26379fb9eaab91 Juergen Gross         2021-07-08  1713  	pr_alert("%s disabled for further use\n", info->gd->disk_name);
26379fb9eaab91 Juergen Gross         2021-07-08  1714  	return IRQ_HANDLED;
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1715  }
9f27ee59503865 Jeremy Fitzhardinge   2007-07-17  1716  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Juergen Gross July 9, 2021, 1:58 p.m. UTC | #5
On 09.07.21 11:42, Roger Pau Monné wrote:
> On Thu, Jul 08, 2021 at 02:43:45PM +0200, Juergen Gross wrote:
>> Today blkfront will trust the backend to send only sane response data.
>> In order to avoid privilege escalations or crashes in case of malicious
>> backends verify the data to be within expected limits. Especially make
>> sure that the response always references an outstanding request.
>>
>> Introduce a new state of the ring BLKIF_STATE_ERROR which will be
>> switched to in case an inconsistency is being detected. Recovering from
>> this state is possible only via removing and adding the virtual device
>> again (e.g. via a suspend/resume cycle).
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
> 
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>

>> @@ -1555,11 +1561,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>   
>>   	spin_lock_irqsave(&rinfo->ring_lock, flags);
>>    again:
>> -	rp = rinfo->ring.sring->rsp_prod;
>> -	rmb(); /* Ensure we see queued responses up to 'rp'. */
>> +	rp = READ_ONCE(rinfo->ring.sring->rsp_prod);
>> +	virt_rmb(); /* Ensure we see queued responses up to 'rp'. */
> 
> Is the READ_ONCE strictly needed? Doesn't the barrier prevent rp from
> not being loaded at this point?

I asked Jan the same and he didn't want to rule that out. Additionally
the READ_ONCE() helps against (rather improbable) load tearing of the
compiler.

>> +		op = rinfo->shadow[id].req.operation;
>> +		if (op == BLKIF_OP_INDIRECT)
>> +			op = rinfo->shadow[id].req.u.indirect.indirect_op;
>> +		if (bret.operation != op) {
>> +			pr_alert("%s: response has wrong operation (%u instead of %u)\n",
>> +				 info->gd->disk_name, bret.operation, op);
> 
> You could also use op_name here, but I guess this could mask the
> operation as 'unknown' for any number out of the defined ones.

This case shouldn't happen normally, so having the numerical value is
enough and will help for hiding any undefined op.

>> @@ -1635,8 +1662,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>   		case BLKIF_OP_READ:
>>   		case BLKIF_OP_WRITE:
>>   			if (unlikely(bret.status != BLKIF_RSP_OKAY))
>> -				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
>> -					"request: %x\n", bret.status);
>> +				dev_dbg_ratelimited(&info->xbdev->dev,
>> +					"Bad return from blkdev data request: %x\n", bret.status);
> 
> Since you are touching the line, could you use %#x here? It's IMO not
> obvious from the context this status will be printed in hex base. Also
> bret.status parameter could be split into a newline.

Fine with me.


Juergen
Juergen Gross July 30, 2021, 10:08 a.m. UTC | #6
On 08.07.21 14:43, Juergen Gross wrote:
> Today blkfront will trust the backend to send only sane response data.
> In order to avoid privilege escalations or crashes in case of malicious
> backends verify the data to be within expected limits. Especially make
> sure that the response always references an outstanding request.
> 
> Introduce a new state of the ring BLKIF_STATE_ERROR which will be
> switched to in case an inconsistency is being detected. Recovering from
> this state is possible only via removing and adding the virtual device
> again (e.g. via a suspend/resume cycle).
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Any comments for this patch?


Juergen

> ---
> V2:
> - use READ_ONCE() for reading the producer index
> - check validity of producer index only after memory barrier (Jan Beulich)
> - use virt_rmb() as barrier (Jan Beulich)
> ---
>   drivers/block/xen-blkfront.c | 66 ++++++++++++++++++++++++++----------
>   1 file changed, 49 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 80701860870a..ecdbb0381b4c 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -80,6 +80,7 @@ enum blkif_state {
>   	BLKIF_STATE_DISCONNECTED,
>   	BLKIF_STATE_CONNECTED,
>   	BLKIF_STATE_SUSPENDED,
> +	BLKIF_STATE_ERROR,
>   };
>   
>   struct grant {
> @@ -89,6 +90,7 @@ struct grant {
>   };
>   
>   enum blk_req_status {
> +	REQ_PROCESSING,
>   	REQ_WAITING,
>   	REQ_DONE,
>   	REQ_ERROR,
> @@ -543,7 +545,7 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
>   
>   	id = get_id_from_freelist(rinfo);
>   	rinfo->shadow[id].request = req;
> -	rinfo->shadow[id].status = REQ_WAITING;
> +	rinfo->shadow[id].status = REQ_PROCESSING;
>   	rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
>   
>   	rinfo->shadow[id].req.u.rw.id = id;
> @@ -572,6 +574,7 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf
>   
>   	/* Copy the request to the ring page. */
>   	*final_ring_req = *ring_req;
> +	rinfo->shadow[id].status = REQ_WAITING;
>   
>   	return 0;
>   }
> @@ -847,8 +850,11 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
>   
>   	/* Copy request(s) to the ring page. */
>   	*final_ring_req = *ring_req;
> -	if (unlikely(require_extra_req))
> +	rinfo->shadow[id].status = REQ_WAITING;
> +	if (unlikely(require_extra_req)) {
>   		*final_extra_ring_req = *extra_ring_req;
> +		rinfo->shadow[extra_id].status = REQ_WAITING;
> +	}
>   
>   	if (new_persistent_gnts)
>   		gnttab_free_grant_references(setup.gref_head);
> @@ -1402,8 +1408,8 @@ static enum blk_req_status blkif_rsp_to_req_status(int rsp)
>   static int blkif_get_final_status(enum blk_req_status s1,
>   				  enum blk_req_status s2)
>   {
> -	BUG_ON(s1 == REQ_WAITING);
> -	BUG_ON(s2 == REQ_WAITING);
> +	BUG_ON(s1 < REQ_DONE);
> +	BUG_ON(s2 < REQ_DONE);
>   
>   	if (s1 == REQ_ERROR || s2 == REQ_ERROR)
>   		return BLKIF_RSP_ERROR;
> @@ -1436,7 +1442,7 @@ static bool blkif_completion(unsigned long *id,
>   		s->status = blkif_rsp_to_req_status(bret->status);
>   
>   		/* Wait the second response if not yet here. */
> -		if (s2->status == REQ_WAITING)
> +		if (s2->status < REQ_DONE)
>   			return false;
>   
>   		bret->status = blkif_get_final_status(s->status,
> @@ -1555,11 +1561,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   
>   	spin_lock_irqsave(&rinfo->ring_lock, flags);
>    again:
> -	rp = rinfo->ring.sring->rsp_prod;
> -	rmb(); /* Ensure we see queued responses up to 'rp'. */
> +	rp = READ_ONCE(rinfo->ring.sring->rsp_prod);
> +	virt_rmb(); /* Ensure we see queued responses up to 'rp'. */
> +	if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) {
> +		pr_alert("%s: illegal number of responses %u\n",
> +			 info->gd->disk_name, rp - rinfo->ring.rsp_cons);
> +		goto err;
> +	}
>   
>   	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
>   		unsigned long id;
> +		unsigned int op;
>   
>   		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
>   		id = bret.id;
> @@ -1570,14 +1582,28 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   		 * look in get_id_from_freelist.
>   		 */
>   		if (id >= BLK_RING_SIZE(info)) {
> -			WARN(1, "%s: response to %s has incorrect id (%ld)\n",
> -			     info->gd->disk_name, op_name(bret.operation), id);
> -			/* We can't safely get the 'struct request' as
> -			 * the id is busted. */
> -			continue;
> +			pr_alert("%s: response has incorrect id (%ld)\n",
> +				 info->gd->disk_name, id);
> +			goto err;
>   		}
> +		if (rinfo->shadow[id].status != REQ_WAITING) {
> +			pr_alert("%s: response references no pending request\n",
> +				 info->gd->disk_name);
> +			goto err;
> +		}
> +
> +		rinfo->shadow[id].status = REQ_PROCESSING;
>   		req  = rinfo->shadow[id].request;
>   
> +		op = rinfo->shadow[id].req.operation;
> +		if (op == BLKIF_OP_INDIRECT)
> +			op = rinfo->shadow[id].req.u.indirect.indirect_op;
> +		if (bret.operation != op) {
> +			pr_alert("%s: response has wrong operation (%u instead of %u)\n",
> +				 info->gd->disk_name, bret.operation, op);
> +			goto err;
> +		}
> +
>   		if (bret.operation != BLKIF_OP_DISCARD) {
>   			/*
>   			 * We may need to wait for an extra response if the
> @@ -1602,7 +1628,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   		case BLKIF_OP_DISCARD:
>   			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
>   				struct request_queue *rq = info->rq;
> -				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> +
> +				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
>   					   info->gd->disk_name, op_name(bret.operation));
>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>   				info->feature_discard = 0;
> @@ -1614,13 +1641,13 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   		case BLKIF_OP_FLUSH_DISKCACHE:
>   		case BLKIF_OP_WRITE_BARRIER:
>   			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
> -				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> +				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
>   				       info->gd->disk_name, op_name(bret.operation));
>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>   			}
>   			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
>   				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
> -				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
> +				pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
>   				       info->gd->disk_name, op_name(bret.operation));
>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>   			}
> @@ -1635,8 +1662,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   		case BLKIF_OP_READ:
>   		case BLKIF_OP_WRITE:
>   			if (unlikely(bret.status != BLKIF_RSP_OKAY))
> -				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
> -					"request: %x\n", bret.status);
> +				dev_dbg_ratelimited(&info->xbdev->dev,
> +					"Bad return from blkdev data request: %x\n", bret.status);
>   
>   			break;
>   		default:
> @@ -1662,6 +1689,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
>   
>   	return IRQ_HANDLED;
> +
> + err:
> +	info->connected = BLKIF_STATE_ERROR;
> +	pr_alert("%s disabled for further use\n", info->gd->disk_name);
> +	return IRQ_HANDLED;
>   }
>   
>   
>
Juergen Gross July 30, 2021, 10:31 a.m. UTC | #7
On 30.07.21 12:08, Juergen Gross wrote:
> On 08.07.21 14:43, Juergen Gross wrote:
>> Today blkfront will trust the backend to send only sane response data.
>> In order to avoid privilege escalations or crashes in case of malicious
>> backends verify the data to be within expected limits. Especially make
>> sure that the response always references an outstanding request.
>>
>> Introduce a new state of the ring BLKIF_STATE_ERROR which will be
>> switched to in case an inconsistency is being detected. Recovering from
>> this state is possible only via removing and adding the virtual device
>> again (e.g. via a suspend/resume cycle).
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
> 
> Any comments for this patch?

Please ignore this request for comments, I already got some.


Juergen
diff mbox series

Patch

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 80701860870a..ecdbb0381b4c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -80,6 +80,7 @@  enum blkif_state {
 	BLKIF_STATE_DISCONNECTED,
 	BLKIF_STATE_CONNECTED,
 	BLKIF_STATE_SUSPENDED,
+	BLKIF_STATE_ERROR,
 };
 
 struct grant {
@@ -89,6 +90,7 @@  struct grant {
 };
 
 enum blk_req_status {
+	REQ_PROCESSING,
 	REQ_WAITING,
 	REQ_DONE,
 	REQ_ERROR,
@@ -543,7 +545,7 @@  static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
 
 	id = get_id_from_freelist(rinfo);
 	rinfo->shadow[id].request = req;
-	rinfo->shadow[id].status = REQ_WAITING;
+	rinfo->shadow[id].status = REQ_PROCESSING;
 	rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
 
 	rinfo->shadow[id].req.u.rw.id = id;
@@ -572,6 +574,7 @@  static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf
 
 	/* Copy the request to the ring page. */
 	*final_ring_req = *ring_req;
+	rinfo->shadow[id].status = REQ_WAITING;
 
 	return 0;
 }
@@ -847,8 +850,11 @@  static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
 
 	/* Copy request(s) to the ring page. */
 	*final_ring_req = *ring_req;
-	if (unlikely(require_extra_req))
+	rinfo->shadow[id].status = REQ_WAITING;
+	if (unlikely(require_extra_req)) {
 		*final_extra_ring_req = *extra_ring_req;
+		rinfo->shadow[extra_id].status = REQ_WAITING;
+	}
 
 	if (new_persistent_gnts)
 		gnttab_free_grant_references(setup.gref_head);
@@ -1402,8 +1408,8 @@  static enum blk_req_status blkif_rsp_to_req_status(int rsp)
 static int blkif_get_final_status(enum blk_req_status s1,
 				  enum blk_req_status s2)
 {
-	BUG_ON(s1 == REQ_WAITING);
-	BUG_ON(s2 == REQ_WAITING);
+	BUG_ON(s1 < REQ_DONE);
+	BUG_ON(s2 < REQ_DONE);
 
 	if (s1 == REQ_ERROR || s2 == REQ_ERROR)
 		return BLKIF_RSP_ERROR;
@@ -1436,7 +1442,7 @@  static bool blkif_completion(unsigned long *id,
 		s->status = blkif_rsp_to_req_status(bret->status);
 
 		/* Wait the second response if not yet here. */
-		if (s2->status == REQ_WAITING)
+		if (s2->status < REQ_DONE)
 			return false;
 
 		bret->status = blkif_get_final_status(s->status,
@@ -1555,11 +1561,17 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 
 	spin_lock_irqsave(&rinfo->ring_lock, flags);
  again:
-	rp = rinfo->ring.sring->rsp_prod;
-	rmb(); /* Ensure we see queued responses up to 'rp'. */
+	rp = READ_ONCE(rinfo->ring.sring->rsp_prod);
+	virt_rmb(); /* Ensure we see queued responses up to 'rp'. */
+	if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) {
+		pr_alert("%s: illegal number of responses %u\n",
+			 info->gd->disk_name, rp - rinfo->ring.rsp_cons);
+		goto err;
+	}
 
 	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
 		unsigned long id;
+		unsigned int op;
 
 		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
 		id = bret.id;
@@ -1570,14 +1582,28 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 		 * look in get_id_from_freelist.
 		 */
 		if (id >= BLK_RING_SIZE(info)) {
-			WARN(1, "%s: response to %s has incorrect id (%ld)\n",
-			     info->gd->disk_name, op_name(bret.operation), id);
-			/* We can't safely get the 'struct request' as
-			 * the id is busted. */
-			continue;
+			pr_alert("%s: response has incorrect id (%ld)\n",
+				 info->gd->disk_name, id);
+			goto err;
 		}
+		if (rinfo->shadow[id].status != REQ_WAITING) {
+			pr_alert("%s: response references no pending request\n",
+				 info->gd->disk_name);
+			goto err;
+		}
+
+		rinfo->shadow[id].status = REQ_PROCESSING;
 		req  = rinfo->shadow[id].request;
 
+		op = rinfo->shadow[id].req.operation;
+		if (op == BLKIF_OP_INDIRECT)
+			op = rinfo->shadow[id].req.u.indirect.indirect_op;
+		if (bret.operation != op) {
+			pr_alert("%s: response has wrong operation (%u instead of %u)\n",
+				 info->gd->disk_name, bret.operation, op);
+			goto err;
+		}
+
 		if (bret.operation != BLKIF_OP_DISCARD) {
 			/*
 			 * We may need to wait for an extra response if the
@@ -1602,7 +1628,8 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 		case BLKIF_OP_DISCARD:
 			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
-				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+
+				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
 					   info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 				info->feature_discard = 0;
@@ -1614,13 +1641,13 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 		case BLKIF_OP_FLUSH_DISKCACHE:
 		case BLKIF_OP_WRITE_BARRIER:
 			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
-				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
 				       info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
 			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
 				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
-				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
+				pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
 				       info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
@@ -1635,8 +1662,8 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 		case BLKIF_OP_READ:
 		case BLKIF_OP_WRITE:
 			if (unlikely(bret.status != BLKIF_RSP_OKAY))
-				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
-					"request: %x\n", bret.status);
+				dev_dbg_ratelimited(&info->xbdev->dev,
+					"Bad return from blkdev data request: %x\n", bret.status);
 
 			break;
 		default:
@@ -1662,6 +1689,11 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
 
 	return IRQ_HANDLED;
+
+ err:
+	info->connected = BLKIF_STATE_ERROR;
+	pr_alert("%s disabled for further use\n", info->gd->disk_name);
+	return IRQ_HANDLED;
 }