diff mbox series

[v3,1/3] xen/blkfront: read response from backend only once

Message ID 20210730103854.12681-2-jgross@suse.com (mailing list archive)
State New, archived
Headers show
Series xen: harden blkfront against malicious backends | expand

Commit Message

Jürgen Groß July 30, 2021, 10:38 a.m. UTC
In order to avoid problems in case the backend is modifying a response
on the ring page while the frontend has already seen it, just read the
response into a local buffer in one go and then operate on that buffer
only.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
---
 drivers/block/xen-blkfront.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

Comments

Oleksandr Andrushchenko Aug. 2, 2021, 2:06 p.m. UTC | #1
Hi, Juergen!

On 30.07.21 13:38, Juergen Gross wrote:
> In order to avoid problems in case the backend is modifying a response
> on the ring page while the frontend has already seen it, just read the
> response into a local buffer in one go and then operate on that buffer
> only.
>
> Signed-off-by: Juergen Gross <jgross@suse.com>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
>   drivers/block/xen-blkfront.c | 35 ++++++++++++++++++-----------------
>   1 file changed, 18 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index d83fee21f6c5..15e840287734 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -1496,7 +1496,7 @@ static bool blkif_completion(unsigned long *id,
>   static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   {
>   	struct request *req;
> -	struct blkif_response *bret;
> +	struct blkif_response bret;
>   	RING_IDX i, rp;
>   	unsigned long flags;
>   	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
> @@ -1513,8 +1513,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
>   		unsigned long id;
>   
> -		bret = RING_GET_RESPONSE(&rinfo->ring, i);
> -		id   = bret->id;
> +		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);

As per my understanding copying is still not an atomic operation as the request/response

are multi-byte structures in general. IOW, what prevents the backend from modifying the ring while

we are copying?

Thanks,

Oleksandr

> +		id = bret.id;
> +
>   		/*
>   		 * The backend has messed up and given us an id that we would
>   		 * never have given to it (we stamp it up to BLK_RING_SIZE -
> @@ -1522,39 +1523,39 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   		 */
>   		if (id >= BLK_RING_SIZE(info)) {
>   			WARN(1, "%s: response to %s has incorrect id (%ld)\n",
> -			     info->gd->disk_name, op_name(bret->operation), id);
> +			     info->gd->disk_name, op_name(bret.operation), id);
>   			/* We can't safely get the 'struct request' as
>   			 * the id is busted. */
>   			continue;
>   		}
>   		req  = rinfo->shadow[id].request;
>   
> -		if (bret->operation != BLKIF_OP_DISCARD) {
> +		if (bret.operation != BLKIF_OP_DISCARD) {
>   			/*
>   			 * We may need to wait for an extra response if the
>   			 * I/O request is split in 2
>   			 */
> -			if (!blkif_completion(&id, rinfo, bret))
> +			if (!blkif_completion(&id, rinfo, &bret))
>   				continue;
>   		}
>   
>   		if (add_id_to_freelist(rinfo, id)) {
>   			WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
> -			     info->gd->disk_name, op_name(bret->operation), id);
> +			     info->gd->disk_name, op_name(bret.operation), id);
>   			continue;
>   		}
>   
> -		if (bret->status == BLKIF_RSP_OKAY)
> +		if (bret.status == BLKIF_RSP_OKAY)
>   			blkif_req(req)->error = BLK_STS_OK;
>   		else
>   			blkif_req(req)->error = BLK_STS_IOERR;
>   
> -		switch (bret->operation) {
> +		switch (bret.operation) {
>   		case BLKIF_OP_DISCARD:
> -			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
> +			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
>   				struct request_queue *rq = info->rq;
>   				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> -					   info->gd->disk_name, op_name(bret->operation));
> +					   info->gd->disk_name, op_name(bret.operation));
>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>   				info->feature_discard = 0;
>   				info->feature_secdiscard = 0;
> @@ -1564,15 +1565,15 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   			break;
>   		case BLKIF_OP_FLUSH_DISKCACHE:
>   		case BLKIF_OP_WRITE_BARRIER:
> -			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
> +			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
>   				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> -				       info->gd->disk_name, op_name(bret->operation));
> +				       info->gd->disk_name, op_name(bret.operation));
>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>   			}
> -			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
> +			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
>   				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
>   				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
> -				       info->gd->disk_name, op_name(bret->operation));
> +				       info->gd->disk_name, op_name(bret.operation));
>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>   			}
>   			if (unlikely(blkif_req(req)->error)) {
> @@ -1585,9 +1586,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   			fallthrough;
>   		case BLKIF_OP_READ:
>   		case BLKIF_OP_WRITE:
> -			if (unlikely(bret->status != BLKIF_RSP_OKAY))
> +			if (unlikely(bret.status != BLKIF_RSP_OKAY))
>   				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
> -					"request: %x\n", bret->status);
> +					"request: %x\n", bret.status);
>   
>   			break;
>   		default:
Julien Grall Aug. 2, 2021, 7:26 p.m. UTC | #2
Hi,

On 02/08/2021 15:06, Oleksandr Andrushchenko wrote:
> On 30.07.21 13:38, Juergen Gross wrote:
>> In order to avoid problems in case the backend is modifying a response
>> on the ring page while the frontend has already seen it, just read the
>> response into a local buffer in one go and then operate on that buffer
>> only.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
>> ---
>>    drivers/block/xen-blkfront.c | 35 ++++++++++++++++++-----------------
>>    1 file changed, 18 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index d83fee21f6c5..15e840287734 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -1496,7 +1496,7 @@ static bool blkif_completion(unsigned long *id,
>>    static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>    {
>>    	struct request *req;
>> -	struct blkif_response *bret;
>> +	struct blkif_response bret;
>>    	RING_IDX i, rp;
>>    	unsigned long flags;
>>    	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
>> @@ -1513,8 +1513,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>    	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
>>    		unsigned long id;
>>    
>> -		bret = RING_GET_RESPONSE(&rinfo->ring, i);
>> -		id   = bret->id;
>> +		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
> 
> As per my understanding copying is still not an atomic operation as the request/response
> 
> are multi-byte structures in general. IOW, what prevents the backend from modifying the ring while
> 
> we are copying?

Nothing and, I believe, you are never going to be able to ensure 
atomicity with large structure (at least between entity that doesn't 
trust each other).

However, what you can do is copying the response once, check that it is 
consistent and then use it. If it is not consistent, then you can report 
an error.

This is better than what's currently in tree. IOW we may have multiple 
read so the code is prone to TOCTOU.

Cheers,
Oleksandr Andrushchenko Aug. 3, 2021, 7 a.m. UTC | #3
On 02.08.21 22:26, Julien Grall wrote:
> Hi,
>
> On 02/08/2021 15:06, Oleksandr Andrushchenko wrote:
>> On 30.07.21 13:38, Juergen Gross wrote:
>>> In order to avoid problems in case the backend is modifying a response
>>> on the ring page while the frontend has already seen it, just read the
>>> response into a local buffer in one go and then operate on that buffer
>>> only.
>>>
>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>>> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>>> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
>>> ---
>>>    drivers/block/xen-blkfront.c | 35 ++++++++++++++++++-----------------
>>>    1 file changed, 18 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>>> index d83fee21f6c5..15e840287734 100644
>>> --- a/drivers/block/xen-blkfront.c
>>> +++ b/drivers/block/xen-blkfront.c
>>> @@ -1496,7 +1496,7 @@ static bool blkif_completion(unsigned long *id,
>>>    static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>>    {
>>>        struct request *req;
>>> -    struct blkif_response *bret;
>>> +    struct blkif_response bret;
>>>        RING_IDX i, rp;
>>>        unsigned long flags;
>>>        struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
>>> @@ -1513,8 +1513,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>>        for (i = rinfo->ring.rsp_cons; i != rp; i++) {
>>>            unsigned long id;
>>>    -        bret = RING_GET_RESPONSE(&rinfo->ring, i);
>>> -        id   = bret->id;
>>> +        RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
>>
>> As per my understanding copying is still not an atomic operation as the request/response
>>
>> are multi-byte structures in general. IOW, what prevents the backend from modifying the ring while
>>
>> we are copying?
>
> Nothing and, I believe, you are never going to be able to ensure atomicity with large structure (at least between entity that doesn't trust each other).
>
> However, what you can do is copying the response once, check that it is consistent and then use it. If it is not consistent, then you can report an error.
>
> This is better than what's currently in tree. IOW we may have multiple read so the code is prone to TOCTOU.

Agree,

Thanks

>
> Cheers,
>
diff mbox series

Patch

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d83fee21f6c5..15e840287734 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1496,7 +1496,7 @@  static bool blkif_completion(unsigned long *id,
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 {
 	struct request *req;
-	struct blkif_response *bret;
+	struct blkif_response bret;
 	RING_IDX i, rp;
 	unsigned long flags;
 	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
@@ -1513,8 +1513,9 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
 		unsigned long id;
 
-		bret = RING_GET_RESPONSE(&rinfo->ring, i);
-		id   = bret->id;
+		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
+		id = bret.id;
+
 		/*
 		 * The backend has messed up and given us an id that we would
 		 * never have given to it (we stamp it up to BLK_RING_SIZE -
@@ -1522,39 +1523,39 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 		 */
 		if (id >= BLK_RING_SIZE(info)) {
 			WARN(1, "%s: response to %s has incorrect id (%ld)\n",
-			     info->gd->disk_name, op_name(bret->operation), id);
+			     info->gd->disk_name, op_name(bret.operation), id);
 			/* We can't safely get the 'struct request' as
 			 * the id is busted. */
 			continue;
 		}
 		req  = rinfo->shadow[id].request;
 
-		if (bret->operation != BLKIF_OP_DISCARD) {
+		if (bret.operation != BLKIF_OP_DISCARD) {
 			/*
 			 * We may need to wait for an extra response if the
 			 * I/O request is split in 2
 			 */
-			if (!blkif_completion(&id, rinfo, bret))
+			if (!blkif_completion(&id, rinfo, &bret))
 				continue;
 		}
 
 		if (add_id_to_freelist(rinfo, id)) {
 			WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
-			     info->gd->disk_name, op_name(bret->operation), id);
+			     info->gd->disk_name, op_name(bret.operation), id);
 			continue;
 		}
 
-		if (bret->status == BLKIF_RSP_OKAY)
+		if (bret.status == BLKIF_RSP_OKAY)
 			blkif_req(req)->error = BLK_STS_OK;
 		else
 			blkif_req(req)->error = BLK_STS_IOERR;
 
-		switch (bret->operation) {
+		switch (bret.operation) {
 		case BLKIF_OP_DISCARD:
-			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
-					   info->gd->disk_name, op_name(bret->operation));
+					   info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
@@ -1564,15 +1565,15 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			break;
 		case BLKIF_OP_FLUSH_DISKCACHE:
 		case BLKIF_OP_WRITE_BARRIER:
-			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
-				       info->gd->disk_name, op_name(bret->operation));
+				       info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
-			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
 				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
 				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
-				       info->gd->disk_name, op_name(bret->operation));
+				       info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
 			if (unlikely(blkif_req(req)->error)) {
@@ -1585,9 +1586,9 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			fallthrough;
 		case BLKIF_OP_READ:
 		case BLKIF_OP_WRITE:
-			if (unlikely(bret->status != BLKIF_RSP_OKAY))
+			if (unlikely(bret.status != BLKIF_RSP_OKAY))
 				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
-					"request: %x\n", bret->status);
+					"request: %x\n", bret.status);
 
 			break;
 		default: