[v3,1/3] xen/blkfront: read response from backend only once

Message ID	20210730103854.12681-2-jgross@suse.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-block-owner@kernel.org> From: Juergen Gross <jgross@suse.com> To: xen-devel@lists.xenproject.org, linux-block@vger.kernel.org, linux-kernel@vger.kernel.org Cc: Juergen Gross <jgross@suse.com>, Boris Ostrovsky <boris.ostrovsky@oracle.com>, Stefano Stabellini <sstabellini@kernel.org>, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>, =?utf-8?q?Roger_Pau_Monn?= =?utf-8?q?=C3=A9?= <roger.pau@citrix.com>, Jens Axboe <axboe@kernel.dk>, Jan Beulich <jbeulich@suse.com> Subject: [PATCH v3 1/3] xen/blkfront: read response from backend only once Date: Fri, 30 Jul 2021 12:38:52 +0200 Message-Id: <20210730103854.12681-2-jgross@suse.com> In-Reply-To: <20210730103854.12681-1-jgross@suse.com> References: <20210730103854.12681-1-jgross@suse.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	xen: harden blkfront against malicious backends \| expand [v3,0/3] xen: harden blkfront against malicious backends [v3,1/3] xen/blkfront: read response from backend only once [v3,2/3] xen/blkfront: don't take local copy of a request from the ring page [v3,3/3] xen/blkfront: don't trust the backend response data blindly

Message ID

20210730103854.12681-2-jgross@suse.com (mailing list archive)

State

New, archived

Headers

From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org, linux-block@vger.kernel.org,
        linux-kernel@vger.kernel.org
Cc: Juergen Gross <jgross@suse.com>,
 Boris Ostrovsky <boris.ostrovsky@oracle.com>,
 Stefano Stabellini <sstabellini@kernel.org>,
 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>, =?utf-8?q?Roger_Pau_Monn?=
	=?utf-8?q?=C3=A9?= <roger.pau@citrix.com>, Jens Axboe <axboe@kernel.dk>,
 Jan Beulich <jbeulich@suse.com>
Subject: [PATCH v3 1/3] xen/blkfront: read response from backend only once
Date: Fri, 30 Jul 2021 12:38:52 +0200
Message-Id: <20210730103854.12681-2-jgross@suse.com>
In-Reply-To: <20210730103854.12681-1-jgross@suse.com>
References: <20210730103854.12681-1-jgross@suse.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

xen: harden blkfront against malicious backends | expand

Commit Message

Jürgen Groß July 30, 2021, 10:38 a.m. UTC

In order to avoid problems in case the backend is modifying a response
on the ring page while the frontend has already seen it, just read the
response into a local buffer in one go and then operate on that buffer
only.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
---
 drivers/block/xen-blkfront.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

Comments

Oleksandr Andrushchenko Aug. 2, 2021, 2:06 p.m. UTC | #1

Hi, Juergen!

On 30.07.21 13:38, Juergen Gross wrote:
> In order to avoid problems in case the backend is modifying a response
> on the ring page while the frontend has already seen it, just read the
> response into a local buffer in one go and then operate on that buffer
> only.
>
> Signed-off-by: Juergen Gross <jgross@suse.com>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
>   drivers/block/xen-blkfront.c | 35 ++++++++++++++++++-----------------
>   1 file changed, 18 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index d83fee21f6c5..15e840287734 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -1496,7 +1496,7 @@ static bool blkif_completion(unsigned long *id,
>   static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   {
>   	struct request *req;
> -	struct blkif_response *bret;
> +	struct blkif_response bret;
>   	RING_IDX i, rp;
>   	unsigned long flags;
>   	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
> @@ -1513,8 +1513,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
>   		unsigned long id;
>   
> -		bret = RING_GET_RESPONSE(&rinfo->ring, i);
> -		id   = bret->id;
> +		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);

As per my understanding copying is still not an atomic operation as the request/response

are multi-byte structures in general. IOW, what prevents the backend from modifying the ring while

we are copying?

Thanks,

Oleksandr

> +		id = bret.id;
> +
>   		/*
>   		 * The backend has messed up and given us an id that we would
>   		 * never have given to it (we stamp it up to BLK_RING_SIZE -
> @@ -1522,39 +1523,39 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   		 */
>   		if (id >= BLK_RING_SIZE(info)) {
>   			WARN(1, "%s: response to %s has incorrect id (%ld)\n",
> -			     info->gd->disk_name, op_name(bret->operation), id);
> +			     info->gd->disk_name, op_name(bret.operation), id);
>   			/* We can't safely get the 'struct request' as
>   			 * the id is busted. */
>   			continue;
>   		}
>   		req  = rinfo->shadow[id].request;
>   
> -		if (bret->operation != BLKIF_OP_DISCARD) {
> +		if (bret.operation != BLKIF_OP_DISCARD) {
>   			/*
>   			 * We may need to wait for an extra response if the
>   			 * I/O request is split in 2
>   			 */
> -			if (!blkif_completion(&id, rinfo, bret))
> +			if (!blkif_completion(&id, rinfo, &bret))
>   				continue;
>   		}
>   
>   		if (add_id_to_freelist(rinfo, id)) {
>   			WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
> -			     info->gd->disk_name, op_name(bret->operation), id);
> +			     info->gd->disk_name, op_name(bret.operation), id);
>   			continue;
>   		}
>   
> -		if (bret->status == BLKIF_RSP_OKAY)
> +		if (bret.status == BLKIF_RSP_OKAY)
>   			blkif_req(req)->error = BLK_STS_OK;
>   		else
>   			blkif_req(req)->error = BLK_STS_IOERR;
>   
> -		switch (bret->operation) {
> +		switch (bret.operation) {
>   		case BLKIF_OP_DISCARD:
> -			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
> +			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
>   				struct request_queue *rq = info->rq;
>   				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> -					   info->gd->disk_name, op_name(bret->operation));
> +					   info->gd->disk_name, op_name(bret.operation));
>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>   				info->feature_discard = 0;
>   				info->feature_secdiscard = 0;
> @@ -1564,15 +1565,15 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   			break;
>   		case BLKIF_OP_FLUSH_DISKCACHE:
>   		case BLKIF_OP_WRITE_BARRIER:
> -			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
> +			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
>   				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> -				       info->gd->disk_name, op_name(bret->operation));
> +				       info->gd->disk_name, op_name(bret.operation));
>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>   			}
> -			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
> +			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
>   				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
>   				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
> -				       info->gd->disk_name, op_name(bret->operation));
> +				       info->gd->disk_name, op_name(bret.operation));
>   				blkif_req(req)->error = BLK_STS_NOTSUPP;
>   			}
>   			if (unlikely(blkif_req(req)->error)) {
> @@ -1585,9 +1586,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>   			fallthrough;
>   		case BLKIF_OP_READ:
>   		case BLKIF_OP_WRITE:
> -			if (unlikely(bret->status != BLKIF_RSP_OKAY))
> +			if (unlikely(bret.status != BLKIF_RSP_OKAY))
>   				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
> -					"request: %x\n", bret->status);
> +					"request: %x\n", bret.status);
>   
>   			break;
>   		default:

Julien Grall Aug. 2, 2021, 7:26 p.m. UTC | #2

Hi,

On 02/08/2021 15:06, Oleksandr Andrushchenko wrote:
> On 30.07.21 13:38, Juergen Gross wrote:
>> In order to avoid problems in case the backend is modifying a response
>> on the ring page while the frontend has already seen it, just read the
>> response into a local buffer in one go and then operate on that buffer
>> only.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
>> ---
>>    drivers/block/xen-blkfront.c | 35 ++++++++++++++++++-----------------
>>    1 file changed, 18 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index d83fee21f6c5..15e840287734 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -1496,7 +1496,7 @@ static bool blkif_completion(unsigned long *id,
>>    static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>    {
>>    	struct request *req;
>> -	struct blkif_response *bret;
>> +	struct blkif_response bret;
>>    	RING_IDX i, rp;
>>    	unsigned long flags;
>>    	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
>> @@ -1513,8 +1513,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>    	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
>>    		unsigned long id;
>>    
>> -		bret = RING_GET_RESPONSE(&rinfo->ring, i);
>> -		id   = bret->id;
>> +		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
> 
> As per my understanding copying is still not an atomic operation as the request/response
> 
> are multi-byte structures in general. IOW, what prevents the backend from modifying the ring while
> 
> we are copying?

Nothing and, I believe, you are never going to be able to ensure 
atomicity with large structure (at least between entity that doesn't 
trust each other).

However, what you can do is copying the response once, check that it is 
consistent and then use it. If it is not consistent, then you can report 
an error.

This is better than what's currently in tree. IOW we may have multiple 
read so the code is prone to TOCTOU.

Cheers,

Oleksandr Andrushchenko Aug. 3, 2021, 7 a.m. UTC | #3

On 02.08.21 22:26, Julien Grall wrote:
> Hi,
>
> On 02/08/2021 15:06, Oleksandr Andrushchenko wrote:
>> On 30.07.21 13:38, Juergen Gross wrote:
>>> In order to avoid problems in case the backend is modifying a response
>>> on the ring page while the frontend has already seen it, just read the
>>> response into a local buffer in one go and then operate on that buffer
>>> only.
>>>
>>> Signed-off-by: Juergen Gross <jgross@suse.com>
>>> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>>> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
>>> ---
>>>    drivers/block/xen-blkfront.c | 35 ++++++++++++++++++-----------------
>>>    1 file changed, 18 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>>> index d83fee21f6c5..15e840287734 100644
>>> --- a/drivers/block/xen-blkfront.c
>>> +++ b/drivers/block/xen-blkfront.c
>>> @@ -1496,7 +1496,7 @@ static bool blkif_completion(unsigned long *id,
>>>    static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>>    {
>>>        struct request *req;
>>> -    struct blkif_response *bret;
>>> +    struct blkif_response bret;
>>>        RING_IDX i, rp;
>>>        unsigned long flags;
>>>        struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
>>> @@ -1513,8 +1513,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
>>>        for (i = rinfo->ring.rsp_cons; i != rp; i++) {
>>>            unsigned long id;
>>>    -        bret = RING_GET_RESPONSE(&rinfo->ring, i);
>>> -        id   = bret->id;
>>> +        RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
>>
>> As per my understanding copying is still not an atomic operation as the request/response
>>
>> are multi-byte structures in general. IOW, what prevents the backend from modifying the ring while
>>
>> we are copying?
>
> Nothing and, I believe, you are never going to be able to ensure atomicity with large structure (at least between entity that doesn't trust each other).
>
> However, what you can do is copying the response once, check that it is consistent and then use it. If it is not consistent, then you can report an error.
>
> This is better than what's currently in tree. IOW we may have multiple read so the code is prone to TOCTOU.

Agree,

Thanks

>
> Cheers,
>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d83fee21f6c5..15e840287734 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1496,7 +1496,7 @@  static bool blkif_completion(unsigned long *id,
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 {
 	struct request *req;
-	struct blkif_response *bret;
+	struct blkif_response bret;
 	RING_IDX i, rp;
 	unsigned long flags;
 	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
@@ -1513,8 +1513,9 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
 		unsigned long id;
 
-		bret = RING_GET_RESPONSE(&rinfo->ring, i);
-		id   = bret->id;
+		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
+		id = bret.id;
+
 		/*
 		 * The backend has messed up and given us an id that we would
 		 * never have given to it (we stamp it up to BLK_RING_SIZE -
@@ -1522,39 +1523,39 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 		 */
 		if (id >= BLK_RING_SIZE(info)) {
 			WARN(1, "%s: response to %s has incorrect id (%ld)\n",
-			     info->gd->disk_name, op_name(bret->operation), id);
+			     info->gd->disk_name, op_name(bret.operation), id);
 			/* We can't safely get the 'struct request' as
 			 * the id is busted. */
 			continue;
 		}
 		req  = rinfo->shadow[id].request;
 
-		if (bret->operation != BLKIF_OP_DISCARD) {
+		if (bret.operation != BLKIF_OP_DISCARD) {
 			/*
 			 * We may need to wait for an extra response if the
 			 * I/O request is split in 2
 			 */
-			if (!blkif_completion(&id, rinfo, bret))
+			if (!blkif_completion(&id, rinfo, &bret))
 				continue;
 		}
 
 		if (add_id_to_freelist(rinfo, id)) {
 			WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
-			     info->gd->disk_name, op_name(bret->operation), id);
+			     info->gd->disk_name, op_name(bret.operation), id);
 			continue;
 		}
 
-		if (bret->status == BLKIF_RSP_OKAY)
+		if (bret.status == BLKIF_RSP_OKAY)
 			blkif_req(req)->error = BLK_STS_OK;
 		else
 			blkif_req(req)->error = BLK_STS_IOERR;
 
-		switch (bret->operation) {
+		switch (bret.operation) {
 		case BLKIF_OP_DISCARD:
-			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
 				struct request_queue *rq = info->rq;
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
-					   info->gd->disk_name, op_name(bret->operation));
+					   info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 				info->feature_discard = 0;
 				info->feature_secdiscard = 0;
@@ -1564,15 +1565,15 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			break;
 		case BLKIF_OP_FLUSH_DISKCACHE:
 		case BLKIF_OP_WRITE_BARRIER:
-			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
 				printk(KERN_WARNING "blkfront: %s: %s op failed\n",
-				       info->gd->disk_name, op_name(bret->operation));
+				       info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
-			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
 				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
 				printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
-				       info->gd->disk_name, op_name(bret->operation));
+				       info->gd->disk_name, op_name(bret.operation));
 				blkif_req(req)->error = BLK_STS_NOTSUPP;
 			}
 			if (unlikely(blkif_req(req)->error)) {
@@ -1585,9 +1586,9 @@  static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 			fallthrough;
 		case BLKIF_OP_READ:
 		case BLKIF_OP_WRITE:
-			if (unlikely(bret->status != BLKIF_RSP_OKAY))
+			if (unlikely(bret.status != BLKIF_RSP_OKAY))
 				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
-					"request: %x\n", bret->status);
+					"request: %x\n", bret.status);
 
 			break;
 		default:

[v3,1/3] xen/blkfront: read response from backend only once

Commit Message

Comments

Patch