diff mbox series

[v2] block: change rq_integrity_vec to respect the iterator

Message ID 7060a917-6537-4334-4961-601a182bca54@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Mike Snitzer
Headers show
Series [v2] block: change rq_integrity_vec to respect the iterator | expand

Commit Message

Mikulas Patocka May 23, 2024, 2:58 p.m. UTC
On Wed, 15 May 2024, Jens Axboe wrote:

> On 5/15/24 7:28 AM, Mikulas Patocka wrote:
> > @@ -177,9 +177,9 @@ static inline int blk_integrity_rq(struc
> >  	return 0;
> >  }
> >  
> > -static inline struct bio_vec *rq_integrity_vec(struct request *rq)
> > +static inline struct bio_vec rq_integrity_vec(struct request *rq)
> >  {
> > -	return NULL;
> > +	BUG();
> >  }
> >  #endif /* CONFIG_BLK_DEV_INTEGRITY */
> >  #endif /* _LINUX_BLK_INTEGRITY_H */
> 
> Let's please not do that. If it's not used outside of
> CONFIG_BLK_DEV_INTEGRITY, it should just go away.
> 
> -- 
> Jens Axboe

Here I'm resending the patch with the function rq_integrity_vec removed if 
CONFIG_BLK_DEV_INTEGRITY is not defined.

Mikulas


From: Mikulas Patocka <mpatocka@redhat.com>

If we allocate a bio that is larger than NVMe maximum request size, attach
integrity metadata to it and send it to the NVMe subsystem, the integrity
metadata will be corrupted.

Splitting the bio works correctly. The function bio_split will clone the
bio, trim the iterator of the first bio and advance the iterator of the
second bio.

However, the function rq_integrity_vec has a bug - it returns the first
vector of the bio's metadata and completely disregards the metadata
iterator that was advanced when the bio was split. Thus, the second bio
uses the same metadata as the first bio and this leads to metadata
corruption.

This commit changes rq_integrity_vec, so that it calls mp_bvec_iter_bvec
instead of returning the first vector. mp_bvec_iter_bvec reads the
iterator and advances the vector by the iterator.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/nvme/host/pci.c       |   14 +++++++++++---
 include/linux/blk-integrity.h |   12 ++++--------
 2 files changed, 15 insertions(+), 11 deletions(-)

Comments

Jens Axboe May 23, 2024, 3:01 p.m. UTC | #1
On 5/23/24 8:58 AM, Mikulas Patocka wrote:
> 
> 
> On Wed, 15 May 2024, Jens Axboe wrote:
> 
>> On 5/15/24 7:28 AM, Mikulas Patocka wrote:
>>> @@ -177,9 +177,9 @@ static inline int blk_integrity_rq(struc
>>>  	return 0;
>>>  }
>>>  
>>> -static inline struct bio_vec *rq_integrity_vec(struct request *rq)
>>> +static inline struct bio_vec rq_integrity_vec(struct request *rq)
>>>  {
>>> -	return NULL;
>>> +	BUG();
>>>  }
>>>  #endif /* CONFIG_BLK_DEV_INTEGRITY */
>>>  #endif /* _LINUX_BLK_INTEGRITY_H */
>>
>> Let's please not do that. If it's not used outside of
>> CONFIG_BLK_DEV_INTEGRITY, it should just go away.
>>
>> -- 
>> Jens Axboe
> 
> Here I'm resending the patch with the function rq_integrity_vec removed if 
> CONFIG_BLK_DEV_INTEGRITY is not defined.

That looks better - but can you please just post a full new series,
that's a lot easier to deal with and look at than adding a v2 of one
patch in the thread.

> @@ -853,16 +855,20 @@ static blk_status_t nvme_prep_rq(struct
>  			goto out_free_cmd;
>  	}
>  
> +#ifdef CONFIG_BLK_DEV_INTEGRITY
>  	if (blk_integrity_rq(req)) {
>  		ret = nvme_map_metadata(dev, req, &iod->cmd);
>  		if (ret)
>  			goto out_unmap_data;
>  	}
> +#endif

	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && blk_integrity_rq(req)) {

?

> @@ -962,12 +968,14 @@ static __always_inline void nvme_pci_unm
>  	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
>  	struct nvme_dev *dev = nvmeq->dev;
>  
> +#ifdef CONFIG_BLK_DEV_INTEGRITY
>  	if (blk_integrity_rq(req)) {
>  	        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);

Ditto

> Index: linux-2.6/include/linux/blk-integrity.h
> ===================================================================
> --- linux-2.6.orig/include/linux/blk-integrity.h
> +++ linux-2.6/include/linux/blk-integrity.h
> @@ -109,11 +109,11 @@ static inline bool blk_integrity_rq(stru
>   * Return the first bvec that contains integrity data.  Only drivers that are
>   * limited to a single integrity segment should use this helper.
>   */
> -static inline struct bio_vec *rq_integrity_vec(struct request *rq)
> +static inline struct bio_vec rq_integrity_vec(struct request *rq)
>  {
> -	if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
> -		return NULL;
> -	return rq->bio->bi_integrity->bip_vec;
> +	WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1);
> +	return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
> +				 rq->bio->bi_integrity->bip_iter);
>  }

Not clear why the return on integrity segments > 1 is removed?
Mikulas Patocka May 23, 2024, 3:11 p.m. UTC | #2
On Thu, 23 May 2024, Jens Axboe wrote:

> On 5/23/24 8:58 AM, Mikulas Patocka wrote:

> > Here I'm resending the patch with the function rq_integrity_vec removed if 
> > CONFIG_BLK_DEV_INTEGRITY is not defined.
> 
> That looks better - but can you please just post a full new series,
> that's a lot easier to deal with and look at than adding a v2 of one
> patch in the thread.

OK, I'll post both patches.

> > @@ -853,16 +855,20 @@ static blk_status_t nvme_prep_rq(struct
> >  			goto out_free_cmd;
> >  	}
> >  
> > +#ifdef CONFIG_BLK_DEV_INTEGRITY
> >  	if (blk_integrity_rq(req)) {
> >  		ret = nvme_map_metadata(dev, req, &iod->cmd);
> >  		if (ret)
> >  			goto out_unmap_data;
> >  	}
> > +#endif
> 
> 	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && blk_integrity_rq(req)) {
> 
> ?

That wouldn't work, because the calls to rq_integrity_vec need to be 
eliminated by the preprocessor.

Should I change rq_integrity_vec to this? Then, we could get rid of the 
ifdefs and let the optimizer remove all calls to rq_integrity_vec.
static inline struct bio_vec rq_integrity_vec(struct request *rq)
{
	struct bio_vec bv = { };
	return bv;
}

> > @@ -962,12 +968,14 @@ static __always_inline void nvme_pci_unm
> >  	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
> >  	struct nvme_dev *dev = nvmeq->dev;
> >  
> > +#ifdef CONFIG_BLK_DEV_INTEGRITY
> >  	if (blk_integrity_rq(req)) {
> >  	        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> 
> Ditto
> 
> > Index: linux-2.6/include/linux/blk-integrity.h
> > ===================================================================
> > --- linux-2.6.orig/include/linux/blk-integrity.h
> > +++ linux-2.6/include/linux/blk-integrity.h
> > @@ -109,11 +109,11 @@ static inline bool blk_integrity_rq(stru
> >   * Return the first bvec that contains integrity data.  Only drivers that are
> >   * limited to a single integrity segment should use this helper.
> >   */
> > -static inline struct bio_vec *rq_integrity_vec(struct request *rq)
> > +static inline struct bio_vec rq_integrity_vec(struct request *rq)
> >  {
> > -	if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
> > -		return NULL;
> > -	return rq->bio->bi_integrity->bip_vec;
> > +	WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1);
> > +	return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
> > +				 rq->bio->bi_integrity->bip_iter);
> >  }
> 
> Not clear why the return on integrity segments > 1 is removed?

Because we can't return NULL. But I can leave it there, print a warning 
and return the first vector.

Mikulas

> -- 
> Jens Axboe
>
Anuj gupta May 23, 2024, 3:22 p.m. UTC | #3
On Thu, May 23, 2024 at 8:41 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
>
>
>
> On Thu, 23 May 2024, Jens Axboe wrote:
>
> > On 5/23/24 8:58 AM, Mikulas Patocka wrote:
>
> > > Here I'm resending the patch with the function rq_integrity_vec removed if
> > > CONFIG_BLK_DEV_INTEGRITY is not defined.
> >
> > That looks better - but can you please just post a full new series,
> > that's a lot easier to deal with and look at than adding a v2 of one
> > patch in the thread.
>
> OK, I'll post both patches.
>
> > > @@ -853,16 +855,20 @@ static blk_status_t nvme_prep_rq(struct
> > >                     goto out_free_cmd;
> > >     }
> > >
> > > +#ifdef CONFIG_BLK_DEV_INTEGRITY
> > >     if (blk_integrity_rq(req)) {
> > >             ret = nvme_map_metadata(dev, req, &iod->cmd);
> > >             if (ret)
> > >                     goto out_unmap_data;
> > >     }
> > > +#endif
> >
> >       if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && blk_integrity_rq(req)) {
> >
> > ?
>
> That wouldn't work, because the calls to rq_integrity_vec need to be
> eliminated by the preprocessor.
>
> Should I change rq_integrity_vec to this? Then, we could get rid of the
> ifdefs and let the optimizer remove all calls to rq_integrity_vec.
> static inline struct bio_vec rq_integrity_vec(struct request *rq)
> {
>         struct bio_vec bv = { };
>         return bv;
> }

This can be reduced to
return (struct bio_vec){ };

>
> > > @@ -962,12 +968,14 @@ static __always_inline void nvme_pci_unm
> > >     struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
> > >     struct nvme_dev *dev = nvmeq->dev;
> > >
> > > +#ifdef CONFIG_BLK_DEV_INTEGRITY
> > >     if (blk_integrity_rq(req)) {
> > >             struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> >
> > Ditto
> >
> > > Index: linux-2.6/include/linux/blk-integrity.h
> > > ===================================================================
> > > --- linux-2.6.orig/include/linux/blk-integrity.h
> > > +++ linux-2.6/include/linux/blk-integrity.h
> > > @@ -109,11 +109,11 @@ static inline bool blk_integrity_rq(stru
> > >   * Return the first bvec that contains integrity data.  Only drivers that are
> > >   * limited to a single integrity segment should use this helper.
> > >   */
> > > -static inline struct bio_vec *rq_integrity_vec(struct request *rq)
> > > +static inline struct bio_vec rq_integrity_vec(struct request *rq)
> > >  {
> > > -   if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
> > > -           return NULL;
> > > -   return rq->bio->bi_integrity->bip_vec;
> > > +   WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1);
> > > +   return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
> > > +                            rq->bio->bi_integrity->bip_iter);
> > >  }
> >
> > Not clear why the return on integrity segments > 1 is removed?
>
> Because we can't return NULL. But I can leave it there, print a warning
> and return the first vector.
>
> Mikulas
>
> > --
> > Jens Axboe
> >
>
>

--
Anuj Gupta
Jens Axboe May 23, 2024, 3:33 p.m. UTC | #4
On 5/23/24 9:11 AM, Mikulas Patocka wrote:
>>> @@ -853,16 +855,20 @@ static blk_status_t nvme_prep_rq(struct
>>>  			goto out_free_cmd;
>>>  	}
>>>  
>>> +#ifdef CONFIG_BLK_DEV_INTEGRITY
>>>  	if (blk_integrity_rq(req)) {
>>>  		ret = nvme_map_metadata(dev, req, &iod->cmd);
>>>  		if (ret)
>>>  			goto out_unmap_data;
>>>  	}
>>> +#endif
>>
>> 	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && blk_integrity_rq(req)) {
>>
>> ?
> 
> That wouldn't work, because the calls to rq_integrity_vec need to be 
> eliminated by the preprocessor.

Why not just do this incremental? Cleans up the ifdef mess too, leaving
only the one actually using rq_integrity_vec in place.

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 5f857cbc95c8..bd56416a7fa8 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -821,10 +821,10 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 	return ret;
 }
 
-#ifdef CONFIG_BLK_DEV_INTEGRITY
 static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
+#ifdef CONFIG_BLK_DEV_INTEGRITY
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct bio_vec bv = rq_integrity_vec(req);
 
@@ -832,9 +832,9 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
 	if (dma_mapping_error(dev->dev, iod->meta_dma))
 		return BLK_STS_IOERR;
 	cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
+#endif
 	return BLK_STS_OK;
 }
-#endif
 
 static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
 {
@@ -855,20 +855,16 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
 			goto out_free_cmd;
 	}
 
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-	if (blk_integrity_rq(req)) {
+	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && blk_integrity_rq(req)) {
 		ret = nvme_map_metadata(dev, req, &iod->cmd);
 		if (ret)
 			goto out_unmap_data;
 	}
-#endif
 
 	nvme_start_request(req);
 	return BLK_STS_OK;
-#ifdef CONFIG_BLK_DEV_INTEGRITY
 out_unmap_data:
 	nvme_unmap_data(dev, req);
-#endif
 out_free_cmd:
 	nvme_cleanup_cmd(req);
 	return ret;

> Should I change rq_integrity_vec to this? Then, we could get rid of the 
> ifdefs and let the optimizer remove all calls to rq_integrity_vec.
> static inline struct bio_vec rq_integrity_vec(struct request *rq)
> {
> 	struct bio_vec bv = { };
> 	return bv;
> }

Only if that eliminates runtime checking for !INTEGRITY, which I don't
thin it will.
Mikulas Patocka May 23, 2024, 3:48 p.m. UTC | #5
On Thu, 23 May 2024, Jens Axboe wrote:

> On 5/23/24 9:11 AM, Mikulas Patocka wrote:
> >>> @@ -853,16 +855,20 @@ static blk_status_t nvme_prep_rq(struct
> >>>  			goto out_free_cmd;
> >>>  	}
> >>>  
> >>> +#ifdef CONFIG_BLK_DEV_INTEGRITY
> >>>  	if (blk_integrity_rq(req)) {
> >>>  		ret = nvme_map_metadata(dev, req, &iod->cmd);
> >>>  		if (ret)
> >>>  			goto out_unmap_data;
> >>>  	}
> >>> +#endif
> >>
> >> 	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && blk_integrity_rq(req)) {
> >>
> >> ?
> > 
> > That wouldn't work, because the calls to rq_integrity_vec need to be 
> > eliminated by the preprocessor.
> 
> Why not just do this incremental? Cleans up the ifdef mess too, leaving
> only the one actually using rq_integrity_vec in place.
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 5f857cbc95c8..bd56416a7fa8 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -821,10 +821,10 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
>  	return ret;
>  }
>  
> -#ifdef CONFIG_BLK_DEV_INTEGRITY
>  static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
>  		struct nvme_command *cmnd)
>  {
> +#ifdef CONFIG_BLK_DEV_INTEGRITY
>  	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
>  	struct bio_vec bv = rq_integrity_vec(req);
>  
> @@ -832,9 +832,9 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
>  	if (dma_mapping_error(dev->dev, iod->meta_dma))
>  		return BLK_STS_IOERR;
>  	cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
> +#endif
>  	return BLK_STS_OK;
>  }
> -#endif
>  
>  static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
>  {
> @@ -855,20 +855,16 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
>  			goto out_free_cmd;
>  	}
>  
> -#ifdef CONFIG_BLK_DEV_INTEGRITY
> -	if (blk_integrity_rq(req)) {
> +	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && blk_integrity_rq(req)) {
>  		ret = nvme_map_metadata(dev, req, &iod->cmd);
>  		if (ret)
>  			goto out_unmap_data;
>  	}
> -#endif
>  
>  	nvme_start_request(req);
>  	return BLK_STS_OK;
> -#ifdef CONFIG_BLK_DEV_INTEGRITY
>  out_unmap_data:
>  	nvme_unmap_data(dev, req);
> -#endif
>  out_free_cmd:
>  	nvme_cleanup_cmd(req);
>  	return ret;
> 
> > Should I change rq_integrity_vec to this? Then, we could get rid of the 
> > ifdefs and let the optimizer remove all calls to rq_integrity_vec.
> > static inline struct bio_vec rq_integrity_vec(struct request *rq)
> > {
> > 	struct bio_vec bv = { };
> > 	return bv;
> > }
> 
> Only if that eliminates runtime checking for !INTEGRITY, which I don't
> thin it will.

It will eliminate the ifdefs. If we are compiling without 
CONFIG_BLK_DEV_INTEGRITY, blk_integrity_rq(req) is inline and it always 
returns 0. So the optimizer will remove anything guarded with "if 
(blk_integrity_rq(req))" - including the calls to nvme_map_metadata and 
rq_integrity_vec.

But we need to provide dummy rq_integrity_vec for the compiler front-end. 
The front-end doesn't know that blk_integrity_rq always returns zero.

So, the patch will be smaller if we get rid of the ifdefs and provide a 
dummy rq_integrity_vec.

Mikulas

> 
> 
> -- 
> Jens Axboe
>
diff mbox series

Patch

Index: linux-2.6/drivers/nvme/host/pci.c
===================================================================
--- linux-2.6.orig/drivers/nvme/host/pci.c
+++ linux-2.6/drivers/nvme/host/pci.c
@@ -821,18 +821,20 @@  out_free_sg:
 	return ret;
 }
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
 static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	struct bio_vec bv = rq_integrity_vec(req);
 
-	iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
-			rq_dma_dir(req), 0);
+	iod->meta_dma = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0);
 	if (dma_mapping_error(dev->dev, iod->meta_dma))
 		return BLK_STS_IOERR;
 	cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
 	return BLK_STS_OK;
 }
+#endif
 
 static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
 {
@@ -853,16 +855,20 @@  static blk_status_t nvme_prep_rq(struct
 			goto out_free_cmd;
 	}
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
 	if (blk_integrity_rq(req)) {
 		ret = nvme_map_metadata(dev, req, &iod->cmd);
 		if (ret)
 			goto out_unmap_data;
 	}
+#endif
 
 	nvme_start_request(req);
 	return BLK_STS_OK;
+#ifdef CONFIG_BLK_DEV_INTEGRITY
 out_unmap_data:
 	nvme_unmap_data(dev, req);
+#endif
 out_free_cmd:
 	nvme_cleanup_cmd(req);
 	return ret;
@@ -962,12 +968,14 @@  static __always_inline void nvme_pci_unm
 	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
 	struct nvme_dev *dev = nvmeq->dev;
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
 	if (blk_integrity_rq(req)) {
 	        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 
 		dma_unmap_page(dev->dev, iod->meta_dma,
-			       rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+			       rq_integrity_vec(req).bv_len, rq_dma_dir(req));
 	}
+#endif
 
 	if (blk_rq_nr_phys_segments(req))
 		nvme_unmap_data(dev, req);
Index: linux-2.6/include/linux/blk-integrity.h
===================================================================
--- linux-2.6.orig/include/linux/blk-integrity.h
+++ linux-2.6/include/linux/blk-integrity.h
@@ -109,11 +109,11 @@  static inline bool blk_integrity_rq(stru
  * Return the first bvec that contains integrity data.  Only drivers that are
  * limited to a single integrity segment should use this helper.
  */
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
 {
-	if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
-		return NULL;
-	return rq->bio->bi_integrity->bip_vec;
+	WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1);
+	return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
+				 rq->bio->bi_integrity->bip_iter);
 }
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 static inline int blk_rq_count_integrity_sg(struct request_queue *q,
@@ -177,9 +177,5 @@  static inline int blk_integrity_rq(struc
 	return 0;
 }
 
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
-{
-	return NULL;
-}
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 #endif /* _LINUX_BLK_INTEGRITY_H */