diff mbox series

[v6,1/6] pmem: Add functions for reading/writing page to/from pmem

Message ID 20200228163456.1587-2-vgoyal@redhat.com (mailing list archive)
State New, archived
Headers show
Series dax/pmem: Provide a dax operation to zero page range | expand

Commit Message

Vivek Goyal Feb. 28, 2020, 4:34 p.m. UTC
This splits pmem_do_bvec() into pmem_do_read() and pmem_do_write().
pmem_do_write() will be used by pmem zero_page_range() as well. Hence
sharing the same code.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 drivers/nvdimm/pmem.c | 86 +++++++++++++++++++++++++------------------
 1 file changed, 50 insertions(+), 36 deletions(-)

Comments

Pankaj Gupta Feb. 29, 2020, 8:04 a.m. UTC | #1
On Fri, 28 Feb 2020 at 17:35, Vivek Goyal <vgoyal@redhat.com> wrote:
>
> This splits pmem_do_bvec() into pmem_do_read() and pmem_do_write().
> pmem_do_write() will be used by pmem zero_page_range() as well. Hence
> sharing the same code.
>
> Suggested-by: Christoph Hellwig <hch@infradead.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> ---
>  drivers/nvdimm/pmem.c | 86 +++++++++++++++++++++++++------------------
>  1 file changed, 50 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index 4eae441f86c9..075b11682192 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
>         return BLK_STS_OK;
>  }
>
> -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
> -                       unsigned int len, unsigned int off, unsigned int op,
> -                       sector_t sector)
> +static blk_status_t pmem_do_read(struct pmem_device *pmem,
> +                       struct page *page, unsigned int page_off,
> +                       sector_t sector, unsigned int len)
> +{
> +       blk_status_t rc;
> +       phys_addr_t pmem_off = sector * 512 + pmem->data_offset;

minor nit,  maybe 512 is replaced by macro? Looks like its used at multiple
places, maybe can keep at is for now.

> +       void *pmem_addr = pmem->virt_addr + pmem_off;
> +
> +       if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
> +               return BLK_STS_IOERR;
> +
> +       rc = read_pmem(page, page_off, pmem_addr, len);
> +       flush_dcache_page(page);
> +       return rc;
> +}
> +
> +static blk_status_t pmem_do_write(struct pmem_device *pmem,
> +                       struct page *page, unsigned int page_off,
> +                       sector_t sector, unsigned int len)
>  {
>         blk_status_t rc = BLK_STS_OK;
>         bool bad_pmem = false;
> @@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
>         if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
>                 bad_pmem = true;
>
> -       if (!op_is_write(op)) {
> -               if (unlikely(bad_pmem))
> -                       rc = BLK_STS_IOERR;
> -               else {
> -                       rc = read_pmem(page, off, pmem_addr, len);
> -                       flush_dcache_page(page);
> -               }
> -       } else {
> -               /*
> -                * Note that we write the data both before and after
> -                * clearing poison.  The write before clear poison
> -                * handles situations where the latest written data is
> -                * preserved and the clear poison operation simply marks
> -                * the address range as valid without changing the data.
> -                * In this case application software can assume that an
> -                * interrupted write will either return the new good
> -                * data or an error.
> -                *
> -                * However, if pmem_clear_poison() leaves the data in an
> -                * indeterminate state we need to perform the write
> -                * after clear poison.
> -                */
> -               flush_dcache_page(page);
> -               write_pmem(pmem_addr, page, off, len);
> -               if (unlikely(bad_pmem)) {
> -                       rc = pmem_clear_poison(pmem, pmem_off, len);
> -                       write_pmem(pmem_addr, page, off, len);
> -               }
> +       /*
> +        * Note that we write the data both before and after
> +        * clearing poison.  The write before clear poison
> +        * handles situations where the latest written data is
> +        * preserved and the clear poison operation simply marks
> +        * the address range as valid without changing the data.
> +        * In this case application software can assume that an
> +        * interrupted write will either return the new good
> +        * data or an error.
> +        *
> +        * However, if pmem_clear_poison() leaves the data in an
> +        * indeterminate state we need to perform the write
> +        * after clear poison.
> +        */
> +       flush_dcache_page(page);
> +       write_pmem(pmem_addr, page, page_off, len);
> +       if (unlikely(bad_pmem)) {
> +               rc = pmem_clear_poison(pmem, pmem_off, len);
> +               write_pmem(pmem_addr, page, page_off, len);
>         }
>
>         return rc;
> @@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
>
>         do_acct = nd_iostat_start(bio, &start);
>         bio_for_each_segment(bvec, bio, iter) {
> -               rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
> -                               bvec.bv_offset, bio_op(bio), iter.bi_sector);
> +               if (op_is_write(bio_op(bio)))
> +                       rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
> +                               iter.bi_sector, bvec.bv_len);
> +               else
> +                       rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
> +                               iter.bi_sector, bvec.bv_len);
>                 if (rc) {
>                         bio->bi_status = rc;
>                         break;
> @@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
>         struct pmem_device *pmem = bdev->bd_queue->queuedata;
>         blk_status_t rc;
>
> -       rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
> -                         0, op, sector);
> -
> +       if (op_is_write(op))
> +               rc = pmem_do_write(pmem, page, 0, sector,
> +                                  hpage_nr_pages(page) * PAGE_SIZE);
> +       else
> +               rc = pmem_do_read(pmem, page, 0, sector,
> +                                  hpage_nr_pages(page) * PAGE_SIZE);
>         /*
>          * The ->rw_page interface is subtle and tricky.  The core
>          * retries on any error, so we can only invoke page_endio() in
> --
> 2.20.1

Reviewed-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com>
> _______________________________________________
> Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
> To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
Christoph Hellwig March 4, 2020, 4:16 p.m. UTC | #2
On Sat, Feb 29, 2020 at 09:04:00AM +0100, Pankaj Gupta wrote:
> > +       phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
> 
> minor nit,  maybe 512 is replaced by macro? Looks like its used at multiple
> places, maybe can keep at is for now.

That would be the existing SECTOR_SIZE macro.
Vivek Goyal March 4, 2020, 6:44 p.m. UTC | #3
On Sat, Feb 29, 2020 at 09:04:00AM +0100, Pankaj Gupta wrote:
> On Fri, 28 Feb 2020 at 17:35, Vivek Goyal <vgoyal@redhat.com> wrote:
> >
> > This splits pmem_do_bvec() into pmem_do_read() and pmem_do_write().
> > pmem_do_write() will be used by pmem zero_page_range() as well. Hence
> > sharing the same code.
> >
> > Suggested-by: Christoph Hellwig <hch@infradead.org>
> > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > ---
> >  drivers/nvdimm/pmem.c | 86 +++++++++++++++++++++++++------------------
> >  1 file changed, 50 insertions(+), 36 deletions(-)
> >
> > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> > index 4eae441f86c9..075b11682192 100644
> > --- a/drivers/nvdimm/pmem.c
> > +++ b/drivers/nvdimm/pmem.c
> > @@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
> >         return BLK_STS_OK;
> >  }
> >
> > -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
> > -                       unsigned int len, unsigned int off, unsigned int op,
> > -                       sector_t sector)
> > +static blk_status_t pmem_do_read(struct pmem_device *pmem,
> > +                       struct page *page, unsigned int page_off,
> > +                       sector_t sector, unsigned int len)
> > +{
> > +       blk_status_t rc;
> > +       phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
> 
> minor nit,  maybe 512 is replaced by macro? Looks like its used at multiple
> places, maybe can keep at is for now.

This came from existing code. If I end up spinning this patch series
again, I will replace it with (sector << SECTOR_SHIFT).

Thanks
Vivek
diff mbox series

Patch

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4eae441f86c9..075b11682192 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -136,9 +136,25 @@  static blk_status_t read_pmem(struct page *page, unsigned int off,
 	return BLK_STS_OK;
 }
 
-static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
-			unsigned int len, unsigned int off, unsigned int op,
-			sector_t sector)
+static blk_status_t pmem_do_read(struct pmem_device *pmem,
+			struct page *page, unsigned int page_off,
+			sector_t sector, unsigned int len)
+{
+	blk_status_t rc;
+	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
+	void *pmem_addr = pmem->virt_addr + pmem_off;
+
+	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
+		return BLK_STS_IOERR;
+
+	rc = read_pmem(page, page_off, pmem_addr, len);
+	flush_dcache_page(page);
+	return rc;
+}
+
+static blk_status_t pmem_do_write(struct pmem_device *pmem,
+			struct page *page, unsigned int page_off,
+			sector_t sector, unsigned int len)
 {
 	blk_status_t rc = BLK_STS_OK;
 	bool bad_pmem = false;
@@ -148,34 +164,25 @@  static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
 		bad_pmem = true;
 
-	if (!op_is_write(op)) {
-		if (unlikely(bad_pmem))
-			rc = BLK_STS_IOERR;
-		else {
-			rc = read_pmem(page, off, pmem_addr, len);
-			flush_dcache_page(page);
-		}
-	} else {
-		/*
-		 * Note that we write the data both before and after
-		 * clearing poison.  The write before clear poison
-		 * handles situations where the latest written data is
-		 * preserved and the clear poison operation simply marks
-		 * the address range as valid without changing the data.
-		 * In this case application software can assume that an
-		 * interrupted write will either return the new good
-		 * data or an error.
-		 *
-		 * However, if pmem_clear_poison() leaves the data in an
-		 * indeterminate state we need to perform the write
-		 * after clear poison.
-		 */
-		flush_dcache_page(page);
-		write_pmem(pmem_addr, page, off, len);
-		if (unlikely(bad_pmem)) {
-			rc = pmem_clear_poison(pmem, pmem_off, len);
-			write_pmem(pmem_addr, page, off, len);
-		}
+	/*
+	 * Note that we write the data both before and after
+	 * clearing poison.  The write before clear poison
+	 * handles situations where the latest written data is
+	 * preserved and the clear poison operation simply marks
+	 * the address range as valid without changing the data.
+	 * In this case application software can assume that an
+	 * interrupted write will either return the new good
+	 * data or an error.
+	 *
+	 * However, if pmem_clear_poison() leaves the data in an
+	 * indeterminate state we need to perform the write
+	 * after clear poison.
+	 */
+	flush_dcache_page(page);
+	write_pmem(pmem_addr, page, page_off, len);
+	if (unlikely(bad_pmem)) {
+		rc = pmem_clear_poison(pmem, pmem_off, len);
+		write_pmem(pmem_addr, page, page_off, len);
 	}
 
 	return rc;
@@ -197,8 +204,12 @@  static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 
 	do_acct = nd_iostat_start(bio, &start);
 	bio_for_each_segment(bvec, bio, iter) {
-		rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
-				bvec.bv_offset, bio_op(bio), iter.bi_sector);
+		if (op_is_write(bio_op(bio)))
+			rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
+				iter.bi_sector, bvec.bv_len);
+		else
+			rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
+				iter.bi_sector, bvec.bv_len);
 		if (rc) {
 			bio->bi_status = rc;
 			break;
@@ -223,9 +234,12 @@  static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	struct pmem_device *pmem = bdev->bd_queue->queuedata;
 	blk_status_t rc;
 
-	rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
-			  0, op, sector);
-
+	if (op_is_write(op))
+		rc = pmem_do_write(pmem, page, 0, sector,
+				   hpage_nr_pages(page) * PAGE_SIZE);
+	else
+		rc = pmem_do_read(pmem, page, 0, sector,
+				   hpage_nr_pages(page) * PAGE_SIZE);
 	/*
 	 * The ->rw_page interface is subtle and tricky.  The core
 	 * retries on any error, so we can only invoke page_endio() in