Message ID | 20200228163456.1587-2-vgoyal@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | dax/pmem: Provide a dax operation to zero page range | expand |
On Fri, 28 Feb 2020 at 17:35, Vivek Goyal <vgoyal@redhat.com> wrote: > > This splits pmem_do_bvec() into pmem_do_read() and pmem_do_write(). > pmem_do_write() will be used by pmem zero_page_range() as well. Hence > sharing the same code. > > Suggested-by: Christoph Hellwig <hch@infradead.org> > Reviewed-by: Christoph Hellwig <hch@lst.de> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com> > --- > drivers/nvdimm/pmem.c | 86 +++++++++++++++++++++++++------------------ > 1 file changed, 50 insertions(+), 36 deletions(-) > > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c > index 4eae441f86c9..075b11682192 100644 > --- a/drivers/nvdimm/pmem.c > +++ b/drivers/nvdimm/pmem.c > @@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, > return BLK_STS_OK; > } > > -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, > - unsigned int len, unsigned int off, unsigned int op, > - sector_t sector) > +static blk_status_t pmem_do_read(struct pmem_device *pmem, > + struct page *page, unsigned int page_off, > + sector_t sector, unsigned int len) > +{ > + blk_status_t rc; > + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; minor nit, maybe 512 is replaced by macro? Looks like its used at multiple places, maybe can keep at is for now. > + void *pmem_addr = pmem->virt_addr + pmem_off; > + > + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) > + return BLK_STS_IOERR; > + > + rc = read_pmem(page, page_off, pmem_addr, len); > + flush_dcache_page(page); > + return rc; > +} > + > +static blk_status_t pmem_do_write(struct pmem_device *pmem, > + struct page *page, unsigned int page_off, > + sector_t sector, unsigned int len) > { > blk_status_t rc = BLK_STS_OK; > bool bad_pmem = false; > @@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, > if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) > bad_pmem = true; > > - if (!op_is_write(op)) { > - if (unlikely(bad_pmem)) > - rc = BLK_STS_IOERR; > - else { > - rc = read_pmem(page, off, pmem_addr, len); > - flush_dcache_page(page); > - } > - } else { > - /* > - * Note that we write the data both before and after > - * clearing poison. The write before clear poison > - * handles situations where the latest written data is > - * preserved and the clear poison operation simply marks > - * the address range as valid without changing the data. > - * In this case application software can assume that an > - * interrupted write will either return the new good > - * data or an error. > - * > - * However, if pmem_clear_poison() leaves the data in an > - * indeterminate state we need to perform the write > - * after clear poison. > - */ > - flush_dcache_page(page); > - write_pmem(pmem_addr, page, off, len); > - if (unlikely(bad_pmem)) { > - rc = pmem_clear_poison(pmem, pmem_off, len); > - write_pmem(pmem_addr, page, off, len); > - } > + /* > + * Note that we write the data both before and after > + * clearing poison. The write before clear poison > + * handles situations where the latest written data is > + * preserved and the clear poison operation simply marks > + * the address range as valid without changing the data. > + * In this case application software can assume that an > + * interrupted write will either return the new good > + * data or an error. > + * > + * However, if pmem_clear_poison() leaves the data in an > + * indeterminate state we need to perform the write > + * after clear poison. > + */ > + flush_dcache_page(page); > + write_pmem(pmem_addr, page, page_off, len); > + if (unlikely(bad_pmem)) { > + rc = pmem_clear_poison(pmem, pmem_off, len); > + write_pmem(pmem_addr, page, page_off, len); > } > > return rc; > @@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) > > do_acct = nd_iostat_start(bio, &start); > bio_for_each_segment(bvec, bio, iter) { > - rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, > - bvec.bv_offset, bio_op(bio), iter.bi_sector); > + if (op_is_write(bio_op(bio))) > + rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset, > + iter.bi_sector, bvec.bv_len); > + else > + rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset, > + iter.bi_sector, bvec.bv_len); > if (rc) { > bio->bi_status = rc; > break; > @@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, > struct pmem_device *pmem = bdev->bd_queue->queuedata; > blk_status_t rc; > > - rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, > - 0, op, sector); > - > + if (op_is_write(op)) > + rc = pmem_do_write(pmem, page, 0, sector, > + hpage_nr_pages(page) * PAGE_SIZE); > + else > + rc = pmem_do_read(pmem, page, 0, sector, > + hpage_nr_pages(page) * PAGE_SIZE); > /* > * The ->rw_page interface is subtle and tricky. The core > * retries on any error, so we can only invoke page_endio() in > -- > 2.20.1 Reviewed-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com> > _______________________________________________ > Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org > To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
On Sat, Feb 29, 2020 at 09:04:00AM +0100, Pankaj Gupta wrote: > > + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; > > minor nit, maybe 512 is replaced by macro? Looks like its used at multiple > places, maybe can keep at is for now. That would be the existing SECTOR_SIZE macro.
On Sat, Feb 29, 2020 at 09:04:00AM +0100, Pankaj Gupta wrote: > On Fri, 28 Feb 2020 at 17:35, Vivek Goyal <vgoyal@redhat.com> wrote: > > > > This splits pmem_do_bvec() into pmem_do_read() and pmem_do_write(). > > pmem_do_write() will be used by pmem zero_page_range() as well. Hence > > sharing the same code. > > > > Suggested-by: Christoph Hellwig <hch@infradead.org> > > Reviewed-by: Christoph Hellwig <hch@lst.de> > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com> > > --- > > drivers/nvdimm/pmem.c | 86 +++++++++++++++++++++++++------------------ > > 1 file changed, 50 insertions(+), 36 deletions(-) > > > > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c > > index 4eae441f86c9..075b11682192 100644 > > --- a/drivers/nvdimm/pmem.c > > +++ b/drivers/nvdimm/pmem.c > > @@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, > > return BLK_STS_OK; > > } > > > > -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, > > - unsigned int len, unsigned int off, unsigned int op, > > - sector_t sector) > > +static blk_status_t pmem_do_read(struct pmem_device *pmem, > > + struct page *page, unsigned int page_off, > > + sector_t sector, unsigned int len) > > +{ > > + blk_status_t rc; > > + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; > > minor nit, maybe 512 is replaced by macro? Looks like its used at multiple > places, maybe can keep at is for now. This came from existing code. If I end up spinning this patch series again, I will replace it with (sector << SECTOR_SHIFT). Thanks Vivek
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 4eae441f86c9..075b11682192 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, return BLK_STS_OK; } -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, - unsigned int len, unsigned int off, unsigned int op, - sector_t sector) +static blk_status_t pmem_do_read(struct pmem_device *pmem, + struct page *page, unsigned int page_off, + sector_t sector, unsigned int len) +{ + blk_status_t rc; + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; + void *pmem_addr = pmem->virt_addr + pmem_off; + + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) + return BLK_STS_IOERR; + + rc = read_pmem(page, page_off, pmem_addr, len); + flush_dcache_page(page); + return rc; +} + +static blk_status_t pmem_do_write(struct pmem_device *pmem, + struct page *page, unsigned int page_off, + sector_t sector, unsigned int len) { blk_status_t rc = BLK_STS_OK; bool bad_pmem = false; @@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) bad_pmem = true; - if (!op_is_write(op)) { - if (unlikely(bad_pmem)) - rc = BLK_STS_IOERR; - else { - rc = read_pmem(page, off, pmem_addr, len); - flush_dcache_page(page); - } - } else { - /* - * Note that we write the data both before and after - * clearing poison. The write before clear poison - * handles situations where the latest written data is - * preserved and the clear poison operation simply marks - * the address range as valid without changing the data. - * In this case application software can assume that an - * interrupted write will either return the new good - * data or an error. - * - * However, if pmem_clear_poison() leaves the data in an - * indeterminate state we need to perform the write - * after clear poison. - */ - flush_dcache_page(page); - write_pmem(pmem_addr, page, off, len); - if (unlikely(bad_pmem)) { - rc = pmem_clear_poison(pmem, pmem_off, len); - write_pmem(pmem_addr, page, off, len); - } + /* + * Note that we write the data both before and after + * clearing poison. The write before clear poison + * handles situations where the latest written data is + * preserved and the clear poison operation simply marks + * the address range as valid without changing the data. + * In this case application software can assume that an + * interrupted write will either return the new good + * data or an error. + * + * However, if pmem_clear_poison() leaves the data in an + * indeterminate state we need to perform the write + * after clear poison. + */ + flush_dcache_page(page); + write_pmem(pmem_addr, page, page_off, len); + if (unlikely(bad_pmem)) { + rc = pmem_clear_poison(pmem, pmem_off, len); + write_pmem(pmem_addr, page, page_off, len); } return rc; @@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { - rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, - bvec.bv_offset, bio_op(bio), iter.bi_sector); + if (op_is_write(bio_op(bio))) + rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset, + iter.bi_sector, bvec.bv_len); + else + rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset, + iter.bi_sector, bvec.bv_len); if (rc) { bio->bi_status = rc; break; @@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, struct pmem_device *pmem = bdev->bd_queue->queuedata; blk_status_t rc; - rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, - 0, op, sector); - + if (op_is_write(op)) + rc = pmem_do_write(pmem, page, 0, sector, + hpage_nr_pages(page) * PAGE_SIZE); + else + rc = pmem_do_read(pmem, page, 0, sector, + hpage_nr_pages(page) * PAGE_SIZE); /* * The ->rw_page interface is subtle and tricky. The core * retries on any error, so we can only invoke page_endio() in