diff mbox series

[RFC,04/20] dev_dax_iomap: Save the kva from memremap

Message ID 66620f69fa3f3664d955649eba7da63fdf8d65ad.1708709155.git.john@groves.net (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series Introduce the famfs shared-memory file system | expand

Commit Message

John Groves Feb. 23, 2024, 5:41 p.m. UTC
Save the kva from memremap because we need it for iomap rw support

Prior to famfs, there were no iomap users of /dev/dax - so the virtual
address from memremap was not needed.

Also: in some cases dev_dax_probe() is called with the first
dev_dax->range offset past pgmap[0].range. In those cases we need to
add the difference to virt_addr in order to have the physaddr's in
dev_dax->ranges match dev_dax->virt_addr.

Dragons...

Signed-off-by: John Groves <john@groves.net>
---
 drivers/dax/dax-private.h |  1 +
 drivers/dax/device.c      | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

Comments

Jonathan Cameron Feb. 26, 2024, 12:21 p.m. UTC | #1
On Fri, 23 Feb 2024 11:41:48 -0600
John Groves <John@Groves.net> wrote:

> Save the kva from memremap because we need it for iomap rw support
> 
> Prior to famfs, there were no iomap users of /dev/dax - so the virtual
> address from memremap was not needed.
> 
> Also: in some cases dev_dax_probe() is called with the first
> dev_dax->range offset past pgmap[0].range. In those cases we need to
> add the difference to virt_addr in order to have the physaddr's in
> dev_dax->ranges match dev_dax->virt_addr.

Probably good to have info on when this happens and preferably why
this dragon is there.

> 
> Dragons...
> 
> Signed-off-by: John Groves <john@groves.net>
> ---
>  drivers/dax/dax-private.h |  1 +
>  drivers/dax/device.c      | 15 +++++++++++++++
>  2 files changed, 16 insertions(+)
> 
> diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
> index 446617b73aea..894eb1c66b4a 100644
> --- a/drivers/dax/dax-private.h
> +++ b/drivers/dax/dax-private.h
> @@ -63,6 +63,7 @@ struct dax_mapping {
>  struct dev_dax {
>  	struct dax_region *region;
>  	struct dax_device *dax_dev;
> +	u64 virt_addr;

Why as a u64? If it's a virt address why not just void *?

>  	unsigned int align;
>  	int target_node;
>  	bool dyn_id;
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index 40ba660013cf..6cd79d00fe1b 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -372,6 +372,7 @@ static int dev_dax_probe(struct dev_dax *dev_dax)
>  	struct dax_device *dax_dev = dev_dax->dax_dev;
>  	struct device *dev = &dev_dax->dev;
>  	struct dev_pagemap *pgmap;
> +	u64 data_offset = 0;
>  	struct inode *inode;
>  	struct cdev *cdev;
>  	void *addr;
> @@ -426,6 +427,20 @@ static int dev_dax_probe(struct dev_dax *dev_dax)
>  	if (IS_ERR(addr))
>  		return PTR_ERR(addr);
>  
> +	/* Detect whether the data is at a non-zero offset into the memory */
> +	if (pgmap->range.start != dev_dax->ranges[0].range.start) {
> +		u64 phys = (u64)dev_dax->ranges[0].range.start;

Why the cast? Ranges use u64s internally.

> +		u64 pgmap_phys = (u64)dev_dax->pgmap[0].range.start;
> +		u64 vmemmap_shift = (u64)dev_dax->pgmap[0].vmemmap_shift;
> +
> +		if (!WARN_ON(pgmap_phys > phys))
> +			data_offset = phys - pgmap_phys;
> +
> +		pr_notice("%s: offset detected phys=%llx pgmap_phys=%llx offset=%llx shift=%llx\n",
> +		       __func__, phys, pgmap_phys, data_offset, vmemmap_shift);

pr_debug() + dynamic debug will then deal with __func__ for you.

> +	}
> +	dev_dax->virt_addr = (u64)addr + data_offset;
> +
>  	inode = dax_inode(dax_dev);
>  	cdev = inode->i_cdev;
>  	cdev_init(cdev, &dax_fops);
John Groves Feb. 26, 2024, 3:48 p.m. UTC | #2
On 24/02/26 12:21PM, Jonathan Cameron wrote:
> On Fri, 23 Feb 2024 11:41:48 -0600
> John Groves <John@Groves.net> wrote:
> 
> > Save the kva from memremap because we need it for iomap rw support
> > 
> > Prior to famfs, there were no iomap users of /dev/dax - so the virtual
> > address from memremap was not needed.
> > 
> > Also: in some cases dev_dax_probe() is called with the first
> > dev_dax->range offset past pgmap[0].range. In those cases we need to
> > add the difference to virt_addr in order to have the physaddr's in
> > dev_dax->ranges match dev_dax->virt_addr.
> 
> Probably good to have info on when this happens and preferably why
> this dragon is there.

I added this paragraph:

  This happens with devdax devices that started as pmem and got converted
  to devdax. I'm not sure whether the offset is due to label storage, or
  page tables. Dan?

...which is also insufficient, but perhaps Dan or somebody else from the
dax side can correct this.

> 
> > 
> > Dragons...
> > 
> > Signed-off-by: John Groves <john@groves.net>
> > ---
> >  drivers/dax/dax-private.h |  1 +
> >  drivers/dax/device.c      | 15 +++++++++++++++
> >  2 files changed, 16 insertions(+)
> > 
> > diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
> > index 446617b73aea..894eb1c66b4a 100644
> > --- a/drivers/dax/dax-private.h
> > +++ b/drivers/dax/dax-private.h
> > @@ -63,6 +63,7 @@ struct dax_mapping {
> >  struct dev_dax {
> >  	struct dax_region *region;
> >  	struct dax_device *dax_dev;
> > +	u64 virt_addr;
> 
> Why as a u64? If it's a virt address why not just void *?

Changed to void * - thanks

> 
> >  	unsigned int align;
> >  	int target_node;
> >  	bool dyn_id;
> > diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> > index 40ba660013cf..6cd79d00fe1b 100644
> > --- a/drivers/dax/device.c
> > +++ b/drivers/dax/device.c
> > @@ -372,6 +372,7 @@ static int dev_dax_probe(struct dev_dax *dev_dax)
> >  	struct dax_device *dax_dev = dev_dax->dax_dev;
> >  	struct device *dev = &dev_dax->dev;
> >  	struct dev_pagemap *pgmap;
> > +	u64 data_offset = 0;
> >  	struct inode *inode;
> >  	struct cdev *cdev;
> >  	void *addr;
> > @@ -426,6 +427,20 @@ static int dev_dax_probe(struct dev_dax *dev_dax)
> >  	if (IS_ERR(addr))
> >  		return PTR_ERR(addr);
> >  
> > +	/* Detect whether the data is at a non-zero offset into the memory */
> > +	if (pgmap->range.start != dev_dax->ranges[0].range.start) {
> > +		u64 phys = (u64)dev_dax->ranges[0].range.start;
> 
> Why the cast? Ranges use u64s internally.

I've removed all the unnecessary casts in this function - thanks
for the catch

> 
> > +		u64 pgmap_phys = (u64)dev_dax->pgmap[0].range.start;
> > +		u64 vmemmap_shift = (u64)dev_dax->pgmap[0].vmemmap_shift;
> > +
> > +		if (!WARN_ON(pgmap_phys > phys))
> > +			data_offset = phys - pgmap_phys;
> > +
> > +		pr_notice("%s: offset detected phys=%llx pgmap_phys=%llx offset=%llx shift=%llx\n",
> > +		       __func__, phys, pgmap_phys, data_offset, vmemmap_shift);
> 
> pr_debug() + dynamic debug will then deal with __func__ for you.

Thanks - yeah that would be better than just taking it out...

> 
> > +	}
> > +	dev_dax->virt_addr = (u64)addr + data_offset;
> > +
> >  	inode = dax_inode(dax_dev);
> >  	cdev = inode->i_cdev;
> >  	cdev_init(cdev, &dax_fops);
> 

Thanks,
John
diff mbox series

Patch

diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index 446617b73aea..894eb1c66b4a 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -63,6 +63,7 @@  struct dax_mapping {
 struct dev_dax {
 	struct dax_region *region;
 	struct dax_device *dax_dev;
+	u64 virt_addr;
 	unsigned int align;
 	int target_node;
 	bool dyn_id;
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 40ba660013cf..6cd79d00fe1b 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -372,6 +372,7 @@  static int dev_dax_probe(struct dev_dax *dev_dax)
 	struct dax_device *dax_dev = dev_dax->dax_dev;
 	struct device *dev = &dev_dax->dev;
 	struct dev_pagemap *pgmap;
+	u64 data_offset = 0;
 	struct inode *inode;
 	struct cdev *cdev;
 	void *addr;
@@ -426,6 +427,20 @@  static int dev_dax_probe(struct dev_dax *dev_dax)
 	if (IS_ERR(addr))
 		return PTR_ERR(addr);
 
+	/* Detect whether the data is at a non-zero offset into the memory */
+	if (pgmap->range.start != dev_dax->ranges[0].range.start) {
+		u64 phys = (u64)dev_dax->ranges[0].range.start;
+		u64 pgmap_phys = (u64)dev_dax->pgmap[0].range.start;
+		u64 vmemmap_shift = (u64)dev_dax->pgmap[0].vmemmap_shift;
+
+		if (!WARN_ON(pgmap_phys > phys))
+			data_offset = phys - pgmap_phys;
+
+		pr_notice("%s: offset detected phys=%llx pgmap_phys=%llx offset=%llx shift=%llx\n",
+		       __func__, phys, pgmap_phys, data_offset, vmemmap_shift);
+	}
+	dev_dax->virt_addr = (u64)addr + data_offset;
+
 	inode = dax_inode(dax_dev);
 	cdev = inode->i_cdev;
 	cdev_init(cdev, &dax_fops);