diff mbox series

[04/13] fs/dax: Add dax_page_free callback

Message ID e626eda568267e1f86d5c30c24bc62474b45f6c3.1719386613.git-series.apopple@nvidia.com (mailing list archive)
State New
Headers show
Series fs/dax: Fix FS DAX page reference counts | expand

Commit Message

Alistair Popple June 27, 2024, 12:54 a.m. UTC
When a fs dax page is freed it has to notify filesystems that the page
has been unpinned/unmapped and is free. Currently this involves
special code in the page free paths to detect a transition of refcount
from 2 to 1 and to call some fs dax specific code.

A future change will require this to happen when the page refcount
drops to zero. In this case we can use the existing
pgmap->ops->page_free() callback so wire that up for all devices that
support FS DAX (nvdimm and virtio).

Signed-off-by: Alistair Popple <apopple@nvidia.com>
---
 drivers/nvdimm/pmem.c | 1 +
 fs/dax.c              | 6 ++++++
 fs/fuse/virtio_fs.c   | 5 +++++
 include/linux/dax.h   | 1 +
 4 files changed, 13 insertions(+)

Comments

Christoph Hellwig June 27, 2024, 5:33 a.m. UTC | #1
On Thu, Jun 27, 2024 at 10:54:19AM +1000, Alistair Popple wrote:
> When a fs dax page is freed it has to notify filesystems that the page
> has been unpinned/unmapped and is free. Currently this involves
> special code in the page free paths to detect a transition of refcount
> from 2 to 1 and to call some fs dax specific code.
> 
> A future change will require this to happen when the page refcount
> drops to zero. In this case we can use the existing
> pgmap->ops->page_free() callback so wire that up for all devices that
> support FS DAX (nvdimm and virtio).

Given that ->page_ffree is only called from free_zone_device_folio
and right next to a switch on the the type, can't we just do the
wake_up_var there without the somewhat confusing indirect call that
just back in common code without any driver logic?
Alistair Popple June 27, 2024, 11:48 p.m. UTC | #2
Christoph Hellwig <hch@lst.de> writes:

> On Thu, Jun 27, 2024 at 10:54:19AM +1000, Alistair Popple wrote:
>> When a fs dax page is freed it has to notify filesystems that the page
>> has been unpinned/unmapped and is free. Currently this involves
>> special code in the page free paths to detect a transition of refcount
>> from 2 to 1 and to call some fs dax specific code.
>> 
>> A future change will require this to happen when the page refcount
>> drops to zero. In this case we can use the existing
>> pgmap->ops->page_free() callback so wire that up for all devices that
>> support FS DAX (nvdimm and virtio).
>
> Given that ->page_ffree is only called from free_zone_device_folio
> and right next to a switch on the the type, can't we just do the
> wake_up_var there without the somewhat confusing indirect call that
> just back in common code without any driver logic?

Longer term I'm hoping we can get rid of that switch on type entirely as
I don't think the whole get/put_dev_pagemap() thing is very useful. Less
indirection is good though so will move the wake_up_var there.
diff mbox series

Patch

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 598fe2e..cafadd0 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -444,6 +444,7 @@  static int pmem_pagemap_memory_failure(struct dev_pagemap *pgmap,
 
 static const struct dev_pagemap_ops fsdax_pagemap_ops = {
 	.memory_failure		= pmem_pagemap_memory_failure,
+	.page_free	        = dax_page_free,
 };
 
 static int pmem_attach_disk(struct device *dev,
diff --git a/fs/dax.c b/fs/dax.c
index becb4a6..f93afd7 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -2065,3 +2065,9 @@  int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 					       pos_out, len, remap_flags, ops);
 }
 EXPORT_SYMBOL_GPL(dax_remap_file_range_prep);
+
+void dax_page_free(struct page *page)
+{
+	wake_up_var(page);
+}
+EXPORT_SYMBOL_GPL(dax_page_free);
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 1a52a51..6e90a4b 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -909,6 +909,10 @@  static void virtio_fs_cleanup_dax(void *data)
 
 DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T))
 
+static const struct dev_pagemap_ops fsdax_pagemap_ops = {
+	.page_free = dax_page_free,
+};
+
 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
 {
 	struct dax_device *dax_dev __free(cleanup_dax) = NULL;
@@ -948,6 +952,7 @@  static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
 		return -ENOMEM;
 
 	pgmap->type = MEMORY_DEVICE_FS_DAX;
+	pgmap->ops = &fsdax_pagemap_ops;
 
 	/* Ideally we would directly use the PCI BAR resource but
 	 * devm_memremap_pages() wants its own copy in pgmap.  So
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 773dfc4..adbafc8 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -213,6 +213,7 @@  int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
 int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 		const struct iomap_ops *ops);
 
+void dax_page_free(struct page *page);
 static inline int dax_wait_page_idle(struct page *page,
 				void (cb)(struct inode *),
 				struct inode *inode)