diff mbox

[20/20] dax: Clear dirty entry tags on cache flush

Message ID 1474992504-20133-21-git-send-email-jack@suse.cz (mailing list archive)
State Not Applicable
Headers show

Commit Message

Jan Kara Sept. 27, 2016, 4:08 p.m. UTC
Currently we never clear dirty tags in DAX mappings and thus address
ranges to flush accumulate. Now that we have locking of radix tree
entries, we have all the locking necessary to reliably clear the radix
tree dirty tag when flushing caches for corresponding address range.
Similarly to page_mkclean() we also have to write-protect pages to get a
page fault when the page is next written to so that we can mark the
entry dirty again.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/dax.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

Comments

Ross Zwisler Oct. 18, 2016, 10:12 p.m. UTC | #1
On Tue, Sep 27, 2016 at 06:08:24PM +0200, Jan Kara wrote:
> Currently we never clear dirty tags in DAX mappings and thus address
> ranges to flush accumulate. Now that we have locking of radix tree
> entries, we have all the locking necessary to reliably clear the radix
> tree dirty tag when flushing caches for corresponding address range.
> Similarly to page_mkclean() we also have to write-protect pages to get a
> page fault when the page is next written to so that we can mark the
> entry dirty again.
> 
> Signed-off-by: Jan Kara <jack@suse.cz>

Looks great. 

Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Jan Kara Oct. 19, 2016, 7:30 a.m. UTC | #2
On Tue 18-10-16 16:12:54, Ross Zwisler wrote:
> On Tue, Sep 27, 2016 at 06:08:24PM +0200, Jan Kara wrote:
> > Currently we never clear dirty tags in DAX mappings and thus address
> > ranges to flush accumulate. Now that we have locking of radix tree
> > entries, we have all the locking necessary to reliably clear the radix
> > tree dirty tag when flushing caches for corresponding address range.
> > Similarly to page_mkclean() we also have to write-protect pages to get a
> > page fault when the page is next written to so that we can mark the
> > entry dirty again.
> > 
> > Signed-off-by: Jan Kara <jack@suse.cz>
> 
> Looks great. 
> 
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>

Thanks for review Ross! I've rebased the series on top of rc1. Do you have
your PMD series somewhere rebased on top of rc1 so that I can rebase my
patches on top of that as well? Then I'd post another version of the
series...

								Honza
Ross Zwisler Oct. 19, 2016, 4:38 p.m. UTC | #3
On Wed, Oct 19, 2016 at 09:30:16AM +0200, Jan Kara wrote:
> On Tue 18-10-16 16:12:54, Ross Zwisler wrote:
> > On Tue, Sep 27, 2016 at 06:08:24PM +0200, Jan Kara wrote:
> > > Currently we never clear dirty tags in DAX mappings and thus address
> > > ranges to flush accumulate. Now that we have locking of radix tree
> > > entries, we have all the locking necessary to reliably clear the radix
> > > tree dirty tag when flushing caches for corresponding address range.
> > > Similarly to page_mkclean() we also have to write-protect pages to get a
> > > page fault when the page is next written to so that we can mark the
> > > entry dirty again.
> > > 
> > > Signed-off-by: Jan Kara <jack@suse.cz>
> > 
> > Looks great. 
> > 
> > Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> 
> Thanks for review Ross! I've rebased the series on top of rc1. Do you have
> your PMD series somewhere rebased on top of rc1 so that I can rebase my
> patches on top of that as well? Then I'd post another version of the
> series...

Sure, I'll rebase & post a new version of my series today.
diff mbox

Patch

diff --git a/fs/dax.c b/fs/dax.c
index a2d3781c9f4e..233f548d298e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -31,6 +31,7 @@ 
 #include <linux/vmstat.h>
 #include <linux/pfn_t.h>
 #include <linux/sizes.h>
+#include <linux/mmu_notifier.h>
 #include <linux/iomap.h>
 #include "internal.h"
 
@@ -668,6 +669,59 @@  static void *dax_insert_mapping_entry(struct address_space *mapping,
 	return new_entry;
 }
 
+static inline unsigned long
+pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
+{
+	unsigned long address;
+
+	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
+	return address;
+}
+
+/* Walk all mappings of a given index of a file and writeprotect them */
+static void dax_mapping_entry_mkclean(struct address_space *mapping,
+				      pgoff_t index, unsigned long pfn)
+{
+	struct vm_area_struct *vma;
+	pte_t *ptep;
+	pte_t pte;
+	spinlock_t *ptl;
+	bool changed;
+
+	i_mmap_lock_read(mapping);
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
+		unsigned long address;
+
+		cond_resched();
+
+		if (!(vma->vm_flags & VM_SHARED))
+			continue;
+
+		address = pgoff_address(index, vma);
+		changed = false;
+		if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
+			continue;
+		if (pfn != pte_pfn(*ptep))
+			goto unlock;
+		if (!pte_dirty(*ptep) && !pte_write(*ptep))
+			goto unlock;
+
+		flush_cache_page(vma, address, pfn);
+		pte = ptep_clear_flush(vma, address, ptep);
+		pte = pte_wrprotect(pte);
+		pte = pte_mkclean(pte);
+		set_pte_at(vma->vm_mm, address, ptep, pte);
+		changed = true;
+unlock:
+		pte_unmap_unlock(ptep, ptl);
+
+		if (changed)
+			mmu_notifier_invalidate_page(vma->vm_mm, address);
+	}
+	i_mmap_unlock_read(mapping);
+}
+
 static int dax_writeback_one(struct block_device *bdev,
 		struct address_space *mapping, pgoff_t index, void *entry)
 {
@@ -735,7 +789,17 @@  static int dax_writeback_one(struct block_device *bdev,
 		goto unmap;
 	}
 
+	dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(dax.pfn));
 	wb_cache_pmem(dax.addr, dax.size);
+	/*
+	 * After we have flushed the cache, we can clear the dirty tag. There
+	 * cannot be new dirty data in the pfn after the flush has completed as
+	 * the pfn mappings are writeprotected and fault waits for mapping
+	 * entry lock.
+	 */
+	spin_lock_irq(&mapping->tree_lock);
+	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY);
+	spin_unlock_irq(&mapping->tree_lock);
 unmap:
 	dax_unmap_atomic(bdev, &dax);
 	put_locked_mapping_entry(mapping, index, entry);