@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
- if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+ if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
return;
invalidate_bh_lrus();
@@ -495,7 +495,7 @@ void clear_inode(struct inode *inode)
*/
spin_lock_irq(&inode->i_data.tree_lock);
BUG_ON(inode->i_data.nrpages);
- BUG_ON(inode->i_data.nrshadows);
+ BUG_ON(inode->i_data.nrexceptional);
spin_unlock_irq(&inode->i_data.tree_lock);
BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING));
@@ -36,4 +36,9 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
{
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
}
+
+static inline bool dax_mapping(struct address_space *mapping)
+{
+ return mapping->host && IS_DAX(mapping->host);
+}
#endif
@@ -433,7 +433,8 @@ struct address_space {
struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
unsigned long nrpages; /* number of total pages */
- unsigned long nrshadows; /* number of shadow entries */
+ /* number of shadow or DAX exceptional entries */
+ unsigned long nrexceptional;
pgoff_t writeback_index;/* writeback starts here */
const struct address_space_operations *a_ops; /* methods */
unsigned long flags; /* error bits/gfp mask */
@@ -51,6 +51,15 @@
#define RADIX_TREE_EXCEPTIONAL_ENTRY 2
#define RADIX_TREE_EXCEPTIONAL_SHIFT 2
+#define RADIX_DAX_MASK 0xf
+#define RADIX_DAX_SHIFT 4
+#define RADIX_DAX_PTE (0x4 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_PMD (0x8 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_MASK)
+#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
+#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
+ RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE)))
+
static inline int radix_tree_is_indirect_ptr(void *ptr)
{
return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
@@ -11,6 +11,7 @@
*/
#include <linux/export.h>
#include <linux/compiler.h>
+#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/capability.h>
@@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
__radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
if (shadow) {
- mapping->nrshadows++;
+ mapping->nrexceptional++;
/*
- * Make sure the nrshadows update is committed before
+ * Make sure the nrexceptional update is committed before
* the nrpages update so that final truncate racing
* with reclaim does not see both counters 0 at the
* same time and miss a shadow entry.
@@ -579,9 +580,13 @@ static int page_cache_tree_insert(struct address_space *mapping,
p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
if (!radix_tree_exceptional_entry(p))
return -EEXIST;
+
+ if (WARN_ON(dax_mapping(mapping)))
+ return -EINVAL;
+
if (shadowp)
*shadowp = p;
- mapping->nrshadows--;
+ mapping->nrexceptional--;
if (node)
workingset_node_shadows_dec(node);
}
@@ -1245,9 +1250,9 @@ repeat:
if (radix_tree_deref_retry(page))
goto restart;
/*
- * A shadow entry of a recently evicted page,
- * or a swap entry from shmem/tmpfs. Return
- * it without attempting to raise page count.
+ * A shadow entry of a recently evicted page, a swap
+ * entry from shmem/tmpfs or a DAX entry. Return it
+ * without attempting to raise page count.
*/
goto export;
}
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/backing-dev.h>
+#include <linux/dax.h>
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/swap.h>
@@ -34,31 +35,39 @@ static void clear_exceptional_entry(struct address_space *mapping,
return;
spin_lock_irq(&mapping->tree_lock);
- /*
- * Regular page slots are stabilized by the page lock even
- * without the tree itself locked. These unlocked entries
- * need verification under the tree lock.
- */
- if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
- goto unlock;
- if (*slot != entry)
- goto unlock;
- radix_tree_replace_slot(slot, NULL);
- mapping->nrshadows--;
- if (!node)
- goto unlock;
- workingset_node_shadows_dec(node);
- /*
- * Don't track node without shadow entries.
- *
- * Avoid acquiring the list_lru lock if already untracked.
- * The list_empty() test is safe as node->private_list is
- * protected by mapping->tree_lock.
- */
- if (!workingset_node_shadows(node) &&
- !list_empty(&node->private_list))
- list_lru_del(&workingset_shadow_nodes, &node->private_list);
- __radix_tree_delete_node(&mapping->page_tree, node);
+
+ if (dax_mapping(mapping)) {
+ if (radix_tree_delete_item(&mapping->page_tree, index, entry))
+ mapping->nrexceptional--;
+ } else {
+ /*
+ * Regular page slots are stabilized by the page lock even
+ * without the tree itself locked. These unlocked entries
+ * need verification under the tree lock.
+ */
+ if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
+ &slot))
+ goto unlock;
+ if (*slot != entry)
+ goto unlock;
+ radix_tree_replace_slot(slot, NULL);
+ mapping->nrexceptional--;
+ if (!node)
+ goto unlock;
+ workingset_node_shadows_dec(node);
+ /*
+ * Don't track node without shadow entries.
+ *
+ * Avoid acquiring the list_lru lock if already untracked.
+ * The list_empty() test is safe as node->private_list is
+ * protected by mapping->tree_lock.
+ */
+ if (!workingset_node_shadows(node) &&
+ !list_empty(&node->private_list))
+ list_lru_del(&workingset_shadow_nodes,
+ &node->private_list);
+ __radix_tree_delete_node(&mapping->page_tree, node);
+ }
unlock:
spin_unlock_irq(&mapping->tree_lock);
}
@@ -228,7 +237,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
int i;
cleancache_invalidate_inode(mapping);
- if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+ if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
return;
/* Offsets within partial pages */
@@ -402,7 +411,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
*/
void truncate_inode_pages_final(struct address_space *mapping)
{
- unsigned long nrshadows;
+ unsigned long nrexceptional;
unsigned long nrpages;
/*
@@ -416,14 +425,14 @@ void truncate_inode_pages_final(struct address_space *mapping)
/*
* When reclaim installs eviction entries, it increases
- * nrshadows first, then decreases nrpages. Make sure we see
+ * nrexceptional first, then decreases nrpages. Make sure we see
* this in the right order or we might miss an entry.
*/
nrpages = mapping->nrpages;
smp_rmb();
- nrshadows = mapping->nrshadows;
+ nrexceptional = mapping->nrexceptional;
- if (nrpages || nrshadows) {
+ if (nrpages || nrexceptional) {
/*
* As truncation uses a lockless tree lookup, cycle
* the tree lock to make sure any ongoing tree
@@ -46,6 +46,7 @@
#include <linux/oom.h>
#include <linux/prefetch.h>
#include <linux/printk.h>
+#include <linux/dax.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
* inode reclaim needs to empty out the radix tree or
* the nodes are lost. Don't plant shadows behind its
* back.
+ *
+ * We also don't store shadows for DAX mappings because the
+ * only page cache pages found in these are zero pages
+ * covering holes, and because we don't want to mix DAX
+ * exceptional entries and shadow exceptional entries in the
+ * same page_tree.
*/
if (reclaimed && page_is_file_cache(page) &&
- !mapping_exiting(mapping))
+ !mapping_exiting(mapping) && !dax_mapping(mapping))
shadow = workingset_eviction(mapping, page);
__delete_from_page_cache(page, shadow, memcg);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -351,8 +351,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
node->slots[i] = NULL;
BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
- BUG_ON(!mapping->nrshadows);
- mapping->nrshadows--;
+ BUG_ON(!mapping->nrexceptional);
+ mapping->nrexceptional--;
}
}
BUG_ON(node->count);