diff mbox series

[v4,2/3] fs/proc/task_mmu: Implement IOCTL to get and/or the clear info about PTEs

Message ID 20221103100736.2356351-3-usama.anjum@collabora.com (mailing list archive)
State New
Headers show
Series Implement IOCTL to get and/or the clear info about PTEs | expand

Commit Message

Muhammad Usama Anjum Nov. 3, 2022, 10:07 a.m. UTC
This IOCTL, PAGEMAP_SCAN can be used to get and/or clear the info about
page table entries. The following operations are supported in this ioctl:
- Get the information if the pages are soft-dirty, file mapped, present
  or swapped.
- Clear the soft-dirty PTE bit of the pages.
- Get and clear the soft-dirty PTE bit of the pages.

Only the soft-dirty bit can be read and cleared atomically. struct
pagemap_sd_args is used as the argument of the IOCTL. In this struct:
- The range is specified through start and len.
- The output buffer and size is specified as vec and vec_len.
- The optional maximum requested pages are specified in the max_pages.
- The flags can be specified in the flags field. The PAGEMAP_SD_CLEAR
  and PAGEMAP_SD_NO_REUSED_REGIONS are supported.
- The masks are specified in rmask, amask, emask and return_mask.

This IOCTL can be extended to get information about more PTE bits.

This is based on a patch from Gabriel Krisman Bertazi.

Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
---
Changes in v4:
- Update the interface and implementation

Changes in v3:
- Tighten the user-kernel interface by using explicit types and add more
  error checking

Changes in v2:
- Convert the interface from syscall to ioctl
- Remove pidfd support as it doesn't make sense in ioctl
---
 fs/proc/task_mmu.c            | 318 ++++++++++++++++++++++++++++++++++
 include/uapi/linux/fs.h       |  53 ++++++
 tools/include/uapi/linux/fs.h |  53 ++++++
 3 files changed, 424 insertions(+)

Comments

kernel test robot Nov. 3, 2022, 2 p.m. UTC | #1
Hi Muhammad,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20221103]
[also build test ERROR on v6.1-rc3]
[cannot apply to shuah-kselftest/next linus/master v6.1-rc3 v6.1-rc2 v6.1-rc1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Muhammad-Usama-Anjum/Implement-IOCTL-to-get-and-or-the-clear-info-about-PTEs/20221103-181024
patch link:    https://lore.kernel.org/r/20221103100736.2356351-3-usama.anjum%40collabora.com
patch subject: [PATCH v4 2/3] fs/proc/task_mmu: Implement IOCTL to get and/or the clear info about PTEs
config: s390-allyesconfig
compiler: s390-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/ece21cda808bfbd0715c4a8bf691531d31569e09
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Muhammad-Usama-Anjum/Implement-IOCTL-to-get-and-or-the-clear-info-about-PTEs/20221103-181024
        git checkout ece21cda808bfbd0715c4a8bf691531d31569e09
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   fs/proc/task_mmu.c: In function 'pagemap_scan_pmd_entry':
>> fs/proc/task_mmu.c:1897:17: error: implicit declaration of function 'flush_tlb_mm_range'; did you mean 'flush_tlb_range'? [-Werror=implicit-function-declaration]
    1897 |                 flush_tlb_mm_range(vma->vm_mm, start, end, PAGE_SHIFT, false);
         |                 ^~~~~~~~~~~~~~~~~~
         |                 flush_tlb_range
   cc1: some warnings being treated as errors


vim +1897 fs/proc/task_mmu.c

  1835	
  1836	static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long addr,
  1837					  unsigned long end, struct mm_walk *walk)
  1838	{
  1839		struct pagemap_scan_private *p = walk->private;
  1840		struct vm_area_struct *vma = walk->vma;
  1841		unsigned long start = addr;
  1842		int dirty, ret = 0;
  1843		spinlock_t *ptl;
  1844		pte_t *pte;
  1845		bool dirty_vma = (p->flags & PAGEMAP_NO_REUSED_REGIONS) ?
  1846				 (false) : (vma->vm_flags & VM_SOFTDIRTY);
  1847	
  1848		if ((walk->vma->vm_end < addr) || (p->max_pages && p->found_pages == p->max_pages))
  1849			return 0;
  1850	
  1851		end = min(end, walk->vma->vm_end);
  1852	
  1853		ptl = pmd_trans_huge_lock(pmd, vma);
  1854		if (ptl) {
  1855			if (dirty_vma || check_soft_dirty_pmd(vma, addr, pmd, false)) {
  1856				/*
  1857				 * Break huge page into small pages if operation needs to be performed is
  1858				 * on a portion of the huge page or the return buffer cannot store complete
  1859				 * data.
  1860				 */
  1861				if ((IS_CLEAR_OP(p->flags) && (end - addr < HPAGE_SIZE)) ||
  1862				    (IS_GET_OP(p->vec) && p->max_pages &&
  1863				    (p->found_pages + HPAGE_SIZE/PAGE_SIZE > p->max_pages))) {
  1864					spin_unlock(ptl);
  1865					split_huge_pmd(vma, pmd, addr);
  1866					goto process_smaller_pages;
  1867				} else {
  1868					dirty = check_soft_dirty_pmd(vma, addr, pmd, IS_CLEAR_OP(p->flags));
  1869					if (IS_GET_OP(p->vec))
  1870						add_to_out(dirty_vma || dirty, vma->vm_file,
  1871							   pmd_present(*pmd), is_swap_pmd(*pmd), p,
  1872							   addr, (end - addr)/PAGE_SIZE);
  1873				}
  1874			}
  1875			spin_unlock(ptl);
  1876			return 0;
  1877		}
  1878	
  1879	process_smaller_pages:
  1880		if (pmd_trans_unstable(pmd))
  1881			return 0;
  1882	
  1883		pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
  1884		for (; addr < end && !ret; pte++, addr += PAGE_SIZE) {
  1885			dirty = check_soft_dirty(vma, addr, pte, IS_CLEAR_OP(p->flags));
  1886			if (IS_GET_OP(p->vec)) {
  1887				ret = add_to_out(dirty_vma || dirty, vma->vm_file, pte_present(*pte),
  1888						 is_swap_pte(*pte), p, addr, 1);
  1889				if (p->max_pages && (p->found_pages == p->max_pages))
  1890					break;
  1891			}
  1892		}
  1893		pte_unmap_unlock(pte - 1, ptl);
  1894		cond_resched();
  1895	
  1896		if (IS_CLEAR_OP(p->flags))
> 1897			flush_tlb_mm_range(vma->vm_mm, start, end, PAGE_SHIFT, false);
  1898	
  1899		return 0;
  1900	}
  1901
diff mbox series

Patch

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8235c536ac70..61892142cd5a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -19,6 +19,9 @@ 
 #include <linux/shmem_fs.h>
 #include <linux/uaccess.h>
 #include <linux/pkeys.h>
+#include <uapi/linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/minmax.h>
 
 #include <asm/elf.h>
 #include <asm/tlb.h>
@@ -1775,11 +1778,326 @@  static int pagemap_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+#ifdef CONFIG_MEM_SOFT_DIRTY
+
+#define PAGEMAP_OP_MASK		(PAGE_IS_SD | PAGE_IS_FILE |		\
+				 PAGE_IS_PRESENT | PAGE_IS_SWAPED)
+#define PAGEMAP_NON_SD_MASK	(PAGE_IS_FILE |	PAGE_IS_PRESENT | PAGE_IS_SWAPED)
+#define PAGEMAP_SD_FLAGS_MASK	(PAGEMAP_SD_CLEAR | PAGEMAP_NO_REUSED_REGIONS)
+#define IS_CLEAR_OP(flags)	(flags & PAGEMAP_SD_CLEAR)
+#define IS_GET_OP(vec)		(vec)
+
+struct pagemap_scan_private {
+	struct page_region *vec;
+	unsigned long vec_len;
+	unsigned long index;
+	unsigned int max_pages;
+	unsigned int found_pages;
+	unsigned int flags;
+	unsigned int rmask;
+	unsigned int amask;
+	unsigned int emask;
+	unsigned int return_mask;
+};
+
+static int add_to_out(bool sd, bool file, bool pres, bool swap, struct pagemap_scan_private *p,
+		      unsigned long addr, unsigned int len)
+{
+	unsigned int bitmap, cpy = true, cur = sd | file << 1 | pres << 2 | swap << 3;
+
+	if (p->rmask)
+		cpy = ((p->rmask & cur) == p->rmask) ? true : false;
+	if (cpy && p->amask)
+		cpy = (p->amask & cur) ? true : false;
+	if (cpy && p->emask)
+		cpy = (p->emask & cur) ? false : true;
+
+	bitmap = cur & p->return_mask;
+
+	if (cpy && bitmap) {
+		if (p->index && p->vec[p->index - 1].bitmap == bitmap &&
+		    p->vec[p->index - 1].start + p->vec[p->index - 1].len * PAGE_SIZE == addr) {
+			p->vec[p->index - 1].len += len;
+			p->found_pages += len;
+		} else if (p->index < p->vec_len) {
+			p->vec[p->index].start = addr;
+			p->vec[p->index].len = len;
+			p->vec[p->index].bitmap = bitmap;
+			p->index++;
+			p->found_pages += len;
+		} else {
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long addr,
+				  unsigned long end, struct mm_walk *walk)
+{
+	struct pagemap_scan_private *p = walk->private;
+	struct vm_area_struct *vma = walk->vma;
+	unsigned long start = addr;
+	int dirty, ret = 0;
+	spinlock_t *ptl;
+	pte_t *pte;
+	bool dirty_vma = (p->flags & PAGEMAP_NO_REUSED_REGIONS) ?
+			 (false) : (vma->vm_flags & VM_SOFTDIRTY);
+
+	if ((walk->vma->vm_end < addr) || (p->max_pages && p->found_pages == p->max_pages))
+		return 0;
+
+	end = min(end, walk->vma->vm_end);
+
+	ptl = pmd_trans_huge_lock(pmd, vma);
+	if (ptl) {
+		if (dirty_vma || check_soft_dirty_pmd(vma, addr, pmd, false)) {
+			/*
+			 * Break huge page into small pages if operation needs to be performed is
+			 * on a portion of the huge page or the return buffer cannot store complete
+			 * data.
+			 */
+			if ((IS_CLEAR_OP(p->flags) && (end - addr < HPAGE_SIZE)) ||
+			    (IS_GET_OP(p->vec) && p->max_pages &&
+			    (p->found_pages + HPAGE_SIZE/PAGE_SIZE > p->max_pages))) {
+				spin_unlock(ptl);
+				split_huge_pmd(vma, pmd, addr);
+				goto process_smaller_pages;
+			} else {
+				dirty = check_soft_dirty_pmd(vma, addr, pmd, IS_CLEAR_OP(p->flags));
+				if (IS_GET_OP(p->vec))
+					add_to_out(dirty_vma || dirty, vma->vm_file,
+						   pmd_present(*pmd), is_swap_pmd(*pmd), p,
+						   addr, (end - addr)/PAGE_SIZE);
+			}
+		}
+		spin_unlock(ptl);
+		return 0;
+	}
+
+process_smaller_pages:
+	if (pmd_trans_unstable(pmd))
+		return 0;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	for (; addr < end && !ret; pte++, addr += PAGE_SIZE) {
+		dirty = check_soft_dirty(vma, addr, pte, IS_CLEAR_OP(p->flags));
+		if (IS_GET_OP(p->vec)) {
+			ret = add_to_out(dirty_vma || dirty, vma->vm_file, pte_present(*pte),
+					 is_swap_pte(*pte), p, addr, 1);
+			if (p->max_pages && (p->found_pages == p->max_pages))
+				break;
+		}
+	}
+	pte_unmap_unlock(pte - 1, ptl);
+	cond_resched();
+
+	if (IS_CLEAR_OP(p->flags))
+		flush_tlb_mm_range(vma->vm_mm, start, end, PAGE_SHIFT, false);
+
+	return 0;
+}
+
+static int pagemap_scan_pte_hole(unsigned long addr, unsigned long end, int depth,
+				 struct mm_walk *walk)
+{
+	struct pagemap_scan_private *p = walk->private;
+	struct vm_area_struct *vma = walk->vma;
+	unsigned int len;
+	bool sd;
+
+	if (vma) {
+		/* Individual pages haven't been allocated and written */
+		sd = (p->flags & PAGEMAP_NO_REUSED_REGIONS) ? (false) :
+		     (vma->vm_flags & VM_SOFTDIRTY);
+
+		len = (end - addr)/PAGE_SIZE;
+		if (p->max_pages && p->max_pages - p->found_pages < len)
+			len = p->max_pages - p->found_pages;
+
+		add_to_out(sd, vma->vm_file, false, false, p, addr, len);
+	}
+
+	return 0;
+}
+
+static int pagemap_scan_pre_vma(unsigned long start, unsigned long end, struct mm_walk *walk)
+{
+	struct pagemap_scan_private *p = walk->private;
+	struct vm_area_struct *vma = walk->vma;
+	unsigned long end_cut = end;
+	int ret;
+
+	if (!(p->flags & PAGEMAP_NO_REUSED_REGIONS) && IS_CLEAR_OP(p->flags) &&
+	    (vma->vm_flags & VM_SOFTDIRTY)) {
+		if (vma->vm_start < start) {
+			ret = split_vma(vma->vm_mm, vma, start, 1);
+			if (ret)
+				return ret;
+		}
+		/* Calculate end_cut because of max_pages */
+		if (IS_GET_OP(p->vec) && p->max_pages)
+			end_cut = min(start + (p->max_pages - p->found_pages) * PAGE_SIZE, end);
+
+		if (vma->vm_end > end_cut) {
+			ret = split_vma(vma->vm_mm, vma, end_cut, 0);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void pagemap_scan_post_vma(struct mm_walk *walk)
+{
+	struct pagemap_scan_private *p = walk->private;
+	struct vm_area_struct *vma = walk->vma;
+
+	if (!(p->flags & PAGEMAP_NO_REUSED_REGIONS) && IS_CLEAR_OP(p->flags) &&
+	    (vma->vm_flags & VM_SOFTDIRTY)) {
+		vma->vm_flags &= ~VM_SOFTDIRTY;
+		vma_set_page_prot(vma);
+	}
+}
+
+static int pagemap_scan_pmd_test_walk(unsigned long start, unsigned long end, struct mm_walk *walk)
+{
+	struct pagemap_scan_private *p = walk->private;
+	struct vm_area_struct *vma = walk->vma;
+
+	if (IS_GET_OP(p->vec) && p->max_pages && (p->found_pages == p->max_pages))
+		return -1;
+
+	if (vma->vm_flags & VM_PFNMAP)
+		return 1;
+
+	return 0;
+}
+
+static const struct mm_walk_ops pagemap_scan_ops = {
+	.test_walk = pagemap_scan_pmd_test_walk,
+	.pmd_entry = pagemap_scan_pmd_entry,
+	.pte_hole = pagemap_scan_pte_hole,
+
+	/* Only for clearing SD bit over VMAs */
+	.pre_vma = pagemap_scan_pre_vma,
+	.post_vma = pagemap_scan_post_vma,
+};
+
+static long do_pagemap_sd_cmd(struct mm_struct *mm, struct pagemap_scan_arg *arg)
+{
+	struct mmu_notifier_range range;
+	unsigned long __user start, end;
+	struct pagemap_scan_private p;
+	int ret;
+
+	start = (unsigned long)untagged_addr(arg->start);
+	if ((!IS_ALIGNED(start, PAGE_SIZE)) || (!access_ok((void __user *)start, arg->len)))
+		return -EINVAL;
+
+	if (IS_GET_OP(arg->vec) &&
+	    ((arg->vec_len == 0) || (!access_ok((struct page_region *)arg->vec, arg->vec_len))))
+		return -ENOMEM;
+
+	if ((arg->flags & ~PAGEMAP_SD_FLAGS_MASK) || (arg->rmask & ~PAGEMAP_OP_MASK) ||
+	    (arg->amask & ~PAGEMAP_OP_MASK) || (arg->emask & ~PAGEMAP_OP_MASK) ||
+	    (arg->return_mask & ~PAGEMAP_OP_MASK))
+		return -EINVAL;
+
+	if ((!arg->rmask && !arg->amask && !arg->emask) || !arg->return_mask)
+		return -EINVAL;
+
+	if ((arg->flags & PAGEMAP_SD_FLAGS_MASK) && ((arg->rmask & PAGEMAP_NON_SD_MASK) ||
+	     (arg->amask & PAGEMAP_NON_SD_MASK)))
+		return -EINVAL;
+
+	end = start + arg->len;
+	p.max_pages = arg->max_pages;
+	p.found_pages = 0;
+	p.flags = arg->flags;
+	p.rmask = arg->rmask;
+	p.amask = arg->amask;
+	p.emask = arg->emask;
+	p.return_mask = arg->return_mask;
+	p.index = 0;
+	p.vec_len = arg->vec_len;
+
+	if (IS_GET_OP(arg->vec)) {
+		p.vec = vzalloc(arg->vec_len * sizeof(struct page_region));
+		if (!p.vec)
+			return -ENOMEM;
+	} else {
+		p.vec = NULL;
+	}
+
+	if (IS_CLEAR_OP(arg->flags)) {
+		mmap_write_lock(mm);
+
+		mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, 0, NULL, mm, start, end);
+		mmu_notifier_invalidate_range_start(&range);
+		inc_tlb_flush_pending(mm);
+	} else {
+		mmap_read_lock(mm);
+	}
+
+	ret = walk_page_range(mm, start, end, &pagemap_scan_ops, &p);
+
+	if (IS_CLEAR_OP(arg->flags)) {
+		mmu_notifier_invalidate_range_end(&range);
+		dec_tlb_flush_pending(mm);
+
+		mmap_write_unlock(mm);
+	} else {
+		mmap_read_unlock(mm);
+	}
+
+	if (ret < 0)
+		goto free_data;
+
+	if (IS_GET_OP(arg->vec) && p.index) {
+		if (copy_to_user((struct page_region *)arg->vec, p.vec,
+				 p.index * sizeof(struct page_region))) {
+			ret = -EFAULT;
+			goto free_data;
+		}
+		ret = p.index;
+	} else {
+		ret = 0;
+	}
+
+free_data:
+	if (IS_GET_OP(arg->vec))
+		vfree(p.vec);
+
+	return ret;
+}
+
+static long pagemap_sd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct pagemap_scan_arg __user *uarg = (struct pagemap_scan_arg __user *)arg;
+	struct mm_struct *mm = file->private_data;
+	struct pagemap_scan_arg argument;
+
+	if (cmd == PAGEMAP_SCAN) {
+		if (copy_from_user(&argument, uarg, sizeof(struct pagemap_scan_arg)))
+			return -EFAULT;
+		return do_pagemap_sd_cmd(mm, &argument);
+	}
+	return -EINVAL;
+}
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
 const struct file_operations proc_pagemap_operations = {
 	.llseek		= mem_lseek, /* borrow this */
 	.read		= pagemap_read,
 	.open		= pagemap_open,
 	.release	= pagemap_release,
+#ifdef CONFIG_MEM_SOFT_DIRTY
+	.unlocked_ioctl = pagemap_sd_ioctl,
+	.compat_ioctl = pagemap_sd_ioctl,
+#endif /* CONFIG_MEM_SOFT_DIRTY */
 };
 #endif /* CONFIG_PROC_PAGE_MONITOR */
 
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index b7b56871029c..5d6c0d85dac4 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -305,4 +305,57 @@  typedef int __bitwise __kernel_rwf_t;
 #define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
 			 RWF_APPEND)
 
+/* PAGEMAP IOCTL */
+#define PAGEMAP_SCAN	_IOWR('f', 16, struct pagemap_scan_arg)
+
+/* Bits are set in the bitmap of the page_region and masks in pagemap_sd_args */
+#define PAGE_IS_SD	(1 << 0)
+#define PAGE_IS_FILE	(1 << 1)
+#define PAGE_IS_PRESENT	(1 << 2)
+#define PAGE_IS_SWAPED	(1 << 3)
+
+/*
+ * struct page_region - Page region with bitmap flags
+ * @start:	Start of the region
+ * @len:	Length of the region
+ * bitmap:	Bits sets for the region
+ */
+struct page_region {
+	__u64 start;
+	__u64 len;
+	__u32 bitmap;
+	__u32 __reserved;
+};
+
+/*
+ * struct pagemap_scan_arg - Soft-dirty IOCTL argument
+ * @start:		Starting address of the region
+ * @len:		Length of the region (All the pages in this length are included)
+ * @vec:		Address of page_region struct array for output
+ * @vec_len:		Length of the page_region struct array
+ * @max_pages:		Optional max return pages (It must be less than vec_len if specified)
+ * @flags:		Special flags for the IOCTL
+ * @rmask:		Required mask - All of these bits have to be set in the PTE
+ * @amask:		Any mask - Any of these bits are set in the PTE
+ * @emask:		Exclude mask - None of these bits are set in the PTE
+ * @return_mask:	Bits that have to be reported to the user in page_region
+ */
+struct pagemap_scan_arg {
+	__u64 start;
+	__u64 len;
+	__u64 vec;
+	__u64 vec_len;
+	__u32 max_pages;
+	__u32 flags;
+	__u32 rmask;
+	__u32 amask;
+	__u32 emask;
+	__u32 return_mask;
+};
+
+/* Special flags */
+#define PAGEMAP_SD_CLEAR		(1 << 0)
+/* Check the individual pages if they are soft-dirty to find dirty pages faster. */
+#define PAGEMAP_NO_REUSED_REGIONS	(1 << 1)
+
 #endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index b7b56871029c..5d6c0d85dac4 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -305,4 +305,57 @@  typedef int __bitwise __kernel_rwf_t;
 #define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
 			 RWF_APPEND)
 
+/* PAGEMAP IOCTL */
+#define PAGEMAP_SCAN	_IOWR('f', 16, struct pagemap_scan_arg)
+
+/* Bits are set in the bitmap of the page_region and masks in pagemap_sd_args */
+#define PAGE_IS_SD	(1 << 0)
+#define PAGE_IS_FILE	(1 << 1)
+#define PAGE_IS_PRESENT	(1 << 2)
+#define PAGE_IS_SWAPED	(1 << 3)
+
+/*
+ * struct page_region - Page region with bitmap flags
+ * @start:	Start of the region
+ * @len:	Length of the region
+ * bitmap:	Bits sets for the region
+ */
+struct page_region {
+	__u64 start;
+	__u64 len;
+	__u32 bitmap;
+	__u32 __reserved;
+};
+
+/*
+ * struct pagemap_scan_arg - Soft-dirty IOCTL argument
+ * @start:		Starting address of the region
+ * @len:		Length of the region (All the pages in this length are included)
+ * @vec:		Address of page_region struct array for output
+ * @vec_len:		Length of the page_region struct array
+ * @max_pages:		Optional max return pages (It must be less than vec_len if specified)
+ * @flags:		Special flags for the IOCTL
+ * @rmask:		Required mask - All of these bits have to be set in the PTE
+ * @amask:		Any mask - Any of these bits are set in the PTE
+ * @emask:		Exclude mask - None of these bits are set in the PTE
+ * @return_mask:	Bits that have to be reported to the user in page_region
+ */
+struct pagemap_scan_arg {
+	__u64 start;
+	__u64 len;
+	__u64 vec;
+	__u64 vec_len;
+	__u32 max_pages;
+	__u32 flags;
+	__u32 rmask;
+	__u32 amask;
+	__u32 emask;
+	__u32 return_mask;
+};
+
+/* Special flags */
+#define PAGEMAP_SD_CLEAR		(1 << 0)
+/* Check the individual pages if they are soft-dirty to find dirty pages faster. */
+#define PAGEMAP_NO_REUSED_REGIONS	(1 << 1)
+
 #endif /* _UAPI_LINUX_FS_H */