[v3,6/6] mm, thp: introduce FOLL_SPLIT_PMD
diff mbox series

Message ID 20190612220320.2223898-7-songliubraving@fb.com
State New
Headers show
Series
  • THP aware uprobe
Related show

Commit Message

Song Liu June 12, 2019, 10:03 p.m. UTC
This patches introduces a new foll_flag: FOLL_SPLIT_PMD. As the name says
FOLL_SPLIT_PMD splits huge pmd for given mm_struct, the underlining huge
page stays as-is.

FOLL_SPLIT_PMD is useful for cases where we need to use regular pages,
but would switch back to huge page and huge pmd on. One of such example
is uprobe. The following patches use FOLL_SPLIT_PMD in uprobe.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/mm.h |  1 +
 mm/gup.c           | 38 +++++++++++++++++++++++++++++++++++---
 2 files changed, 36 insertions(+), 3 deletions(-)

Comments

Song Liu June 12, 2019, 10:16 p.m. UTC | #1
> On Jun 12, 2019, at 3:03 PM, Song Liu <songliubraving@fb.com> wrote:
> 
> This patches introduces a new foll_flag: FOLL_SPLIT_PMD. As the name says
> FOLL_SPLIT_PMD splits huge pmd for given mm_struct, the underlining huge
> page stays as-is.
> 
> FOLL_SPLIT_PMD is useful for cases where we need to use regular pages,
> but would switch back to huge page and huge pmd on. One of such example
> is uprobe. The following patches use FOLL_SPLIT_PMD in uprobe.
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>

Please ignore this one. It is a duplicated copy of 3/5, sent by accident. 

Sorry for the noise. 

Song

> ---
> include/linux/mm.h |  1 +
> mm/gup.c           | 38 +++++++++++++++++++++++++++++++++++---
> 2 files changed, 36 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0ab8c7d84cd0..e605acc4fc81 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2642,6 +2642,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
> #define FOLL_COW	0x4000	/* internal GUP flag */
> #define FOLL_ANON	0x8000	/* don't do file mappings */
> #define FOLL_LONGTERM	0x10000	/* mapping lifetime is indefinite: see below */
> +#define FOLL_SPLIT_PMD	0x20000	/* split huge pmd before returning */
> 
> /*
>  * NOTE on FOLL_LONGTERM:
> diff --git a/mm/gup.c b/mm/gup.c
> index ddde097cf9e4..3d05bddb56c9 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -398,7 +398,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
> 		spin_unlock(ptl);
> 		return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
> 	}
> -	if (flags & FOLL_SPLIT) {
> +	if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
> 		int ret;
> 		page = pmd_page(*pmd);
> 		if (is_huge_zero_page(page)) {
> @@ -407,7 +407,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
> 			split_huge_pmd(vma, pmd, address);
> 			if (pmd_trans_unstable(pmd))
> 				ret = -EBUSY;
> -		} else {
> +		} else if (flags & FOLL_SPLIT) {
> 			if (unlikely(!try_get_page(page))) {
> 				spin_unlock(ptl);
> 				return ERR_PTR(-ENOMEM);
> @@ -419,8 +419,40 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
> 			put_page(page);
> 			if (pmd_none(*pmd))
> 				return no_page_table(vma, flags);
> -		}
> +		} else {  /* flags & FOLL_SPLIT_PMD */
> +			unsigned long addr;
> +			pgprot_t prot;
> +			pte_t *pte;
> +			int i;
> +
> +			spin_unlock(ptl);
> +			split_huge_pmd(vma, pmd, address);
> +			lock_page(page);
> +			pte = get_locked_pte(mm, address, &ptl);
> +			if (!pte) {
> +				unlock_page(page);
> +				return no_page_table(vma, flags);
> +			}
> 
> +			/* get refcount for every small page */
> +			page_ref_add(page, HPAGE_PMD_NR);
> +
> +			prot = READ_ONCE(vma->vm_page_prot);
> +			for (i = 0, addr = address & PMD_MASK;
> +			     i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
> +				struct page *p = page + i;
> +
> +				pte = pte_offset_map(pmd, addr);
> +				VM_BUG_ON(!pte_none(*pte));
> +				set_pte_at(mm, addr, pte, mk_pte(p, prot));
> +				page_add_file_rmap(p, false);
> +			}
> +
> +			spin_unlock(ptl);
> +			unlock_page(page);
> +			add_mm_counter(mm, mm_counter_file(page), HPAGE_PMD_NR);
> +			ret = 0;
> +		}
> 		return ret ? ERR_PTR(ret) :
> 			follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
> 	}
> -- 
> 2.17.1
>

Patch
diff mbox series

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0ab8c7d84cd0..e605acc4fc81 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2642,6 +2642,7 @@  struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_COW	0x4000	/* internal GUP flag */
 #define FOLL_ANON	0x8000	/* don't do file mappings */
 #define FOLL_LONGTERM	0x10000	/* mapping lifetime is indefinite: see below */
+#define FOLL_SPLIT_PMD	0x20000	/* split huge pmd before returning */
 
 /*
  * NOTE on FOLL_LONGTERM:
diff --git a/mm/gup.c b/mm/gup.c
index ddde097cf9e4..3d05bddb56c9 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -398,7 +398,7 @@  static struct page *follow_pmd_mask(struct vm_area_struct *vma,
 		spin_unlock(ptl);
 		return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 	}
-	if (flags & FOLL_SPLIT) {
+	if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
 		int ret;
 		page = pmd_page(*pmd);
 		if (is_huge_zero_page(page)) {
@@ -407,7 +407,7 @@  static struct page *follow_pmd_mask(struct vm_area_struct *vma,
 			split_huge_pmd(vma, pmd, address);
 			if (pmd_trans_unstable(pmd))
 				ret = -EBUSY;
-		} else {
+		} else if (flags & FOLL_SPLIT) {
 			if (unlikely(!try_get_page(page))) {
 				spin_unlock(ptl);
 				return ERR_PTR(-ENOMEM);
@@ -419,8 +419,40 @@  static struct page *follow_pmd_mask(struct vm_area_struct *vma,
 			put_page(page);
 			if (pmd_none(*pmd))
 				return no_page_table(vma, flags);
-		}
+		} else {  /* flags & FOLL_SPLIT_PMD */
+			unsigned long addr;
+			pgprot_t prot;
+			pte_t *pte;
+			int i;
+
+			spin_unlock(ptl);
+			split_huge_pmd(vma, pmd, address);
+			lock_page(page);
+			pte = get_locked_pte(mm, address, &ptl);
+			if (!pte) {
+				unlock_page(page);
+				return no_page_table(vma, flags);
+			}
 
+			/* get refcount for every small page */
+			page_ref_add(page, HPAGE_PMD_NR);
+
+			prot = READ_ONCE(vma->vm_page_prot);
+			for (i = 0, addr = address & PMD_MASK;
+			     i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
+				struct page *p = page + i;
+
+				pte = pte_offset_map(pmd, addr);
+				VM_BUG_ON(!pte_none(*pte));
+				set_pte_at(mm, addr, pte, mk_pte(p, prot));
+				page_add_file_rmap(p, false);
+			}
+
+			spin_unlock(ptl);
+			unlock_page(page);
+			add_mm_counter(mm, mm_counter_file(page), HPAGE_PMD_NR);
+			ret = 0;
+		}
 		return ret ? ERR_PTR(ret) :
 			follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 	}