diff mbox

[PATCHv4,04/25] rmap: support file thp

Message ID 1457737157-38573-5-git-send-email-kirill.shutemov@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

kirill.shutemov@linux.intel.com March 11, 2016, 10:58 p.m. UTC
Naive approach: on mapping/unmapping the page as compound we update
->_mapcount on each 4k page. That's not efficient, but it's not obvious
how we can optimize this. We can look into optimization later.

PG_double_map optimization doesn't work for file pages since lifecycle
of file pages is different comparing to anon pages: file page can be
mapped again at any time.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 include/linux/rmap.h |  2 +-
 mm/huge_memory.c     | 10 +++++++---
 mm/memory.c          |  4 ++--
 mm/migrate.c         |  2 +-
 mm/rmap.c            | 48 +++++++++++++++++++++++++++++++++++-------------
 mm/util.c            |  6 ++++++
 6 files changed, 52 insertions(+), 20 deletions(-)

Comments

Aneesh Kumar K.V March 18, 2016, 9:40 a.m. UTC | #1
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> writes:

> [ text/plain ]
> Naive approach: on mapping/unmapping the page as compound we update
> ->_mapcount on each 4k page. That's not efficient, but it's not obvious
> how we can optimize this. We can look into optimization later.
>
> PG_double_map optimization doesn't work for file pages since lifecycle
> of file pages is different comparing to anon pages: file page can be
> mapped again at any time.
>

Can you explain this more ?. We added PG_double_map so that we can keep
page_remove_rmap simpler. So if it isn't a compound page we still can do

	if (!atomic_add_negative(-1, &page->_mapcount))

I am trying to understand why we can't use that with file pages ?

-aneesh

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kirill A . Shutemov March 19, 2016, 1:01 a.m. UTC | #2
On Fri, Mar 18, 2016 at 03:10:06PM +0530, Aneesh Kumar K.V wrote:
> "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> writes:
> 
> > [ text/plain ]
> > Naive approach: on mapping/unmapping the page as compound we update
> > ->_mapcount on each 4k page. That's not efficient, but it's not obvious
> > how we can optimize this. We can look into optimization later.
> >
> > PG_double_map optimization doesn't work for file pages since lifecycle
> > of file pages is different comparing to anon pages: file page can be
> > mapped again at any time.
> >
> 
> Can you explain this more ?. We added PG_double_map so that we can keep
> page_remove_rmap simpler. So if it isn't a compound page we still can do
> 
> 	if (!atomic_add_negative(-1, &page->_mapcount))
> 
> I am trying to understand why we can't use that with file pages ?

The first thing: for non-compound pages we still have simple
atomic_inc_and_test() / atomic_add_negative(-1), nothing changed here.

About compound pages:

For anon-THP PG_double_map allowed to not touch _mapcount in all subpages
until a PMD which maps the page is split.  This way we significantly lower
overhead on refcounting as long as we have the page mapped with PMD-only,
since we only need to increment compound_mapcount().

The optimization is possible due to relatively simple lifecycle of
anonymous THP page:

  - anon-THPs always mapped with PMD first;

  - new mapping of THP can only be created via fork();

  - the page only can get mapped with PTEs via split_huge_pmd();

For file-THP the situation is different. Once we allocated a huge page and
put it on radix tree, the page can be mapped with PTEs or PMDs at any
time. It makes the same optimization inapplicable there.

I think there *can* be some room for optimization, but I don't want to
invest more time here, until it's identified as bottleneck. It can lead to
more complex code on rmap side.
diff mbox

Patch

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 49eb4f8ebac9..5704f101b52e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -165,7 +165,7 @@  void do_page_add_anon_rmap(struct page *, struct vm_area_struct *,
 			   unsigned long, int);
 void page_add_new_anon_rmap(struct page *, struct vm_area_struct *,
 		unsigned long, bool);
-void page_add_file_rmap(struct page *);
+void page_add_file_rmap(struct page *, bool);
 void page_remove_rmap(struct page *, bool);
 
 void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1f58c6026860..1b111d5c0312 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3251,18 +3251,22 @@  static void __split_huge_page(struct page *page, struct list_head *list)
 
 int total_mapcount(struct page *page)
 {
-	int i, ret;
+	int i, compound, ret;
 
 	VM_BUG_ON_PAGE(PageTail(page), page);
 
 	if (likely(!PageCompound(page)))
 		return atomic_read(&page->_mapcount) + 1;
 
-	ret = compound_mapcount(page);
+	compound = compound_mapcount(page);
 	if (PageHuge(page))
-		return ret;
+		return compound;
+	ret = compound;
 	for (i = 0; i < HPAGE_PMD_NR; i++)
 		ret += atomic_read(&page[i]._mapcount) + 1;
+	/* File pages has compound_mapcount included in _mapcount */
+	if (!PageAnon(page))
+		return ret - compound * HPAGE_PMD_NR;
 	if (PageDoubleMap(page))
 		ret -= HPAGE_PMD_NR;
 	return ret;
diff --git a/mm/memory.c b/mm/memory.c
index 9c896a52565c..a6c1c4955560 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1436,7 +1436,7 @@  static int insert_page(struct vm_area_struct *vma, unsigned long addr,
 	/* Ok, finally just insert the thing.. */
 	get_page(page);
 	inc_mm_counter_fast(mm, mm_counter_file(page));
-	page_add_file_rmap(page);
+	page_add_file_rmap(page, false);
 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
 
 	retval = 0;
@@ -2879,7 +2879,7 @@  int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
 		lru_cache_add_active_or_unevictable(page, vma);
 	} else {
 		inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
-		page_add_file_rmap(page);
+		page_add_file_rmap(page, false);
 	}
 	set_pte_at(vma->vm_mm, fe->address, fe->pte, entry);
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 6c822a7b27e0..d20276fffce7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -170,7 +170,7 @@  static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 	} else if (PageAnon(new))
 		page_add_anon_rmap(new, vma, addr, false);
 	else
-		page_add_file_rmap(new);
+		page_add_file_rmap(new, false);
 
 	if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
 		mlock_vma_page(new);
diff --git a/mm/rmap.c b/mm/rmap.c
index c399a0d41b31..66808955a5e0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1285,18 +1285,34 @@  void page_add_new_anon_rmap(struct page *page,
  *
  * The caller needs to hold the pte lock.
  */
-void page_add_file_rmap(struct page *page)
+void page_add_file_rmap(struct page *page, bool compound)
 {
+	int i, nr = 1;
+
+	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
 	lock_page_memcg(page);
-	if (atomic_inc_and_test(&page->_mapcount)) {
-		__inc_zone_page_state(page, NR_FILE_MAPPED);
-		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+	if (compound && PageTransHuge(page)) {
+		for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
+			if (atomic_inc_and_test(&page[i]._mapcount))
+				nr++;
+		}
+		if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
+			goto out;
+	} else {
+		if (!atomic_inc_and_test(&page->_mapcount))
+			goto out;
 	}
+	__mod_zone_page_state(page_zone(page), NR_FILE_MAPPED, nr);
+	mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+out:
 	unlock_page_memcg(page);
 }
 
-static void page_remove_file_rmap(struct page *page)
+static void page_remove_file_rmap(struct page *page, bool compound)
 {
+	int i, nr = 1;
+
+	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
 	lock_page_memcg(page);
 
 	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
@@ -1307,15 +1323,24 @@  static void page_remove_file_rmap(struct page *page)
 	}
 
 	/* page still mapped by someone else? */
-	if (!atomic_add_negative(-1, &page->_mapcount))
-		goto out;
+	if (compound && PageTransHuge(page)) {
+		for (i = 0, nr = 0; i < HPAGE_PMD_NR; i++) {
+			if (atomic_add_negative(-1, &page[i]._mapcount))
+				nr++;
+		}
+		if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
+			goto out;
+	} else {
+		if (!atomic_add_negative(-1, &page->_mapcount))
+			goto out;
+	}
 
 	/*
 	 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
 	 * these counters are not modified in interrupt context, and
 	 * pte lock(a spinlock) is held, which implies preemption disabled.
 	 */
-	__dec_zone_page_state(page, NR_FILE_MAPPED);
+	__mod_zone_page_state(page_zone(page), NR_FILE_MAPPED, -nr);
 	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
 
 	if (unlikely(PageMlocked(page)))
@@ -1371,11 +1396,8 @@  static void page_remove_anon_compound_rmap(struct page *page)
  */
 void page_remove_rmap(struct page *page, bool compound)
 {
-	if (!PageAnon(page)) {
-		VM_BUG_ON_PAGE(compound && !PageHuge(page), page);
-		page_remove_file_rmap(page);
-		return;
-	}
+	if (!PageAnon(page))
+		return page_remove_file_rmap(page, compound);
 
 	if (compound)
 		return page_remove_anon_compound_rmap(page);
diff --git a/mm/util.c b/mm/util.c
index 362f4cd8ab3a..2e92f231796b 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -357,6 +357,12 @@  int __page_mapcount(struct page *page)
 	int ret;
 
 	ret = atomic_read(&page->_mapcount) + 1;
+	/*
+	 * For file THP page->_mapcount contains total number of mapping
+	 * of the page: no need to look into compound_mapcount.
+	 */
+	if (!PageAnon(page) && !PageHuge(page))
+		return ret;
 	page = compound_head(page);
 	ret += atomic_read(compound_mapcount_ptr(page)) + 1;
 	if (PageDoubleMap(page))