diff mbox series

[v9,5/6] mm,thp: add read-only THP support for (non-shmem) FS

Message ID 20190625001246.685563-6-songliubraving@fb.com (mailing list archive)
State New, archived
Headers show
Series Enable THP for text section of non-shmem files | expand

Commit Message

Song Liu June 25, 2019, 12:12 a.m. UTC
This patch is (hopefully) the first step to enable THP for non-shmem
filesystems.

This patch enables an application to put part of its text sections to THP
via madvise, for example:

    madvise((void *)0x600000, 0x200000, MADV_HUGEPAGE);

We tried to reuse the logic for THP on tmpfs.

Currently, write is not supported for non-shmem THP. khugepaged will only
process vma with VM_DENYWRITE. sys_mmap() ignores VM_DENYWRITE requests
(see ksys_mmap_pgoff). The only way to create vma with VM_DENYWRITE is
execve(). This requirement limits non-shmem THP to text sections.

The next patch will handle writes, which would only happen when the all
the vmas with VM_DENYWRITE are unmapped.

An EXPERIMENTAL config, READ_ONLY_THP_FOR_FS, is added to gate this
feature.

Acked-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 mm/Kconfig      | 11 ++++++
 mm/filemap.c    |  4 +--
 mm/khugepaged.c | 94 +++++++++++++++++++++++++++++++++++++++++--------
 mm/rmap.c       | 12 ++++---
 4 files changed, 100 insertions(+), 21 deletions(-)

Comments

Johannes Weiner July 10, 2019, 6:48 p.m. UTC | #1
On Mon, Jun 24, 2019 at 05:12:45PM -0700, Song Liu wrote:
> This patch is (hopefully) the first step to enable THP for non-shmem
> filesystems.
> 
> This patch enables an application to put part of its text sections to THP
> via madvise, for example:
> 
>     madvise((void *)0x600000, 0x200000, MADV_HUGEPAGE);
> 
> We tried to reuse the logic for THP on tmpfs.
> 
> Currently, write is not supported for non-shmem THP. khugepaged will only
> process vma with VM_DENYWRITE. sys_mmap() ignores VM_DENYWRITE requests
> (see ksys_mmap_pgoff). The only way to create vma with VM_DENYWRITE is
> execve(). This requirement limits non-shmem THP to text sections.
> 
> The next patch will handle writes, which would only happen when the all
> the vmas with VM_DENYWRITE are unmapped.
> 
> An EXPERIMENTAL config, READ_ONLY_THP_FOR_FS, is added to gate this
> feature.
> 
> Acked-by: Rik van Riel <riel@surriel.com>
> Signed-off-by: Song Liu <songliubraving@fb.com>

This is really cool, and less invasive than I anticipated. Nice work.

I only have one concern and one question:

> @@ -1392,6 +1401,29 @@ static void collapse_file(struct mm_struct *mm,
>  				result = SCAN_FAIL;
>  				goto xa_unlocked;
>  			}
> +		} else if (!page || xa_is_value(page)) {
> +			xas_unlock_irq(&xas);
> +			page_cache_sync_readahead(mapping, &file->f_ra, file,
> +						  index, PAGE_SIZE);
> +			/* drain pagevecs to help isolate_lru_page() */
> +			lru_add_drain();
> +			page = find_lock_page(mapping, index);
> +			if (unlikely(page == NULL)) {
> +				result = SCAN_FAIL;
> +				goto xa_unlocked;
> +			}
> +		} else if (!PageUptodate(page)) {
> +			VM_BUG_ON(is_shmem);
> +			xas_unlock_irq(&xas);
> +			wait_on_page_locked(page);
> +			if (!trylock_page(page)) {
> +				result = SCAN_PAGE_LOCK;
> +				goto xa_unlocked;
> +			}
> +			get_page(page);
> +		} else if (!is_shmem && PageDirty(page)) {
> +			result = SCAN_FAIL;
> +			goto xa_locked;
>  		} else if (trylock_page(page)) {
>  			get_page(page);
>  			xas_unlock_irq(&xas);

The many else ifs here check fairly complex page state and are hard to
follow and verify mentally. In fact, it's a bit easier now in the
patch when you see how it *used* to work with just shmem, but the end
result is fragile from a maintenance POV.

The shmem and file cases have little in common - basically only the
trylock_page(). Can you please make one big 'if (is_shmem) {} {}'
structure instead that keeps those two scenarios separate?

> @@ -1426,6 +1458,12 @@ static void collapse_file(struct mm_struct *mm,
>  			goto out_unlock;
>  		}
>  
> +		if (page_has_private(page) &&
> +		    !try_to_release_page(page, GFP_KERNEL)) {
> +			result = SCAN_PAGE_HAS_PRIVATE;
> +			break;
> +		}
> +
>  		if (page_mapped(page))
>  			unmap_mapping_pages(mapping, index, 1, false);

> @@ -1607,6 +1658,17 @@ static void khugepaged_scan_file(struct mm_struct *mm,
>  			break;
>  		}
>  
> +		if (page_has_private(page) && trylock_page(page)) {
> +			int ret;
> +
> +			ret = try_to_release_page(page, GFP_KERNEL);
> +			unlock_page(page);
> +			if (!ret) {
> +				result = SCAN_PAGE_HAS_PRIVATE;
> +				break;
> +			}
> +		}
> +
>  		if (page_count(page) != 1 + page_mapcount(page)) {
>  			result = SCAN_PAGE_COUNT;
>  			break;

There is already a try_to_release() inside the page lock section in
collapse_file(). I'm assuming you added this one because private data
affects the refcount. But it seems a bit overkill just for that; we
could also still fail the check, in which case we'd have dropped the
buffers in vain. Can you fix the check instead?

There is an is_page_cache_freeable() function in vmscan.c that handles
private fs references:

static inline int is_page_cache_freeable(struct page *page)
{
	/*
	 * A freeable page cache page is referenced only by the caller
	 * that isolated the page, the page cache and optional buffer
	 * heads at page->private.
	 */
	int page_cache_pins = PageTransHuge(page) && PageSwapCache(page) ?
		HPAGE_PMD_NR : 1;
	return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
}

Wouldn't this work here as well?

The rest looks great to me.
Song Liu July 22, 2019, 11:41 p.m. UTC | #2
> On Jul 10, 2019, at 11:48 AM, Johannes Weiner <hannes@cmpxchg.org> wrote:
> 
> On Mon, Jun 24, 2019 at 05:12:45PM -0700, Song Liu wrote:
>> This patch is (hopefully) the first step to enable THP for non-shmem
>> filesystems.
>> 
>> This patch enables an application to put part of its text sections to THP
>> via madvise, for example:
>> 
>>    madvise((void *)0x600000, 0x200000, MADV_HUGEPAGE);
>> 
>> We tried to reuse the logic for THP on tmpfs.
>> 
>> Currently, write is not supported for non-shmem THP. khugepaged will only
>> process vma with VM_DENYWRITE. sys_mmap() ignores VM_DENYWRITE requests
>> (see ksys_mmap_pgoff). The only way to create vma with VM_DENYWRITE is
>> execve(). This requirement limits non-shmem THP to text sections.
>> 
>> The next patch will handle writes, which would only happen when the all
>> the vmas with VM_DENYWRITE are unmapped.
>> 
>> An EXPERIMENTAL config, READ_ONLY_THP_FOR_FS, is added to gate this
>> feature.
>> 
>> Acked-by: Rik van Riel <riel@surriel.com>
>> Signed-off-by: Song Liu <songliubraving@fb.com>
> 
> This is really cool, and less invasive than I anticipated. Nice work.
> 
> I only have one concern and one question:
> 
>> @@ -1392,6 +1401,29 @@ static void collapse_file(struct mm_struct *mm,
>> 				result = SCAN_FAIL;
>> 				goto xa_unlocked;
>> 			}
>> +		} else if (!page || xa_is_value(page)) {
>> +			xas_unlock_irq(&xas);
>> +			page_cache_sync_readahead(mapping, &file->f_ra, file,
>> +						  index, PAGE_SIZE);
>> +			/* drain pagevecs to help isolate_lru_page() */
>> +			lru_add_drain();
>> +			page = find_lock_page(mapping, index);
>> +			if (unlikely(page == NULL)) {
>> +				result = SCAN_FAIL;
>> +				goto xa_unlocked;
>> +			}
>> +		} else if (!PageUptodate(page)) {
>> +			VM_BUG_ON(is_shmem);
>> +			xas_unlock_irq(&xas);
>> +			wait_on_page_locked(page);
>> +			if (!trylock_page(page)) {
>> +				result = SCAN_PAGE_LOCK;
>> +				goto xa_unlocked;
>> +			}
>> +			get_page(page);
>> +		} else if (!is_shmem && PageDirty(page)) {
>> +			result = SCAN_FAIL;
>> +			goto xa_locked;
>> 		} else if (trylock_page(page)) {
>> 			get_page(page);
>> 			xas_unlock_irq(&xas);
> 
> The many else ifs here check fairly complex page state and are hard to
> follow and verify mentally. In fact, it's a bit easier now in the
> patch when you see how it *used* to work with just shmem, but the end
> result is fragile from a maintenance POV.
> 
> The shmem and file cases have little in common - basically only the
> trylock_page(). Can you please make one big 'if (is_shmem) {} {}'
> structure instead that keeps those two scenarios separate?

Good point! Will fix in next version. 

> 
>> @@ -1426,6 +1458,12 @@ static void collapse_file(struct mm_struct *mm,
>> 			goto out_unlock;
>> 		}
>> 
>> +		if (page_has_private(page) &&
>> +		    !try_to_release_page(page, GFP_KERNEL)) {
>> +			result = SCAN_PAGE_HAS_PRIVATE;
>> +			break;
>> +		}
>> +
>> 		if (page_mapped(page))
>> 			unmap_mapping_pages(mapping, index, 1, false);
> 
>> @@ -1607,6 +1658,17 @@ static void khugepaged_scan_file(struct mm_struct *mm,
>> 			break;
>> 		}
>> 
>> +		if (page_has_private(page) && trylock_page(page)) {
>> +			int ret;
>> +
>> +			ret = try_to_release_page(page, GFP_KERNEL);
>> +			unlock_page(page);
>> +			if (!ret) {
>> +				result = SCAN_PAGE_HAS_PRIVATE;
>> +				break;
>> +			}
>> +		}
>> +
>> 		if (page_count(page) != 1 + page_mapcount(page)) {
>> 			result = SCAN_PAGE_COUNT;
>> 			break;
> 
> There is already a try_to_release() inside the page lock section in
> collapse_file(). I'm assuming you added this one because private data
> affects the refcount. But it seems a bit overkill just for that; we
> could also still fail the check, in which case we'd have dropped the
> buffers in vain. Can you fix the check instead?
> 
> There is an is_page_cache_freeable() function in vmscan.c that handles
> private fs references:
> 
> static inline int is_page_cache_freeable(struct page *page)
> {
> 	/*
> 	 * A freeable page cache page is referenced only by the caller
> 	 * that isolated the page, the page cache and optional buffer
> 	 * heads at page->private.
> 	 */
> 	int page_cache_pins = PageTransHuge(page) && PageSwapCache(page) ?
> 		HPAGE_PMD_NR : 1;
> 	return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
> }
> 
> Wouldn't this work here as well?

Good point! Let me try fix this. 

Thanks,
Song
Kai Huang July 23, 2019, 11:59 p.m. UTC | #3
On Mon, 2019-06-24 at 17:12 -0700, Song Liu wrote:
> This patch is (hopefully) the first step to enable THP for non-shmem
> filesystems.
> 
> This patch enables an application to put part of its text sections to THP
> via madvise, for example:
> 
>     madvise((void *)0x600000, 0x200000, MADV_HUGEPAGE);
> 
> We tried to reuse the logic for THP on tmpfs.
> 
> Currently, write is not supported for non-shmem THP. khugepaged will only
> process vma with VM_DENYWRITE. sys_mmap() ignores VM_DENYWRITE requests
> (see ksys_mmap_pgoff). The only way to create vma with VM_DENYWRITE is
> execve(). This requirement limits non-shmem THP to text sections.
> 
> The next patch will handle writes, which would only happen when the all
> the vmas with VM_DENYWRITE are unmapped.
> 
> An EXPERIMENTAL config, READ_ONLY_THP_FOR_FS, is added to gate this
> feature.
> 
> Acked-by: Rik van Riel <riel@surriel.com>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  mm/Kconfig      | 11 ++++++
>  mm/filemap.c    |  4 +--
>  mm/khugepaged.c | 94 +++++++++++++++++++++++++++++++++++++++++--------
>  mm/rmap.c       | 12 ++++---
>  4 files changed, 100 insertions(+), 21 deletions(-)
> 
> diff --git a/mm/Kconfig b/mm/Kconfig
> index f0c76ba47695..0a8fd589406d 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -762,6 +762,17 @@ config GUP_BENCHMARK
>  
>  	  See tools/testing/selftests/vm/gup_benchmark.c
>  
> +config READ_ONLY_THP_FOR_FS
> +	bool "Read-only THP for filesystems (EXPERIMENTAL)"
> +	depends on TRANSPARENT_HUGE_PAGECACHE && SHMEM

Hi,

Maybe a stupid question since I am new, but why does it depend on SHMEM?

Thanks,
-Kai
Song Liu July 28, 2019, 6:41 a.m. UTC | #4
> On Jul 23, 2019, at 4:59 PM, Huang, Kai <kai.huang@intel.com> wrote:
> 
> On Mon, 2019-06-24 at 17:12 -0700, Song Liu wrote:
>> This patch is (hopefully) the first step to enable THP for non-shmem
>> filesystems.
>> 
>> This patch enables an application to put part of its text sections to THP
>> via madvise, for example:
>> 
>>    madvise((void *)0x600000, 0x200000, MADV_HUGEPAGE);
>> 
>> We tried to reuse the logic for THP on tmpfs.
>> 
>> Currently, write is not supported for non-shmem THP. khugepaged will only
>> process vma with VM_DENYWRITE. sys_mmap() ignores VM_DENYWRITE requests
>> (see ksys_mmap_pgoff). The only way to create vma with VM_DENYWRITE is
>> execve(). This requirement limits non-shmem THP to text sections.
>> 
>> The next patch will handle writes, which would only happen when the all
>> the vmas with VM_DENYWRITE are unmapped.
>> 
>> An EXPERIMENTAL config, READ_ONLY_THP_FOR_FS, is added to gate this
>> feature.
>> 
>> Acked-by: Rik van Riel <riel@surriel.com>
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> mm/Kconfig      | 11 ++++++
>> mm/filemap.c    |  4 +--
>> mm/khugepaged.c | 94 +++++++++++++++++++++++++++++++++++++++++--------
>> mm/rmap.c       | 12 ++++---
>> 4 files changed, 100 insertions(+), 21 deletions(-)
>> 
>> diff --git a/mm/Kconfig b/mm/Kconfig
>> index f0c76ba47695..0a8fd589406d 100644
>> --- a/mm/Kconfig
>> +++ b/mm/Kconfig
>> @@ -762,6 +762,17 @@ config GUP_BENCHMARK
>> 
>> 	  See tools/testing/selftests/vm/gup_benchmark.c
>> 
>> +config READ_ONLY_THP_FOR_FS
>> +	bool "Read-only THP for filesystems (EXPERIMENTAL)"
>> +	depends on TRANSPARENT_HUGE_PAGECACHE && SHMEM
> 
> Hi,
> 
> Maybe a stupid question since I am new, but why does it depend on SHMEM?

Not stupid at all. :)

We reuse a lot of code for shmem thp, thus the dependency. Technically, we 
can remove the dependency. However, we will remove this config option when
THP for FS is more mature. So it doesn't make sense to resolve the 
dependency at this stage.

Thanks,
Song
diff mbox series

Patch

diff --git a/mm/Kconfig b/mm/Kconfig
index f0c76ba47695..0a8fd589406d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -762,6 +762,17 @@  config GUP_BENCHMARK
 
 	  See tools/testing/selftests/vm/gup_benchmark.c
 
+config READ_ONLY_THP_FOR_FS
+	bool "Read-only THP for filesystems (EXPERIMENTAL)"
+	depends on TRANSPARENT_HUGE_PAGECACHE && SHMEM
+
+	help
+	  Allow khugepaged to put read-only file-backed pages in THP.
+
+	  This is marked experimental because it is a new feature. Write
+	  support of file THPs will be developed in the next few release
+	  cycles.
+
 config ARCH_HAS_PTE_SPECIAL
 	bool
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 5f072a113535..e79ceccdc6df 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -203,8 +203,8 @@  static void unaccount_page_cache_page(struct address_space *mapping,
 		__mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
 		if (PageTransHuge(page))
 			__dec_node_page_state(page, NR_SHMEM_THPS);
-	} else {
-		VM_BUG_ON_PAGE(PageTransHuge(page), page);
+	} else if (PageTransHuge(page)) {
+		__dec_node_page_state(page, NR_FILE_THPS);
 	}
 
 	/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 158cad542627..acbbbeaa083c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -48,6 +48,7 @@  enum scan_result {
 	SCAN_CGROUP_CHARGE_FAIL,
 	SCAN_EXCEED_SWAP_PTE,
 	SCAN_TRUNCATED,
+	SCAN_PAGE_HAS_PRIVATE,
 };
 
 #define CREATE_TRACE_POINTS
@@ -404,7 +405,11 @@  static bool hugepage_vma_check(struct vm_area_struct *vma,
 	    (vm_flags & VM_NOHUGEPAGE) ||
 	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 		return false;
-	if (shmem_file(vma->vm_file)) {
+
+	if (shmem_file(vma->vm_file) ||
+	    (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+	     vma->vm_file &&
+	     (vm_flags & VM_DENYWRITE))) {
 		if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
 			return false;
 		return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
@@ -456,8 +461,9 @@  int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 	unsigned long hstart, hend;
 
 	/*
-	 * khugepaged does not yet work on non-shmem files or special
-	 * mappings. And file-private shmem THP is not supported.
+	 * khugepaged only supports read-only files for non-shmem files.
+	 * khugepaged does not yet work on special mappings. And
+	 * file-private shmem THP is not supported.
 	 */
 	if (!hugepage_vma_check(vma, vm_flags))
 		return 0;
@@ -1287,12 +1293,12 @@  static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 }
 
 /**
- * collapse_file - collapse small tmpfs/shmem pages into huge one.
+ * collapse_file - collapse filemap/tmpfs/shmem pages into huge one.
  *
  * Basic scheme is simple, details are more complex:
  *  - allocate and lock a new huge page;
  *  - scan page cache replacing old pages with the new one
- *    + swap in pages if necessary;
+ *    + swap/gup in pages if necessary;
  *    + fill in gaps;
  *    + keep old pages around in case rollback is required;
  *  - if replacing succeeds:
@@ -1316,7 +1322,9 @@  static void collapse_file(struct mm_struct *mm,
 	LIST_HEAD(pagelist);
 	XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
 	int nr_none = 0, result = SCAN_SUCCEED;
+	bool is_shmem = shmem_file(file);
 
+	VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
 	VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 
 	/* Only allocate from the target node */
@@ -1348,7 +1356,8 @@  static void collapse_file(struct mm_struct *mm,
 	} while (1);
 
 	__SetPageLocked(new_page);
-	__SetPageSwapBacked(new_page);
+	if (is_shmem)
+		__SetPageSwapBacked(new_page);
 	new_page->index = start;
 	new_page->mapping = mapping;
 
@@ -1363,7 +1372,7 @@  static void collapse_file(struct mm_struct *mm,
 		struct page *page = xas_next(&xas);
 
 		VM_BUG_ON(index != xas.xa_index);
-		if (!page) {
+		if (is_shmem && !page) {
 			/*
 			 * Stop if extent has been truncated or hole-punched,
 			 * and is now completely empty.
@@ -1384,7 +1393,7 @@  static void collapse_file(struct mm_struct *mm,
 			continue;
 		}
 
-		if (xa_is_value(page) || !PageUptodate(page)) {
+		if (is_shmem && (xa_is_value(page) || !PageUptodate(page))) {
 			xas_unlock_irq(&xas);
 			/* swap in or instantiate fallocated page */
 			if (shmem_getpage(mapping->host, index, &page,
@@ -1392,6 +1401,29 @@  static void collapse_file(struct mm_struct *mm,
 				result = SCAN_FAIL;
 				goto xa_unlocked;
 			}
+		} else if (!page || xa_is_value(page)) {
+			xas_unlock_irq(&xas);
+			page_cache_sync_readahead(mapping, &file->f_ra, file,
+						  index, PAGE_SIZE);
+			/* drain pagevecs to help isolate_lru_page() */
+			lru_add_drain();
+			page = find_lock_page(mapping, index);
+			if (unlikely(page == NULL)) {
+				result = SCAN_FAIL;
+				goto xa_unlocked;
+			}
+		} else if (!PageUptodate(page)) {
+			VM_BUG_ON(is_shmem);
+			xas_unlock_irq(&xas);
+			wait_on_page_locked(page);
+			if (!trylock_page(page)) {
+				result = SCAN_PAGE_LOCK;
+				goto xa_unlocked;
+			}
+			get_page(page);
+		} else if (!is_shmem && PageDirty(page)) {
+			result = SCAN_FAIL;
+			goto xa_locked;
 		} else if (trylock_page(page)) {
 			get_page(page);
 			xas_unlock_irq(&xas);
@@ -1426,6 +1458,12 @@  static void collapse_file(struct mm_struct *mm,
 			goto out_unlock;
 		}
 
+		if (page_has_private(page) &&
+		    !try_to_release_page(page, GFP_KERNEL)) {
+			result = SCAN_PAGE_HAS_PRIVATE;
+			break;
+		}
+
 		if (page_mapped(page))
 			unmap_mapping_pages(mapping, index, 1, false);
 
@@ -1463,12 +1501,18 @@  static void collapse_file(struct mm_struct *mm,
 		goto xa_unlocked;
 	}
 
-	__inc_node_page_state(new_page, NR_SHMEM_THPS);
+	if (is_shmem)
+		__inc_node_page_state(new_page, NR_SHMEM_THPS);
+	else
+		__inc_node_page_state(new_page, NR_FILE_THPS);
+
 	if (nr_none) {
 		struct zone *zone = page_zone(new_page);
 
 		__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
-		__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+		if (is_shmem)
+			__mod_node_page_state(zone->zone_pgdat,
+					      NR_SHMEM, nr_none);
 	}
 
 xa_locked:
@@ -1506,10 +1550,15 @@  static void collapse_file(struct mm_struct *mm,
 
 		SetPageUptodate(new_page);
 		page_ref_add(new_page, HPAGE_PMD_NR - 1);
-		set_page_dirty(new_page);
 		mem_cgroup_commit_charge(new_page, memcg, false, true);
+
+		if (is_shmem) {
+			set_page_dirty(new_page);
+			lru_cache_add_anon(new_page);
+		} else {
+			lru_cache_add_file(new_page);
+		}
 		count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
-		lru_cache_add_anon(new_page);
 
 		/*
 		 * Remove pte page tables, so we can re-fault the page as huge.
@@ -1524,7 +1573,9 @@  static void collapse_file(struct mm_struct *mm,
 		/* Something went wrong: roll back page cache changes */
 		xas_lock_irq(&xas);
 		mapping->nrpages -= nr_none;
-		shmem_uncharge(mapping->host, nr_none);
+
+		if (is_shmem)
+			shmem_uncharge(mapping->host, nr_none);
 
 		xas_set(&xas, start);
 		xas_for_each(&xas, page, end - 1) {
@@ -1607,6 +1658,17 @@  static void khugepaged_scan_file(struct mm_struct *mm,
 			break;
 		}
 
+		if (page_has_private(page) && trylock_page(page)) {
+			int ret;
+
+			ret = try_to_release_page(page, GFP_KERNEL);
+			unlock_page(page);
+			if (!ret) {
+				result = SCAN_PAGE_HAS_PRIVATE;
+				break;
+			}
+		}
+
 		if (page_count(page) != 1 + page_mapcount(page)) {
 			result = SCAN_PAGE_COUNT;
 			break;
@@ -1713,11 +1775,13 @@  static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 			VM_BUG_ON(khugepaged_scan.address < hstart ||
 				  khugepaged_scan.address + HPAGE_PMD_SIZE >
 				  hend);
-			if (shmem_file(vma->vm_file)) {
+			if (vma->vm_file) {
 				struct file *file;
 				pgoff_t pgoff = linear_page_index(vma,
 						khugepaged_scan.address);
-				if (!shmem_huge_enabled(vma))
+
+				if (shmem_file(vma->vm_file)
+				    && !shmem_huge_enabled(vma))
 					goto skip;
 				file = get_file(vma->vm_file);
 				up_read(&mm->mmap_sem);
diff --git a/mm/rmap.c b/mm/rmap.c
index e5dfe2ae6b0d..87cfa2c19eda 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1192,8 +1192,10 @@  void page_add_file_rmap(struct page *page, bool compound)
 		}
 		if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
 			goto out;
-		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
-		__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+		if (PageSwapBacked(page))
+			__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+		else
+			__inc_node_page_state(page, NR_FILE_PMDMAPPED);
 	} else {
 		if (PageTransCompound(page) && page_mapping(page)) {
 			VM_WARN_ON_ONCE(!PageLocked(page));
@@ -1232,8 +1234,10 @@  static void page_remove_file_rmap(struct page *page, bool compound)
 		}
 		if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
 			goto out;
-		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
-		__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+		if (PageSwapBacked(page))
+			__dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+		else
+			__dec_node_page_state(page, NR_FILE_PMDMAPPED);
 	} else {
 		if (!atomic_add_negative(-1, &page->_mapcount))
 			goto out;