diff mbox

[PATCHv6,01/37] mm, shmem: swich huge tmpfs to multi-order radix-tree entries

Message ID 20170126115819.58875-2-kirill.shutemov@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kirill A . Shutemov Jan. 26, 2017, 11:57 a.m. UTC
We would need to use multi-order radix-tree entires for ext4 and other
filesystems to have coherent view on tags (dirty/towrite) in the tree.

This patch converts huge tmpfs implementation to multi-order entries, so
we will be able to use the same code patch for all filesystems.

We also change interface for page-cache lookup function:

  - functions that lookup for pages[1] would return subpages of THP
    relevant for requested indexes;

  - functions that lookup for entries[2] would return one entry per-THP
    and index will point to index of head page (basically, round down to
    HPAGE_PMD_NR);

This would provide balanced exposure of multi-order entires to the rest
of the kernel.

[1] find_get_pages(), pagecache_get_page(), pagevec_lookup(), etc.
[2] find_get_entry(), find_get_entries(), pagevec_lookup_entries(), etc.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 include/linux/pagemap.h |   9 ++
 mm/filemap.c            | 236 ++++++++++++++++++++++++++----------------------
 mm/huge_memory.c        |  48 +++++++---
 mm/khugepaged.c         |  26 ++----
 mm/shmem.c              | 117 ++++++++++--------------
 mm/truncate.c           |  15 ++-
 6 files changed, 235 insertions(+), 216 deletions(-)

Comments

Matthew Wilcox Feb. 9, 2017, 3:57 a.m. UTC | #1
On Thu, Jan 26, 2017 at 02:57:43PM +0300, Kirill A. Shutemov wrote:
> +++ b/include/linux/pagemap.h
> @@ -332,6 +332,15 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
>  			mapping_gfp_mask(mapping));
>  }
>  
> +static inline struct page *find_subpage(struct page *page, pgoff_t offset)
> +{
> +	VM_BUG_ON_PAGE(PageTail(page), page);
> +	VM_BUG_ON_PAGE(page->index > offset, page);
> +	VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) < offset,
> +			page);
> +	return page - page->index + offset;
> +}

What would you think to:

static inline void check_page_index(struct page *page, pgoff_t offset)
{
	VM_BUG_ON_PAGE(PageTail(page), page);
	VM_BUG_ON_PAGE(page->index > offset, page);
	VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) <= offset,
			page);
}

(I think I fixed an off-by-one error up there ...  if
index + (1 << order) == offset, this is also a bug, right?
because offset would then refer to the next page, not this page)

static inline struct page *find_subpage(struct page *page, pgoff_t offset)
{
	check_page_index(page, offset);
	return page + (offset - page->index);
}

... then you can use check_page_index down ...

> @@ -1250,7 +1233,6 @@ struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
>  			put_page(page);
>  			goto repeat;
>  		}
> -		VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);

... here?

> @@ -1472,25 +1451,35 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
>  			goto repeat;
>  		}
>  
> +		/* For multi-order entries, find relevant subpage */
> +		if (PageTransHuge(page)) {
> +			VM_BUG_ON(index - page->index < 0);
> +			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> +			page += index - page->index;
> +		}

Use find_subpage() here?

>  		pages[ret] = page;
>  		if (++ret == nr_pages)
>  			break;
> +		if (!PageTransCompound(page))
> +			continue;
> +		for (refs = 0; ret < nr_pages &&
> +				(index + 1) % HPAGE_PMD_NR;
> +				ret++, refs++, index++)
> +			pages[ret] = ++page;
> +		if (refs)
> +			page_ref_add(compound_head(page), refs);
> +		if (ret == nr_pages)
> +			break;

Can we avoid referencing huge pages specifically in the page cache?  I'd
like us to get to the point where we can put arbitrary compound pages into
the page cache.  For example, I think this can be written as:

		if (!PageCompound(page))
			continue;
		for (refs = 0; ret < nr_pages; refs++, index++) {
			if (index > page->index + (1 << compound_order(page)))
				break;
			pages[ret++] = ++page;
		}
		if (refs)
			page_ref_add(compound_head(page), refs);
		if (ret == nr_pages)
			break;

> @@ -1541,19 +1533,12 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
>  
> +		/* For multi-order entries, find relevant subpage */
> +		if (PageTransHuge(page)) {
> +			VM_BUG_ON(index - page->index < 0);
> +			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> +			page += index - page->index;
> +		}
> +
>  		pages[ret] = page;
>  		if (++ret == nr_pages)
>  			break;
> +		if (!PageTransCompound(page))
> +			continue;
> +		for (refs = 0; ret < nr_pages &&
> +				(index + 1) % HPAGE_PMD_NR;
> +				ret++, refs++, index++)
> +			pages[ret] = ++page;
> +		if (refs)
> +			page_ref_add(compound_head(page), refs);
> +		if (ret == nr_pages)
> +			break;
>  	}
>  	rcu_read_unlock();
>  	return ret;

Ugh, the same code again.  Hmm ... we only need to modify 'ret' as a result
of this ... so could we split it out like this?

static unsigned populate_pages(struct page **pages, unsigned i, unsigned max,
                                struct page *page)
{
        unsigned refs = 0;
        for (;;) {
                pages[i++] = page;
                if (i == max)
                        break;
                if (PageHead(page + 1))
                        break;
                page++;
                refs++;
        }
        if (refs)
                page_ref_add(compound_head(page), refs);
        return i;
}

> +unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *indexp,
>  			int tag, unsigned int nr_pages, struct page **pages)

>  			break;
> +		if (!PageTransCompound(page))
> +			continue;
> +		for (refs = 0; ret < nr_pages &&
> +				(index + 1) % HPAGE_PMD_NR;
> +				ret++, refs++, index++)
> +			pages[ret] = ++page;
> +		if (refs)
> +			page_ref_add(compound_head(page), refs);
> +		if (ret == nr_pages)
> +			break;
>  	}

... and again!

> @@ -2326,25 +2337,26 @@ void filemap_map_pages(struct vm_fault *vmf,
> +		/* For multi-order entries, find relevant subpage */
> +		if (PageTransHuge(page)) {
> +			VM_BUG_ON(index - page->index < 0);
> +			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> +			page += index - page->index;
> +		}
> +
>  		if (!PageUptodate(page) ||
>  				PageReadahead(page) ||

Readahead is PF_NO_COMPOUND ... so I don't see why this works?

> @@ -2378,8 +2390,14 @@ void filemap_map_pages(struct vm_fault *vmf,
>  		/* Huge page is mapped? No need to proceed. */
>  		if (pmd_trans_huge(*vmf->pmd))
>  			break;
> -		if (iter.index == end_pgoff)
> +		if (index == end_pgoff)
>  			break;
> +		if (page && PageTransCompound(page) &&
> +				(index & (HPAGE_PMD_NR - 1)) !=
> +				HPAGE_PMD_NR - 1) {
> +			index++;
> +			goto repeat;

Do we really have to go all the way back to the beginning of the loop?  It'd
be nice to be able to insert all of the relevant PTEs in a shorter loop here.
We'd need to bump the reference count N more times, and I think we do need
to check HWPoison for each subpage.
Kirill A. Shutemov Feb. 9, 2017, 4:58 p.m. UTC | #2
On Wed, Feb 08, 2017 at 07:57:27PM -0800, Matthew Wilcox wrote:
> On Thu, Jan 26, 2017 at 02:57:43PM +0300, Kirill A. Shutemov wrote:
> > +++ b/include/linux/pagemap.h
> > @@ -332,6 +332,15 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
> >  			mapping_gfp_mask(mapping));
> >  }
> >  
> > +static inline struct page *find_subpage(struct page *page, pgoff_t offset)
> > +{
> > +	VM_BUG_ON_PAGE(PageTail(page), page);
> > +	VM_BUG_ON_PAGE(page->index > offset, page);
> > +	VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) < offset,
> > +			page);
> > +	return page - page->index + offset;
> > +}
> 
> What would you think to:
> 
> static inline void check_page_index(struct page *page, pgoff_t offset)
> {
> 	VM_BUG_ON_PAGE(PageTail(page), page);
> 	VM_BUG_ON_PAGE(page->index > offset, page);
> 	VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) <= offset,
> 			page);
> }
> 
> (I think I fixed an off-by-one error up there ...  if
> index + (1 << order) == offset, this is also a bug, right?
> because offset would then refer to the next page, not this page)

Right, thanks.

> 
> static inline struct page *find_subpage(struct page *page, pgoff_t offset)
> {
> 	check_page_index(page, offset);
> 	return page + (offset - page->index);
> }
> 
> ... then you can use check_page_index down ...

Okay, makes sense.

> 
> > @@ -1250,7 +1233,6 @@ struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
> >  			put_page(page);
> >  			goto repeat;
> >  		}
> > -		VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
> 
> ... here?

Ok.

> > @@ -1472,25 +1451,35 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
> >  			goto repeat;
> >  		}
> >  
> > +		/* For multi-order entries, find relevant subpage */
> > +		if (PageTransHuge(page)) {
> > +			VM_BUG_ON(index - page->index < 0);
> > +			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> > +			page += index - page->index;
> > +		}
> 
> Use find_subpage() here?

Ok.

> >  		pages[ret] = page;
> >  		if (++ret == nr_pages)
> >  			break;
> > +		if (!PageTransCompound(page))
> > +			continue;
> > +		for (refs = 0; ret < nr_pages &&
> > +				(index + 1) % HPAGE_PMD_NR;
> > +				ret++, refs++, index++)
> > +			pages[ret] = ++page;
> > +		if (refs)
> > +			page_ref_add(compound_head(page), refs);
> > +		if (ret == nr_pages)
> > +			break;
> 
> Can we avoid referencing huge pages specifically in the page cache?  I'd
> like us to get to the point where we can put arbitrary compound pages into
> the page cache.  For example, I think this can be written as:
> 
> 		if (!PageCompound(page))
> 			continue;
> 		for (refs = 0; ret < nr_pages; refs++, index++) {
> 			if (index > page->index + (1 << compound_order(page)))
> 				break;
> 			pages[ret++] = ++page;
> 		}
> 		if (refs)
> 			page_ref_add(compound_head(page), refs);
> 		if (ret == nr_pages)
> 			break;

That's slightly more costly, but I guess that's fine.

> > @@ -1541,19 +1533,12 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
> >  
> > +		/* For multi-order entries, find relevant subpage */
> > +		if (PageTransHuge(page)) {
> > +			VM_BUG_ON(index - page->index < 0);
> > +			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> > +			page += index - page->index;
> > +		}
> > +
> >  		pages[ret] = page;
> >  		if (++ret == nr_pages)
> >  			break;
> > +		if (!PageTransCompound(page))
> > +			continue;
> > +		for (refs = 0; ret < nr_pages &&
> > +				(index + 1) % HPAGE_PMD_NR;
> > +				ret++, refs++, index++)
> > +			pages[ret] = ++page;
> > +		if (refs)
> > +			page_ref_add(compound_head(page), refs);
> > +		if (ret == nr_pages)
> > +			break;
> >  	}
> >  	rcu_read_unlock();
> >  	return ret;
> 
> Ugh, the same code again.  Hmm ... we only need to modify 'ret' as a result
> of this ... so could we split it out like this?
> 
> static unsigned populate_pages(struct page **pages, unsigned i, unsigned max,
>                                 struct page *page)
> {
>         unsigned refs = 0;
>         for (;;) {
>                 pages[i++] = page;
>                 if (i == max)
>                         break;
>                 if (PageHead(page + 1))
>                         break;

Hm? PageHead()? No. The next page can head or small.

I *guess* we can get away with !PageTail(page + 1)...

>                 page++;
>                 refs++;
>         }
>         if (refs)
>                 page_ref_add(compound_head(page), refs);
>         return i;
> }
> 
> > +unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *indexp,
> >  			int tag, unsigned int nr_pages, struct page **pages)
> 
> >  			break;
> > +		if (!PageTransCompound(page))
> > +			continue;
> > +		for (refs = 0; ret < nr_pages &&
> > +				(index + 1) % HPAGE_PMD_NR;
> > +				ret++, refs++, index++)
> > +			pages[ret] = ++page;
> > +		if (refs)
> > +			page_ref_add(compound_head(page), refs);
> > +		if (ret == nr_pages)
> > +			break;
> >  	}
> 
> ... and again!
> 
> > @@ -2326,25 +2337,26 @@ void filemap_map_pages(struct vm_fault *vmf,
> > +		/* For multi-order entries, find relevant subpage */
> > +		if (PageTransHuge(page)) {
> > +			VM_BUG_ON(index - page->index < 0);
> > +			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> > +			page += index - page->index;
> > +		}
> > +
> >  		if (!PageUptodate(page) ||
> >  				PageReadahead(page) ||
> 
> Readahead is PF_NO_COMPOUND ... so I don't see why this works?

We wouldn't see pages with readahead flag set/cleared until huge pages
support in ext4 is enabled at very end of the patchset.

> 
> > @@ -2378,8 +2390,14 @@ void filemap_map_pages(struct vm_fault *vmf,
> >  		/* Huge page is mapped? No need to proceed. */
> >  		if (pmd_trans_huge(*vmf->pmd))
> >  			break;
> > -		if (iter.index == end_pgoff)
> > +		if (index == end_pgoff)
> >  			break;
> > +		if (page && PageTransCompound(page) &&
> > +				(index & (HPAGE_PMD_NR - 1)) !=
> > +				HPAGE_PMD_NR - 1) {
> > +			index++;
> > +			goto repeat;
> 
> Do we really have to go all the way back to the beginning of the loop?  It'd
> be nice to be able to insert all of the relevant PTEs in a shorter loop here.
> We'd need to bump the reference count N more times, and I think we do need
> to check HWPoison for each subpage.

I'll look into it.

Thanks for review.
Kirill A. Shutemov Feb. 13, 2017, 1:43 p.m. UTC | #3
On Thu, Feb 09, 2017 at 07:58:20PM +0300, Kirill A. Shutemov wrote:
> I'll look into it.

I ended up with this (I'll test it more later):

void filemap_map_pages(struct vm_fault *vmf,
		pgoff_t start_pgoff, pgoff_t end_pgoff)
{
	struct radix_tree_iter iter;
	void **slot;
	struct file *file = vmf->vma->vm_file;
	struct address_space *mapping = file->f_mapping;
	pgoff_t last_pgoff = start_pgoff;
	loff_t size;
	struct page *page;
	bool mapped;

	rcu_read_lock();
	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
			start_pgoff) {
		unsigned long index = iter.index;
		if (index < start_pgoff)
			index = start_pgoff;
		if (index > end_pgoff)
			break;
repeat:
		page = radix_tree_deref_slot(slot);
		if (unlikely(!page))
			continue;
		if (radix_tree_exception(page)) {
			if (radix_tree_deref_retry(page))
				slot = radix_tree_iter_retry(&iter);
			continue;
		}

		if (!page_cache_get_speculative(page))
			goto repeat;

		/* Has the page moved? */
		if (unlikely(page != *slot)) {
			put_page(page);
			goto repeat;
		}

		/* For multi-order entries, find relevant subpage */
		page = find_subpage(page, index);

		if (!PageUptodate(page) || PageReadahead(page))
			goto skip;
		if (!trylock_page(page))
			goto skip;

		if (page_mapping(page) != mapping || !PageUptodate(page))
			goto skip_unlock;

		size = round_up(i_size_read(mapping->host), PAGE_SIZE);
		if (compound_head(page)->index >= size >> PAGE_SHIFT)
			goto skip_unlock;

		if (file->f_ra.mmap_miss > 0)
			file->f_ra.mmap_miss--;
map_next_subpage:
		if (PageHWPoison(page))
			goto next;

		vmf->address += (index - last_pgoff) << PAGE_SHIFT;
		if (vmf->pte)
			vmf->pte += index - last_pgoff;
		last_pgoff = index;
		mapped = !alloc_set_pte(vmf, NULL, page);

		/* Huge page is mapped or last index? No need to proceed. */
		if (pmd_trans_huge(*vmf->pmd) ||
				index == end_pgoff) {
			unlock_page(page);
			break;
		}
next:
		if (page && PageCompound(page)) {
			/* Last subpage handled? */
			if ((index & (compound_nr_pages(page) - 1)) ==
					compound_nr_pages(page) - 1)
				goto skip_unlock;
			index++;
			page++;

			/*
			 * One page reference goes to page table mapping.
			 * Need additional reference, if last alloc_set_pte()
			 * succeed.
			 */
			if (mapped)
				get_page(page);
			goto map_next_subpage;
		}
skip_unlock:
		unlock_page(page);
skip:
		iter.index = compound_head(page)->index +
			compound_nr_pages(page) - 1;
		/* Only give up reference if alloc_set_pte() failed. */
		if (!mapped)
			put_page(page);
	}
	rcu_read_unlock();
}
diff mbox

Patch

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 324c8dbad1e1..ad63a7be5a5e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -332,6 +332,15 @@  static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 			mapping_gfp_mask(mapping));
 }
 
+static inline struct page *find_subpage(struct page *page, pgoff_t offset)
+{
+	VM_BUG_ON_PAGE(PageTail(page), page);
+	VM_BUG_ON_PAGE(page->index > offset, page);
+	VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) < offset,
+			page);
+	return page - page->index + offset;
+}
+
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
diff --git a/mm/filemap.c b/mm/filemap.c
index b772a33ef640..837a71a2a412 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -150,7 +150,9 @@  static int page_cache_tree_insert(struct address_space *mapping,
 static void page_cache_tree_delete(struct address_space *mapping,
 				   struct page *page, void *shadow)
 {
-	int i, nr;
+	struct radix_tree_node *node;
+	void **slot;
+	int nr;
 
 	/* hugetlb pages are represented by one entry in the radix tree */
 	nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
@@ -159,19 +161,12 @@  static void page_cache_tree_delete(struct address_space *mapping,
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	VM_BUG_ON_PAGE(nr != 1 && shadow, page);
 
-	for (i = 0; i < nr; i++) {
-		struct radix_tree_node *node;
-		void **slot;
+	__radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
+	VM_BUG_ON_PAGE(!node && nr != 1, page);
 
-		__radix_tree_lookup(&mapping->page_tree, page->index + i,
-				    &node, &slot);
-
-		VM_BUG_ON_PAGE(!node && nr != 1, page);
-
-		radix_tree_clear_tags(&mapping->page_tree, node, slot);
-		__radix_tree_replace(&mapping->page_tree, node, slot, shadow,
-				     workingset_update_node, mapping);
-	}
+	radix_tree_clear_tags(&mapping->page_tree, node, slot);
+	__radix_tree_replace(&mapping->page_tree, node, slot, shadow,
+			workingset_update_node, mapping);
 
 	if (shadow) {
 		mapping->nrexceptional += nr;
@@ -285,12 +280,7 @@  void delete_from_page_cache(struct page *page)
 	if (freepage)
 		freepage(page);
 
-	if (PageTransHuge(page) && !PageHuge(page)) {
-		page_ref_sub(page, HPAGE_PMD_NR);
-		VM_BUG_ON_PAGE(page_count(page) <= 0, page);
-	} else {
-		put_page(page);
-	}
+	put_page(page);
 }
 EXPORT_SYMBOL(delete_from_page_cache);
 
@@ -1172,7 +1162,7 @@  EXPORT_SYMBOL(page_cache_prev_hole);
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 {
 	void **pagep;
-	struct page *head, *page;
+	struct page *page;
 
 	rcu_read_lock();
 repeat:
@@ -1193,15 +1183,8 @@  struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 			goto out;
 		}
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
-			goto repeat;
-
-		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
+		if (!page_cache_get_speculative(page))
 			goto repeat;
-		}
 
 		/*
 		 * Has the page moved?
@@ -1209,7 +1192,7 @@  struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 		 * include/linux/pagemap.h for details.
 		 */
 		if (unlikely(page != *pagep)) {
-			put_page(head);
+			put_page(page);
 			goto repeat;
 		}
 	}
@@ -1250,7 +1233,6 @@  struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
 			put_page(page);
 			goto repeat;
 		}
-		VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
 	}
 	return page;
 }
@@ -1307,7 +1289,6 @@  struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 			put_page(page);
 			goto repeat;
 		}
-		VM_BUG_ON_PAGE(page->index != offset, page);
 	}
 
 	if (page && (fgp_flags & FGP_ACCESSED))
@@ -1342,6 +1323,8 @@  struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 		}
 	}
 
+	if (page)
+		page = find_subpage(page, offset);
 	return page;
 }
 EXPORT_SYMBOL(pagecache_get_page);
@@ -1382,7 +1365,7 @@  unsigned find_get_entries(struct address_space *mapping,
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-		struct page *head, *page;
+		struct page *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
@@ -1400,19 +1383,12 @@  unsigned find_get_entries(struct address_space *mapping,
 			goto export;
 		}
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
+		if (!page_cache_get_speculative(page))
 			goto repeat;
 
-		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
-			goto repeat;
-		}
-
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(head);
+			put_page(page);
 			goto repeat;
 		}
 export:
@@ -1446,14 +1422,17 @@  unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 {
 	struct radix_tree_iter iter;
 	void **slot;
-	unsigned ret = 0;
+	unsigned refs, ret = 0;
 
 	if (unlikely(!nr_pages))
 		return 0;
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
-		struct page *head, *page;
+		struct page *page;
+		unsigned long index = iter.index;
+		if (index < start)
+			index = start;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
@@ -1472,25 +1451,35 @@  unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 			continue;
 		}
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
-			goto repeat;
-
-		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
+		if (!page_cache_get_speculative(page))
 			goto repeat;
-		}
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(head);
+			put_page(page);
 			goto repeat;
 		}
 
+		/* For multi-order entries, find relevant subpage */
+		if (PageTransHuge(page)) {
+			VM_BUG_ON(index - page->index < 0);
+			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
+			page += index - page->index;
+		}
+
 		pages[ret] = page;
 		if (++ret == nr_pages)
 			break;
+		if (!PageTransCompound(page))
+			continue;
+		for (refs = 0; ret < nr_pages &&
+				(index + 1) % HPAGE_PMD_NR;
+				ret++, refs++, index++)
+			pages[ret] = ++page;
+		if (refs)
+			page_ref_add(compound_head(page), refs);
+		if (ret == nr_pages)
+			break;
 	}
 
 	rcu_read_unlock();
@@ -1500,7 +1489,7 @@  unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 /**
  * find_get_pages_contig - gang contiguous pagecache lookup
  * @mapping:	The address_space to search
- * @index:	The starting page index
+ * @start:	The starting page index
  * @nr_pages:	The maximum number of pages
  * @pages:	Where the resulting pages are placed
  *
@@ -1509,19 +1498,22 @@  unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
  *
  * find_get_pages_contig() returns the number of pages which were found.
  */
-unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
+unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
 			       unsigned int nr_pages, struct page **pages)
 {
 	struct radix_tree_iter iter;
 	void **slot;
-	unsigned int ret = 0;
+	unsigned int refs, ret = 0;
 
 	if (unlikely(!nr_pages))
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
-		struct page *head, *page;
+	radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, start) {
+		struct page *page;
+		unsigned long index = iter.index;
+		if (index < start)
+			index = start;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		/* The hole, there no reason to continue */
@@ -1541,19 +1533,12 @@  unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 			break;
 		}
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
+		if (!page_cache_get_speculative(page))
 			goto repeat;
 
-		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
-			goto repeat;
-		}
-
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(head);
+			put_page(page);
 			goto repeat;
 		}
 
@@ -1562,14 +1547,31 @@  unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 		 * otherwise we can get both false positives and false
 		 * negatives, which is just confusing to the caller.
 		 */
-		if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
+		if (page->mapping == NULL || page_to_pgoff(page) != index) {
 			put_page(page);
 			break;
 		}
 
+		/* For multi-order entries, find relevant subpage */
+		if (PageTransHuge(page)) {
+			VM_BUG_ON(index - page->index < 0);
+			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
+			page += index - page->index;
+		}
+
 		pages[ret] = page;
 		if (++ret == nr_pages)
 			break;
+		if (!PageTransCompound(page))
+			continue;
+		for (refs = 0; ret < nr_pages &&
+				(index + 1) % HPAGE_PMD_NR;
+				ret++, refs++, index++)
+			pages[ret] = ++page;
+		if (refs)
+			page_ref_add(compound_head(page), refs);
+		if (ret == nr_pages)
+			break;
 	}
 	rcu_read_unlock();
 	return ret;
@@ -1579,7 +1581,7 @@  EXPORT_SYMBOL(find_get_pages_contig);
 /**
  * find_get_pages_tag - find and return pages that match @tag
  * @mapping:	the address_space to search
- * @index:	the starting page index
+ * @indexp:	the starting page index
  * @tag:	the tag index
  * @nr_pages:	the maximum number of pages
  * @pages:	where the resulting pages are placed
@@ -1587,20 +1589,23 @@  EXPORT_SYMBOL(find_get_pages_contig);
  * Like find_get_pages, except we only return pages which are tagged with
  * @tag.   We update @index to index the next page for the traversal.
  */
-unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
+unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *indexp,
 			int tag, unsigned int nr_pages, struct page **pages)
 {
 	struct radix_tree_iter iter;
 	void **slot;
-	unsigned ret = 0;
+	unsigned refs, ret = 0;
 
 	if (unlikely(!nr_pages))
 		return 0;
 
 	rcu_read_lock();
 	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, *index, tag) {
-		struct page *head, *page;
+				   &iter, *indexp, tag) {
+		struct page *page;
+		unsigned long index = iter.index;
+		if (index < *indexp)
+			index = *indexp;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
@@ -1625,31 +1630,41 @@  unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 			continue;
 		}
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
-			goto repeat;
-
-		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
+		if (!page_cache_get_speculative(page))
 			goto repeat;
-		}
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(head);
+			put_page(page);
 			goto repeat;
 		}
 
+		/* For multi-order entries, find relevant subpage */
+		if (PageTransHuge(page)) {
+			VM_BUG_ON(index - page->index < 0);
+			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
+			page += index - page->index;
+		}
+
 		pages[ret] = page;
 		if (++ret == nr_pages)
 			break;
+		if (!PageTransCompound(page))
+			continue;
+		for (refs = 0; ret < nr_pages &&
+				(index + 1) % HPAGE_PMD_NR;
+				ret++, refs++, index++)
+			pages[ret] = ++page;
+		if (refs)
+			page_ref_add(compound_head(page), refs);
+		if (ret == nr_pages)
+			break;
 	}
 
 	rcu_read_unlock();
 
 	if (ret)
-		*index = pages[ret - 1]->index + 1;
+		*indexp = page_to_pgoff(pages[ret - 1]) + 1;
 
 	return ret;
 }
@@ -1681,7 +1696,7 @@  unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
 	rcu_read_lock();
 	radix_tree_for_each_tagged(slot, &mapping->page_tree,
 				   &iter, start, tag) {
-		struct page *head, *page;
+		struct page *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
 		if (unlikely(!page))
@@ -1700,19 +1715,12 @@  unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
 			goto export;
 		}
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
-			goto repeat;
-
-		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
+		if (!page_cache_get_speculative(page))
 			goto repeat;
-		}
 
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(head);
+			put_page(page);
 			goto repeat;
 		}
 export:
@@ -2310,12 +2318,15 @@  void filemap_map_pages(struct vm_fault *vmf,
 	struct address_space *mapping = file->f_mapping;
 	pgoff_t last_pgoff = start_pgoff;
 	loff_t size;
-	struct page *head, *page;
+	struct page *page;
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
 			start_pgoff) {
-		if (iter.index > end_pgoff)
+		unsigned long index = iter.index;
+		if (index < start_pgoff)
+			index = start_pgoff;
+		if (index > end_pgoff)
 			break;
 repeat:
 		page = radix_tree_deref_slot(slot);
@@ -2326,25 +2337,26 @@  void filemap_map_pages(struct vm_fault *vmf,
 				slot = radix_tree_iter_retry(&iter);
 				continue;
 			}
+			page = NULL;
 			goto next;
 		}
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
+		if (!page_cache_get_speculative(page))
 			goto repeat;
 
-		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
-			goto repeat;
-		}
-
 		/* Has the page moved? */
 		if (unlikely(page != *slot)) {
-			put_page(head);
+			put_page(page);
 			goto repeat;
 		}
 
+		/* For multi-order entries, find relevant subpage */
+		if (PageTransHuge(page)) {
+			VM_BUG_ON(index - page->index < 0);
+			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
+			page += index - page->index;
+		}
+
 		if (!PageUptodate(page) ||
 				PageReadahead(page) ||
 				PageHWPoison(page))
@@ -2352,20 +2364,20 @@  void filemap_map_pages(struct vm_fault *vmf,
 		if (!trylock_page(page))
 			goto skip;
 
-		if (page->mapping != mapping || !PageUptodate(page))
+		if (page_mapping(page) != mapping || !PageUptodate(page))
 			goto unlock;
 
 		size = round_up(i_size_read(mapping->host), PAGE_SIZE);
-		if (page->index >= size >> PAGE_SHIFT)
+		if (compound_head(page)->index >= size >> PAGE_SHIFT)
 			goto unlock;
 
 		if (file->f_ra.mmap_miss > 0)
 			file->f_ra.mmap_miss--;
 
-		vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT;
+		vmf->address += (index - last_pgoff) << PAGE_SHIFT;
 		if (vmf->pte)
-			vmf->pte += iter.index - last_pgoff;
-		last_pgoff = iter.index;
+			vmf->pte += index - last_pgoff;
+		last_pgoff = index;
 		if (alloc_set_pte(vmf, NULL, page))
 			goto unlock;
 		unlock_page(page);
@@ -2378,8 +2390,14 @@  void filemap_map_pages(struct vm_fault *vmf,
 		/* Huge page is mapped? No need to proceed. */
 		if (pmd_trans_huge(*vmf->pmd))
 			break;
-		if (iter.index == end_pgoff)
+		if (index == end_pgoff)
 			break;
+		if (page && PageTransCompound(page) &&
+				(index & (HPAGE_PMD_NR - 1)) !=
+				HPAGE_PMD_NR - 1) {
+			index++;
+			goto repeat;
+		}
 	}
 	rcu_read_unlock();
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ca7855f857fa..f383cb801e34 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1928,6 +1928,7 @@  static void __split_huge_page(struct page *page, struct list_head *list,
 	struct page *head = compound_head(page);
 	struct zone *zone = page_zone(head);
 	struct lruvec *lruvec;
+	struct page *subpage;
 	pgoff_t end = -1;
 	int i;
 
@@ -1936,8 +1937,27 @@  static void __split_huge_page(struct page *page, struct list_head *list,
 	/* complete memcg works before add pages to LRU */
 	mem_cgroup_split_huge_fixup(head);
 
-	if (!PageAnon(page))
-		end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
+	if (!PageAnon(head)) {
+		struct address_space *mapping = head->mapping;
+		struct radix_tree_iter iter;
+		void **slot;
+
+		__dec_node_page_state(head, NR_SHMEM_THPS);
+
+		radix_tree_split(&mapping->page_tree, head->index, 0);
+		radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
+				head->index) {
+			if (iter.index >= head->index + HPAGE_PMD_NR)
+				break;
+			subpage = head + iter.index - head->index;
+			radix_tree_replace_slot(&mapping->page_tree,
+					slot, subpage);
+			VM_BUG_ON_PAGE(compound_head(subpage) != head, subpage);
+		}
+		radix_tree_preload_end();
+
+		end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
+	}
 
 	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
 		__split_huge_page_tail(head, i, lruvec, list);
@@ -1966,7 +1986,7 @@  static void __split_huge_page(struct page *page, struct list_head *list,
 	unfreeze_page(head);
 
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
-		struct page *subpage = head + i;
+		subpage = head + i;
 		if (subpage == page)
 			continue;
 		unlock_page(subpage);
@@ -2123,8 +2143,8 @@  int split_huge_page_to_list(struct page *page, struct list_head *list)
 			goto out;
 		}
 
-		/* Addidional pins from radix tree */
-		extra_pins = HPAGE_PMD_NR;
+		/* Addidional pin from radix tree */
+		extra_pins = 1;
 		anon_vma = NULL;
 		i_mmap_lock_read(mapping);
 	}
@@ -2146,6 +2166,12 @@  int split_huge_page_to_list(struct page *page, struct list_head *list)
 	if (mlocked)
 		lru_add_drain();
 
+	if (mapping && radix_tree_split_preload(HPAGE_PMD_ORDER, 0,
+				GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto unfreeze;
+	}
+
 	/* prevent PageLRU to go away from under us, and freeze lru stats */
 	spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);
 
@@ -2155,10 +2181,7 @@  int split_huge_page_to_list(struct page *page, struct list_head *list)
 		spin_lock(&mapping->tree_lock);
 		pslot = radix_tree_lookup_slot(&mapping->page_tree,
 				page_index(head));
-		/*
-		 * Check if the head page is present in radix tree.
-		 * We assume all tail are present too, if head is there.
-		 */
+		/* Check if the page is present in radix tree */
 		if (radix_tree_deref_slot_protected(pslot,
 					&mapping->tree_lock) != head)
 			goto fail;
@@ -2173,8 +2196,6 @@  int split_huge_page_to_list(struct page *page, struct list_head *list)
 			pgdata->split_queue_len--;
 			list_del(page_deferred_list(head));
 		}
-		if (mapping)
-			__dec_node_page_state(page, NR_SHMEM_THPS);
 		spin_unlock(&pgdata->split_queue_lock);
 		__split_huge_page(page, list, flags);
 		ret = 0;
@@ -2188,9 +2209,12 @@  int split_huge_page_to_list(struct page *page, struct list_head *list)
 			BUG();
 		}
 		spin_unlock(&pgdata->split_queue_lock);
-fail:		if (mapping)
+fail:		if (mapping) {
 			spin_unlock(&mapping->tree_lock);
+			radix_tree_preload_end();
+		}
 		spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+unfreeze:
 		unfreeze_page(head);
 		ret = -EBUSY;
 	}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 77ae3239c3de..1910048bf63a 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1350,10 +1350,8 @@  static void collapse_shmem(struct mm_struct *mm,
 			break;
 		}
 		nr_none += n;
-		for (; index < min(iter.index, end); index++) {
-			radix_tree_insert(&mapping->page_tree, index,
-					new_page + (index % HPAGE_PMD_NR));
-		}
+		for (; index < min(iter.index, end); index++)
+			radix_tree_insert(&mapping->page_tree, index, new_page);
 
 		/* We are done. */
 		if (index >= end)
@@ -1425,8 +1423,7 @@  static void collapse_shmem(struct mm_struct *mm,
 		list_add_tail(&page->lru, &pagelist);
 
 		/* Finally, replace with the new page. */
-		radix_tree_replace_slot(&mapping->page_tree, slot,
-				new_page + (index % HPAGE_PMD_NR));
+		radix_tree_replace_slot(&mapping->page_tree, slot, new_page);
 
 		slot = radix_tree_iter_resume(slot, &iter);
 		index++;
@@ -1444,24 +1441,17 @@  static void collapse_shmem(struct mm_struct *mm,
 		break;
 	}
 
-	/*
-	 * Handle hole in radix tree at the end of the range.
-	 * This code only triggers if there's nothing in radix tree
-	 * beyond 'end'.
-	 */
-	if (result == SCAN_SUCCEED && index < end) {
+	if (result == SCAN_SUCCEED) {
 		int n = end - index;
 
-		if (!shmem_charge(mapping->host, n)) {
+		if (n && !shmem_charge(mapping->host, n)) {
 			result = SCAN_FAIL;
 			goto tree_locked;
 		}
-
-		for (; index < end; index++) {
-			radix_tree_insert(&mapping->page_tree, index,
-					new_page + (index % HPAGE_PMD_NR));
-		}
 		nr_none += n;
+
+		radix_tree_join(&mapping->page_tree, start,
+				HPAGE_PMD_ORDER, new_page);
 	}
 
 tree_locked:
diff --git a/mm/shmem.c b/mm/shmem.c
index bb53285a1d99..ff017bbde4b3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -544,33 +544,14 @@  static int shmem_add_to_page_cache(struct page *page,
 	VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 	VM_BUG_ON(expected && PageTransHuge(page));
 
-	page_ref_add(page, nr);
+	get_page(page);
 	page->mapping = mapping;
 	page->index = index;
 
 	spin_lock_irq(&mapping->tree_lock);
-	if (PageTransHuge(page)) {
-		void __rcu **results;
-		pgoff_t idx;
-		int i;
-
-		error = 0;
-		if (radix_tree_gang_lookup_slot(&mapping->page_tree,
-					&results, &idx, index, 1) &&
-				idx < index + HPAGE_PMD_NR) {
-			error = -EEXIST;
-		}
-
-		if (!error) {
-			for (i = 0; i < HPAGE_PMD_NR; i++) {
-				error = radix_tree_insert(&mapping->page_tree,
-						index + i, page + i);
-				VM_BUG_ON(error);
-			}
-			count_vm_event(THP_FILE_ALLOC);
-		}
-	} else if (!expected) {
-		error = radix_tree_insert(&mapping->page_tree, index, page);
+	if (!expected) {
+		error = __radix_tree_insert(&mapping->page_tree, index,
+				compound_order(page), page);
 	} else {
 		error = shmem_radix_tree_replace(mapping, index, expected,
 								 page);
@@ -578,15 +559,17 @@  static int shmem_add_to_page_cache(struct page *page,
 
 	if (!error) {
 		mapping->nrpages += nr;
-		if (PageTransHuge(page))
+		if (PageTransHuge(page)) {
+			count_vm_event(THP_FILE_ALLOC);
 			__inc_node_page_state(page, NR_SHMEM_THPS);
+		}
 		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
 		__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
 		spin_unlock_irq(&mapping->tree_lock);
 	} else {
 		page->mapping = NULL;
 		spin_unlock_irq(&mapping->tree_lock);
-		page_ref_sub(page, nr);
+		put_page(page);
 	}
 	return error;
 }
@@ -727,8 +710,9 @@  void shmem_unlock_mapping(struct address_space *mapping)
 					   PAGEVEC_SIZE, pvec.pages, indices);
 		if (!pvec.nr)
 			break;
-		index = indices[pvec.nr - 1] + 1;
 		pagevec_remove_exceptionals(&pvec);
+		index = indices[pvec.nr - 1] +
+			hpage_nr_pages(pvec.pages[pvec.nr - 1]);
 		check_move_unevictable_pages(pvec.pages, pvec.nr);
 		pagevec_release(&pvec);
 		cond_resched();
@@ -785,23 +769,25 @@  static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			if (!trylock_page(page))
 				continue;
 
-			if (PageTransTail(page)) {
-				/* Middle of THP: zero out the page */
-				clear_highpage(page);
-				unlock_page(page);
-				continue;
-			} else if (PageTransHuge(page)) {
+			if (PageTransHuge(page)) {
+				/* Range starts in the middle of THP */
+				if (start > page->index) {
+					pgoff_t i;
+					index += HPAGE_PMD_NR;
+					page += start - page->index;
+					for (i = start; i < index; i++, page++)
+						clear_highpage(page);
+					unlock_page(page - 1);
+					continue;
+				}
+
+				/* Range ends in the middle of THP */
 				if (index == round_down(end, HPAGE_PMD_NR)) {
-					/*
-					 * Range ends in the middle of THP:
-					 * zero out the page
-					 */
-					clear_highpage(page);
+					while (index++ < end)
+						clear_highpage(page++);
 					unlock_page(page);
 					continue;
 				}
-				index += HPAGE_PMD_NR - 1;
-				i += HPAGE_PMD_NR - 1;
 			}
 
 			if (!unfalloc || !PageUptodate(page)) {
@@ -814,9 +800,9 @@  static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			unlock_page(page);
 		}
 		pagevec_remove_exceptionals(&pvec);
+		index += pvec.nr ? hpage_nr_pages(pvec.pages[pvec.nr - 1]) : 1;
 		pagevec_release(&pvec);
 		cond_resched();
-		index++;
 	}
 
 	if (partial_start) {
@@ -874,8 +860,7 @@  static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 					continue;
 				if (shmem_free_swap(mapping, index, page)) {
 					/* Swap was replaced by page: retry */
-					index--;
-					break;
+					goto retry;
 				}
 				nr_swaps_freed++;
 				continue;
@@ -883,30 +868,24 @@  static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 
 			lock_page(page);
 
-			if (PageTransTail(page)) {
-				/* Middle of THP: zero out the page */
-				clear_highpage(page);
-				unlock_page(page);
-				/*
-				 * Partial thp truncate due 'start' in middle
-				 * of THP: don't need to look on these pages
-				 * again on !pvec.nr restart.
-				 */
-				if (index != round_down(end, HPAGE_PMD_NR))
-					start++;
-				continue;
-			} else if (PageTransHuge(page)) {
+			if (PageTransHuge(page)) {
+				/* Range starts in the middle of THP */
+				if (start > page->index) {
+					index += HPAGE_PMD_NR;
+					page += start - page->index;
+					while (start++ < index)
+						clear_highpage(page++);
+					unlock_page(page - 1);
+					continue;
+				}
+
+				/* Range ends in the middle of THP */
 				if (index == round_down(end, HPAGE_PMD_NR)) {
-					/*
-					 * Range ends in the middle of THP:
-					 * zero out the page
-					 */
-					clear_highpage(page);
+					while (index++ < end)
+						clear_highpage(page++);
 					unlock_page(page);
 					continue;
 				}
-				index += HPAGE_PMD_NR - 1;
-				i += HPAGE_PMD_NR - 1;
 			}
 
 			if (!unfalloc || !PageUptodate(page)) {
@@ -917,15 +896,18 @@  static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 				} else {
 					/* Page was replaced by swap: retry */
 					unlock_page(page);
-					index--;
-					break;
+					goto retry;
 				}
 			}
 			unlock_page(page);
 		}
 		pagevec_remove_exceptionals(&pvec);
+		index += pvec.nr ? hpage_nr_pages(pvec.pages[pvec.nr - 1]) : 1;
+		pagevec_release(&pvec);
+		continue;
+retry:
+		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
-		index++;
 	}
 
 	spin_lock_irq(&info->lock);
@@ -1762,8 +1744,7 @@  alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
 				PageTransHuge(page));
 		if (error)
 			goto unacct;
-		error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK,
-				compound_order(page));
+		error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
 		if (!error) {
 			error = shmem_add_to_page_cache(page, mapping, hindex,
 							NULL);
@@ -1837,7 +1818,7 @@  alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
 		error = -EINVAL;
 		goto unlock;
 	}
-	*pagep = page + index - hindex;
+	*pagep = find_subpage(page, index);
 	return 0;
 
 	/*
diff --git a/mm/truncate.c b/mm/truncate.c
index dd7b24e083c5..3a1a1c1a654e 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -524,16 +524,13 @@  unsigned long invalidate_mapping_pages(struct address_space *mapping,
 
 			WARN_ON(page_to_index(page) != index);
 
-			/* Middle of THP: skip */
-			if (PageTransTail(page)) {
+			/* Is 'start' or 'end' in the middle of THP ? */
+			if (PageTransHuge(page) &&
+				(start > index ||
+				 (index ==  round_down(end, HPAGE_PMD_NR)))) {
+				/* skip */
 				unlock_page(page);
 				continue;
-			} else if (PageTransHuge(page)) {
-				index += HPAGE_PMD_NR - 1;
-				i += HPAGE_PMD_NR - 1;
-				/* 'end' is in the middle of THP */
-				if (index ==  round_down(end, HPAGE_PMD_NR))
-					continue;
 			}
 
 			ret = invalidate_inode_page(page);
@@ -547,9 +544,9 @@  unsigned long invalidate_mapping_pages(struct address_space *mapping,
 			count += ret;
 		}
 		pagevec_remove_exceptionals(&pvec);
+		index += pvec.nr ? hpage_nr_pages(pvec.pages[pvec.nr - 1]) : 1;
 		pagevec_release(&pvec);
 		cond_resched();
-		index++;
 	}
 	return count;
 }