[2/3] mm: Allow large pages to be added to the page cache
diff mbox series

Message ID 20190905182348.5319-3-willy@infradead.org
State New
Headers show
Series
  • Large pages in the page cache
Related show

Commit Message

Matthew Wilcox Sept. 5, 2019, 6:23 p.m. UTC
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

We return -EEXIST if there are any non-shadow entries in the page
cache in the range covered by the large page.  If there are multiple
shadow entries in the range, we set *shadowp to one of them (currently
the one at the highest index).  If that turns out to be the wrong
answer, we can implement something more complex.  This is mostly
modelled after the equivalent function in the shmem code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

Comments

Matthew Wilcox Sept. 5, 2019, 6:28 p.m. UTC | #1
On Thu, Sep 05, 2019 at 11:23:47AM -0700, Matthew Wilcox wrote:
> +		xas_for_each_conflict(&xas, old) {
> +			if (!xa_is_value(old))
> +				break;
> +			exceptional++;
> +			if (shadowp)
> +				*shadowp = old;
> +		}
> +		if (old) {
>  			xas_set_err(&xas, -EEXIST);
> -		xas_store(&xas, page);
> +			break;

Of course, one cannot see one's own bugs until one has posted them
publically.  This will exit the loop with the lock held.

> +		}
> +		xas_create_range(&xas);
>  		if (xas_error(&xas))
>  			goto unlock;
>  

The stanza should read:

                if (old) 
                        xas_set_err(&xas, -EEXIST);
                xas_create_range(&xas);
                if (xas_error(&xas))
                        goto unlock;

just like the corresponding stanza in mm/shmem.c.

(while the xa_state is in an error condition, the xas_create_range()
function will return without doing anything).
Kirill A. Shutemov Sept. 6, 2019, 12:09 p.m. UTC | #2
On Thu, Sep 05, 2019 at 11:23:47AM -0700, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> 
> We return -EEXIST if there are any non-shadow entries in the page
> cache in the range covered by the large page.  If there are multiple
> shadow entries in the range, we set *shadowp to one of them (currently
> the one at the highest index).  If that turns out to be the wrong
> answer, we can implement something more complex.  This is mostly
> modelled after the equivalent function in the shmem code.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/filemap.c | 39 ++++++++++++++++++++++++++++-----------
>  1 file changed, 28 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 041c77c4ca56..ae3c0a70a8e9 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -850,6 +850,7 @@ static int __add_to_page_cache_locked(struct page *page,
>  	int huge = PageHuge(page);
>  	struct mem_cgroup *memcg;
>  	int error;
> +	unsigned int nr = 1;
>  	void *old;
>  
>  	VM_BUG_ON_PAGE(!PageLocked(page), page);
> @@ -861,31 +862,47 @@ static int __add_to_page_cache_locked(struct page *page,
>  					      gfp_mask, &memcg, false);
>  		if (error)
>  			return error;
> +		xas_set_order(&xas, offset, compound_order(page));
> +		nr = compound_nr(page);
>  	}
>  
> -	get_page(page);
> +	page_ref_add(page, nr);
>  	page->mapping = mapping;
>  	page->index = offset;
>  
>  	do {
> +		unsigned long exceptional = 0;
> +		unsigned int i = 0;
> +
>  		xas_lock_irq(&xas);
> -		old = xas_load(&xas);
> -		if (old && !xa_is_value(old))
> +		xas_for_each_conflict(&xas, old) {
> +			if (!xa_is_value(old))
> +				break;
> +			exceptional++;
> +			if (shadowp)
> +				*shadowp = old;
> +		}
> +		if (old) {
>  			xas_set_err(&xas, -EEXIST);
> -		xas_store(&xas, page);
> +			break;
> +		}
> +		xas_create_range(&xas);
>  		if (xas_error(&xas))
>  			goto unlock;
>  
> -		if (xa_is_value(old)) {
> -			mapping->nrexceptional--;
> -			if (shadowp)
> -				*shadowp = old;
> +next:
> +		xas_store(&xas, page);
> +		if (++i < nr) {
> +			xas_next(&xas);
> +			goto next;
>  		}

Can we have a proper loop here instead of goto?

		do {
			xas_store(&xas, page);
			/* Do not move xas ouside the range */
			if (++i != nr)
				xas_next(&xas);
		} while (i < nr);

> -		mapping->nrpages++;
> +		mapping->nrexceptional -= exceptional;
> +		mapping->nrpages += nr;
>  
>  		/* hugetlb pages do not participate in page cache accounting */
>  		if (!huge)
> -			__inc_node_page_state(page, NR_FILE_PAGES);
> +			__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES,
> +						nr);
>  unlock:
>  		xas_unlock_irq(&xas);
>  	} while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
> @@ -902,7 +919,7 @@ static int __add_to_page_cache_locked(struct page *page,
>  	/* Leave page->index set: truncation relies upon it */
>  	if (!huge)
>  		mem_cgroup_cancel_charge(page, memcg, false);
> -	put_page(page);
> +	page_ref_sub(page, nr);
>  	return xas_error(&xas);
>  }
>  ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
> -- 
> 2.23.0.rc1
>
Matthew Wilcox Sept. 6, 2019, 1:31 p.m. UTC | #3
On Fri, Sep 06, 2019 at 03:09:44PM +0300, Kirill A. Shutemov wrote:
> On Thu, Sep 05, 2019 at 11:23:47AM -0700, Matthew Wilcox wrote:
> > +next:
> > +		xas_store(&xas, page);
> > +		if (++i < nr) {
> > +			xas_next(&xas);
> > +			goto next;
> >  		}
> 
> Can we have a proper loop here instead of goto?
> 
> 		do {
> 			xas_store(&xas, page);
> 			/* Do not move xas ouside the range */
> 			if (++i != nr)
> 				xas_next(&xas);
> 		} while (i < nr);

We could.  I wanted to keep it as close to the shmem.c code as possible,
and this code is scheduled to go away once we're using a single large
entry in the xarray instead of N consecutive entries.

Patch
diff mbox series

diff --git a/mm/filemap.c b/mm/filemap.c
index 041c77c4ca56..ae3c0a70a8e9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -850,6 +850,7 @@  static int __add_to_page_cache_locked(struct page *page,
 	int huge = PageHuge(page);
 	struct mem_cgroup *memcg;
 	int error;
+	unsigned int nr = 1;
 	void *old;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -861,31 +862,47 @@  static int __add_to_page_cache_locked(struct page *page,
 					      gfp_mask, &memcg, false);
 		if (error)
 			return error;
+		xas_set_order(&xas, offset, compound_order(page));
+		nr = compound_nr(page);
 	}
 
-	get_page(page);
+	page_ref_add(page, nr);
 	page->mapping = mapping;
 	page->index = offset;
 
 	do {
+		unsigned long exceptional = 0;
+		unsigned int i = 0;
+
 		xas_lock_irq(&xas);
-		old = xas_load(&xas);
-		if (old && !xa_is_value(old))
+		xas_for_each_conflict(&xas, old) {
+			if (!xa_is_value(old))
+				break;
+			exceptional++;
+			if (shadowp)
+				*shadowp = old;
+		}
+		if (old) {
 			xas_set_err(&xas, -EEXIST);
-		xas_store(&xas, page);
+			break;
+		}
+		xas_create_range(&xas);
 		if (xas_error(&xas))
 			goto unlock;
 
-		if (xa_is_value(old)) {
-			mapping->nrexceptional--;
-			if (shadowp)
-				*shadowp = old;
+next:
+		xas_store(&xas, page);
+		if (++i < nr) {
+			xas_next(&xas);
+			goto next;
 		}
-		mapping->nrpages++;
+		mapping->nrexceptional -= exceptional;
+		mapping->nrpages += nr;
 
 		/* hugetlb pages do not participate in page cache accounting */
 		if (!huge)
-			__inc_node_page_state(page, NR_FILE_PAGES);
+			__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES,
+						nr);
 unlock:
 		xas_unlock_irq(&xas);
 	} while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
@@ -902,7 +919,7 @@  static int __add_to_page_cache_locked(struct page *page,
 	/* Leave page->index set: truncation relies upon it */
 	if (!huge)
 		mem_cgroup_cancel_charge(page, memcg, false);
-	put_page(page);
+	page_ref_sub(page, nr);
 	return xas_error(&xas);
 }
 ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);