diff mbox series

[v2,13/25] fs: Add zero_user_large

Message ID 20200212041845.25879-14-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series Large pages in the page cache | expand

Commit Message

Matthew Wilcox Feb. 12, 2020, 4:18 a.m. UTC
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

We can't kmap() a THP, so add a wrapper around zero_user() for large
pages.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/highmem.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

Comments

Kirill A . Shutemov Feb. 14, 2020, 1:52 p.m. UTC | #1
On Tue, Feb 11, 2020 at 08:18:33PM -0800, Matthew Wilcox wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> 
> We can't kmap() a THP, so add a wrapper around zero_user() for large
> pages.

I would rather address it closer to the root: make zero_user_segments()
handle compound pages.

> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/highmem.h | 22 ++++++++++++++++++++++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/include/linux/highmem.h b/include/linux/highmem.h
> index ea5cdbd8c2c3..4465b8784353 100644
> --- a/include/linux/highmem.h
> +++ b/include/linux/highmem.h
> @@ -245,6 +245,28 @@ static inline void zero_user(struct page *page,
>  	zero_user_segments(page, start, start + size, 0, 0);
>  }
>  
> +static inline void zero_user_large(struct page *page,
> +		unsigned start, unsigned size)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < thp_order(page); i++) {
> +		if (start > PAGE_SIZE) {

Off-by-one? >= ?

> +			start -= PAGE_SIZE;
> +		} else {
> +			unsigned this_size = size;
> +
> +			if (size > (PAGE_SIZE - start))
> +				this_size = PAGE_SIZE - start;
> +			zero_user(page + i, start, this_size);
> +			start = 0;
> +			size -= this_size;
> +			if (!size)
> +				break;
> +		}
> +	}
> +}
> +
>  #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE
>  
>  static inline void copy_user_highpage(struct page *to, struct page *from,
> -- 
> 2.25.0
> 
>
Matthew Wilcox Feb. 14, 2020, 4:03 p.m. UTC | #2
On Fri, Feb 14, 2020 at 04:52:48PM +0300, Kirill A. Shutemov wrote:
> On Tue, Feb 11, 2020 at 08:18:33PM -0800, Matthew Wilcox wrote:
> > From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> > 
> > We can't kmap() a THP, so add a wrapper around zero_user() for large
> > pages.
> 
> I would rather address it closer to the root: make zero_user_segments()
> handle compound pages.

Hah.  I ended up doing that, but hadn't sent it out.  I don't like
how ugly it is:

@@ -219,18 +219,57 @@ static inline void zero_user_segments(struct page *page,
        unsigned start1, unsigned end1,
        unsigned start2, unsigned end2)
 {
-       void *kaddr = kmap_atomic(page);
-
-       BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE);
-
-       if (end1 > start1)
-               memset(kaddr + start1, 0, end1 - start1);
-
-       if (end2 > start2)
-               memset(kaddr + start2, 0, end2 - start2);
-
-       kunmap_atomic(kaddr);
-       flush_dcache_page(page);
+       unsigned int i;
+
+       BUG_ON(end1 > thp_size(page) || end2 > thp_size(page));
+
+       for (i = 0; i < hpage_nr_pages(page); i++) {
+               void *kaddr;
+               unsigned this_end;
+
+               if (end1 == 0 && start2 >= PAGE_SIZE) {
+                       start2 -= PAGE_SIZE;
+                       end2 -= PAGE_SIZE;
+                       continue;
+               }
+
+               if (start1 >= PAGE_SIZE) {
+                       start1 -= PAGE_SIZE;
+                       end1 -= PAGE_SIZE;
+                       if (start2) {
+                               start2 -= PAGE_SIZE;
+                               end2 -= PAGE_SIZE;
+                       }
+                       continue;
+               }
+
+               kaddr = kmap_atomic(page + i);
+
+               this_end = min_t(unsigned, end1, PAGE_SIZE);
+               if (end1 > start1)
+                       memset(kaddr + start1, 0, this_end - start1);
+               end1 -= this_end;
+               start1 = 0;
+
+               if (start2 >= PAGE_SIZE) {
+                       start2 -= PAGE_SIZE;
+                       end2 -= PAGE_SIZE;
+               } else {
+                       this_end = min_t(unsigned, end2, PAGE_SIZE);
+                       if (end2 > start2)
+                               memset(kaddr + start2, 0, this_end - start2);
+                       end2 -= this_end;
+                       start2 = 0;
+               }
+
+               kunmap_atomic(kaddr);
+               flush_dcache_page(page + i);
+
+               if (!end1 && !end2)
+                       break;
+       }
+
+       BUG_ON((start1 | start2 | end1 | end2) != 0);
 }

I think at this point it has to move out-of-line too.

> > +static inline void zero_user_large(struct page *page,
> > +		unsigned start, unsigned size)
> > +{
> > +	unsigned int i;
> > +
> > +	for (i = 0; i < thp_order(page); i++) {
> > +		if (start > PAGE_SIZE) {
> 
> Off-by-one? >= ?

Good catch; I'd also noticed that when I came to redo the zero_user_segments().
Kirill A . Shutemov Feb. 18, 2020, 2:16 p.m. UTC | #3
On Fri, Feb 14, 2020 at 08:03:42AM -0800, Matthew Wilcox wrote:
> On Fri, Feb 14, 2020 at 04:52:48PM +0300, Kirill A. Shutemov wrote:
> > On Tue, Feb 11, 2020 at 08:18:33PM -0800, Matthew Wilcox wrote:
> > > From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> > > 
> > > We can't kmap() a THP, so add a wrapper around zero_user() for large
> > > pages.
> > 
> > I would rather address it closer to the root: make zero_user_segments()
> > handle compound pages.
> 
> Hah.  I ended up doing that, but hadn't sent it out.  I don't like
> how ugly it is:
> 
> @@ -219,18 +219,57 @@ static inline void zero_user_segments(struct page *page,
>         unsigned start1, unsigned end1,
>         unsigned start2, unsigned end2)
>  {
> -       void *kaddr = kmap_atomic(page);
> -
> -       BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE);
> -
> -       if (end1 > start1)
> -               memset(kaddr + start1, 0, end1 - start1);
> -
> -       if (end2 > start2)
> -               memset(kaddr + start2, 0, end2 - start2);
> -
> -       kunmap_atomic(kaddr);
> -       flush_dcache_page(page);
> +       unsigned int i;
> +
> +       BUG_ON(end1 > thp_size(page) || end2 > thp_size(page));
> +
> +       for (i = 0; i < hpage_nr_pages(page); i++) {
> +               void *kaddr;
> +               unsigned this_end;
> +
> +               if (end1 == 0 && start2 >= PAGE_SIZE) {
> +                       start2 -= PAGE_SIZE;
> +                       end2 -= PAGE_SIZE;
> +                       continue;
> +               }
> +
> +               if (start1 >= PAGE_SIZE) {
> +                       start1 -= PAGE_SIZE;
> +                       end1 -= PAGE_SIZE;
> +                       if (start2) {
> +                               start2 -= PAGE_SIZE;
> +                               end2 -= PAGE_SIZE;
> +                       }

You assume start2/end2 is always after start1/end1 in the page.
Is it always true? If so, I would add BUG_ON() for it.

Otherwise, looks good.

> +                       continue;
> +               }
> +
> +               kaddr = kmap_atomic(page + i);
> +
> +               this_end = min_t(unsigned, end1, PAGE_SIZE);
> +               if (end1 > start1)
> +                       memset(kaddr + start1, 0, this_end - start1);
> +               end1 -= this_end;
> +               start1 = 0;
> +
> +               if (start2 >= PAGE_SIZE) {
> +                       start2 -= PAGE_SIZE;
> +                       end2 -= PAGE_SIZE;
> +               } else {
> +                       this_end = min_t(unsigned, end2, PAGE_SIZE);
> +                       if (end2 > start2)
> +                               memset(kaddr + start2, 0, this_end - start2);
> +                       end2 -= this_end;
> +                       start2 = 0;
> +               }
> +
> +               kunmap_atomic(kaddr);
> +               flush_dcache_page(page + i);
> +
> +               if (!end1 && !end2)
> +                       break;
> +       }
> +
> +       BUG_ON((start1 | start2 | end1 | end2) != 0);
>  }
> 
> I think at this point it has to move out-of-line too.
> 
> > > +static inline void zero_user_large(struct page *page,
> > > +		unsigned start, unsigned size)
> > > +{
> > > +	unsigned int i;
> > > +
> > > +	for (i = 0; i < thp_order(page); i++) {
> > > +		if (start > PAGE_SIZE) {
> > 
> > Off-by-one? >= ?
> 
> Good catch; I'd also noticed that when I came to redo the zero_user_segments().
>
Matthew Wilcox Feb. 18, 2020, 4:13 p.m. UTC | #4
On Tue, Feb 18, 2020 at 05:16:34PM +0300, Kirill A. Shutemov wrote:
> > +               if (start1 >= PAGE_SIZE) {
> > +                       start1 -= PAGE_SIZE;
> > +                       end1 -= PAGE_SIZE;
> > +                       if (start2) {
> > +                               start2 -= PAGE_SIZE;
> > +                               end2 -= PAGE_SIZE;
> > +                       }
> 
> You assume start2/end2 is always after start1/end1 in the page.
> Is it always true? If so, I would add BUG_ON() for it.

after or zero.  Yes, I should add a BUG_ON to check for that.

> Otherwise, looks good.

Here's what I currently have (I'll add the BUG_ON later):

commit 7fabe16755365cdc6e80343ef994843ecebde60a
Author: Matthew Wilcox (Oracle) <willy@infradead.org>
Date:   Sat Feb 1 03:38:49 2020 -0500

    fs: Support THPs in zero_user_segments
    
    We can only kmap() one subpage of a THP at a time, so loop over all
    relevant subpages, skipping ones which don't need to be zeroed.  This is
    too large to inline when THPs are enabled and we actually need highmem,
    so put it in highmem.c.
    
    Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index ea5cdbd8c2c3..74614903619d 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -215,13 +215,18 @@ static inline void clear_highpage(struct page *page)
        kunmap_atomic(kaddr);
 }
 
+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
+               unsigned start2, unsigned end2);
+#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
 static inline void zero_user_segments(struct page *page,
-       unsigned start1, unsigned end1,
-       unsigned start2, unsigned end2)
+               unsigned start1, unsigned end1,
+               unsigned start2, unsigned end2)
 {
+       unsigned long i;
        void *kaddr = kmap_atomic(page);
 
-       BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE);
+       BUG_ON(end1 > thp_size(page) || end2 > thp_size(page));
 
        if (end1 > start1)
                memset(kaddr + start1, 0, end1 - start1);
@@ -230,8 +235,10 @@ static inline void zero_user_segments(struct page *page,
                memset(kaddr + start2, 0, end2 - start2);
 
        kunmap_atomic(kaddr);
-       flush_dcache_page(page);
+       for (i = 0; i < hpage_nr_pages(page); i++)
+               flush_dcache_page(page + i);
 }
+#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
 
 static inline void zero_user_segment(struct page *page,
        unsigned start, unsigned end)
diff --git a/mm/highmem.c b/mm/highmem.c
index 64d8dea47dd1..3a85c66ef532 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -367,9 +367,67 @@ void kunmap_high(struct page *page)
        if (need_wakeup)
                wake_up(pkmap_map_wait);
 }
-
 EXPORT_SYMBOL(kunmap_high);
-#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
+               unsigned start2, unsigned end2)
+{
+       unsigned int i;
+
+       BUG_ON(end1 > thp_size(page) || end2 > thp_size(page));
+
+       for (i = 0; i < hpage_nr_pages(page); i++) {
+               void *kaddr;
+               unsigned this_end;
+
+               if (end1 == 0 && start2 >= PAGE_SIZE) {
+                       start2 -= PAGE_SIZE;
+                       end2 -= PAGE_SIZE;
+                       continue;
+               }
+
+               if (start1 >= PAGE_SIZE) {
+                       start1 -= PAGE_SIZE;
+                       end1 -= PAGE_SIZE;
+                       if (start2) {
+                               start2 -= PAGE_SIZE;
+                               end2 -= PAGE_SIZE;
+                       }
+                       continue;
+               }
+
+               kaddr = kmap_atomic(page + i);
+
+               this_end = min_t(unsigned, end1, PAGE_SIZE);
+               if (end1 > start1)
+                       memset(kaddr + start1, 0, this_end - start1);
+               end1 -= this_end;
+               start1 = 0;
+
+               if (start2 >= PAGE_SIZE) {
+                       start2 -= PAGE_SIZE;
+                       end2 -= PAGE_SIZE;
+               } else {
+                       this_end = min_t(unsigned, end2, PAGE_SIZE);
+                       if (end2 > start2)
+                               memset(kaddr + start2, 0, this_end - start2);
+                       end2 -= this_end;
+                       start2 = 0;
+               }
+
+               kunmap_atomic(kaddr);
+               flush_dcache_page(page + i);
+
+               if (!end1 && !end2)
+                       break;
+       }
+
+       BUG_ON((start1 | start2 | end1 | end2) != 0);
+}
+EXPORT_SYMBOL(zero_user_segments);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_HIGHMEM */
 
 #if defined(HASHED_PAGE_VIRTUAL)
 



> > +                       continue;
> > +               }
> > +
> > +               kaddr = kmap_atomic(page + i);
> > +
> > +               this_end = min_t(unsigned, end1, PAGE_SIZE);
> > +               if (end1 > start1)
> > +                       memset(kaddr + start1, 0, this_end - start1);
> > +               end1 -= this_end;
> > +               start1 = 0;
> > +
> > +               if (start2 >= PAGE_SIZE) {
> > +                       start2 -= PAGE_SIZE;
> > +                       end2 -= PAGE_SIZE;
> > +               } else {
> > +                       this_end = min_t(unsigned, end2, PAGE_SIZE);
> > +                       if (end2 > start2)
> > +                               memset(kaddr + start2, 0, this_end - start2);
> > +                       end2 -= this_end;
> > +                       start2 = 0;
> > +               }
> > +
> > +               kunmap_atomic(kaddr);
> > +               flush_dcache_page(page + i);
> > +
> > +               if (!end1 && !end2)
> > +                       break;
> > +       }
> > +
> > +       BUG_ON((start1 | start2 | end1 | end2) != 0);
> >  }
> > 
> > I think at this point it has to move out-of-line too.
> > 
> > > > +static inline void zero_user_large(struct page *page,
> > > > +		unsigned start, unsigned size)
> > > > +{
> > > > +	unsigned int i;
> > > > +
> > > > +	for (i = 0; i < thp_order(page); i++) {
> > > > +		if (start > PAGE_SIZE) {
> > > 
> > > Off-by-one? >= ?
> > 
> > Good catch; I'd also noticed that when I came to redo the zero_user_segments().
> > 
> 
> -- 
>  Kirill A. Shutemov
Kirill A . Shutemov Feb. 18, 2020, 5:10 p.m. UTC | #5
On Tue, Feb 18, 2020 at 08:13:49AM -0800, Matthew Wilcox wrote:
> On Tue, Feb 18, 2020 at 05:16:34PM +0300, Kirill A. Shutemov wrote:
> > > +               if (start1 >= PAGE_SIZE) {
> > > +                       start1 -= PAGE_SIZE;
> > > +                       end1 -= PAGE_SIZE;
> > > +                       if (start2) {
> > > +                               start2 -= PAGE_SIZE;
> > > +                               end2 -= PAGE_SIZE;
> > > +                       }
> > 
> > You assume start2/end2 is always after start1/end1 in the page.
> > Is it always true? If so, I would add BUG_ON() for it.
> 
> after or zero.  Yes, I should add a BUG_ON to check for that.
> 
> > Otherwise, looks good.
> 
> Here's what I currently have (I'll add the BUG_ON later):
> 
> commit 7fabe16755365cdc6e80343ef994843ecebde60a
> Author: Matthew Wilcox (Oracle) <willy@infradead.org>
> Date:   Sat Feb 1 03:38:49 2020 -0500
> 
>     fs: Support THPs in zero_user_segments
>     
>     We can only kmap() one subpage of a THP at a time, so loop over all
>     relevant subpages, skipping ones which don't need to be zeroed.  This is
>     too large to inline when THPs are enabled and we actually need highmem,
>     so put it in highmem.c.
>     
>     Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

> 
> diff --git a/include/linux/highmem.h b/include/linux/highmem.h
> index ea5cdbd8c2c3..74614903619d 100644
> --- a/include/linux/highmem.h
> +++ b/include/linux/highmem.h
> @@ -215,13 +215,18 @@ static inline void clear_highpage(struct page *page)
>         kunmap_atomic(kaddr);
>  }
>  
> +#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
> +void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
> +               unsigned start2, unsigned end2);
> +#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */

This is a neat trick. I like it.

Although, it means non-inlined version will never get tested :/

>  static inline void zero_user_segments(struct page *page,
> -       unsigned start1, unsigned end1,
> -       unsigned start2, unsigned end2)
> +               unsigned start1, unsigned end1,
> +               unsigned start2, unsigned end2)
>  {
> +       unsigned long i;
>         void *kaddr = kmap_atomic(page);
>  
> -       BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE);
> +       BUG_ON(end1 > thp_size(page) || end2 > thp_size(page));
>  
>         if (end1 > start1)
>                 memset(kaddr + start1, 0, end1 - start1);
> @@ -230,8 +235,10 @@ static inline void zero_user_segments(struct page *page,
>                 memset(kaddr + start2, 0, end2 - start2);
>  
>         kunmap_atomic(kaddr);
> -       flush_dcache_page(page);
> +       for (i = 0; i < hpage_nr_pages(page); i++)
> +               flush_dcache_page(page + i);
>  }
> +#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
>  
>  static inline void zero_user_segment(struct page *page,
>         unsigned start, unsigned end)
> diff --git a/mm/highmem.c b/mm/highmem.c
> index 64d8dea47dd1..3a85c66ef532 100644
> --- a/mm/highmem.c
> +++ b/mm/highmem.c
> @@ -367,9 +367,67 @@ void kunmap_high(struct page *page)
>         if (need_wakeup)
>                 wake_up(pkmap_map_wait);
>  }
> -
>  EXPORT_SYMBOL(kunmap_high);
> -#endif
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
> +               unsigned start2, unsigned end2)
> +{
> +       unsigned int i;
> +
> +       BUG_ON(end1 > thp_size(page) || end2 > thp_size(page));
> +
> +       for (i = 0; i < hpage_nr_pages(page); i++) {
> +               void *kaddr;
> +               unsigned this_end;
> +
> +               if (end1 == 0 && start2 >= PAGE_SIZE) {
> +                       start2 -= PAGE_SIZE;
> +                       end2 -= PAGE_SIZE;
> +                       continue;
> +               }
> +
> +               if (start1 >= PAGE_SIZE) {
> +                       start1 -= PAGE_SIZE;
> +                       end1 -= PAGE_SIZE;
> +                       if (start2) {
> +                               start2 -= PAGE_SIZE;
> +                               end2 -= PAGE_SIZE;
> +                       }
> +                       continue;
> +               }
> +
> +               kaddr = kmap_atomic(page + i);
> +
> +               this_end = min_t(unsigned, end1, PAGE_SIZE);
> +               if (end1 > start1)
> +                       memset(kaddr + start1, 0, this_end - start1);
> +               end1 -= this_end;
> +               start1 = 0;
> +
> +               if (start2 >= PAGE_SIZE) {
> +                       start2 -= PAGE_SIZE;
> +                       end2 -= PAGE_SIZE;
> +               } else {
> +                       this_end = min_t(unsigned, end2, PAGE_SIZE);
> +                       if (end2 > start2)
> +                               memset(kaddr + start2, 0, this_end - start2);
> +                       end2 -= this_end;
> +                       start2 = 0;
> +               }
> +
> +               kunmap_atomic(kaddr);
> +               flush_dcache_page(page + i);
> +
> +               if (!end1 && !end2)
> +                       break;
> +       }
> +
> +       BUG_ON((start1 | start2 | end1 | end2) != 0);
> +}
> +EXPORT_SYMBOL(zero_user_segments);
> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> +#endif /* CONFIG_HIGHMEM */
>  
>  #if defined(HASHED_PAGE_VIRTUAL)
>  
> 
> 
> 
> > > +                       continue;
> > > +               }
> > > +
> > > +               kaddr = kmap_atomic(page + i);
> > > +
> > > +               this_end = min_t(unsigned, end1, PAGE_SIZE);
> > > +               if (end1 > start1)
> > > +                       memset(kaddr + start1, 0, this_end - start1);
> > > +               end1 -= this_end;
> > > +               start1 = 0;
> > > +
> > > +               if (start2 >= PAGE_SIZE) {
> > > +                       start2 -= PAGE_SIZE;
> > > +                       end2 -= PAGE_SIZE;
> > > +               } else {
> > > +                       this_end = min_t(unsigned, end2, PAGE_SIZE);
> > > +                       if (end2 > start2)
> > > +                               memset(kaddr + start2, 0, this_end - start2);
> > > +                       end2 -= this_end;
> > > +                       start2 = 0;
> > > +               }
> > > +
> > > +               kunmap_atomic(kaddr);
> > > +               flush_dcache_page(page + i);
> > > +
> > > +               if (!end1 && !end2)
> > > +                       break;
> > > +       }
> > > +
> > > +       BUG_ON((start1 | start2 | end1 | end2) != 0);
> > >  }
> > > 
> > > I think at this point it has to move out-of-line too.
> > > 
> > > > > +static inline void zero_user_large(struct page *page,
> > > > > +		unsigned start, unsigned size)
> > > > > +{
> > > > > +	unsigned int i;
> > > > > +
> > > > > +	for (i = 0; i < thp_order(page); i++) {
> > > > > +		if (start > PAGE_SIZE) {
> > > > 
> > > > Off-by-one? >= ?
> > > 
> > > Good catch; I'd also noticed that when I came to redo the zero_user_segments().
> > > 
> > 
> > -- 
> >  Kirill A. Shutemov
Matthew Wilcox Feb. 18, 2020, 6:07 p.m. UTC | #6
On Tue, Feb 18, 2020 at 08:10:52PM +0300, Kirill A. Shutemov wrote:
> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

Thanks

> > +#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
> > +void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
> > +               unsigned start2, unsigned end2);
> > +#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
> 
> This is a neat trick. I like it.
> 
> Although, it means non-inlined version will never get tested :/

I worry about that too, but I don't really want to incur the overhead on
platforms people actually use.
Kirill A . Shutemov Feb. 21, 2020, 12:42 p.m. UTC | #7
On Tue, Feb 18, 2020 at 10:07:05AM -0800, Matthew Wilcox wrote:
> On Tue, Feb 18, 2020 at 08:10:52PM +0300, Kirill A. Shutemov wrote:
> > Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> 
> Thanks
> 
> > > +#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
> > > +void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
> > > +               unsigned start2, unsigned end2);
> > > +#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
> > 
> > This is a neat trick. I like it.
> > 
> > Although, it means non-inlined version will never get tested :/
> 
> I worry about that too, but I don't really want to incur the overhead on
> platforms people actually use.

I'm also worried about latency: kmap_atomic() disables preemption even if
system has no highmem. Some archs have way too large THP to clear them
with preemption disabled.

I *think* there's no real need in preemption disabling in this situation
and we can wrap kmap_atomic()/kunmap_atomic() into CONFIG_HIGHMEM.
diff mbox series

Patch

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index ea5cdbd8c2c3..4465b8784353 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -245,6 +245,28 @@  static inline void zero_user(struct page *page,
 	zero_user_segments(page, start, start + size, 0, 0);
 }
 
+static inline void zero_user_large(struct page *page,
+		unsigned start, unsigned size)
+{
+	unsigned int i;
+
+	for (i = 0; i < thp_order(page); i++) {
+		if (start > PAGE_SIZE) {
+			start -= PAGE_SIZE;
+		} else {
+			unsigned this_size = size;
+
+			if (size > (PAGE_SIZE - start))
+				this_size = PAGE_SIZE - start;
+			zero_user(page + i, start, this_size);
+			start = 0;
+			size -= this_size;
+			if (!size)
+				break;
+		}
+	}
+}
+
 #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE
 
 static inline void copy_user_highpage(struct page *to, struct page *from,