diff mbox series

[5/9] x86/clear_pages: add clear_pages()

Message ID 20230403052233.1880567-6-ankur.a.arora@oracle.com (mailing list archive)
State New
Headers show
Series x86/clear_huge_page: multi-page clearing | expand

Commit Message

Ankur Arora April 3, 2023, 5:22 a.m. UTC
Add clear_pages() and define the ancillary clear_user_pages().

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/x86/include/asm/page.h    | 6 ++++++
 arch/x86/include/asm/page_32.h | 6 ++++++
 arch/x86/include/asm/page_64.h | 9 +++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)

Comments

Peter Zijlstra April 6, 2023, 8:23 a.m. UTC | #1
On Sun, Apr 02, 2023 at 10:22:29PM -0700, Ankur Arora wrote:
> Add clear_pages() and define the ancillary clear_user_pages().
> 
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  arch/x86/include/asm/page.h    | 6 ++++++
>  arch/x86/include/asm/page_32.h | 6 ++++++
>  arch/x86/include/asm/page_64.h | 9 +++++++--
>  3 files changed, 19 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
> index d18e5c332cb9..03e3c69fc427 100644
> --- a/arch/x86/include/asm/page.h
> +++ b/arch/x86/include/asm/page.h
> @@ -28,6 +28,12 @@ static inline void clear_user_page(void *page, unsigned long vaddr,
>  	clear_page(page);
>  }
>  
> +static inline void clear_user_pages(void *page, unsigned long vaddr,
> +				    struct page *pg, unsigned int nsubpages)
> +{
> +	clear_pages(page, nsubpages);
> +}

This seems dodgy, clear_user* has slightly different semantics. It needs
the access_ok() and stac/clac thing on at the very least.

> +
>  static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
>  				  struct page *topage)
>  {
> diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
> index 580d71aca65a..3523d1150cfc 100644
> --- a/arch/x86/include/asm/page_32.h
> +++ b/arch/x86/include/asm/page_32.h
> @@ -22,6 +22,12 @@ static inline void clear_page(void *page)
>  	memset(page, 0, PAGE_SIZE);
>  }
>  
> +static inline void clear_pages(void *page, unsigned int nsubpages)
> +{
> +	for (int i = 0; i < nsubpages; i++)
> +		clear_page(page + i * PAGE_SIZE);

cond_resched() ?

> +}
> +
>  static inline void copy_page(void *to, void *from)
>  {
>  	memcpy(to, from, PAGE_SIZE);
Ankur Arora April 7, 2023, 12:50 a.m. UTC | #2
Peter Zijlstra <peterz@infradead.org> writes:

> On Sun, Apr 02, 2023 at 10:22:29PM -0700, Ankur Arora wrote:
>> Add clear_pages() and define the ancillary clear_user_pages().
>>
>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>>  arch/x86/include/asm/page.h    | 6 ++++++
>>  arch/x86/include/asm/page_32.h | 6 ++++++
>>  arch/x86/include/asm/page_64.h | 9 +++++++--
>>  3 files changed, 19 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
>> index d18e5c332cb9..03e3c69fc427 100644
>> --- a/arch/x86/include/asm/page.h
>> +++ b/arch/x86/include/asm/page.h
>> @@ -28,6 +28,12 @@ static inline void clear_user_page(void *page, unsigned long vaddr,
>>  	clear_page(page);
>>  }
>>
>> +static inline void clear_user_pages(void *page, unsigned long vaddr,
>> +				    struct page *pg, unsigned int nsubpages)
>> +{
>> +	clear_pages(page, nsubpages);
>> +}
>
> This seems dodgy, clear_user* has slightly different semantics. It needs
> the access_ok() and stac/clac thing on at the very least.

That can't be right. On x86, clear_user_page(), copy_user_page() (and
now the multi-page versions) only write to kernel maps of user pages.
That's why they can skip the access_ok(), stac/clac or uacess
exception handling.

From core-api/cachetlb.rst:

  ``void copy_user_page(void *to, void *from, unsigned long addr, struct page *page)``
  ``void clear_user_page(void *to, unsigned long addr, struct page *page)``

        These two routines store data in user anonymous or COW
        pages.  It allows a port to efficiently avoid D-cache alias
        issues between userspace and the kernel.

        For example, a port may temporarily map 'from' and 'to' to
        kernel virtual addresses during the copy.  The virtual address
        for these two pages is chosen in such a way that the kernel
        load/store instructions happen to virtual addresses which are
        of the same "color" as the user mapping of the page.  Sparc64
        for example, uses this technique.

        The 'addr' parameter tells the virtual address where the
        user will ultimately have this page mapped, and the 'page'
        parameter gives a pointer to the struct page of the target.

The naming OTOH does seems dodgy. Especially because as you say it
suggests semantics similar to clear_user() etc.

On x86, I think it is definitely a mistake for clear_huge_page() to be
calling clear_user_page*() (especially given that it is getting the
kernel map.) Will fix that.

Even for non-x86, I see just two users in common code:
  highmem.h: copy_user_highpage(), clear_user_highpage()
  fs/dax.c: copy_cow_page_dax()

All of them do a kmap_atomic() so there's really no "may" as documented
above:
        For example, a port may temporarily map 'from' and 'to' to
        kernel virtual addresses during the copy.  The virtual address

Maybe a name change is warranted, if nothing else?

>> +
>>  static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
>>  				  struct page *topage)
>>  {
>> diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
>> index 580d71aca65a..3523d1150cfc 100644
>> --- a/arch/x86/include/asm/page_32.h
>> +++ b/arch/x86/include/asm/page_32.h
>> @@ -22,6 +22,12 @@ static inline void clear_page(void *page)
>>  	memset(page, 0, PAGE_SIZE);
>>  }
>>
>> +static inline void clear_pages(void *page, unsigned int nsubpages)
>> +{
>> +	for (int i = 0; i < nsubpages; i++)
>> +		clear_page(page + i * PAGE_SIZE);
>
> cond_resched() ?

Missed that. Thanks. Will fix.

--
ankur
Peter Zijlstra April 7, 2023, 10:34 a.m. UTC | #3
On Thu, Apr 06, 2023 at 05:50:18PM -0700, Ankur Arora wrote:
> 
> Peter Zijlstra <peterz@infradead.org> writes:
> 
> > On Sun, Apr 02, 2023 at 10:22:29PM -0700, Ankur Arora wrote:
> >> Add clear_pages() and define the ancillary clear_user_pages().
> >>
> >> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> >> ---
> >>  arch/x86/include/asm/page.h    | 6 ++++++
> >>  arch/x86/include/asm/page_32.h | 6 ++++++
> >>  arch/x86/include/asm/page_64.h | 9 +++++++--
> >>  3 files changed, 19 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
> >> index d18e5c332cb9..03e3c69fc427 100644
> >> --- a/arch/x86/include/asm/page.h
> >> +++ b/arch/x86/include/asm/page.h
> >> @@ -28,6 +28,12 @@ static inline void clear_user_page(void *page, unsigned long vaddr,
> >>  	clear_page(page);
> >>  }
> >>
> >> +static inline void clear_user_pages(void *page, unsigned long vaddr,
> >> +				    struct page *pg, unsigned int nsubpages)
> >> +{
> >> +	clear_pages(page, nsubpages);
> >> +}
> >
> > This seems dodgy, clear_user* has slightly different semantics. It needs
> > the access_ok() and stac/clac thing on at the very least.
> 
> That can't be right. On x86, clear_user_page(), copy_user_page() (and
> now the multi-page versions) only write to kernel maps of user pages.
> That's why they can skip the access_ok(), stac/clac or uacess
> exception handling.

Bah, that namespace is a mess :/
Matthew Wilcox April 9, 2023, 1:26 p.m. UTC | #4
On Fri, Apr 07, 2023 at 12:34:44PM +0200, Peter Zijlstra wrote:
> On Thu, Apr 06, 2023 at 05:50:18PM -0700, Ankur Arora wrote:
> > 
> > Peter Zijlstra <peterz@infradead.org> writes:
> > 
> > > On Sun, Apr 02, 2023 at 10:22:29PM -0700, Ankur Arora wrote:
> > >> Add clear_pages() and define the ancillary clear_user_pages().
> > >>
> > >> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> > >> ---
> > >>  arch/x86/include/asm/page.h    | 6 ++++++
> > >>  arch/x86/include/asm/page_32.h | 6 ++++++
> > >>  arch/x86/include/asm/page_64.h | 9 +++++++--
> > >>  3 files changed, 19 insertions(+), 2 deletions(-)
> > >>
> > >> diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
> > >> index d18e5c332cb9..03e3c69fc427 100644
> > >> --- a/arch/x86/include/asm/page.h
> > >> +++ b/arch/x86/include/asm/page.h
> > >> @@ -28,6 +28,12 @@ static inline void clear_user_page(void *page, unsigned long vaddr,
> > >>  	clear_page(page);
> > >>  }
> > >>
> > >> +static inline void clear_user_pages(void *page, unsigned long vaddr,
> > >> +				    struct page *pg, unsigned int nsubpages)
> > >> +{
> > >> +	clear_pages(page, nsubpages);
> > >> +}
> > >
> > > This seems dodgy, clear_user* has slightly different semantics. It needs
> > > the access_ok() and stac/clac thing on at the very least.
> > 
> > That can't be right. On x86, clear_user_page(), copy_user_page() (and
> > now the multi-page versions) only write to kernel maps of user pages.
> > That's why they can skip the access_ok(), stac/clac or uacess
> > exception handling.
> 
> Bah, that namespace is a mess :/

What (I think) it's suppsoed to be is that clear_page() works on kernel
pages that are never seen by userspace while clear_user_page() works
on kernel mappings of pages the user can definitely see.  This makes
no difference to x86, but some architectures can skip a lot of cache
flushing.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index d18e5c332cb9..03e3c69fc427 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -28,6 +28,12 @@  static inline void clear_user_page(void *page, unsigned long vaddr,
 	clear_page(page);
 }
 
+static inline void clear_user_pages(void *page, unsigned long vaddr,
+				    struct page *pg, unsigned int nsubpages)
+{
+	clear_pages(page, nsubpages);
+}
+
 static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
 				  struct page *topage)
 {
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 580d71aca65a..3523d1150cfc 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -22,6 +22,12 @@  static inline void clear_page(void *page)
 	memset(page, 0, PAGE_SIZE);
 }
 
+static inline void clear_pages(void *page, unsigned int nsubpages)
+{
+	for (int i = 0; i < nsubpages; i++)
+		clear_page(page + i * PAGE_SIZE);
+}
+
 static inline void copy_page(void *to, void *from)
 {
 	memcpy(to, from, PAGE_SIZE);
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 7ca3bd2448c1..42f6c45206c1 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -43,9 +43,9 @@  void clear_pages_orig(void *page, unsigned int length);
 void clear_pages_rep(void *page, unsigned int length);
 void clear_pages_erms(void *page, unsigned int length);
 
-static inline void clear_page(void *page)
+static inline void clear_pages(void *page, unsigned int nsubpages)
 {
-	unsigned long length = PAGE_SIZE;
+	unsigned int length = nsubpages * PAGE_SIZE;
 	/*
 	 * Clean up KMSAN metadata for the page being cleared. The assembly call
 	 * below clobbers @page, so we perform unpoisoning before it.
@@ -60,6 +60,11 @@  static inline void clear_page(void *page)
 			   : "cc", "memory", "rax", "rcx");
 }
 
+static inline void clear_page(void *page)
+{
+	clear_pages(page, 1);
+}
+
 void copy_page(void *to, void *from);
 
 #ifdef CONFIG_X86_5LEVEL