diff mbox series

[v2,07/14] x86/clear_page: add clear_page_uncached()

Message ID 20211020170305.376118-8-ankur.a.arora@oracle.com (mailing list archive)
State New
Headers show
Series Use uncached stores while clearing huge pages | expand

Commit Message

Ankur Arora Oct. 20, 2021, 5:02 p.m. UTC
Expose the low-level uncached primitives (clear_page_movnt(),
clear_page_clzero()) as alternatives via clear_page_uncached().
Also fallback to clear_page(), if X86_FEATURE_MOVNT_SLOW is set
and the CPU does not have X86_FEATURE_CLZERO.

Both the uncached primitives use stores which are weakly ordered
with respect to other instructions accessing the memory hierarchy.
To ensure that callers don't mix accesses to different types of
address_spaces, annotate clear_user_page_uncached(), and
clear_page_uncached() as taking __incoherent pointers as arguments.

Also add clear_page_uncached_make_coherent() which provides the
necessary store fence to flush out the uncached regions.

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---

Notes:
    This patch adds the fallback definitions of clear_user_page_uncached()
    etc in include/linux/mm.h which is likely not the right place for it.

    I'm guessing these should be moved to include/asm-generic/page.h
    (or maybe a new include/asm-generic/page_uncached.h) and for
    architectures that do have arch/$arch/include/asm/page.h (which
    seems like all of them), also replicate there?

    Anyway, wanted to first check if that's the way to do it, before 
    doing that.

 arch/x86/include/asm/page.h    | 10 ++++++++++
 arch/x86/include/asm/page_32.h |  9 +++++++++
 arch/x86/include/asm/page_64.h | 32 ++++++++++++++++++++++++++++++++
 include/linux/mm.h             | 14 ++++++++++++++
 4 files changed, 65 insertions(+)

Comments

Xu Yu Dec. 8, 2021, 8:58 a.m. UTC | #1
On 10/21/21 1:02 AM, Ankur Arora wrote:
> Expose the low-level uncached primitives (clear_page_movnt(),
> clear_page_clzero()) as alternatives via clear_page_uncached().
> Also fallback to clear_page(), if X86_FEATURE_MOVNT_SLOW is set
> and the CPU does not have X86_FEATURE_CLZERO.
> 
> Both the uncached primitives use stores which are weakly ordered
> with respect to other instructions accessing the memory hierarchy.
> To ensure that callers don't mix accesses to different types of
> address_spaces, annotate clear_user_page_uncached(), and
> clear_page_uncached() as taking __incoherent pointers as arguments.
> 
> Also add clear_page_uncached_make_coherent() which provides the
> necessary store fence to flush out the uncached regions.
> 
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
> 
> Notes:
>      This patch adds the fallback definitions of clear_user_page_uncached()
>      etc in include/linux/mm.h which is likely not the right place for it.
> 
>      I'm guessing these should be moved to include/asm-generic/page.h
>      (or maybe a new include/asm-generic/page_uncached.h) and for
>      architectures that do have arch/$arch/include/asm/page.h (which
>      seems like all of them), also replicate there?
> 
>      Anyway, wanted to first check if that's the way to do it, before
>      doing that.
> 
>   arch/x86/include/asm/page.h    | 10 ++++++++++
>   arch/x86/include/asm/page_32.h |  9 +++++++++
>   arch/x86/include/asm/page_64.h | 32 ++++++++++++++++++++++++++++++++
>   include/linux/mm.h             | 14 ++++++++++++++
>   4 files changed, 65 insertions(+)
> 
> diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
> index 94dbd51df58f..163be03ac422 100644
> --- a/arch/x86/include/asm/page_32.h
> +++ b/arch/x86/include/asm/page_32.h
> @@ -39,6 +39,15 @@ static inline void clear_page(void *page)
>   	memset(page, 0, PAGE_SIZE);
>   }
>   
> +static inline void clear_page_uncached(__incoherent void *page)
> +{
> +	clear_page((__force void *) page);
> +}
> +
> +static inline void clear_page_uncached_make_coherent(void)
> +{
> +}
> +
>   static inline void copy_page(void *to, void *from)
>   {
>   	memcpy(to, from, PAGE_SIZE);
> diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
> index 3c53f8ef8818..d7946047c70f 100644
> --- a/arch/x86/include/asm/page_64.h
> +++ b/arch/x86/include/asm/page_64.h
> @@ -56,6 +56,38 @@ static inline void clear_page(void *page)
>   			   : "cc", "memory", "rax", "rcx");
>   }
>   
> +/*
> + * clear_page_uncached: only allowed on __incoherent memory regions.
> + */
> +static inline void clear_page_uncached(__incoherent void *page)
> +{
> +	alternative_call_2(clear_page_movnt,
> +			   clear_page, X86_FEATURE_MOVNT_SLOW,
> +			   clear_page_clzero, X86_FEATURE_CLZERO,
> +			   "=D" (page),
> +			   "0" (page)
> +			   : "cc", "memory", "rax", "rcx");
> +}
> +
> +/*
> + * clear_page_uncached_make_coherent: executes the necessary store
> + * fence after which __incoherent regions can be safely accessed.
> + */
> +static inline void clear_page_uncached_make_coherent(void)
> +{
> +	/*
> +	 * Keep the sfence for oldinstr and clzero separate to guard against
> +	 * the possibility that a cpu-model both has X86_FEATURE_MOVNT_SLOW
> +	 * and X86_FEATURE_CLZERO.
> +	 *
> +	 * The alternatives need to be in the same order as the ones
> +	 * in clear_page_uncached().
> +	 */
> +	alternative_2("sfence",
> +		      "", X86_FEATURE_MOVNT_SLOW,
> +		      "sfence", X86_FEATURE_CLZERO);
> +}
> +
>   void copy_page(void *to, void *from);
>   
>   #ifdef CONFIG_X86_5LEVEL
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 73a52aba448f..b88069d1116c 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3192,6 +3192,20 @@ static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
>   
>   #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
>   
> +#ifndef clear_user_page_uncached

Hi Ankur Arora,

I've been looking for where clear_user_page_uncached is defined in this
patchset, but failed.

There should be something like follows in arch/x86, right?

static inline void clear_user_page_uncached(__incoherent void *page,
                                unsigned long vaddr, struct page *pg)
{
         clear_page_uncached(page);
}


Did I miss something?

> +/*
> + * clear_user_page_uncached: fallback to the standard clear_user_page().
> + */
> +static inline void clear_user_page_uncached(__incoherent void *page,
> +					unsigned long vaddr, struct page *pg)
> +{
> +	clear_user_page((__force void *)page, vaddr, pg);
> +}
> +
> +static inline void clear_page_uncached_make_coherent(void) { }
> +#endif
> +
> +
>   #ifdef CONFIG_DEBUG_PAGEALLOC
>   extern unsigned int _debug_guardpage_minorder;
>   DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
>
Ankur Arora Dec. 10, 2021, 4:37 a.m. UTC | #2
Yu Xu <xuyu@linux.alibaba.com> writes:

> On 10/21/21 1:02 AM, Ankur Arora wrote:
>> Expose the low-level uncached primitives (clear_page_movnt(),
>> clear_page_clzero()) as alternatives via clear_page_uncached().
>> Also fallback to clear_page(), if X86_FEATURE_MOVNT_SLOW is set
>> and the CPU does not have X86_FEATURE_CLZERO.
>> Both the uncached primitives use stores which are weakly ordered
>> with respect to other instructions accessing the memory hierarchy.
>> To ensure that callers don't mix accesses to different types of
>> address_spaces, annotate clear_user_page_uncached(), and
>> clear_page_uncached() as taking __incoherent pointers as arguments.
>> Also add clear_page_uncached_make_coherent() which provides the
>> necessary store fence to flush out the uncached regions.
>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>> Notes:
>>      This patch adds the fallback definitions of clear_user_page_uncached()
>>      etc in include/linux/mm.h which is likely not the right place for it.
>>      I'm guessing these should be moved to include/asm-generic/page.h
>>      (or maybe a new include/asm-generic/page_uncached.h) and for
>>      architectures that do have arch/$arch/include/asm/page.h (which
>>      seems like all of them), also replicate there?
>>      Anyway, wanted to first check if that's the way to do it, before
>>      doing that.
>>   arch/x86/include/asm/page.h    | 10 ++++++++++
>>   arch/x86/include/asm/page_32.h |  9 +++++++++
>>   arch/x86/include/asm/page_64.h | 32 ++++++++++++++++++++++++++++++++
>>   include/linux/mm.h             | 14 ++++++++++++++
>>   4 files changed, 65 insertions(+)
>> diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
>> index 94dbd51df58f..163be03ac422 100644
>> --- a/arch/x86/include/asm/page_32.h
>> +++ b/arch/x86/include/asm/page_32.h
>> @@ -39,6 +39,15 @@ static inline void clear_page(void *page)
>>   	memset(page, 0, PAGE_SIZE);
>>   }
>>   +static inline void clear_page_uncached(__incoherent void *page)
>> +{
>> +	clear_page((__force void *) page);
>> +}
>> +
>> +static inline void clear_page_uncached_make_coherent(void)
>> +{
>> +}
>> +
>>   static inline void copy_page(void *to, void *from)
>>   {
>>   	memcpy(to, from, PAGE_SIZE);
>> diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
>> index 3c53f8ef8818..d7946047c70f 100644
>> --- a/arch/x86/include/asm/page_64.h
>> +++ b/arch/x86/include/asm/page_64.h
>> @@ -56,6 +56,38 @@ static inline void clear_page(void *page)
>>   			   : "cc", "memory", "rax", "rcx");
>>   }
>>   +/*
>> + * clear_page_uncached: only allowed on __incoherent memory regions.
>> + */
>> +static inline void clear_page_uncached(__incoherent void *page)
>> +{
>> +	alternative_call_2(clear_page_movnt,
>> +			   clear_page, X86_FEATURE_MOVNT_SLOW,
>> +			   clear_page_clzero, X86_FEATURE_CLZERO,
>> +			   "=D" (page),
>> +			   "0" (page)
>> +			   : "cc", "memory", "rax", "rcx");
>> +}
>> +
>> +/*
>> + * clear_page_uncached_make_coherent: executes the necessary store
>> + * fence after which __incoherent regions can be safely accessed.
>> + */
>> +static inline void clear_page_uncached_make_coherent(void)
>> +{
>> +	/*
>> +	 * Keep the sfence for oldinstr and clzero separate to guard against
>> +	 * the possibility that a cpu-model both has X86_FEATURE_MOVNT_SLOW
>> +	 * and X86_FEATURE_CLZERO.
>> +	 *
>> +	 * The alternatives need to be in the same order as the ones
>> +	 * in clear_page_uncached().
>> +	 */
>> +	alternative_2("sfence",
>> +		      "", X86_FEATURE_MOVNT_SLOW,
>> +		      "sfence", X86_FEATURE_CLZERO);
>> +}
>> +
>>   void copy_page(void *to, void *from);
>>     #ifdef CONFIG_X86_5LEVEL
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 73a52aba448f..b88069d1116c 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -3192,6 +3192,20 @@ static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
>>     #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
>>   +#ifndef clear_user_page_uncached
>
> Hi Ankur Arora,
>
> I've been looking for where clear_user_page_uncached is defined in this
> patchset, but failed.
>
> There should be something like follows in arch/x86, right?
>
> static inline void clear_user_page_uncached(__incoherent void *page,
>                                unsigned long vaddr, struct page *pg)
> {
>         clear_page_uncached(page);
> }
>
>
> Did I miss something?
>
Hi Yu Xu,

Defined in include/linux/mm.h. Just below :).

>> +/*
>> + * clear_user_page_uncached: fallback to the standard clear_user_page().
>> + */
>> +static inline void clear_user_page_uncached(__incoherent void *page,
>> +					unsigned long vaddr, struct page *pg)
>> +{
>> +	clear_user_page((__force void *)page, vaddr, pg);
>> +}

That said, as this note in the patch mentions, this isn't really a great
place for this definition. As you also mention, the right place for this
would be somewhere in the arch/.../include and include/asm-generic hierarchy.

>>      This patch adds the fallback definitions of clear_user_page_uncached()
>>      etc in include/linux/mm.h which is likely not the right place for it.
>>      I'm guessing these should be moved to include/asm-generic/page.h
>>      (or maybe a new include/asm-generic/page_uncached.h) and for
>>      architectures that do have arch/$arch/include/asm/page.h (which
>>      seems like all of them), also replicate there?
>>      Anyway, wanted to first check if that's the way to do it, before
>>      doing that.

Recommendations on how to handle this, welcome.

Thanks

--
ankur
Xu Yu Dec. 10, 2021, 4:48 a.m. UTC | #3
On 12/10/21 12:37 PM, Ankur Arora wrote:
> 
> Yu Xu <xuyu@linux.alibaba.com> writes:
> 
>> On 10/21/21 1:02 AM, Ankur Arora wrote:
>>> Expose the low-level uncached primitives (clear_page_movnt(),
>>> clear_page_clzero()) as alternatives via clear_page_uncached().
>>> Also fallback to clear_page(), if X86_FEATURE_MOVNT_SLOW is set
>>> and the CPU does not have X86_FEATURE_CLZERO.
>>> Both the uncached primitives use stores which are weakly ordered
>>> with respect to other instructions accessing the memory hierarchy.
>>> To ensure that callers don't mix accesses to different types of
>>> address_spaces, annotate clear_user_page_uncached(), and
>>> clear_page_uncached() as taking __incoherent pointers as arguments.
>>> Also add clear_page_uncached_make_coherent() which provides the
>>> necessary store fence to flush out the uncached regions.
>>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>>> ---
>>> Notes:
>>>       This patch adds the fallback definitions of clear_user_page_uncached()
>>>       etc in include/linux/mm.h which is likely not the right place for it.
>>>       I'm guessing these should be moved to include/asm-generic/page.h
>>>       (or maybe a new include/asm-generic/page_uncached.h) and for
>>>       architectures that do have arch/$arch/include/asm/page.h (which
>>>       seems like all of them), also replicate there?
>>>       Anyway, wanted to first check if that's the way to do it, before
>>>       doing that.
>>>    arch/x86/include/asm/page.h    | 10 ++++++++++
>>>    arch/x86/include/asm/page_32.h |  9 +++++++++
>>>    arch/x86/include/asm/page_64.h | 32 ++++++++++++++++++++++++++++++++
>>>    include/linux/mm.h             | 14 ++++++++++++++
>>>    4 files changed, 65 insertions(+)
>>> diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
>>> index 94dbd51df58f..163be03ac422 100644
>>> --- a/arch/x86/include/asm/page_32.h
>>> +++ b/arch/x86/include/asm/page_32.h
>>> @@ -39,6 +39,15 @@ static inline void clear_page(void *page)
>>>    	memset(page, 0, PAGE_SIZE);
>>>    }
>>>    +static inline void clear_page_uncached(__incoherent void *page)
>>> +{
>>> +	clear_page((__force void *) page);
>>> +}
>>> +
>>> +static inline void clear_page_uncached_make_coherent(void)
>>> +{
>>> +}
>>> +
>>>    static inline void copy_page(void *to, void *from)
>>>    {
>>>    	memcpy(to, from, PAGE_SIZE);
>>> diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
>>> index 3c53f8ef8818..d7946047c70f 100644
>>> --- a/arch/x86/include/asm/page_64.h
>>> +++ b/arch/x86/include/asm/page_64.h
>>> @@ -56,6 +56,38 @@ static inline void clear_page(void *page)
>>>    			   : "cc", "memory", "rax", "rcx");
>>>    }
>>>    +/*
>>> + * clear_page_uncached: only allowed on __incoherent memory regions.
>>> + */
>>> +static inline void clear_page_uncached(__incoherent void *page)
>>> +{
>>> +	alternative_call_2(clear_page_movnt,
>>> +			   clear_page, X86_FEATURE_MOVNT_SLOW,
>>> +			   clear_page_clzero, X86_FEATURE_CLZERO,
>>> +			   "=D" (page),
>>> +			   "0" (page)
>>> +			   : "cc", "memory", "rax", "rcx");
>>> +}
>>> +
>>> +/*
>>> + * clear_page_uncached_make_coherent: executes the necessary store
>>> + * fence after which __incoherent regions can be safely accessed.
>>> + */
>>> +static inline void clear_page_uncached_make_coherent(void)
>>> +{
>>> +	/*
>>> +	 * Keep the sfence for oldinstr and clzero separate to guard against
>>> +	 * the possibility that a cpu-model both has X86_FEATURE_MOVNT_SLOW
>>> +	 * and X86_FEATURE_CLZERO.
>>> +	 *
>>> +	 * The alternatives need to be in the same order as the ones
>>> +	 * in clear_page_uncached().
>>> +	 */
>>> +	alternative_2("sfence",
>>> +		      "", X86_FEATURE_MOVNT_SLOW,
>>> +		      "sfence", X86_FEATURE_CLZERO);
>>> +}
>>> +
>>>    void copy_page(void *to, void *from);
>>>      #ifdef CONFIG_X86_5LEVEL
>>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>>> index 73a52aba448f..b88069d1116c 100644
>>> --- a/include/linux/mm.h
>>> +++ b/include/linux/mm.h
>>> @@ -3192,6 +3192,20 @@ static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
>>>      #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
>>>    +#ifndef clear_user_page_uncached
>>
>> Hi Ankur Arora,
>>
>> I've been looking for where clear_user_page_uncached is defined in this
>> patchset, but failed.
>>
>> There should be something like follows in arch/x86, right?
>>
>> static inline void clear_user_page_uncached(__incoherent void *page,
>>                                 unsigned long vaddr, struct page *pg)
>> {
>>          clear_page_uncached(page);
>> }
>>
>>
>> Did I miss something?
>>
> Hi Yu Xu,
> 
> Defined in include/linux/mm.h. Just below :).

Thanks for your reply :)

This is the version when #ifndef clear_user_page_uncached, i.e., fall
back to standard clear_user_page.

But where is the uncached version of clear_user_page? I am looking for
this.

> 
>>> +/*
>>> + * clear_user_page_uncached: fallback to the standard clear_user_page().
>>> + */
>>> +static inline void clear_user_page_uncached(__incoherent void *page,
>>> +					unsigned long vaddr, struct page *pg)
>>> +{
>>> +	clear_user_page((__force void *)page, vaddr, pg);
>>> +}
> 
> That said, as this note in the patch mentions, this isn't really a great
> place for this definition. As you also mention, the right place for this
> would be somewhere in the arch/.../include and include/asm-generic hierarchy.
> 
>>>       This patch adds the fallback definitions of clear_user_page_uncached()
>>>       etc in include/linux/mm.h which is likely not the right place for it.
>>>       I'm guessing these should be moved to include/asm-generic/page.h
>>>       (or maybe a new include/asm-generic/page_uncached.h) and for
>>>       architectures that do have arch/$arch/include/asm/page.h (which
>>>       seems like all of them), also replicate there?
>>>       Anyway, wanted to first check if that's the way to do it, before
>>>       doing that.
> 
> Recommendations on how to handle this, welcome.
> 
> Thanks
> 
> --
> ankur
>
Ankur Arora Dec. 10, 2021, 2:26 p.m. UTC | #4
Yu Xu <xuyu@linux.alibaba.com> writes:

> On 12/10/21 12:37 PM, Ankur Arora wrote:
>> Yu Xu <xuyu@linux.alibaba.com> writes:
>>
>>> On 10/21/21 1:02 AM, Ankur Arora wrote:
>>>> Expose the low-level uncached primitives (clear_page_movnt(),
>>>> clear_page_clzero()) as alternatives via clear_page_uncached().
>>>> Also fallback to clear_page(), if X86_FEATURE_MOVNT_SLOW is set
>>>> and the CPU does not have X86_FEATURE_CLZERO.
>>>> Both the uncached primitives use stores which are weakly ordered
>>>> with respect to other instructions accessing the memory hierarchy.
>>>> To ensure that callers don't mix accesses to different types of
>>>> address_spaces, annotate clear_user_page_uncached(), and
>>>> clear_page_uncached() as taking __incoherent pointers as arguments.
>>>> Also add clear_page_uncached_make_coherent() which provides the
>>>> necessary store fence to flush out the uncached regions.
>>>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>>>> ---
>>>> Notes:
>>>>       This patch adds the fallback definitions of clear_user_page_uncached()
>>>>       etc in include/linux/mm.h which is likely not the right place for it.
>>>>       I'm guessing these should be moved to include/asm-generic/page.h
>>>>       (or maybe a new include/asm-generic/page_uncached.h) and for
>>>>       architectures that do have arch/$arch/include/asm/page.h (which
>>>>       seems like all of them), also replicate there?
>>>>       Anyway, wanted to first check if that's the way to do it, before
>>>>       doing that.
>>>>    arch/x86/include/asm/page.h    | 10 ++++++++++
>>>>    arch/x86/include/asm/page_32.h |  9 +++++++++
>>>>    arch/x86/include/asm/page_64.h | 32 ++++++++++++++++++++++++++++++++
>>>>    include/linux/mm.h             | 14 ++++++++++++++
>>>>    4 files changed, 65 insertions(+)
>>>> diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
>>>> index 94dbd51df58f..163be03ac422 100644
>>>> --- a/arch/x86/include/asm/page_32.h
>>>> +++ b/arch/x86/include/asm/page_32.h
>>>> @@ -39,6 +39,15 @@ static inline void clear_page(void *page)
>>>>    	memset(page, 0, PAGE_SIZE);
>>>>    }
>>>>    +static inline void clear_page_uncached(__incoherent void *page)
>>>> +{
>>>> +	clear_page((__force void *) page);
>>>> +}
>>>> +
>>>> +static inline void clear_page_uncached_make_coherent(void)
>>>> +{
>>>> +}
>>>> +
>>>>    static inline void copy_page(void *to, void *from)
>>>>    {
>>>>    	memcpy(to, from, PAGE_SIZE);
>>>> diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
>>>> index 3c53f8ef8818..d7946047c70f 100644
>>>> --- a/arch/x86/include/asm/page_64.h
>>>> +++ b/arch/x86/include/asm/page_64.h
>>>> @@ -56,6 +56,38 @@ static inline void clear_page(void *page)
>>>>    			   : "cc", "memory", "rax", "rcx");
>>>>    }
>>>>    +/*
>>>> + * clear_page_uncached: only allowed on __incoherent memory regions.
>>>> + */
>>>> +static inline void clear_page_uncached(__incoherent void *page)
>>>> +{
>>>> +	alternative_call_2(clear_page_movnt,
>>>> +			   clear_page, X86_FEATURE_MOVNT_SLOW,
>>>> +			   clear_page_clzero, X86_FEATURE_CLZERO,
>>>> +			   "=D" (page),
>>>> +			   "0" (page)
>>>> +			   : "cc", "memory", "rax", "rcx");
>>>> +}
>>>> +
>>>> +/*
>>>> + * clear_page_uncached_make_coherent: executes the necessary store
>>>> + * fence after which __incoherent regions can be safely accessed.
>>>> + */
>>>> +static inline void clear_page_uncached_make_coherent(void)
>>>> +{
>>>> +	/*
>>>> +	 * Keep the sfence for oldinstr and clzero separate to guard against
>>>> +	 * the possibility that a cpu-model both has X86_FEATURE_MOVNT_SLOW
>>>> +	 * and X86_FEATURE_CLZERO.
>>>> +	 *
>>>> +	 * The alternatives need to be in the same order as the ones
>>>> +	 * in clear_page_uncached().
>>>> +	 */
>>>> +	alternative_2("sfence",
>>>> +		      "", X86_FEATURE_MOVNT_SLOW,
>>>> +		      "sfence", X86_FEATURE_CLZERO);
>>>> +}
>>>> +
>>>>    void copy_page(void *to, void *from);
>>>>      #ifdef CONFIG_X86_5LEVEL
>>>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>>>> index 73a52aba448f..b88069d1116c 100644
>>>> --- a/include/linux/mm.h
>>>> +++ b/include/linux/mm.h
>>>> @@ -3192,6 +3192,20 @@ static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
>>>>      #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
>>>>    +#ifndef clear_user_page_uncached
>>>
>>> Hi Ankur Arora,
>>>
>>> I've been looking for where clear_user_page_uncached is defined in this
>>> patchset, but failed.
>>>
>>> There should be something like follows in arch/x86, right?
>>>
>>> static inline void clear_user_page_uncached(__incoherent void *page,
>>>                                 unsigned long vaddr, struct page *pg)
>>> {
>>>          clear_page_uncached(page);
>>> }
>>>
>>>
>>> Did I miss something?
>>>
>> Hi Yu Xu,
>> Defined in include/linux/mm.h. Just below :).
>
> Thanks for your reply :)
>
> This is the version when #ifndef clear_user_page_uncached, i.e., fall
> back to standard clear_user_page.
>
> But where is the uncached version of clear_user_page? I am looking for
> this.

Sorry, my bad. There is a hunk missing. Not sure how, but this hunk
(from patch 7) got dropped in version that I sent out:

    diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
    index 4d5810c8fab7..89229f2db34a 100644
    --- a/arch/x86/include/asm/page.h
    +++ b/arch/x86/include/asm/page.h
    @@ -28,6 +28,16 @@ static inline void clear_user_page(void *page, unsigned long vaddr,
            clear_page(page);
    }

    +#define clear_user_page_uncached clear_user_page_uncached
    +/*
    + * clear_page_uncached: allowed on only __incoherent memory regions.
    + */
    +static inline void clear_user_page_uncached(__incoherent void *page,
    +                                       unsigned long vaddr, struct page *pg)
    +{
    +       clear_page_uncached(page);
    +}
    +
    static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
                                    struct page *topage)
    {

It is identical to the version that you surmised was missing. Thanks for
the close reading.

Ankur

>>
>>>> +/*
>>>> + * clear_user_page_uncached: fallback to the standard clear_user_page().
>>>> + */
>>>> +static inline void clear_user_page_uncached(__incoherent void *page,
>>>> +					unsigned long vaddr, struct page *pg)
>>>> +{
>>>> +	clear_user_page((__force void *)page, vaddr, pg);
>>>> +}
>> That said, as this note in the patch mentions, this isn't really a great
>> place for this definition. As you also mention, the right place for this
>> would be somewhere in the arch/.../include and include/asm-generic hierarchy.
>>
>>>>       This patch adds the fallback definitions of clear_user_page_uncached()
>>>>       etc in include/linux/mm.h which is likely not the right place for it.
>>>>       I'm guessing these should be moved to include/asm-generic/page.h
>>>>       (or maybe a new include/asm-generic/page_uncached.h) and for
>>>>       architectures that do have arch/$arch/include/asm/page.h (which
>>>>       seems like all of them), also replicate there?
>>>>       Anyway, wanted to first check if that's the way to do it, before
>>>>       doing that.
>> Recommendations on how to handle this, welcome.
>> Thanks
>> --
>> ankur
>>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 94dbd51df58f..163be03ac422 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -39,6 +39,15 @@  static inline void clear_page(void *page)
 	memset(page, 0, PAGE_SIZE);
 }
 
+static inline void clear_page_uncached(__incoherent void *page)
+{
+	clear_page((__force void *) page);
+}
+
+static inline void clear_page_uncached_make_coherent(void)
+{
+}
+
 static inline void copy_page(void *to, void *from)
 {
 	memcpy(to, from, PAGE_SIZE);
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 3c53f8ef8818..d7946047c70f 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -56,6 +56,38 @@  static inline void clear_page(void *page)
 			   : "cc", "memory", "rax", "rcx");
 }
 
+/*
+ * clear_page_uncached: only allowed on __incoherent memory regions.
+ */
+static inline void clear_page_uncached(__incoherent void *page)
+{
+	alternative_call_2(clear_page_movnt,
+			   clear_page, X86_FEATURE_MOVNT_SLOW,
+			   clear_page_clzero, X86_FEATURE_CLZERO,
+			   "=D" (page),
+			   "0" (page)
+			   : "cc", "memory", "rax", "rcx");
+}
+
+/*
+ * clear_page_uncached_make_coherent: executes the necessary store
+ * fence after which __incoherent regions can be safely accessed.
+ */
+static inline void clear_page_uncached_make_coherent(void)
+{
+	/*
+	 * Keep the sfence for oldinstr and clzero separate to guard against
+	 * the possibility that a cpu-model both has X86_FEATURE_MOVNT_SLOW
+	 * and X86_FEATURE_CLZERO.
+	 *
+	 * The alternatives need to be in the same order as the ones
+	 * in clear_page_uncached().
+	 */
+	alternative_2("sfence",
+		      "", X86_FEATURE_MOVNT_SLOW,
+		      "sfence", X86_FEATURE_CLZERO);
+}
+
 void copy_page(void *to, void *from);
 
 #ifdef CONFIG_X86_5LEVEL
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f..b88069d1116c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3192,6 +3192,20 @@  static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
+#ifndef clear_user_page_uncached
+/*
+ * clear_user_page_uncached: fallback to the standard clear_user_page().
+ */
+static inline void clear_user_page_uncached(__incoherent void *page,
+					unsigned long vaddr, struct page *pg)
+{
+	clear_user_page((__force void *)page, vaddr, pg);
+}
+
+static inline void clear_page_uncached_make_coherent(void) { }
+#endif
+
+
 #ifdef CONFIG_DEBUG_PAGEALLOC
 extern unsigned int _debug_guardpage_minorder;
 DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);