diff mbox series

[5/8] x86/clear_page: add clear_page_uncached()

Message ID 20201014083300.19077-6-ankur.a.arora@oracle.com (mailing list archive)
State New, archived
Headers show
Series Use uncached writes while clearing gigantic pages | expand

Commit Message

Ankur Arora Oct. 14, 2020, 8:32 a.m. UTC
Define clear_page_uncached() as an alternative_call() to clear_page_nt()
if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it
doesn't.

Similarly define clear_page_uncached_flush() which provides an SFENCE
if the CPU sets X86_FEATURE_NT_GOOD.

Also, add the glue interface clear_user_highpage_uncached().

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/x86/include/asm/page.h    |  6 ++++++
 arch/x86/include/asm/page_32.h |  9 +++++++++
 arch/x86/include/asm/page_64.h | 14 ++++++++++++++
 include/asm-generic/page.h     |  3 +++
 include/linux/highmem.h        | 10 ++++++++++
 5 files changed, 42 insertions(+)

Comments

kernel test robot Oct. 14, 2020, 11:10 a.m. UTC | #1
Hi Ankur,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/master]
[also build test ERROR on linus/master next-20201013]
[cannot apply to tip/x86/core linux/master v5.9]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Ankur-Arora/Use-uncached-writes-while-clearing-gigantic-pages/20201014-163720
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 80f92ca9b86c71450f003d39956fca4327cc5586
config: riscv-randconfig-r006-20201014 (attached as .config)
compiler: riscv32-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/6a1ec80588fc845c7ce6bd0e0e3635bf07d9110d
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Ankur-Arora/Use-uncached-writes-while-clearing-gigantic-pages/20201014-163720
        git checkout 6a1ec80588fc845c7ce6bd0e0e3635bf07d9110d
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from net/socket.c:74:
   include/linux/highmem.h: In function 'clear_user_highpage_uncached':
>> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration]
     240 |  clear_user_page_uncached(addr, vaddr, page);
         |  ^~~~~~~~~~~~~~~~~~~~~~~~
         |  clear_user_highpage_uncached
   net/socket.c: In function '__sys_getsockopt':
   net/socket.c:2155:6: warning: variable 'max_optlen' set but not used [-Wunused-but-set-variable]
    2155 |  int max_optlen;
         |      ^~~~~~~~~~
   cc1: some warnings being treated as errors
--
   In file included from include/linux/pagemap.h:11,
                    from include/linux/blkdev.h:13,
                    from include/linux/blk-cgroup.h:23,
                    from include/linux/writeback.h:14,
                    from include/linux/memcontrol.h:22,
                    from include/net/sock.h:53,
                    from net/sysctl_net.c:20:
   include/linux/highmem.h: In function 'clear_user_highpage_uncached':
>> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration]
     240 |  clear_user_page_uncached(addr, vaddr, page);
         |  ^~~~~~~~~~~~~~~~~~~~~~~~
         |  clear_user_highpage_uncached
   cc1: some warnings being treated as errors
--
   In file included from include/linux/pagemap.h:11,
                    from include/linux/blkdev.h:13,
                    from include/linux/blk-cgroup.h:23,
                    from include/linux/writeback.h:14,
                    from include/linux/memcontrol.h:22,
                    from include/net/sock.h:53,
                    from include/linux/mroute_base.h:8,
                    from include/linux/mroute.h:10,
                    from net/ipv4/route.c:82:
   include/linux/highmem.h: In function 'clear_user_highpage_uncached':
>> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration]
     240 |  clear_user_page_uncached(addr, vaddr, page);
         |  ^~~~~~~~~~~~~~~~~~~~~~~~
         |  clear_user_highpage_uncached
   net/ipv4/route.c: In function 'ip_rt_send_redirect':
   net/ipv4/route.c:878:6: warning: variable 'log_martians' set but not used [-Wunused-but-set-variable]
     878 |  int log_martians;
         |      ^~~~~~~~~~~~
   cc1: some warnings being treated as errors
--
   In file included from include/linux/pagemap.h:11,
                    from include/linux/blkdev.h:13,
                    from include/linux/blk-cgroup.h:23,
                    from include/linux/writeback.h:14,
                    from include/linux/memcontrol.h:22,
                    from include/net/sock.h:53,
                    from include/net/inet_sock.h:22,
                    from include/net/ip.h:28,
                    from net/ipv6/ip6_fib.c:28:
   include/linux/highmem.h: In function 'clear_user_highpage_uncached':
>> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration]
     240 |  clear_user_page_uncached(addr, vaddr, page);
         |  ^~~~~~~~~~~~~~~~~~~~~~~~
         |  clear_user_highpage_uncached
   net/ipv6/ip6_fib.c: In function 'fib6_add':
   net/ipv6/ip6_fib.c:1373:25: warning: variable 'pn' set but not used [-Wunused-but-set-variable]
    1373 |  struct fib6_node *fn, *pn = NULL;
         |                         ^~
   cc1: some warnings being treated as errors
--
   In file included from include/linux/pagemap.h:11,
                    from include/linux/blkdev.h:13,
                    from include/linux/blk-cgroup.h:23,
                    from include/linux/writeback.h:14,
                    from include/linux/memcontrol.h:22,
                    from include/net/sock.h:53,
                    from include/linux/tcp.h:19,
                    from include/linux/ipv6.h:88,
                    from include/linux/netfilter/ipset/ip_set.h:11,
                    from net/netfilter/ipset/ip_set_core.c:23:
   include/linux/highmem.h: In function 'clear_user_highpage_uncached':
>> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration]
     240 |  clear_user_page_uncached(addr, vaddr, page);
         |  ^~~~~~~~~~~~~~~~~~~~~~~~
         |  clear_user_highpage_uncached
   net/netfilter/ipset/ip_set_core.c: In function 'ip_set_rename':
   net/netfilter/ipset/ip_set_core.c:1363:2: warning: 'strncpy' specified bound 32 equals destination size [-Wstringop-truncation]
    1363 |  strncpy(set->name, name2, IPSET_MAXNAMELEN);
         |  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors
--
   In file included from include/linux/pagemap.h:11,
                    from include/linux/blkdev.h:13,
                    from include/linux/blk-cgroup.h:23,
                    from include/linux/writeback.h:14,
                    from include/linux/memcontrol.h:22,
                    from include/net/sock.h:53,
                    from net/nfc/nci/../nfc.h:14,
                    from net/nfc/nci/hci.c:13:
   include/linux/highmem.h: In function 'clear_user_highpage_uncached':
>> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration]
     240 |  clear_user_page_uncached(addr, vaddr, page);
         |  ^~~~~~~~~~~~~~~~~~~~~~~~
         |  clear_user_highpage_uncached
   net/nfc/nci/hci.c: In function 'nci_hci_resp_received':
   net/nfc/nci/hci.c:369:5: warning: variable 'status' set but not used [-Wunused-but-set-variable]
     369 |  u8 status = result;
         |     ^~~~~~
   cc1: some warnings being treated as errors
--
   In file included from include/linux/pagemap.h:11,
                    from include/linux/blkdev.h:13,
                    from include/linux/blk-cgroup.h:23,
                    from include/linux/writeback.h:14,
                    from include/linux/memcontrol.h:22,
                    from include/net/sock.h:53,
                    from include/linux/tcp.h:19,
                    from include/linux/ipv6.h:88,
                    from include/net/ipv6.h:12,
                    from net/ipv6/netfilter/nf_reject_ipv6.c:7:
   include/linux/highmem.h: In function 'clear_user_highpage_uncached':
>> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration]
     240 |  clear_user_page_uncached(addr, vaddr, page);
         |  ^~~~~~~~~~~~~~~~~~~~~~~~
         |  clear_user_highpage_uncached
   net/ipv6/netfilter/nf_reject_ipv6.c: In function 'nf_send_reset6':
   net/ipv6/netfilter/nf_reject_ipv6.c:152:18: warning: variable 'ip6h' set but not used [-Wunused-but-set-variable]
     152 |  struct ipv6hdr *ip6h;
         |                  ^~~~
   cc1: some warnings being treated as errors
--
   In file included from include/linux/pagemap.h:11,
                    from include/linux/blkdev.h:13,
                    from include/linux/blk-cgroup.h:23,
                    from include/linux/writeback.h:14,
                    from include/linux/memcontrol.h:22,
                    from include/net/sock.h:53,
                    from include/linux/tcp.h:19,
                    from net/netfilter/ipvs/ip_vs_core.c:28:
   include/linux/highmem.h: In function 'clear_user_highpage_uncached':
>> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration]
     240 |  clear_user_page_uncached(addr, vaddr, page);
         |  ^~~~~~~~~~~~~~~~~~~~~~~~
         |  clear_user_highpage_uncached
   net/netfilter/ipvs/ip_vs_core.c: In function 'ip_vs_in_icmp':
   net/netfilter/ipvs/ip_vs_core.c:1660:8: warning: variable 'outer_proto' set but not used [-Wunused-but-set-variable]
    1660 |  char *outer_proto = "IPIP";
         |        ^~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +240 include/linux/highmem.h

   234	
   235	#ifndef clear_user_highpage_uncached
   236	static inline void clear_user_highpage_uncached(struct page *page, unsigned long vaddr)
   237	{
   238		void *addr = kmap_atomic(page);
   239	
 > 240		clear_user_page_uncached(addr, vaddr, page);
   241		kunmap_atomic(addr);
   242	}
   243	#endif
   244	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot Oct. 14, 2020, 1:04 p.m. UTC | #2
Hi Ankur,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on tip/master]
[also build test ERROR on linus/master next-20201013]
[cannot apply to tip/x86/core linux/master v5.9]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Ankur-Arora/Use-uncached-writes-while-clearing-gigantic-pages/20201014-163720
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 80f92ca9b86c71450f003d39956fca4327cc5586
config: arm64-randconfig-r001-20201014 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project e7fe3c6dfede8d5781bd000741c3dea7088307a4)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm64 cross compiling tool for clang build
        # apt-get install binutils-aarch64-linux-gnu
        # https://github.com/0day-ci/linux/commit/6a1ec80588fc845c7ce6bd0e0e3635bf07d9110d
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Ankur-Arora/Use-uncached-writes-while-clearing-gigantic-pages/20201014-163720
        git checkout 6a1ec80588fc845c7ce6bd0e0e3635bf07d9110d
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/arm64/kernel/asm-offsets.c:16:
   In file included from include/linux/suspend.h:5:
   In file included from include/linux/swap.h:9:
   In file included from include/linux/memcontrol.h:22:
   In file included from include/linux/writeback.h:14:
   In file included from include/linux/blk-cgroup.h:23:
   In file included from include/linux/blkdev.h:13:
   In file included from include/linux/pagemap.h:11:
>> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached' [-Werror,-Wimplicit-function-declaration]
           clear_user_page_uncached(addr, vaddr, page);
           ^
   include/linux/highmem.h:240:2: note: did you mean 'clear_user_highpage_uncached'?
   include/linux/highmem.h:236:20: note: 'clear_user_highpage_uncached' declared here
   static inline void clear_user_highpage_uncached(struct page *page, unsigned long vaddr)
                      ^
   1 error generated.
   make[2]: *** [scripts/Makefile.build:117: arch/arm64/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1198: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:185: __sub-make] Error 2
   make: Target 'prepare' not remade because of errors.

vim +/clear_user_page_uncached +240 include/linux/highmem.h

   234	
   235	#ifndef clear_user_highpage_uncached
   236	static inline void clear_user_highpage_uncached(struct page *page, unsigned long vaddr)
   237	{
   238		void *addr = kmap_atomic(page);
   239	
 > 240		clear_user_page_uncached(addr, vaddr, page);
   241		kunmap_atomic(addr);
   242	}
   243	#endif
   244	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Andy Lutomirski Oct. 14, 2020, 3:45 p.m. UTC | #3
On Wed, Oct 14, 2020 at 1:33 AM Ankur Arora <ankur.a.arora@oracle.com> wrote:
>
> Define clear_page_uncached() as an alternative_call() to clear_page_nt()
> if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it
> doesn't.
>
> Similarly define clear_page_uncached_flush() which provides an SFENCE
> if the CPU sets X86_FEATURE_NT_GOOD.

As long as you keep "NT" or "MOVNTI" in the names and keep functions
in arch/x86, I think it's reasonable to expect that callers understand
that MOVNTI has bizarre memory ordering rules.  But once you give
something a generic name like "clear_page_uncached" and stick it in
generic code, I think the semantics should be more obvious.

How about:

clear_page_uncached_unordered() or clear_page_uncached_incoherent()

and

flush_after_clear_page_uncached()

After all, a naive reader might expect "uncached" to imply "caches are
off and this is coherent with everything".  And the results of getting
this wrong will be subtle and possibly hard-to-reproduce corruption.

--Andy
Borislav Petkov Oct. 14, 2020, 7:58 p.m. UTC | #4
On Wed, Oct 14, 2020 at 08:45:37AM -0700, Andy Lutomirski wrote:
> On Wed, Oct 14, 2020 at 1:33 AM Ankur Arora <ankur.a.arora@oracle.com> wrote:
> >
> > Define clear_page_uncached() as an alternative_call() to clear_page_nt()
> > if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it
> > doesn't.
> >
> > Similarly define clear_page_uncached_flush() which provides an SFENCE
> > if the CPU sets X86_FEATURE_NT_GOOD.
> 
> As long as you keep "NT" or "MOVNTI" in the names and keep functions
> in arch/x86, I think it's reasonable to expect that callers understand
> that MOVNTI has bizarre memory ordering rules.  But once you give
> something a generic name like "clear_page_uncached" and stick it in
> generic code, I think the semantics should be more obvious.

Why does it have to be a separate call? Why isn't it behind the
clear_page() alternative machinery so that the proper function is
selected at boot? IOW, why does a user of clear_page functionality need
to know at all about an "uncached" variant?
Ankur Arora Oct. 14, 2020, 8:54 p.m. UTC | #5
On 2020-10-14 8:45 a.m., Andy Lutomirski wrote:
> On Wed, Oct 14, 2020 at 1:33 AM Ankur Arora <ankur.a.arora@oracle.com> wrote:
>>
>> Define clear_page_uncached() as an alternative_call() to clear_page_nt()
>> if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it
>> doesn't.
>>
>> Similarly define clear_page_uncached_flush() which provides an SFENCE
>> if the CPU sets X86_FEATURE_NT_GOOD.
> 
> As long as you keep "NT" or "MOVNTI" in the names and keep functions
> in arch/x86, I think it's reasonable to expect that callers understand
> that MOVNTI has bizarre memory ordering rules.  But once you give
> something a generic name like "clear_page_uncached" and stick it in
> generic code, I think the semantics should be more obvious.
> 
> How about:
> 
> clear_page_uncached_unordered() or clear_page_uncached_incoherent()
> 
> and
> 
> flush_after_clear_page_uncached()
> 
> After all, a naive reader might expect "uncached" to imply "caches are
> off and this is coherent with everything".  And the results of getting
> this wrong will be subtle and possibly hard-to-reproduce corruption.
Yeah, these are a lot more obvious. Thanks. Will fix.

Ankur

>
> --Andy
>
Andy Lutomirski Oct. 14, 2020, 9:07 p.m. UTC | #6
> On Oct 14, 2020, at 12:58 PM, Borislav Petkov <bp@alien8.de> wrote:
> 
> On Wed, Oct 14, 2020 at 08:45:37AM -0700, Andy Lutomirski wrote:
>>> On Wed, Oct 14, 2020 at 1:33 AM Ankur Arora <ankur.a.arora@oracle.com> wrote:
>>> 
>>> Define clear_page_uncached() as an alternative_call() to clear_page_nt()
>>> if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it
>>> doesn't.
>>> 
>>> Similarly define clear_page_uncached_flush() which provides an SFENCE
>>> if the CPU sets X86_FEATURE_NT_GOOD.
>> 
>> As long as you keep "NT" or "MOVNTI" in the names and keep functions
>> in arch/x86, I think it's reasonable to expect that callers understand
>> that MOVNTI has bizarre memory ordering rules.  But once you give
>> something a generic name like "clear_page_uncached" and stick it in
>> generic code, I think the semantics should be more obvious.
> 
> Why does it have to be a separate call? Why isn't it behind the
> clear_page() alternative machinery so that the proper function is
> selected at boot? IOW, why does a user of clear_page functionality need
> to know at all about an "uncached" variant?
> 
> 

I assume it’s for a little optimization of clearing more than one page per SFENCE.

In any event, based on the benchmark data upthread, we only want to do NT clears when they’re rather large, so this shouldn’t be just an alternative. I assume this is because a page or two will fit in cache and, for most uses that allocate zeroed pages, we prefer cache-hot pages.  When clearing 1G, on the other hand, cache-hot is impossible and we prefer the improved bandwidth and less cache trashing of NT clears.

Perhaps SFENCE is so fast that this is a silly optimization, though, and we don’t lose anything measurable by SFENCEing once per page.
Borislav Petkov Oct. 14, 2020, 9:12 p.m. UTC | #7
On Wed, Oct 14, 2020 at 02:07:30PM -0700, Andy Lutomirski wrote:
> I assume it’s for a little optimization of clearing more than one
> page per SFENCE.
>
> In any event, based on the benchmark data upthread, we only want to do
> NT clears when they’re rather large, so this shouldn’t be just an
> alternative. I assume this is because a page or two will fit in cache
> and, for most uses that allocate zeroed pages, we prefer cache-hot
> pages. When clearing 1G, on the other hand, cache-hot is impossible
> and we prefer the improved bandwidth and less cache trashing of NT
> clears.

Yeah, use case makes sense but people won't know what to use. At the
time I was experimenting with this crap, I remember Linus saying that
that selection should be made based on the size of the area cleared, so
users should not have to know the difference.

Which perhaps is the only sane use case I see for this.

> Perhaps SFENCE is so fast that this is a silly optimization, though,
> and we don’t lose anything measurable by SFENCEing once per page.

Yes, I'd like to see real use cases showing improvement from this, not
just microbenchmarks.
Ankur Arora Oct. 15, 2020, 3:21 a.m. UTC | #8
On 2020-10-14 2:07 p.m., Andy Lutomirski wrote:
> 
> 
> 
>> On Oct 14, 2020, at 12:58 PM, Borislav Petkov <bp@alien8.de> wrote:
>>
>> On Wed, Oct 14, 2020 at 08:45:37AM -0700, Andy Lutomirski wrote:
>>>> On Wed, Oct 14, 2020 at 1:33 AM Ankur Arora <ankur.a.arora@oracle.com> wrote:
>>>>
>>>> Define clear_page_uncached() as an alternative_call() to clear_page_nt()
>>>> if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it
>>>> doesn't.
>>>>
>>>> Similarly define clear_page_uncached_flush() which provides an SFENCE
>>>> if the CPU sets X86_FEATURE_NT_GOOD.
>>>
>>> As long as you keep "NT" or "MOVNTI" in the names and keep functions
>>> in arch/x86, I think it's reasonable to expect that callers understand
>>> that MOVNTI has bizarre memory ordering rules.  But once you give
>>> something a generic name like "clear_page_uncached" and stick it in
>>> generic code, I think the semantics should be more obvious.
>>
>> Why does it have to be a separate call? Why isn't it behind the
>> clear_page() alternative machinery so that the proper function is
>> selected at boot? IOW, why does a user of clear_page functionality need
>> to know at all about an "uncached" variant?
>
> I assume it’s for a little optimization of clearing more than one page
> per SFENCE.
>
> In any event, based on the benchmark data upthread, we only want to do
> NT clears when they’re rather large, so this shouldn’t be just an
> alternative. I assume this is because a page or two will fit in cache
> and, for most uses that allocate zeroed pages, we prefer cache-hot
> pages. When clearing 1G, on the other hand, cache-hot is impossible
> and we prefer the improved bandwidth and less cache trashing of NT
> clears.

Also, if we did extend clear_page() to take the page-size as parameter
we still might not have enough information (ex. a 4K or a 2MB page that
clear_page() sees could be part of a GUP of a much larger extent) to
decide whether to go uncached or not.

> Perhaps SFENCE is so fast that this is a silly optimization, though,
> and we don’t lose anything measurable by SFENCEing once per page.
Alas, no. I tried that and dropped about 15% performance on Rome.

Thanks
Ankur
Ankur Arora Oct. 15, 2020, 3:37 a.m. UTC | #9
On 2020-10-14 2:12 p.m., Borislav Petkov wrote:
> On Wed, Oct 14, 2020 at 02:07:30PM -0700, Andy Lutomirski wrote:
>> I assume it’s for a little optimization of clearing more than one
>> page per SFENCE.
>>
>> In any event, based on the benchmark data upthread, we only want to do
>> NT clears when they’re rather large, so this shouldn’t be just an
>> alternative. I assume this is because a page or two will fit in cache
>> and, for most uses that allocate zeroed pages, we prefer cache-hot
>> pages. When clearing 1G, on the other hand, cache-hot is impossible
>> and we prefer the improved bandwidth and less cache trashing of NT
>> clears.
> 
> Yeah, use case makes sense but people won't know what to use. At the
> time I was experimenting with this crap, I remember Linus saying that
> that selection should be made based on the size of the area cleared, so
> users should not have to know the difference.
I don't disagree but I think the selection of cached/uncached route should
be made where we have enough context available to be able to choose to do
this.

This could be for example, done in mm_populate() or gup where if say the
extent is larger than LLC-size, it takes the uncached path.

> 
> Which perhaps is the only sane use case I see for this.
> 
>> Perhaps SFENCE is so fast that this is a silly optimization, though,
>> and we don’t lose anything measurable by SFENCEing once per page.
> 
> Yes, I'd like to see real use cases showing improvement from this, not
> just microbenchmarks.
Sure will add.

Thanks
Ankur

>
Borislav Petkov Oct. 15, 2020, 10:35 a.m. UTC | #10
On Wed, Oct 14, 2020 at 08:37:44PM -0700, Ankur Arora wrote:
> I don't disagree but I think the selection of cached/uncached route should
> be made where we have enough context available to be able to choose to do
> this.
>
> This could be for example, done in mm_populate() or gup where if say the
> extent is larger than LLC-size, it takes the uncached path.

Are there examples where we don't know the size?
Borislav Petkov Oct. 15, 2020, 10:40 a.m. UTC | #11
On Wed, Oct 14, 2020 at 08:21:57PM -0700, Ankur Arora wrote:
> Also, if we did extend clear_page() to take the page-size as parameter
> we still might not have enough information (ex. a 4K or a 2MB page that
> clear_page() sees could be part of a GUP of a much larger extent) to
> decide whether to go uncached or not.

clear_page* assumes 4K. All of the lowlevel asm variants do. So adding
the size there won't bring you a whole lot.

So you'd need to devise this whole thing differently. Perhaps have a
clear_pages() helper which decides based on size what to do: uncached
clearing or the clear_page() as is now in a loop.

Looking at the callsites would give you a better idea I'd say.

Thx.
Ankur Arora Oct. 15, 2020, 9:20 p.m. UTC | #12
On 2020-10-15 3:35 a.m., Borislav Petkov wrote:
> On Wed, Oct 14, 2020 at 08:37:44PM -0700, Ankur Arora wrote:
>> I don't disagree but I think the selection of cached/uncached route should
>> be made where we have enough context available to be able to choose to do
>> this.
>>
>> This could be for example, done in mm_populate() or gup where if say the
>> extent is larger than LLC-size, it takes the uncached path.
> 
> Are there examples where we don't know the size?

The case I was thinking of was that clear_huge_page() or faultin_page() would
know the size to a page unit, while the higher level function would know the
whole extent and could optimize differently based on that.

Thanks
Ankur

>
Ankur Arora Oct. 15, 2020, 9:40 p.m. UTC | #13
On 2020-10-15 3:40 a.m., Borislav Petkov wrote:
> On Wed, Oct 14, 2020 at 08:21:57PM -0700, Ankur Arora wrote:
>> Also, if we did extend clear_page() to take the page-size as parameter
>> we still might not have enough information (ex. a 4K or a 2MB page that
>> clear_page() sees could be part of a GUP of a much larger extent) to
>> decide whether to go uncached or not.
> 
> clear_page* assumes 4K. All of the lowlevel asm variants do. So adding
> the size there won't bring you a whole lot.
> 
> So you'd need to devise this whole thing differently. Perhaps have a
> clear_pages() helper which decides based on size what to do: uncached
> clearing or the clear_page() as is now in a loop.

I think that'll work well for GB pages, where the clear_pages() helper
has enough information to make a decision.

But, unless I'm missing something, I'm not sure how that would work for
say, a 1GB mm_populate() using 4K pages. The clear_page() (or clear_pages())
in that case would only see the 4K size.

But let me think about this more (and look at the callsites as you suggest.)

> 
> Looking at the callsites would give you a better idea I'd say.
Thanks, yeah that's a good idea. Let me go do that.

Ankur

> 
> Thx.
>
Borislav Petkov Oct. 16, 2020, 6:21 p.m. UTC | #14
On Thu, Oct 15, 2020 at 02:20:36PM -0700, Ankur Arora wrote:
> The case I was thinking of was that clear_huge_page()

That loop in clear_gigantic_page() there could be optimized not to
iterate over the pages but do a NTA moves in one go, provided they're
contiguous.

> or faultin_page() would

faultin_page() goes into the bowels of mm fault handling, you'd have to
be more precise what exactly you mean with that one.

> know the size to a page unit, while the higher level function would know the
> whole extent and could optimize differently based on that.

Just don't forget that this "optimization" of yours comes at the price
of added code complexity and you're putting the onus on the people to
know which function to call. So it is not for free and needs to be
carefully weighed.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48803a8..ca0aa379ac7f 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -28,6 +28,12 @@  static inline void clear_user_page(void *page, unsigned long vaddr,
 	clear_page(page);
 }
 
+static inline void clear_user_page_uncached(void *page, unsigned long vaddr,
+					    struct page *pg)
+{
+	clear_page_uncached(page);
+}
+
 static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
 				  struct page *topage)
 {
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 94dbd51df58f..7a03a274a9a4 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -39,6 +39,15 @@  static inline void clear_page(void *page)
 	memset(page, 0, PAGE_SIZE);
 }
 
+static inline void clear_page_uncached(void *page)
+{
+	clear_page(page);
+}
+
+static inline void clear_page_uncached_flush(void)
+{
+}
+
 static inline void copy_page(void *to, void *from)
 {
 	memcpy(to, from, PAGE_SIZE);
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index bde3c2785ec4..5897075e77dd 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -55,6 +55,20 @@  static inline void clear_page(void *page)
 			   : "cc", "memory", "rax", "rcx");
 }
 
+static inline void clear_page_uncached(void *page)
+{
+	alternative_call(clear_page,
+			 clear_page_nt, X86_FEATURE_NT_GOOD,
+			 "=D" (page),
+			 "0" (page)
+			 : "cc", "memory", "rax", "rcx");
+}
+
+static inline void clear_page_uncached_flush(void)
+{
+	alternative("", "sfence", X86_FEATURE_NT_GOOD);
+}
+
 void copy_page(void *to, void *from);
 
 #endif	/* !__ASSEMBLY__ */
diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h
index fe801f01625e..60235a0cf24a 100644
--- a/include/asm-generic/page.h
+++ b/include/asm-generic/page.h
@@ -26,6 +26,9 @@ 
 #ifndef __ASSEMBLY__
 
 #define clear_page(page)	memset((page), 0, PAGE_SIZE)
+#define clear_page_uncached(page)	clear_page(page)
+#define clear_page_uncached_flush()	do { } while (0)
+
 #define copy_page(to,from)	memcpy((to), (from), PAGE_SIZE)
 
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 14e6202ce47f..f842593e2474 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -232,6 +232,16 @@  static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
 }
 #endif
 
+#ifndef clear_user_highpage_uncached
+static inline void clear_user_highpage_uncached(struct page *page, unsigned long vaddr)
+{
+	void *addr = kmap_atomic(page);
+
+	clear_user_page_uncached(addr, vaddr, page);
+	kunmap_atomic(addr);
+}
+#endif
+
 #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 /**
  * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags