Message ID | 20201014083300.19077-6-ankur.a.arora@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Use uncached writes while clearing gigantic pages | expand |
Hi Ankur, Thank you for the patch! Yet something to improve: [auto build test ERROR on tip/master] [also build test ERROR on linus/master next-20201013] [cannot apply to tip/x86/core linux/master v5.9] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Ankur-Arora/Use-uncached-writes-while-clearing-gigantic-pages/20201014-163720 base: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 80f92ca9b86c71450f003d39956fca4327cc5586 config: riscv-randconfig-r006-20201014 (attached as .config) compiler: riscv32-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/6a1ec80588fc845c7ce6bd0e0e3635bf07d9110d git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Ankur-Arora/Use-uncached-writes-while-clearing-gigantic-pages/20201014-163720 git checkout 6a1ec80588fc845c7ce6bd0e0e3635bf07d9110d # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=riscv If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): In file included from net/socket.c:74: include/linux/highmem.h: In function 'clear_user_highpage_uncached': >> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration] 240 | clear_user_page_uncached(addr, vaddr, page); | ^~~~~~~~~~~~~~~~~~~~~~~~ | clear_user_highpage_uncached net/socket.c: In function '__sys_getsockopt': net/socket.c:2155:6: warning: variable 'max_optlen' set but not used [-Wunused-but-set-variable] 2155 | int max_optlen; | ^~~~~~~~~~ cc1: some warnings being treated as errors -- In file included from include/linux/pagemap.h:11, from include/linux/blkdev.h:13, from include/linux/blk-cgroup.h:23, from include/linux/writeback.h:14, from include/linux/memcontrol.h:22, from include/net/sock.h:53, from net/sysctl_net.c:20: include/linux/highmem.h: In function 'clear_user_highpage_uncached': >> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration] 240 | clear_user_page_uncached(addr, vaddr, page); | ^~~~~~~~~~~~~~~~~~~~~~~~ | clear_user_highpage_uncached cc1: some warnings being treated as errors -- In file included from include/linux/pagemap.h:11, from include/linux/blkdev.h:13, from include/linux/blk-cgroup.h:23, from include/linux/writeback.h:14, from include/linux/memcontrol.h:22, from include/net/sock.h:53, from include/linux/mroute_base.h:8, from include/linux/mroute.h:10, from net/ipv4/route.c:82: include/linux/highmem.h: In function 'clear_user_highpage_uncached': >> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration] 240 | clear_user_page_uncached(addr, vaddr, page); | ^~~~~~~~~~~~~~~~~~~~~~~~ | clear_user_highpage_uncached net/ipv4/route.c: In function 'ip_rt_send_redirect': net/ipv4/route.c:878:6: warning: variable 'log_martians' set but not used [-Wunused-but-set-variable] 878 | int log_martians; | ^~~~~~~~~~~~ cc1: some warnings being treated as errors -- In file included from include/linux/pagemap.h:11, from include/linux/blkdev.h:13, from include/linux/blk-cgroup.h:23, from include/linux/writeback.h:14, from include/linux/memcontrol.h:22, from include/net/sock.h:53, from include/net/inet_sock.h:22, from include/net/ip.h:28, from net/ipv6/ip6_fib.c:28: include/linux/highmem.h: In function 'clear_user_highpage_uncached': >> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration] 240 | clear_user_page_uncached(addr, vaddr, page); | ^~~~~~~~~~~~~~~~~~~~~~~~ | clear_user_highpage_uncached net/ipv6/ip6_fib.c: In function 'fib6_add': net/ipv6/ip6_fib.c:1373:25: warning: variable 'pn' set but not used [-Wunused-but-set-variable] 1373 | struct fib6_node *fn, *pn = NULL; | ^~ cc1: some warnings being treated as errors -- In file included from include/linux/pagemap.h:11, from include/linux/blkdev.h:13, from include/linux/blk-cgroup.h:23, from include/linux/writeback.h:14, from include/linux/memcontrol.h:22, from include/net/sock.h:53, from include/linux/tcp.h:19, from include/linux/ipv6.h:88, from include/linux/netfilter/ipset/ip_set.h:11, from net/netfilter/ipset/ip_set_core.c:23: include/linux/highmem.h: In function 'clear_user_highpage_uncached': >> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration] 240 | clear_user_page_uncached(addr, vaddr, page); | ^~~~~~~~~~~~~~~~~~~~~~~~ | clear_user_highpage_uncached net/netfilter/ipset/ip_set_core.c: In function 'ip_set_rename': net/netfilter/ipset/ip_set_core.c:1363:2: warning: 'strncpy' specified bound 32 equals destination size [-Wstringop-truncation] 1363 | strncpy(set->name, name2, IPSET_MAXNAMELEN); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors -- In file included from include/linux/pagemap.h:11, from include/linux/blkdev.h:13, from include/linux/blk-cgroup.h:23, from include/linux/writeback.h:14, from include/linux/memcontrol.h:22, from include/net/sock.h:53, from net/nfc/nci/../nfc.h:14, from net/nfc/nci/hci.c:13: include/linux/highmem.h: In function 'clear_user_highpage_uncached': >> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration] 240 | clear_user_page_uncached(addr, vaddr, page); | ^~~~~~~~~~~~~~~~~~~~~~~~ | clear_user_highpage_uncached net/nfc/nci/hci.c: In function 'nci_hci_resp_received': net/nfc/nci/hci.c:369:5: warning: variable 'status' set but not used [-Wunused-but-set-variable] 369 | u8 status = result; | ^~~~~~ cc1: some warnings being treated as errors -- In file included from include/linux/pagemap.h:11, from include/linux/blkdev.h:13, from include/linux/blk-cgroup.h:23, from include/linux/writeback.h:14, from include/linux/memcontrol.h:22, from include/net/sock.h:53, from include/linux/tcp.h:19, from include/linux/ipv6.h:88, from include/net/ipv6.h:12, from net/ipv6/netfilter/nf_reject_ipv6.c:7: include/linux/highmem.h: In function 'clear_user_highpage_uncached': >> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration] 240 | clear_user_page_uncached(addr, vaddr, page); | ^~~~~~~~~~~~~~~~~~~~~~~~ | clear_user_highpage_uncached net/ipv6/netfilter/nf_reject_ipv6.c: In function 'nf_send_reset6': net/ipv6/netfilter/nf_reject_ipv6.c:152:18: warning: variable 'ip6h' set but not used [-Wunused-but-set-variable] 152 | struct ipv6hdr *ip6h; | ^~~~ cc1: some warnings being treated as errors -- In file included from include/linux/pagemap.h:11, from include/linux/blkdev.h:13, from include/linux/blk-cgroup.h:23, from include/linux/writeback.h:14, from include/linux/memcontrol.h:22, from include/net/sock.h:53, from include/linux/tcp.h:19, from net/netfilter/ipvs/ip_vs_core.c:28: include/linux/highmem.h: In function 'clear_user_highpage_uncached': >> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached'; did you mean 'clear_user_highpage_uncached'? [-Werror=implicit-function-declaration] 240 | clear_user_page_uncached(addr, vaddr, page); | ^~~~~~~~~~~~~~~~~~~~~~~~ | clear_user_highpage_uncached net/netfilter/ipvs/ip_vs_core.c: In function 'ip_vs_in_icmp': net/netfilter/ipvs/ip_vs_core.c:1660:8: warning: variable 'outer_proto' set but not used [-Wunused-but-set-variable] 1660 | char *outer_proto = "IPIP"; | ^~~~~~~~~~~ cc1: some warnings being treated as errors vim +240 include/linux/highmem.h 234 235 #ifndef clear_user_highpage_uncached 236 static inline void clear_user_highpage_uncached(struct page *page, unsigned long vaddr) 237 { 238 void *addr = kmap_atomic(page); 239 > 240 clear_user_page_uncached(addr, vaddr, page); 241 kunmap_atomic(addr); 242 } 243 #endif 244 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi Ankur, Thank you for the patch! Yet something to improve: [auto build test ERROR on tip/master] [also build test ERROR on linus/master next-20201013] [cannot apply to tip/x86/core linux/master v5.9] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Ankur-Arora/Use-uncached-writes-while-clearing-gigantic-pages/20201014-163720 base: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 80f92ca9b86c71450f003d39956fca4327cc5586 config: arm64-randconfig-r001-20201014 (attached as .config) compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project e7fe3c6dfede8d5781bd000741c3dea7088307a4) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install arm64 cross compiling tool for clang build # apt-get install binutils-aarch64-linux-gnu # https://github.com/0day-ci/linux/commit/6a1ec80588fc845c7ce6bd0e0e3635bf07d9110d git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Ankur-Arora/Use-uncached-writes-while-clearing-gigantic-pages/20201014-163720 git checkout 6a1ec80588fc845c7ce6bd0e0e3635bf07d9110d # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): In file included from arch/arm64/kernel/asm-offsets.c:16: In file included from include/linux/suspend.h:5: In file included from include/linux/swap.h:9: In file included from include/linux/memcontrol.h:22: In file included from include/linux/writeback.h:14: In file included from include/linux/blk-cgroup.h:23: In file included from include/linux/blkdev.h:13: In file included from include/linux/pagemap.h:11: >> include/linux/highmem.h:240:2: error: implicit declaration of function 'clear_user_page_uncached' [-Werror,-Wimplicit-function-declaration] clear_user_page_uncached(addr, vaddr, page); ^ include/linux/highmem.h:240:2: note: did you mean 'clear_user_highpage_uncached'? include/linux/highmem.h:236:20: note: 'clear_user_highpage_uncached' declared here static inline void clear_user_highpage_uncached(struct page *page, unsigned long vaddr) ^ 1 error generated. make[2]: *** [scripts/Makefile.build:117: arch/arm64/kernel/asm-offsets.s] Error 1 make[2]: Target '__build' not remade because of errors. make[1]: *** [Makefile:1198: prepare0] Error 2 make[1]: Target 'prepare' not remade because of errors. make: *** [Makefile:185: __sub-make] Error 2 make: Target 'prepare' not remade because of errors. vim +/clear_user_page_uncached +240 include/linux/highmem.h 234 235 #ifndef clear_user_highpage_uncached 236 static inline void clear_user_highpage_uncached(struct page *page, unsigned long vaddr) 237 { 238 void *addr = kmap_atomic(page); 239 > 240 clear_user_page_uncached(addr, vaddr, page); 241 kunmap_atomic(addr); 242 } 243 #endif 244 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Wed, Oct 14, 2020 at 1:33 AM Ankur Arora <ankur.a.arora@oracle.com> wrote: > > Define clear_page_uncached() as an alternative_call() to clear_page_nt() > if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it > doesn't. > > Similarly define clear_page_uncached_flush() which provides an SFENCE > if the CPU sets X86_FEATURE_NT_GOOD. As long as you keep "NT" or "MOVNTI" in the names and keep functions in arch/x86, I think it's reasonable to expect that callers understand that MOVNTI has bizarre memory ordering rules. But once you give something a generic name like "clear_page_uncached" and stick it in generic code, I think the semantics should be more obvious. How about: clear_page_uncached_unordered() or clear_page_uncached_incoherent() and flush_after_clear_page_uncached() After all, a naive reader might expect "uncached" to imply "caches are off and this is coherent with everything". And the results of getting this wrong will be subtle and possibly hard-to-reproduce corruption. --Andy
On Wed, Oct 14, 2020 at 08:45:37AM -0700, Andy Lutomirski wrote: > On Wed, Oct 14, 2020 at 1:33 AM Ankur Arora <ankur.a.arora@oracle.com> wrote: > > > > Define clear_page_uncached() as an alternative_call() to clear_page_nt() > > if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it > > doesn't. > > > > Similarly define clear_page_uncached_flush() which provides an SFENCE > > if the CPU sets X86_FEATURE_NT_GOOD. > > As long as you keep "NT" or "MOVNTI" in the names and keep functions > in arch/x86, I think it's reasonable to expect that callers understand > that MOVNTI has bizarre memory ordering rules. But once you give > something a generic name like "clear_page_uncached" and stick it in > generic code, I think the semantics should be more obvious. Why does it have to be a separate call? Why isn't it behind the clear_page() alternative machinery so that the proper function is selected at boot? IOW, why does a user of clear_page functionality need to know at all about an "uncached" variant?
On 2020-10-14 8:45 a.m., Andy Lutomirski wrote: > On Wed, Oct 14, 2020 at 1:33 AM Ankur Arora <ankur.a.arora@oracle.com> wrote: >> >> Define clear_page_uncached() as an alternative_call() to clear_page_nt() >> if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it >> doesn't. >> >> Similarly define clear_page_uncached_flush() which provides an SFENCE >> if the CPU sets X86_FEATURE_NT_GOOD. > > As long as you keep "NT" or "MOVNTI" in the names and keep functions > in arch/x86, I think it's reasonable to expect that callers understand > that MOVNTI has bizarre memory ordering rules. But once you give > something a generic name like "clear_page_uncached" and stick it in > generic code, I think the semantics should be more obvious. > > How about: > > clear_page_uncached_unordered() or clear_page_uncached_incoherent() > > and > > flush_after_clear_page_uncached() > > After all, a naive reader might expect "uncached" to imply "caches are > off and this is coherent with everything". And the results of getting > this wrong will be subtle and possibly hard-to-reproduce corruption. Yeah, these are a lot more obvious. Thanks. Will fix. Ankur > > --Andy >
> On Oct 14, 2020, at 12:58 PM, Borislav Petkov <bp@alien8.de> wrote: > > On Wed, Oct 14, 2020 at 08:45:37AM -0700, Andy Lutomirski wrote: >>> On Wed, Oct 14, 2020 at 1:33 AM Ankur Arora <ankur.a.arora@oracle.com> wrote: >>> >>> Define clear_page_uncached() as an alternative_call() to clear_page_nt() >>> if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it >>> doesn't. >>> >>> Similarly define clear_page_uncached_flush() which provides an SFENCE >>> if the CPU sets X86_FEATURE_NT_GOOD. >> >> As long as you keep "NT" or "MOVNTI" in the names and keep functions >> in arch/x86, I think it's reasonable to expect that callers understand >> that MOVNTI has bizarre memory ordering rules. But once you give >> something a generic name like "clear_page_uncached" and stick it in >> generic code, I think the semantics should be more obvious. > > Why does it have to be a separate call? Why isn't it behind the > clear_page() alternative machinery so that the proper function is > selected at boot? IOW, why does a user of clear_page functionality need > to know at all about an "uncached" variant? > > I assume it’s for a little optimization of clearing more than one page per SFENCE. In any event, based on the benchmark data upthread, we only want to do NT clears when they’re rather large, so this shouldn’t be just an alternative. I assume this is because a page or two will fit in cache and, for most uses that allocate zeroed pages, we prefer cache-hot pages. When clearing 1G, on the other hand, cache-hot is impossible and we prefer the improved bandwidth and less cache trashing of NT clears. Perhaps SFENCE is so fast that this is a silly optimization, though, and we don’t lose anything measurable by SFENCEing once per page.
On Wed, Oct 14, 2020 at 02:07:30PM -0700, Andy Lutomirski wrote: > I assume it’s for a little optimization of clearing more than one > page per SFENCE. > > In any event, based on the benchmark data upthread, we only want to do > NT clears when they’re rather large, so this shouldn’t be just an > alternative. I assume this is because a page or two will fit in cache > and, for most uses that allocate zeroed pages, we prefer cache-hot > pages. When clearing 1G, on the other hand, cache-hot is impossible > and we prefer the improved bandwidth and less cache trashing of NT > clears. Yeah, use case makes sense but people won't know what to use. At the time I was experimenting with this crap, I remember Linus saying that that selection should be made based on the size of the area cleared, so users should not have to know the difference. Which perhaps is the only sane use case I see for this. > Perhaps SFENCE is so fast that this is a silly optimization, though, > and we don’t lose anything measurable by SFENCEing once per page. Yes, I'd like to see real use cases showing improvement from this, not just microbenchmarks.
On 2020-10-14 2:07 p.m., Andy Lutomirski wrote: > > > >> On Oct 14, 2020, at 12:58 PM, Borislav Petkov <bp@alien8.de> wrote: >> >> On Wed, Oct 14, 2020 at 08:45:37AM -0700, Andy Lutomirski wrote: >>>> On Wed, Oct 14, 2020 at 1:33 AM Ankur Arora <ankur.a.arora@oracle.com> wrote: >>>> >>>> Define clear_page_uncached() as an alternative_call() to clear_page_nt() >>>> if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it >>>> doesn't. >>>> >>>> Similarly define clear_page_uncached_flush() which provides an SFENCE >>>> if the CPU sets X86_FEATURE_NT_GOOD. >>> >>> As long as you keep "NT" or "MOVNTI" in the names and keep functions >>> in arch/x86, I think it's reasonable to expect that callers understand >>> that MOVNTI has bizarre memory ordering rules. But once you give >>> something a generic name like "clear_page_uncached" and stick it in >>> generic code, I think the semantics should be more obvious. >> >> Why does it have to be a separate call? Why isn't it behind the >> clear_page() alternative machinery so that the proper function is >> selected at boot? IOW, why does a user of clear_page functionality need >> to know at all about an "uncached" variant? > > I assume it’s for a little optimization of clearing more than one page > per SFENCE. > > In any event, based on the benchmark data upthread, we only want to do > NT clears when they’re rather large, so this shouldn’t be just an > alternative. I assume this is because a page or two will fit in cache > and, for most uses that allocate zeroed pages, we prefer cache-hot > pages. When clearing 1G, on the other hand, cache-hot is impossible > and we prefer the improved bandwidth and less cache trashing of NT > clears. Also, if we did extend clear_page() to take the page-size as parameter we still might not have enough information (ex. a 4K or a 2MB page that clear_page() sees could be part of a GUP of a much larger extent) to decide whether to go uncached or not. > Perhaps SFENCE is so fast that this is a silly optimization, though, > and we don’t lose anything measurable by SFENCEing once per page. Alas, no. I tried that and dropped about 15% performance on Rome. Thanks Ankur
On 2020-10-14 2:12 p.m., Borislav Petkov wrote: > On Wed, Oct 14, 2020 at 02:07:30PM -0700, Andy Lutomirski wrote: >> I assume it’s for a little optimization of clearing more than one >> page per SFENCE. >> >> In any event, based on the benchmark data upthread, we only want to do >> NT clears when they’re rather large, so this shouldn’t be just an >> alternative. I assume this is because a page or two will fit in cache >> and, for most uses that allocate zeroed pages, we prefer cache-hot >> pages. When clearing 1G, on the other hand, cache-hot is impossible >> and we prefer the improved bandwidth and less cache trashing of NT >> clears. > > Yeah, use case makes sense but people won't know what to use. At the > time I was experimenting with this crap, I remember Linus saying that > that selection should be made based on the size of the area cleared, so > users should not have to know the difference. I don't disagree but I think the selection of cached/uncached route should be made where we have enough context available to be able to choose to do this. This could be for example, done in mm_populate() or gup where if say the extent is larger than LLC-size, it takes the uncached path. > > Which perhaps is the only sane use case I see for this. > >> Perhaps SFENCE is so fast that this is a silly optimization, though, >> and we don’t lose anything measurable by SFENCEing once per page. > > Yes, I'd like to see real use cases showing improvement from this, not > just microbenchmarks. Sure will add. Thanks Ankur >
On Wed, Oct 14, 2020 at 08:37:44PM -0700, Ankur Arora wrote: > I don't disagree but I think the selection of cached/uncached route should > be made where we have enough context available to be able to choose to do > this. > > This could be for example, done in mm_populate() or gup where if say the > extent is larger than LLC-size, it takes the uncached path. Are there examples where we don't know the size?
On Wed, Oct 14, 2020 at 08:21:57PM -0700, Ankur Arora wrote: > Also, if we did extend clear_page() to take the page-size as parameter > we still might not have enough information (ex. a 4K or a 2MB page that > clear_page() sees could be part of a GUP of a much larger extent) to > decide whether to go uncached or not. clear_page* assumes 4K. All of the lowlevel asm variants do. So adding the size there won't bring you a whole lot. So you'd need to devise this whole thing differently. Perhaps have a clear_pages() helper which decides based on size what to do: uncached clearing or the clear_page() as is now in a loop. Looking at the callsites would give you a better idea I'd say. Thx.
On 2020-10-15 3:35 a.m., Borislav Petkov wrote: > On Wed, Oct 14, 2020 at 08:37:44PM -0700, Ankur Arora wrote: >> I don't disagree but I think the selection of cached/uncached route should >> be made where we have enough context available to be able to choose to do >> this. >> >> This could be for example, done in mm_populate() or gup where if say the >> extent is larger than LLC-size, it takes the uncached path. > > Are there examples where we don't know the size? The case I was thinking of was that clear_huge_page() or faultin_page() would know the size to a page unit, while the higher level function would know the whole extent and could optimize differently based on that. Thanks Ankur >
On 2020-10-15 3:40 a.m., Borislav Petkov wrote: > On Wed, Oct 14, 2020 at 08:21:57PM -0700, Ankur Arora wrote: >> Also, if we did extend clear_page() to take the page-size as parameter >> we still might not have enough information (ex. a 4K or a 2MB page that >> clear_page() sees could be part of a GUP of a much larger extent) to >> decide whether to go uncached or not. > > clear_page* assumes 4K. All of the lowlevel asm variants do. So adding > the size there won't bring you a whole lot. > > So you'd need to devise this whole thing differently. Perhaps have a > clear_pages() helper which decides based on size what to do: uncached > clearing or the clear_page() as is now in a loop. I think that'll work well for GB pages, where the clear_pages() helper has enough information to make a decision. But, unless I'm missing something, I'm not sure how that would work for say, a 1GB mm_populate() using 4K pages. The clear_page() (or clear_pages()) in that case would only see the 4K size. But let me think about this more (and look at the callsites as you suggest.) > > Looking at the callsites would give you a better idea I'd say. Thanks, yeah that's a good idea. Let me go do that. Ankur > > Thx. >
On Thu, Oct 15, 2020 at 02:20:36PM -0700, Ankur Arora wrote: > The case I was thinking of was that clear_huge_page() That loop in clear_gigantic_page() there could be optimized not to iterate over the pages but do a NTA moves in one go, provided they're contiguous. > or faultin_page() would faultin_page() goes into the bowels of mm fault handling, you'd have to be more precise what exactly you mean with that one. > know the size to a page unit, while the higher level function would know the > whole extent and could optimize differently based on that. Just don't forget that this "optimization" of yours comes at the price of added code complexity and you're putting the onus on the people to know which function to call. So it is not for free and needs to be carefully weighed.
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 7555b48803a8..ca0aa379ac7f 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -28,6 +28,12 @@ static inline void clear_user_page(void *page, unsigned long vaddr, clear_page(page); } +static inline void clear_user_page_uncached(void *page, unsigned long vaddr, + struct page *pg) +{ + clear_page_uncached(page); +} + static inline void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *topage) { diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 94dbd51df58f..7a03a274a9a4 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -39,6 +39,15 @@ static inline void clear_page(void *page) memset(page, 0, PAGE_SIZE); } +static inline void clear_page_uncached(void *page) +{ + clear_page(page); +} + +static inline void clear_page_uncached_flush(void) +{ +} + static inline void copy_page(void *to, void *from) { memcpy(to, from, PAGE_SIZE); diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index bde3c2785ec4..5897075e77dd 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -55,6 +55,20 @@ static inline void clear_page(void *page) : "cc", "memory", "rax", "rcx"); } +static inline void clear_page_uncached(void *page) +{ + alternative_call(clear_page, + clear_page_nt, X86_FEATURE_NT_GOOD, + "=D" (page), + "0" (page) + : "cc", "memory", "rax", "rcx"); +} + +static inline void clear_page_uncached_flush(void) +{ + alternative("", "sfence", X86_FEATURE_NT_GOOD); +} + void copy_page(void *to, void *from); #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h index fe801f01625e..60235a0cf24a 100644 --- a/include/asm-generic/page.h +++ b/include/asm-generic/page.h @@ -26,6 +26,9 @@ #ifndef __ASSEMBLY__ #define clear_page(page) memset((page), 0, PAGE_SIZE) +#define clear_page_uncached(page) clear_page(page) +#define clear_page_uncached_flush() do { } while (0) + #define copy_page(to,from) memcpy((to), (from), PAGE_SIZE) #define clear_user_page(page, vaddr, pg) clear_page(page) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 14e6202ce47f..f842593e2474 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -232,6 +232,16 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr) } #endif +#ifndef clear_user_highpage_uncached +static inline void clear_user_highpage_uncached(struct page *page, unsigned long vaddr) +{ + void *addr = kmap_atomic(page); + + clear_user_page_uncached(addr, vaddr, page); + kunmap_atomic(addr); +} +#endif + #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /** * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags
Define clear_page_uncached() as an alternative_call() to clear_page_nt() if the CPU sets X86_FEATURE_NT_GOOD and fallback to clear_page() if it doesn't. Similarly define clear_page_uncached_flush() which provides an SFENCE if the CPU sets X86_FEATURE_NT_GOOD. Also, add the glue interface clear_user_highpage_uncached(). Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com> --- arch/x86/include/asm/page.h | 6 ++++++ arch/x86/include/asm/page_32.h | 9 +++++++++ arch/x86/include/asm/page_64.h | 14 ++++++++++++++ include/asm-generic/page.h | 3 +++ include/linux/highmem.h | 10 ++++++++++ 5 files changed, 42 insertions(+)