Message ID | 20201118204007.169209557@linutronix.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm/highmem: Preemptible variant of kmap_atomic & friends | expand |
On Wed, 18 Nov 2020 20:48:41 +0100 Thomas Gleixner <tglx@linutronix.de> wrote: > kmap_local() and related interfaces are NOOPs on 64bit and only create > temporary fixmaps for highmem pages on 32bit. That means the test coverage > for this code is pretty small. > > CONFIG_KMAP_LOCAL can be enabled independent from CONFIG_HIGHMEM, which > allows to provide support for enforced kmap_local() debugging even on > 64bit. > > For 32bit the support is unconditional, for 64bit it's only supported when > CONFIG_NR_CPUS <= 4096 as supporting it for 8192 CPUs would require to set > up yet another fixmap PGT. > > If CONFIG_KMAP_LOCAL_FORCE_DEBUG is enabled then kmap_local()/kmap_atomic() > will use the temporary fixmap mapping path. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > --- > V4: New patch > --- > arch/x86/Kconfig | 1 + > arch/x86/include/asm/fixmap.h | 12 +++++++++--- > arch/x86/include/asm/pgtable_64_types.h | 6 +++++- > 3 files changed, 15 insertions(+), 4 deletions(-) > > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -93,6 +93,7 @@ config X86 > select ARCH_SUPPORTS_ACPI > select ARCH_SUPPORTS_ATOMIC_RMW > select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 I triggered the following crash on x86_32 by simply doing a: (ssh'ing into the box) # head -100 /tmp/output-file Where the /tmp/output-file was the output of a trace-cmd report. Even after rebooting and not running the tracing code, simply doing the head command still crashed. BUG: unable to handle page fault for address: fff58000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page *pdpt = 0000000006de9001 *pde = 0000000001968063 *pte = 0000000000000000 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 3935 Comm: sshd Not tainted 5.11.0-rc2-test+ #2 Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014 EIP: skb_copy_bits+0x10c/0x1b9 Code: 3b 5d e8 0f 47 5d e8 c7 45 e0 00 00 00 00 8b 7d e0 39 7d e8 76 3a 8b 45 d4 e8 a4 e4 ff ff 8b 55 e4 03 55 e0 89 d9 01 c6 89 d7 <f3> a4 e8 c9 e4 ff ff 01 5d e0 8b 5d e8 b8 00 10 00 00 2b 5d e0 83 EAX: fff57000 EBX: 000005a8 ECX: 000000f8 EDX: c77b9900 ESI: fff58000 EDI: c77b9db0 EBP: c6de39ec ESP: c6de39c0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00210286 CR0: 80050033 CR2: fff58000 CR3: 06de6000 CR4: 001506f0 Call Trace: skb_segment+0x4a3/0x828 ? __tcp_mtu_to_mss+0x2d/0x6b tcp_gso_segment+0xf6/0x336 ? list_add+0x26/0x26 tcp4_gso_segment+0x77/0x7c ? tcp_gso_segment+0x336/0x336 inet_gso_segment+0x1a1/0x2df ? inet_unregister_protosw+0x5e/0x5e skb_mac_gso_segment+0xb9/0x107 __skb_gso_segment+0xdf/0x10f ? netif_skb_features+0x1ca/0x24a ? __qdisc_run+0x1e4/0x418 validate_xmit_skb.constprop.0+0x10f/0x1ad validate_xmit_skb_list+0x25/0x45 sch_direct_xmit+0x5c/0x19d __qdisc_run+0x3e3/0x418 ? qdisc_run_begin+0x53/0x5d qdisc_run+0x26/0x30 __dev_queue_xmit+0x2bd/0x524 ? mark_held_locks+0x40/0x51 dev_queue_xmit+0xf/0x11 ip_finish_output2+0x378/0x3d7 __ip_finish_output+0xd6/0xe2 ip_output+0x8c/0xbb ? ip_mc_output+0x18d/0x18d dst_output+0x27/0x2d ip_local_out+0x2b/0x30 __ip_queue_xmit+0x32e/0x38e ? __copy_skb_header+0x4b/0x98 ? __ip_queue_xmit+0x38e/0x38e ip_queue_xmit+0x16/0x1b __tcp_transmit_skb+0x731/0x794 tcp_transmit_skb+0x16/0x18 tcp_write_xmit+0x7b4/0xa90 __tcp_push_pending_frames+0x2c/0x6b tcp_push+0x8c/0xf1 tcp_sendmsg_locked+0x74a/0x7f2 ? tcp_sendmsg_locked+0x7f2/0x7f2 tcp_sendmsg+0x27/0x38 ? tcp_sendmsg_locked+0x7f2/0x7f2 inet_sendmsg+0x3c/0x5f ? inet_send_prepare+0x3b/0x3b sock_sendmsg_nosec+0x1a/0x2d sock_sendmsg+0x25/0x29 sock_write_iter+0x84/0xa7 vfs_write+0xf5/0x19b ksys_write+0x68/0xaa __ia32_sys_write+0x15/0x17 __do_fast_syscall_32+0x66/0x76 do_fast_syscall_32+0x29/0x5b do_SYSENTER_32+0x15/0x17 entry_SYSENTER_32+0x9f/0xf2 EIP: 0xb7ee3545 Code: c4 01 10 03 03 74 c0 01 10 05 03 74 b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 EAX: ffffffda EBX: 00000003 ECX: 01d12448 EDX: 00002028 ESI: 00002028 EDI: 01d12448 EBP: bff4e388 ESP: bff4e328 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00200246 ? asm_exc_nmi+0xc5/0x2ab Modules linked in: ppdev parport_pc parport CR2: 00000000fff58000 ---[ end trace 3d4582614c9c2a0e ]--- EIP: skb_copy_bits+0x10c/0x1b9 Code: 3b 5d e8 0f 47 5d e8 c7 45 e0 00 00 00 00 8b 7d e0 39 7d e8 76 3a 8b 45 d4 e8 a4 e4 ff ff 8b 55 e4 03 55 e0 89 d9 01 c6 89 d7 <f3> a4 e8 c9 e4 ff ff 01 5d e0 8b 5d e8 b8 00 10 00 00 2b 5d e0 83 EAX: fff57000 EBX: 000005a8 ECX: 000000f8 EDX: c77b9900 ESI: fff58000 EDI: c77b9db0 EBP: c6de39ec ESP: c6de39c0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00210286 CR0: 80050033 CR2: fff58000 CR3: 06de6000 CR4: 001506f0 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- This was against 5.11-rc2. I bisected it down to the commit that added this patch. > + select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 If I remove the above line, it works fine. Attached is the config file. -- Steve > select ARCH_USE_BUILTIN_BSWAP > select ARCH_USE_QUEUED_RWLOCKS > select ARCH_USE_QUEUED_SPINLOCKS > --- a/arch/x86/include/asm/fixmap.h > +++ b/arch/x86/include/asm/fixmap.h > @@ -14,13 +14,20 @@ > #ifndef _ASM_X86_FIXMAP_H > #define _ASM_X86_FIXMAP_H > > +#include <asm/kmap_size.h> > + > /* > * Exposed to assembly code for setting up initial page tables. Cannot be > * calculated in assembly code (fixmap entries are an enum), but is sanity > * checked in the actual fixmap C code to make sure that the fixmap is > * covered fully. > */ > -#define FIXMAP_PMD_NUM 2 > +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP > +# define FIXMAP_PMD_NUM 2 > +#else > +# define KM_PMDS (KM_MAX_IDX * ((CONFIG_NR_CPUS + 511) / 512)) > +# define FIXMAP_PMD_NUM (KM_PMDS + 2) > +#endif > /* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ > #define FIXMAP_PMD_TOP 507 > > @@ -31,7 +38,6 @@ > #include <asm/pgtable_types.h> > #ifdef CONFIG_X86_32 > #include <linux/threads.h> > -#include <asm/kmap_size.h> > #else > #include <uapi/asm/vsyscall.h> > #endif > @@ -92,7 +98,7 @@ enum fixed_addresses { > FIX_IO_APIC_BASE_0, > FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, > #endif > -#ifdef CONFIG_X86_32 > +#ifdef CONFIG_KMAP_LOCAL > FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ > FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, > #ifdef CONFIG_PCI_MMCONFIG > --- a/arch/x86/include/asm/pgtable_64_types.h > +++ b/arch/x86/include/asm/pgtable_64_types.h > @@ -143,7 +143,11 @@ extern unsigned int ptrs_per_p4d; > > #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) > /* The module sections ends with the start of the fixmap */ > -#define MODULES_END _AC(0xffffffffff000000, UL) > +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP > +# define MODULES_END _AC(0xffffffffff000000, UL) > +#else > +# define MODULES_END _AC(0xfffffffffe000000, UL) > +#endif > #define MODULES_LEN (MODULES_END - MODULES_VADDR) > > #define ESPFIX_PGD_ENTRY _AC(-2, UL) > >
On Wed, Jan 6, 2021 at 3:01 PM Steven Rostedt <rostedt@goodmis.org> wrote: > > I triggered the following crash on x86_32 by simply doing a: > > (ssh'ing into the box) > > # head -100 /tmp/output-file > > Where the /tmp/output-file was the output of a trace-cmd report. > Even after rebooting and not running the tracing code, simply doing the > head command still crashed. The code decodes to 0: 3b 5d e8 cmp -0x18(%ebp),%ebx 3: 0f 47 5d e8 cmova -0x18(%ebp),%ebx 7: c7 45 e0 00 00 00 00 movl $0x0,-0x20(%ebp) e: 8b 7d e0 mov -0x20(%ebp),%edi 11: 39 7d e8 cmp %edi,-0x18(%ebp) 14: 76 3a jbe 0x50 16: 8b 45 d4 mov -0x2c(%ebp),%eax 19: e8 a4 e4 ff ff call 0xffffe4c2 1e: 8b 55 e4 mov -0x1c(%ebp),%edx 21: 03 55 e0 add -0x20(%ebp),%edx 24: 89 d9 mov %ebx,%ecx 26: 01 c6 add %eax,%esi 28: 89 d7 mov %edx,%edi 2a:* f3 a4 rep movsb %ds:(%esi),%es:(%edi) <-- trapping instruction 2c: e8 c9 e4 ff ff call 0xffffe4fa 31: 01 5d e0 add %ebx,-0x20(%ebp) 34: 8b 5d e8 mov -0x18(%ebp),%ebx 37: b8 00 10 00 00 mov $0x1000,%eax 3c: 2b 5d e0 sub -0x20(%ebp),%ebx and while it would be good to see the output of scripts/decode_stacktrace.sh, I strongly suspect that the above is vaddr = kmap_atomic(p); memcpy(to + copied, vaddr + p_off, p_len); kunmap_atomic(vaddr); (although I wonder how/why the heck you've enabled CC_OPTIMIZE_FOR_SIZE=y, which is what causes "memcpy()" to be done as that "rep movsb". I thought we disabled it because it's so bad on most cpus). So that first "call" instruction is the kmap_atomic(), the "rep movs" is the memcpy(), and the "call" instruction immediately after is the kunmap_atomic(). Anyway, you can see vaddr in register state: EAX: fff57000 so we've kmapped that one page at fff57000, but we're accessing past it into the next page: > BUG: unable to handle page fault for address: fff58000 with the current source address being (ESI: fff58000) and we still have 248 bytes to go (ECX: 000000f8) even though we've already overflowed into the next page. You can see the original count still (EBX: 000005a8), so it really looks like that skb_frag_foreach_page() logic skb_frag_foreach_page(f, skb_frag_off(f) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(to + copied, vaddr + p_off, p_len); kunmap_atomic(vaddr); } must be wrong, and doesn't handle the "each page" part properly. It must have started in the middle of the page, and p_len (that 0x5a8) was wrong. IOW, it really looks like p_off + p_len had the value 0x10f8, which is larger than one page. And looking at the code, in skb_frag_foreach_page(), I see: p_off = (f_off) & (PAGE_SIZE - 1), \ p_len = skb_frag_must_loop(p) ? \ min_t(u32, f_len, PAGE_SIZE - p_off) : f_len, \ where that "min_t(u32, f_len, PAGE_SIZE - p_off)" looks correct, but then presumably skb_frag_must_loop() must be wrong. Oh, and when I look at that, I see static inline bool skb_frag_must_loop(struct page *p) { #if defined(CONFIG_HIGHMEM) if (PageHighMem(p)) return true; #endif return false; } and that is no longer true. With the kmap debugging, even non-highmem pages need that "do one page at a time" code, because even non-highmem pages get remapped by kmap(). IOW, I think the patch to fix this might be something like the attached. I wonder whether there is other code that "knows" about kmap() only affecting PageHighmem() pages thing that is no longer true. Looking at some other code, skb_gro_reset_offset() looks suspiciously like it also thinks highmem pages are special. Adding the networking people involved in this area to the cc too. Linus
On Wed, 6 Jan 2021 17:03:48 -0800 Linus Torvalds <torvalds@linux-foundation.org> wrote: > (although I wonder how/why the heck you've enabled > CC_OPTIMIZE_FOR_SIZE=y, which is what causes "memcpy()" to be done as > that "rep movsb". I thought we disabled it because it's so bad on most > cpus). Why? Because to test x86_32, I have a Fedora Core 13 (yes 13!) partition (baremetal) that I use. And the .config I use for it hasn't changed since that time ;-) (except to add new features that I want to test on x86_32). Anyway, I'll test out your patch. Thanks for investigating this. -- Steve
On Wed, 6 Jan 2021 17:03:48 -0800 Linus Torvalds <torvalds@linux-foundation.org> wrote: > --- a/include/linux/skbuff.h > +++ b/include/linux/skbuff.h > @@ -366,7 +366,7 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) > static inline bool skb_frag_must_loop(struct page *p) > { > #if defined(CONFIG_HIGHMEM) > - if (PageHighMem(p)) > + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || PageHighMem(p)) > return true; > #endif > return false; I applied this and I haven't been able to crash it again. Thanks, -- Steve
On Wed, 6 Jan 2021 17:03:48 -0800 Linus Torvalds wrote: > I wonder whether there is other code that "knows" about kmap() only > affecting PageHighmem() pages thing that is no longer true. > > Looking at some other code, skb_gro_reset_offset() looks suspiciously > like it also thinks highmem pages are special. > > Adding the networking people involved in this area to the cc too. Thanks for the detailed analysis! skb_gro_reset_offset() checks if kernel can read data in the fragments directly as an optimization, in case the entire header is in a fragment. IIUC DEBUG_KMAP_LOCAL_FORCE_MAP only affects the mappings from explicit kmap calls, which GRO won't make - it will fall back to pulling the header out of the fragment and end up in skb_copy_bits(), i.e. the loop you fixed. So GRO should be good. I think..
On Wed, Jan 6, 2021 at 8:49 PM Jakub Kicinski <kuba@kernel.org> wrote: > > On Wed, 6 Jan 2021 17:03:48 -0800 Linus Torvalds wrote: > > I wonder whether there is other code that "knows" about kmap() only > > affecting PageHighmem() pages thing that is no longer true. > > > > Looking at some other code, skb_gro_reset_offset() looks suspiciously > > like it also thinks highmem pages are special. > > > > Adding the networking people involved in this area to the cc too. > > Thanks for the detailed analysis! skb_gro_reset_offset() checks if > kernel can read data in the fragments directly as an optimization, > in case the entire header is in a fragment. > > IIUC DEBUG_KMAP_LOCAL_FORCE_MAP only affects the mappings from > explicit kmap calls, which GRO won't make - it will fall back to > pulling the header out of the fragment and end up in skb_copy_bits(), > i.e. the loop you fixed. So GRO should be good. I think.. Agreed. That code in skb_gro_reset_offset skips the GRO frag0 optimization in various cases, including if the first fragment is in high mem. That specific check goes back to the introduction of the frag0 optimization in commit 86911732d399 ("gro: Avoid copying headers of unmerged packets"), at the time in helper skb_gro_header(). Very glad to hear that the fix addresses the crash in skb_frag_foreach_page. Thanks!
On Wed, Jan 6, 2021 at 9:11 PM Willem de Bruijn <willemb@google.com> wrote: > > On Wed, Jan 6, 2021 at 8:49 PM Jakub Kicinski <kuba@kernel.org> wrote: > > > > On Wed, 6 Jan 2021 17:03:48 -0800 Linus Torvalds wrote: > > > I wonder whether there is other code that "knows" about kmap() only > > > affecting PageHighmem() pages thing that is no longer true. > > > > > > Looking at some other code, skb_gro_reset_offset() looks suspiciously > > > like it also thinks highmem pages are special. > > > > > > Adding the networking people involved in this area to the cc too. But there are three other kmap_atomic callers under net/ that do not loop at all, so assume non-compound pages. In esp_output_head, esp6_output_head and skb_seq_read. The first two directly use skb_page_frag_refill, which can allocate compound (but not __GFP_HIGHMEM) pages, and the third can be inserted with netfilter xt_string in the path of tcp transmit skbs, which can also have compound pages. I think that these could similarly access data beyond the end of the kmap_atomic mapped page. I'll take a closer look.
On Wed, Jan 6, 2021 at 8:45 PM Willem de Bruijn <willemb@google.com> wrote: > > But there are three other kmap_atomic callers under net/ that do not > loop at all, so assume non-compound pages. In esp_output_head, > esp6_output_head and skb_seq_read. The first two directly use > skb_page_frag_refill, which can allocate compound (but not > __GFP_HIGHMEM) pages, and the third can be inserted with > netfilter xt_string in the path of tcp transmit skbs, which can also > have compound pages. I think that these could similarly access > data beyond the end of the kmap_atomic mapped page. I'll take > a closer look. Thanks. Note that I have flushed my random one-liner patch from my system, and expect to get a proper fix through the normal networking pulls. And _if_ the networking people feel that my one-liner was the proper fix, you can use it and add my sign-off if you want to, but it really was more of a "this is the quick ugly fix for testing" rather than anything else. Linus
On Thu, 7 Jan 2021 11:47:02 -0800 Linus Torvalds <torvalds@linux-foundation.org> wrote: > On Wed, Jan 6, 2021 at 8:45 PM Willem de Bruijn <willemb@google.com> wrote: > > > > But there are three other kmap_atomic callers under net/ that do not > > loop at all, so assume non-compound pages. In esp_output_head, > > esp6_output_head and skb_seq_read. The first two directly use > > skb_page_frag_refill, which can allocate compound (but not > > __GFP_HIGHMEM) pages, and the third can be inserted with > > netfilter xt_string in the path of tcp transmit skbs, which can also > > have compound pages. I think that these could similarly access > > data beyond the end of the kmap_atomic mapped page. I'll take > > a closer look. > > Thanks. > > Note that I have flushed my random one-liner patch from my system, and > expect to get a proper fix through the normal networking pulls. > > And _if_ the networking people feel that my one-liner was the proper > fix, you can use it and add my sign-off if you want to, but it really > was more of a "this is the quick ugly fix for testing" rather than > anything else. > Please add: Link: https://lore.kernel.org/linux-mm/20210106180132.41dc249d@gandalf.local.home/ Reported-by: Steven Rostedt (VMware) <rostedt@goodmis.org> And if you take Linus's patch, please add my: Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org> and if you come up with another patch, please send it to me for testing. Thanks! -- Steve
On Thu, Jan 7, 2021 at 3:53 PM Steven Rostedt <rostedt@goodmis.org> wrote: > > On Thu, 7 Jan 2021 11:47:02 -0800 > Linus Torvalds <torvalds@linux-foundation.org> wrote: > > > On Wed, Jan 6, 2021 at 8:45 PM Willem de Bruijn <willemb@google.com> wrote: > > > > > > But there are three other kmap_atomic callers under net/ that do not > > > loop at all, so assume non-compound pages. In esp_output_head, > > > esp6_output_head and skb_seq_read. The first two directly use > > > skb_page_frag_refill, which can allocate compound (but not > > > __GFP_HIGHMEM) pages, and the third can be inserted with > > > netfilter xt_string in the path of tcp transmit skbs, which can also > > > have compound pages. I think that these could similarly access > > > data beyond the end of the kmap_atomic mapped page. I'll take > > > a closer look. > > > > Thanks. > > > > Note that I have flushed my random one-liner patch from my system, and > > expect to get a proper fix through the normal networking pulls. > > > > And _if_ the networking people feel that my one-liner was the proper > > fix, you can use it and add my sign-off if you want to, but it really > > was more of a "this is the quick ugly fix for testing" rather than > > anything else. I do think it is the proper fix as is. If no one else has comments, I can submit it through the net tree. It won't address the other issues that became apparent only as a result of this. I'm preparing separate patches for those. > Please add: > > Link: https://lore.kernel.org/linux-mm/20210106180132.41dc249d@gandalf.local.home/ > Reported-by: Steven Rostedt (VMware) <rostedt@goodmis.org> > > And if you take Linus's patch, please add my: > > Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org> > > and if you come up with another patch, please send it to me for testing. > > Thanks! Will do, thanks.
--- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -93,6 +93,7 @@ config X86 select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 + select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -14,13 +14,20 @@ #ifndef _ASM_X86_FIXMAP_H #define _ASM_X86_FIXMAP_H +#include <asm/kmap_size.h> + /* * Exposed to assembly code for setting up initial page tables. Cannot be * calculated in assembly code (fixmap entries are an enum), but is sanity * checked in the actual fixmap C code to make sure that the fixmap is * covered fully. */ -#define FIXMAP_PMD_NUM 2 +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +# define FIXMAP_PMD_NUM 2 +#else +# define KM_PMDS (KM_MAX_IDX * ((CONFIG_NR_CPUS + 511) / 512)) +# define FIXMAP_PMD_NUM (KM_PMDS + 2) +#endif /* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ #define FIXMAP_PMD_TOP 507 @@ -31,7 +38,6 @@ #include <asm/pgtable_types.h> #ifdef CONFIG_X86_32 #include <linux/threads.h> -#include <asm/kmap_size.h> #else #include <uapi/asm/vsyscall.h> #endif @@ -92,7 +98,7 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif -#ifdef CONFIG_X86_32 +#ifdef CONFIG_KMAP_LOCAL FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1, #ifdef CONFIG_PCI_MMCONFIG --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -143,7 +143,11 @@ extern unsigned int ptrs_per_p4d; #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END _AC(0xffffffffff000000, UL) +#ifndef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +# define MODULES_END _AC(0xffffffffff000000, UL) +#else +# define MODULES_END _AC(0xfffffffffe000000, UL) +#endif #define MODULES_LEN (MODULES_END - MODULES_VADDR) #define ESPFIX_PGD_ENTRY _AC(-2, UL)
kmap_local() and related interfaces are NOOPs on 64bit and only create temporary fixmaps for highmem pages on 32bit. That means the test coverage for this code is pretty small. CONFIG_KMAP_LOCAL can be enabled independent from CONFIG_HIGHMEM, which allows to provide support for enforced kmap_local() debugging even on 64bit. For 32bit the support is unconditional, for 64bit it's only supported when CONFIG_NR_CPUS <= 4096 as supporting it for 8192 CPUs would require to set up yet another fixmap PGT. If CONFIG_KMAP_LOCAL_FORCE_DEBUG is enabled then kmap_local()/kmap_atomic() will use the temporary fixmap mapping path. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- V4: New patch --- arch/x86/Kconfig | 1 + arch/x86/include/asm/fixmap.h | 12 +++++++++--- arch/x86/include/asm/pgtable_64_types.h | 6 +++++- 3 files changed, 15 insertions(+), 4 deletions(-)