Message ID | 20150720001822.30857.65670.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi, On Mon, Jul 20, 2015 at 01:18:23AM +0100, Dan Williams wrote: > Existing users of ioremap_cache() are mapping memory that is known in > advance to not have i/o side effects. These users are forced to cast > away the __iomem annotation, or otherwise neglect to fix the sparse > errors thrown when dereferencing pointers to this memory. Provide > memremap() as a non __iomem annotated ioremap_*() in the case when > ioremap is otherwise a pointer to memory. > > The ARCH_HAS_MEMREMAP kconfig symbol is introduced for archs to assert > that it is safe to recast / reuse the return value from ioremap as a > normal pointer to memory. In other words, archs that mandate specific > accessors for __iomem are not memremap() capable and drivers that care, > like pmem, can add a dependency to disable themselves on these archs. > > Note, that memremap is a break from the ioremap implementation pattern > of adding a new memremap_<type>() for each mapping type. Instead, > the implementation defines flags that are passed to the central > memremap() implementation. > > Outside of ioremap() and ioremap_nocache(), the expectation is that most > calls to ioremap_<type>() are seeking memory-like semantics (e.g. > speculative reads, and prefetching permitted). These callsites can be > moved to memremap() over time. > > Cc: Arnd Bergmann <arnd@arndb.de> > Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> > Signed-off-by: Dan Williams <dan.j.williams@intel.com> > --- > arch/arm/Kconfig | 1 + > arch/arm64/Kconfig | 1 + > arch/arm64/kernel/efi.c | 7 ++--- > arch/arm64/kernel/smp_spin_table.c | 17 +++++------ These last two files weren't updated in patch 2 to use <linux/io.h>, nor are they updated here, so this patch breaks the build for arm64. [...] > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 318175f62c24..305def28385f 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -6,6 +6,7 @@ config ARM64 > select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE > select ARCH_HAS_ELF_RANDOMIZE > select ARCH_HAS_GCOV_PROFILE_ALL > + select ARCH_HAS_MEMREMAP > select ARCH_HAS_SG_CHAIN > select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST > select ARCH_USE_CMPXCHG_LOCKREF > diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c > index 9d4aa18f2a82..ed363a0202b9 100644 > --- a/arch/arm64/kernel/efi.c > +++ b/arch/arm64/kernel/efi.c > @@ -290,8 +290,7 @@ static int __init arm64_enable_runtime_services(void) > pr_info("Remapping and enabling EFI services.\n"); > > mapsize = memmap.map_end - memmap.map; > - memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, > - mapsize); > + memmap.map = memremap(memmap.phys_map, mapsize, MEMREMAP_CACHE); memmap.phys_map is a void*, so we need to retain a cast to a numeric type or we'll get splats like: arch/arm64/kernel/efi.c: In function ‘arm64_enable_runtime_services’: arch/arm64/kernel/efi.c:294:24: warning: passing argument 1 of ‘memremap’ makes integer from pointer without a cast memmap.map = memremap(memmap.phys_map, mapsize, MEMREMAP_CACHE); ^ In file included from arch/arm64/kernel/efi.c:18:0: include/linux/io.h:203:14: note: expected ‘resource_size_t’ but argument is of type ‘void *’ extern void *memremap(resource_size_t offset, size_t size, unsigned long flags); ^ Unfortunately, even with that fixed this patch breaks boot due to the memremap_valid check. It's extremely likely that (portions of) the tables are already in the linear mapping, and so page_is_ram will be true. At boot time I get the following: Remapping and enabling EFI services. ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at kernel/resource.c:541 memremap+0xb0/0xbc() memremap attempted on ram 0x00000009f9e4a018 size: 1200 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.2.0-rc3+ #201 Hardware name: ARM Juno development board (r0) (DT) Call trace: [<ffffffc000089954>] dump_backtrace+0x0/0x124 [<ffffffc000089a88>] show_stack+0x10/0x1c [<ffffffc0005d15c8>] dump_stack+0x84/0xc8 [<ffffffc0000b3f44>] warn_slowpath_common+0x98/0xd0 [<ffffffc0000b3fc8>] warn_slowpath_fmt+0x4c/0x58 [<ffffffc0000b9470>] memremap+0xac/0xbc [<ffffffc000814fe8>] arm64_enable_runtime_services+0x90/0x208 [<ffffffc000082864>] do_one_initcall+0x88/0x1a0 [<ffffffc000811a20>] kernel_init_freeable+0x8c/0x1f8 [<ffffffc0005ce370>] kernel_init+0xc/0xd8 ---[ end trace cb88537fdc8fa200 ]--- Failed to remap EFI memory map > if (!memmap.map) { > pr_err("Failed to remap EFI memory map\n"); > return -1; > @@ -299,8 +298,8 @@ static int __init arm64_enable_runtime_services(void) > memmap.map_end = memmap.map + mapsize; > efi.memmap = &memmap; > > - efi.systab = (__force void *)ioremap_cache(efi_system_table, > - sizeof(efi_system_table_t)); > + efi.systab = memremap(efi_system_table, sizeof(efi_system_table_t), > + MEMREMAP_CACHE); > if (!efi.systab) { > pr_err("Failed to remap EFI System Table\n"); > return -1; > diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c > index aef3605a8c47..b9caf6cd44e6 100644 > --- a/arch/arm64/kernel/smp_spin_table.c > +++ b/arch/arm64/kernel/smp_spin_table.c > @@ -73,19 +73,19 @@ static int smp_spin_table_cpu_init(unsigned int cpu) > > static int smp_spin_table_cpu_prepare(unsigned int cpu) > { > - __le64 __iomem *release_addr; > + __le64 *release_addr; > > if (!cpu_release_addr[cpu]) > return -ENODEV; > > /* > * The cpu-release-addr may or may not be inside the linear mapping. > - * As ioremap_cache will either give us a new mapping or reuse the > - * existing linear mapping, we can use it to cover both cases. In > - * either case the memory will be MT_NORMAL. > + * As memremap will either give us a new mapping or reuse the existing > + * linear mapping, we can use it to cover both cases. In either case > + * the memory will be MT_NORMAL. > */ > - release_addr = ioremap_cache(cpu_release_addr[cpu], > - sizeof(*release_addr)); > + release_addr = memremap(cpu_release_addr[cpu], sizeof(*release_addr), > + MEMREMAP_CACHE); Likewise, the comment here is contradicted by the code in memremap_valid (below). We'll fail to bring up secondaries if the release address fell in the linear mapping. > +#ifdef CONFIG_ARCH_HAS_MEMREMAP > +/* > + * memremap() is "ioremap" for cases where it is known that the resource > + * being mapped does not have i/o side effects and the __iomem > + * annotation is not applicable. > + */ > +static bool memremap_valid(resource_size_t offset, size_t size) > +{ > + if (region_is_ram(offset, size) != 0) { > + WARN_ONCE(1, "memremap attempted on ram %pa size: %zu\n", > + &offset, size); > + return false; > + } > + return true; > +} > + > +void *memremap(resource_size_t offset, size_t size, unsigned long flags) > +{ > + void __iomem *addr = NULL; > + > + if (!memremap_valid(offset, size)) > + return NULL; > + > + /* try all mapping types requested until one returns non-NULL */ > + if (flags & MEMREMAP_CACHE) { > + if (flags & MEMREMAP_STRICT) > + addr = strict_ioremap_cache(offset, size); > + else > + addr = ioremap_cache(offset, size); > + } > + > + if (!addr && (flags & MEMREMAP_WT)) { > + if (flags & MEMREMAP_STRICT) > + addr = strict_ioremap_wt(offset, size); > + else > + addr = ioremap_wt(offset, size); > + } > + > + return (void __force *) addr; > +} > +EXPORT_SYMBOL(memremap); Thanks, Mark.
On Mon, Jul 20, 2015 at 5:00 AM, Mark Rutland <mark.rutland@arm.com> wrote: > Hi, > > On Mon, Jul 20, 2015 at 01:18:23AM +0100, Dan Williams wrote: >> Existing users of ioremap_cache() are mapping memory that is known in >> advance to not have i/o side effects. These users are forced to cast >> away the __iomem annotation, or otherwise neglect to fix the sparse >> errors thrown when dereferencing pointers to this memory. Provide >> memremap() as a non __iomem annotated ioremap_*() in the case when >> ioremap is otherwise a pointer to memory. >> >> The ARCH_HAS_MEMREMAP kconfig symbol is introduced for archs to assert >> that it is safe to recast / reuse the return value from ioremap as a >> normal pointer to memory. In other words, archs that mandate specific >> accessors for __iomem are not memremap() capable and drivers that care, >> like pmem, can add a dependency to disable themselves on these archs. >> >> Note, that memremap is a break from the ioremap implementation pattern >> of adding a new memremap_<type>() for each mapping type. Instead, >> the implementation defines flags that are passed to the central >> memremap() implementation. >> >> Outside of ioremap() and ioremap_nocache(), the expectation is that most >> calls to ioremap_<type>() are seeking memory-like semantics (e.g. >> speculative reads, and prefetching permitted). These callsites can be >> moved to memremap() over time. >> >> Cc: Arnd Bergmann <arnd@arndb.de> >> Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> >> Signed-off-by: Dan Williams <dan.j.williams@intel.com> >> --- >> arch/arm/Kconfig | 1 + >> arch/arm64/Kconfig | 1 + >> arch/arm64/kernel/efi.c | 7 ++--- >> arch/arm64/kernel/smp_spin_table.c | 17 +++++------ > > These last two files weren't updated in patch 2 to use <linux/io.h>, nor are > they updated here, so this patch breaks the build for arm64. Thanks for testing! I'll follow up and see why this failure wasn't reported by 0day-kbuild. > [...] > >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index 318175f62c24..305def28385f 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -6,6 +6,7 @@ config ARM64 >> select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE >> select ARCH_HAS_ELF_RANDOMIZE >> select ARCH_HAS_GCOV_PROFILE_ALL >> + select ARCH_HAS_MEMREMAP >> select ARCH_HAS_SG_CHAIN >> select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST >> select ARCH_USE_CMPXCHG_LOCKREF >> diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c >> index 9d4aa18f2a82..ed363a0202b9 100644 >> --- a/arch/arm64/kernel/efi.c >> +++ b/arch/arm64/kernel/efi.c >> @@ -290,8 +290,7 @@ static int __init arm64_enable_runtime_services(void) >> pr_info("Remapping and enabling EFI services.\n"); >> >> mapsize = memmap.map_end - memmap.map; >> - memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, >> - mapsize); >> + memmap.map = memremap(memmap.phys_map, mapsize, MEMREMAP_CACHE); > > memmap.phys_map is a void*, so we need to retain a cast to a numeric type or > we'll get splats like: > > arch/arm64/kernel/efi.c: In function ‘arm64_enable_runtime_services’: > arch/arm64/kernel/efi.c:294:24: warning: passing argument 1 of ‘memremap’ makes integer from pointer without a cast > memmap.map = memremap(memmap.phys_map, mapsize, MEMREMAP_CACHE); > ^ > In file included from arch/arm64/kernel/efi.c:18:0: > include/linux/io.h:203:14: note: expected ‘resource_size_t’ but argument is of type ‘void *’ > extern void *memremap(resource_size_t offset, size_t size, unsigned long flags); > ^ Again, I need to check my compile scripts. > Unfortunately, even with that fixed this patch breaks boot due to the > memremap_valid check. It's extremely likely that (portions of) the > tables are already in the linear mapping, and so page_is_ram will be > true. Really, "portions of", not the entire table? Is it too early to use the direct mapping like pfn_to_page(memmap.phys_map >> PAGE_SHIFT)? In any event this splat mandates splitting this memremap() enabling patch into per-instance conversions of ioremap_cache(). > At boot time I get the following: > > Remapping and enabling EFI services. > ------------[ cut here ]------------ > WARNING: CPU: 0 PID: 1 at kernel/resource.c:541 memremap+0xb0/0xbc() > memremap attempted on ram 0x00000009f9e4a018 size: 1200 > Modules linked in: > CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.2.0-rc3+ #201 > Hardware name: ARM Juno development board (r0) (DT) > Call trace: > [<ffffffc000089954>] dump_backtrace+0x0/0x124 > [<ffffffc000089a88>] show_stack+0x10/0x1c > [<ffffffc0005d15c8>] dump_stack+0x84/0xc8 > [<ffffffc0000b3f44>] warn_slowpath_common+0x98/0xd0 > [<ffffffc0000b3fc8>] warn_slowpath_fmt+0x4c/0x58 > [<ffffffc0000b9470>] memremap+0xac/0xbc > [<ffffffc000814fe8>] arm64_enable_runtime_services+0x90/0x208 > [<ffffffc000082864>] do_one_initcall+0x88/0x1a0 > [<ffffffc000811a20>] kernel_init_freeable+0x8c/0x1f8 > [<ffffffc0005ce370>] kernel_init+0xc/0xd8 > ---[ end trace cb88537fdc8fa200 ]--- > Failed to remap EFI memory map > >> if (!memmap.map) { >> pr_err("Failed to remap EFI memory map\n"); >> return -1; >> @@ -299,8 +298,8 @@ static int __init arm64_enable_runtime_services(void) >> memmap.map_end = memmap.map + mapsize; >> efi.memmap = &memmap; >> >> - efi.systab = (__force void *)ioremap_cache(efi_system_table, >> - sizeof(efi_system_table_t)); >> + efi.systab = memremap(efi_system_table, sizeof(efi_system_table_t), >> + MEMREMAP_CACHE); >> if (!efi.systab) { >> pr_err("Failed to remap EFI System Table\n"); >> return -1; >> diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c >> index aef3605a8c47..b9caf6cd44e6 100644 >> --- a/arch/arm64/kernel/smp_spin_table.c >> +++ b/arch/arm64/kernel/smp_spin_table.c >> @@ -73,19 +73,19 @@ static int smp_spin_table_cpu_init(unsigned int cpu) >> >> static int smp_spin_table_cpu_prepare(unsigned int cpu) >> { >> - __le64 __iomem *release_addr; >> + __le64 *release_addr; >> >> if (!cpu_release_addr[cpu]) >> return -ENODEV; >> >> /* >> * The cpu-release-addr may or may not be inside the linear mapping. >> - * As ioremap_cache will either give us a new mapping or reuse the >> - * existing linear mapping, we can use it to cover both cases. In >> - * either case the memory will be MT_NORMAL. >> + * As memremap will either give us a new mapping or reuse the existing >> + * linear mapping, we can use it to cover both cases. In either case >> + * the memory will be MT_NORMAL. >> */ >> - release_addr = ioremap_cache(cpu_release_addr[cpu], >> - sizeof(*release_addr)); >> + release_addr = memremap(cpu_release_addr[cpu], sizeof(*release_addr), >> + MEMREMAP_CACHE); > > Likewise, the comment here is contradicted by the code in memremap_valid > (below). We'll fail to bring up secondaries if the release address fell > in the linear mapping. Ah, so ARM's ioremap_cache() silently handles calls to remap RAM with a pointer to the direct mapping. It think that's a useful property to carry in the generic implementation. Especially if the direct mapping has established greater than PAGE_SIZE mappings. > >> +#ifdef CONFIG_ARCH_HAS_MEMREMAP >> +/* >> + * memremap() is "ioremap" for cases where it is known that the resource >> + * being mapped does not have i/o side effects and the __iomem >> + * annotation is not applicable. >> + */ >> +static bool memremap_valid(resource_size_t offset, size_t size) >> +{ >> + if (region_is_ram(offset, size) != 0) { >> + WARN_ONCE(1, "memremap attempted on ram %pa size: %zu\n", >> + &offset, size); >> + return false; >> + } >> + return true; >> +} >> + >> +void *memremap(resource_size_t offset, size_t size, unsigned long flags) >> +{ >> + void __iomem *addr = NULL; >> + >> + if (!memremap_valid(offset, size)) >> + return NULL; >> + >> + /* try all mapping types requested until one returns non-NULL */ >> + if (flags & MEMREMAP_CACHE) { >> + if (flags & MEMREMAP_STRICT) >> + addr = strict_ioremap_cache(offset, size); >> + else >> + addr = ioremap_cache(offset, size); >> + } >> + >> + if (!addr && (flags & MEMREMAP_WT)) { >> + if (flags & MEMREMAP_STRICT) >> + addr = strict_ioremap_wt(offset, size); >> + else >> + addr = ioremap_wt(offset, size); >> + } >> + >> + return (void __force *) addr; >> +} >> +EXPORT_SYMBOL(memremap); > > Thanks, > Mark. Thanks Mark!
On Mon, Jul 20, 2015 at 03:39:44PM +0100, Dan Williams wrote: > On Mon, Jul 20, 2015 at 5:00 AM, Mark Rutland <mark.rutland@arm.com> wrote: > > Hi, > > > > On Mon, Jul 20, 2015 at 01:18:23AM +0100, Dan Williams wrote: > >> Existing users of ioremap_cache() are mapping memory that is known in > >> advance to not have i/o side effects. These users are forced to cast > >> away the __iomem annotation, or otherwise neglect to fix the sparse > >> errors thrown when dereferencing pointers to this memory. Provide > >> memremap() as a non __iomem annotated ioremap_*() in the case when > >> ioremap is otherwise a pointer to memory. > >> > >> The ARCH_HAS_MEMREMAP kconfig symbol is introduced for archs to assert > >> that it is safe to recast / reuse the return value from ioremap as a > >> normal pointer to memory. In other words, archs that mandate specific > >> accessors for __iomem are not memremap() capable and drivers that care, > >> like pmem, can add a dependency to disable themselves on these archs. > >> > >> Note, that memremap is a break from the ioremap implementation pattern > >> of adding a new memremap_<type>() for each mapping type. Instead, > >> the implementation defines flags that are passed to the central > >> memremap() implementation. > >> > >> Outside of ioremap() and ioremap_nocache(), the expectation is that most > >> calls to ioremap_<type>() are seeking memory-like semantics (e.g. > >> speculative reads, and prefetching permitted). These callsites can be > >> moved to memremap() over time. > >> > >> Cc: Arnd Bergmann <arnd@arndb.de> > >> Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> > >> Signed-off-by: Dan Williams <dan.j.williams@intel.com> [...] > > Unfortunately, even with that fixed this patch breaks boot due to the > > memremap_valid check. It's extremely likely that (portions of) the > > tables are already in the linear mapping, and so page_is_ram will be > > true. > > Really, "portions of", not the entire table? Yes, if a table straddles a page boundary (the spec doesn't seem to disallow this). We might not map all of the required pages, e.g. * If the kernel was loaded more than TEXT_OFFSET bytes above the table (the arm64 linear/direct mapping is currently tied to our text mapping, and we can't currently address anything below that). In this case the start isn't in the direct mapping, and we'll try to create a new mapping. That should work. * If the kernel is built with a small VA range, and we have just the right amount of memory before the table begins (this is absurdly unlikely). In this case if the start is in the direct mapping, but the end isn't, we'll explode when accessing it. * If the 'mem=' parameter is used, the direct mapping can be truncated. In this case if the start is in the direct mapping, but the end isn't, we'll explode when accessing it. In the arm64 ioremap_cache we only check pfn_valid(__phys_to_pfn(phys_addr)), and assume that if the start was mapped, then the whole region is. In the cases above that's clearly not true, so we should probably see about handling that. > Is it too early to use the direct mapping like > pfn_to_page(memmap.phys_map >> PAGE_SHIFT)? I believe it's too early, as we haven't initialised the memmap yet. For the cases above we will never have a page for the portion of the region not in the direct mapping anyway. > In any event this splat mandates splitting this memremap() enabling > patch into per-instance conversions of ioremap_cache(). > > > At boot time I get the following: > > > > Remapping and enabling EFI services. > > ------------[ cut here ]------------ > > WARNING: CPU: 0 PID: 1 at kernel/resource.c:541 memremap+0xb0/0xbc() > > memremap attempted on ram 0x00000009f9e4a018 size: 1200 > > Modules linked in: > > CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.2.0-rc3+ #201 > > Hardware name: ARM Juno development board (r0) (DT) > > Call trace: > > [<ffffffc000089954>] dump_backtrace+0x0/0x124 > > [<ffffffc000089a88>] show_stack+0x10/0x1c > > [<ffffffc0005d15c8>] dump_stack+0x84/0xc8 > > [<ffffffc0000b3f44>] warn_slowpath_common+0x98/0xd0 > > [<ffffffc0000b3fc8>] warn_slowpath_fmt+0x4c/0x58 > > [<ffffffc0000b9470>] memremap+0xac/0xbc > > [<ffffffc000814fe8>] arm64_enable_runtime_services+0x90/0x208 > > [<ffffffc000082864>] do_one_initcall+0x88/0x1a0 > > [<ffffffc000811a20>] kernel_init_freeable+0x8c/0x1f8 > > [<ffffffc0005ce370>] kernel_init+0xc/0xd8 > > ---[ end trace cb88537fdc8fa200 ]--- > > Failed to remap EFI memory map > > > >> if (!memmap.map) { > >> pr_err("Failed to remap EFI memory map\n"); > >> return -1; > >> @@ -299,8 +298,8 @@ static int __init arm64_enable_runtime_services(void) > >> memmap.map_end = memmap.map + mapsize; > >> efi.memmap = &memmap; > >> > >> - efi.systab = (__force void *)ioremap_cache(efi_system_table, > >> - sizeof(efi_system_table_t)); > >> + efi.systab = memremap(efi_system_table, sizeof(efi_system_table_t), > >> + MEMREMAP_CACHE); > >> if (!efi.systab) { > >> pr_err("Failed to remap EFI System Table\n"); > >> return -1; > >> diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c > >> index aef3605a8c47..b9caf6cd44e6 100644 > >> --- a/arch/arm64/kernel/smp_spin_table.c > >> +++ b/arch/arm64/kernel/smp_spin_table.c > >> @@ -73,19 +73,19 @@ static int smp_spin_table_cpu_init(unsigned int cpu) > >> > >> static int smp_spin_table_cpu_prepare(unsigned int cpu) > >> { > >> - __le64 __iomem *release_addr; > >> + __le64 *release_addr; > >> > >> if (!cpu_release_addr[cpu]) > >> return -ENODEV; > >> > >> /* > >> * The cpu-release-addr may or may not be inside the linear mapping. > >> - * As ioremap_cache will either give us a new mapping or reuse the > >> - * existing linear mapping, we can use it to cover both cases. In > >> - * either case the memory will be MT_NORMAL. > >> + * As memremap will either give us a new mapping or reuse the existing > >> + * linear mapping, we can use it to cover both cases. In either case > >> + * the memory will be MT_NORMAL. > >> */ > >> - release_addr = ioremap_cache(cpu_release_addr[cpu], > >> - sizeof(*release_addr)); > >> + release_addr = memremap(cpu_release_addr[cpu], sizeof(*release_addr), > >> + MEMREMAP_CACHE); > > > > Likewise, the comment here is contradicted by the code in memremap_valid > > (below). We'll fail to bring up secondaries if the release address fell > > in the linear mapping. > > Ah, so ARM's ioremap_cache() silently handles calls to remap RAM with > a pointer to the direct mapping. It think that's a useful property to > carry in the generic implementation. Especially if the direct mapping > has established greater than PAGE_SIZE mappings. That sounds good to me. As mentioned above, I think we should test that the entire region we're trying to map falls within the direct mapping before assuming we can use it, though I'm not sure what the best way of doing that is. Thanks, Mark.
On Sun, Jul 19, 2015 at 08:18:23PM -0400, Dan Williams wrote: > diff --git a/include/linux/io.h b/include/linux/io.h > index 080a4fbf2ba4..2983b6e63970 100644 > --- a/include/linux/io.h > +++ b/include/linux/io.h > @@ -192,4 +192,15 @@ static inline int arch_phys_wc_index(int handle) > #endif > #endif > > +enum { > + MEMREMAP_WB = 1 << 0, > + MEMREMAP_WT = 1 << 1, > + MEMREMAP_WC = 1 << 2, > + MEMREMAP_STRICT = 1 << 3, > + MEMREMAP_CACHE = MEMREMAP_WB, > +}; A few things: 1) You'll need MEMREMAP_UC now as well now. 2) as you are doing all this sweep over architectures on this please also address the lack of ioremap_*() variant implemention to return NULL, ie not supported, because we've decided for now that so long as the semantics are not well defined we can't expect architectures to get it right unless they are doing the work themselves, and the old strategy of just #defin'ing a variant to iorempa_nocache() which folks tended to just can lead to issues. In your case since you are jumping to the flags implementation this might be knocking two birds with one stone. 3) For the case that architectures have no MMU we currently do a direct mapping such as what you try to describe to do with memremap(). I wonder if its worth it to then enable that code to just map to memremap(). That throws off your usage of CONFIG_ARCH_HAS_MEMREMAP if we want to repurpose that implementation for no the MMU case, unless of course you just have a __memremap() which does the default basic direct mapping implementation. 4) Since we are all blaming semantics on our woes I'd like to ask for some effort on semantics to be well defined. Semantics here sholud cover some form of Documentation but also sparse annotation checks and perhaps Coccinelle grammar rules for how things should be done. This should not only cover general use but also if there are calls which depend on a cache type to have been set. If we used sparse annotations it may meen a different return type for each cache type. Not sure if we want this. If we went with grammar rules I'm looking to for instance have in place rules on scripts/coccinelle which would allow developers to use make coccicheck M=foo/ to find issues. I can perhaps help with this, but we'd need to do a good sweep here to not only cover good territory but also get folks to agree on things. 5) This may be related to 4), not sure. There are aligment requirements we should probably iron out for architectures. How will we annotate these requirements or allow architectures to be pedantic over these requirements? Luis
On Tue, Jul 21, 2015 at 4:58 PM, Luis R. Rodriguez <mcgrof@suse.com> wrote: > On Sun, Jul 19, 2015 at 08:18:23PM -0400, Dan Williams wrote: >> diff --git a/include/linux/io.h b/include/linux/io.h >> index 080a4fbf2ba4..2983b6e63970 100644 >> --- a/include/linux/io.h >> +++ b/include/linux/io.h >> @@ -192,4 +192,15 @@ static inline int arch_phys_wc_index(int handle) >> #endif >> #endif >> >> +enum { >> + MEMREMAP_WB = 1 << 0, >> + MEMREMAP_WT = 1 << 1, >> + MEMREMAP_WC = 1 << 2, >> + MEMREMAP_STRICT = 1 << 3, >> + MEMREMAP_CACHE = MEMREMAP_WB, >> +}; > > A few things: > > 1) You'll need MEMREMAP_UC now as well now. Why? I don't think it fits. If there are any I/O areas (non-memory) in the range then it simply is not "memory" and should not be using memremap(). In other words it seems like you really do need to heed the __iomem annotation in the return value from ioremap_uc(). > 2) as you are doing all this sweep over architectures on this please > also address the lack of ioremap_*() variant implemention to return > NULL, ie not supported, because we've decided for now that so long > as the semantics are not well defined we can't expect architectures > to get it right unless they are doing the work themselves, and the > old strategy of just #defin'ing a variant to iorempa_nocache() which > folks tended to just can lead to issues. In your case since you are > jumping to the flags implementation this might be knocking two birds > with one stone. I'm not going to do a general sweep for this as the expectation that ioremap silently falls back if a mapping type is not supported is buried all over the place. That said, new usages and conversions to memremap() can be strict about this. For now, I'm only handling ioremap_cache() and ioremap_wt() conversions. > 3) For the case that architectures have no MMU we currently do a direct > mapping such as what you try to describe to do with memremap(). I wonder > if its worth it to then enable that code to just map to memremap(). That > throws off your usage of CONFIG_ARCH_HAS_MEMREMAP if we want to repurpose > that implementation for no the MMU case, unless of course you just have a > __memremap() which does the default basic direct mapping implementation. Yes, in the next rev of this series I am having it fall back to direct mappings where it makes sense. > 4) Since we are all blaming semantics on our woes I'd like to ask for > some effort on semantics to be well defined. Semantics here sholud cover > some form of Documentation but also sparse annotation checks and perhaps > Coccinelle grammar rules for how things should be done. This should not only > cover general use but also if there are calls which depend on a cache > type to have been set. If we used sparse annotations it may meen a > different return type for each cache type. Not sure if we want this. > If we went with grammar rules I'm looking to for instance have in place > rules on scripts/coccinelle which would allow developers to use memremap() explicitly does not want get into arch semantics debates. The pointer returned from memremap() is a "void *" that can be used as normal memory. If it is a normal pointer I don't see the role for sparse annotation. > > make coccicheck M=foo/ > > to find issues. I can perhaps help with this, but we'd need to do a good > sweep here to not only cover good territory but also get folks to agree > on things. > > 5) This may be related to 4), not sure. There are aligment requirements we > should probably iron out for architectures. How will we annotate these > requirements or allow architectures to be pedantic over these requirements? What requirements beyond PAGE_SIZE alignment would we need to worry about?
On Tue, Jul 21, 2015 at 09:04:22PM -0700, Dan Williams wrote: > On Tue, Jul 21, 2015 at 4:58 PM, Luis R. Rodriguez <mcgrof@suse.com> wrote: > > On Sun, Jul 19, 2015 at 08:18:23PM -0400, Dan Williams wrote: > >> diff --git a/include/linux/io.h b/include/linux/io.h > >> index 080a4fbf2ba4..2983b6e63970 100644 > >> --- a/include/linux/io.h > >> +++ b/include/linux/io.h > >> @@ -192,4 +192,15 @@ static inline int arch_phys_wc_index(int handle) > >> #endif > >> #endif > >> > >> +enum { > >> + MEMREMAP_WB = 1 << 0, > >> + MEMREMAP_WT = 1 << 1, > >> + MEMREMAP_WC = 1 << 2, > >> + MEMREMAP_STRICT = 1 << 3, > >> + MEMREMAP_CACHE = MEMREMAP_WB, > >> +}; > > > > A few things: > > > > 1) You'll need MEMREMAP_UC now as well now. > > Why? I don't think it fits. If there are any I/O areas (non-memory) > in the range then it simply is not "memory" and should not be using > memremap(). In other words it seems like you really do need to heed > the __iomem annotation in the return value from ioremap_uc(). One can use a similar argument for some areas of use of write-combining, what litmus test are you using to add a flag above? Why would WC be OK and not UC? WC is a cache hack on x86... > > 2) as you are doing all this sweep over architectures on this please > > also address the lack of ioremap_*() variant implemention to return > > NULL, ie not supported, because we've decided for now that so long > > as the semantics are not well defined we can't expect architectures > > to get it right unless they are doing the work themselves, and the > > old strategy of just #defin'ing a variant to iorempa_nocache() which > > folks tended to just can lead to issues. In your case since you are > > jumping to the flags implementation this might be knocking two birds > > with one stone. > > I'm not going to do a general sweep for this as the expectation that > ioremap silently falls back if a mapping type is not supported is > buried all over the place. But it does not. It cannot. The reason, as I noted in a commit now merged on tip, is that the default wrappers are nested under #ifndef CONFIG_MMU, whereas really this should have just been used for ioremap() and iounmap(). That is, the ioremap_*() variants should have a definition even for !CONFIG_MMU, and since we actually don't want to enable sloppy defines of this the sensible defaults should be to return NULL on variants -- for both !CONFIG_MMU and for CONFIG_MMU. The way to fix then then is to move the default variant definitions out from #ifndef CONFIG_MMU and have them return NULL. We may be able to have them return something not-null by default always but we first need to beat to death the topic of semantics with all architecture folks to each that agreement. Until then the variants shoudl just return NULL encouraging arch developers to supply a proper implementation. > That said, new usages and conversions to > memremap() can be strict about this. For now, I'm only handling > ioremap_cache() and ioremap_wt() conversions. OK, if you are not going to do this let me know and I can do it. > > 3) For the case that architectures have no MMU we currently do a direct > > mapping such as what you try to describe to do with memremap(). I wonder > > if its worth it to then enable that code to just map to memremap(). That > > throws off your usage of CONFIG_ARCH_HAS_MEMREMAP if we want to repurpose > > that implementation for no the MMU case, unless of course you just have a > > __memremap() which does the default basic direct mapping implementation. > > Yes, in the next rev of this series I am having it fall back to direct > mappings where it makes sense. > > > 4) Since we are all blaming semantics on our woes I'd like to ask for > > some effort on semantics to be well defined. Semantics here sholud cover > > some form of Documentation but also sparse annotation checks and perhaps > > Coccinelle grammar rules for how things should be done. This should not only > > cover general use but also if there are calls which depend on a cache > > type to have been set. If we used sparse annotations it may meen a > > different return type for each cache type. Not sure if we want this. > > If we went with grammar rules I'm looking to for instance have in place > > rules on scripts/coccinelle which would allow developers to use > > memremap() explicitly does not want get into arch semantics debates. Why? The ioremap() cluster fuck seems like a good example to learn from. I was also under the impression you are going to provide a new API with flags to kill / make old ioremap() varaints use this new API with the flags passed? Is that not the case ? > The pointer returned from memremap() is a "void *" that can be used as > normal memory. If it is a normal pointer I don't see the role for > sparse annotation. Hrm, do we want to *prevent* certain to use the memory range when it is direct? > > make coccicheck M=foo/ > > > > to find issues. I can perhaps help with this, but we'd need to do a good > > sweep here to not only cover good territory but also get folks to agree > > on things. > > > > 5) This may be related to 4), not sure. There are aligment requirements we > > should probably iron out for architectures. How will we annotate these > > requirements or allow architectures to be pedantic over these requirements? > > What requirements beyond PAGE_SIZE alignment would we need to worry about? That's a big concern some folks expressed, architecture folks would know more. One last thing: if you are providing a new API to replace a series of old symbols that were used before (I though you were working towards this for all ioremap_*() variants) we have to take care to ensure that any symbol that is currently exported with EXPORT_SYMBOL_GPL() does not get an EXPORT_SYMBOL() wrapper-escape since we do not want proprietary drivers to make use these alternatives. Luis
On Wed, Jul 22, 2015 at 3:55 PM, Luis R. Rodriguez <mcgrof@suse.com> wrote: > On Tue, Jul 21, 2015 at 09:04:22PM -0700, Dan Williams wrote: >> On Tue, Jul 21, 2015 at 4:58 PM, Luis R. Rodriguez <mcgrof@suse.com> wrote: >> > On Sun, Jul 19, 2015 at 08:18:23PM -0400, Dan Williams wrote: >> >> diff --git a/include/linux/io.h b/include/linux/io.h >> >> index 080a4fbf2ba4..2983b6e63970 100644 >> >> --- a/include/linux/io.h >> >> +++ b/include/linux/io.h >> >> @@ -192,4 +192,15 @@ static inline int arch_phys_wc_index(int handle) >> >> #endif >> >> #endif >> >> >> >> +enum { >> >> + MEMREMAP_WB = 1 << 0, >> >> + MEMREMAP_WT = 1 << 1, >> >> + MEMREMAP_WC = 1 << 2, >> >> + MEMREMAP_STRICT = 1 << 3, >> >> + MEMREMAP_CACHE = MEMREMAP_WB, >> >> +}; >> > >> > A few things: >> > >> > 1) You'll need MEMREMAP_UC now as well now. >> >> Why? I don't think it fits. If there are any I/O areas (non-memory) >> in the range then it simply is not "memory" and should not be using >> memremap(). In other words it seems like you really do need to heed >> the __iomem annotation in the return value from ioremap_uc(). > > One can use a similar argument for some areas of use of write-combining, > what litmus test are you using to add a flag above? Why would WC be OK > and not UC? WC is a cache hack on x86... I should remove WC from the list for now, I'm not converting ioremap_wc() to memremap at this time. That said it's not the same argument. A driver calling ioremap_wc() is explicitly signing up to handle flushing the write-combine buffer and the expectation is that there are no I/O ranges in the mapping that would be affected by write combining. An ioremap_uc() mapping says "careful there are I/O sensitive registers in this range". >> > 2) as you are doing all this sweep over architectures on this please >> > also address the lack of ioremap_*() variant implemention to return >> > NULL, ie not supported, because we've decided for now that so long >> > as the semantics are not well defined we can't expect architectures >> > to get it right unless they are doing the work themselves, and the >> > old strategy of just #defin'ing a variant to iorempa_nocache() which >> > folks tended to just can lead to issues. In your case since you are >> > jumping to the flags implementation this might be knocking two birds >> > with one stone. >> >> I'm not going to do a general sweep for this as the expectation that >> ioremap silently falls back if a mapping type is not supported is >> buried all over the place. > > But it does not. It cannot. The reason, as I noted in a commit now merged > on tip, is that the default wrappers are nested under #ifndef CONFIG_MMU, > whereas really this should have just been used for ioremap() and iounmap(). > > That is, the ioremap_*() variants should have a definition even for !CONFIG_MMU, > and since we actually don't want to enable sloppy defines of this the sensible > defaults should be to return NULL on variants -- for both !CONFIG_MMU > and for CONFIG_MMU. The way to fix then then is to move the default variant > definitions out from #ifndef CONFIG_MMU and have them return NULL. We may > be able to have them return something not-null by default always but we > first need to beat to death the topic of semantics with all architecture > folks to each that agreement. Until then the variants shoudl just return > NULL encouraging arch developers to supply a proper implementation. That clean up is orthogonal to memremap. memremap will return NULL for unsupported mapping types. >> That said, new usages and conversions to >> memremap() can be strict about this. For now, I'm only handling >> ioremap_cache() and ioremap_wt() conversions. > > OK, if you are not going to do this let me know and I can do it. Yes, go ahead. >> > 3) For the case that architectures have no MMU we currently do a direct >> > mapping such as what you try to describe to do with memremap(). I wonder >> > if its worth it to then enable that code to just map to memremap(). That >> > throws off your usage of CONFIG_ARCH_HAS_MEMREMAP if we want to repurpose >> > that implementation for no the MMU case, unless of course you just have a >> > __memremap() which does the default basic direct mapping implementation. >> >> Yes, in the next rev of this series I am having it fall back to direct >> mappings where it makes sense. >> >> > 4) Since we are all blaming semantics on our woes I'd like to ask for >> > some effort on semantics to be well defined. Semantics here sholud cover >> > some form of Documentation but also sparse annotation checks and perhaps >> > Coccinelle grammar rules for how things should be done. This should not only >> > cover general use but also if there are calls which depend on a cache >> > type to have been set. If we used sparse annotations it may meen a >> > different return type for each cache type. Not sure if we want this. >> > If we went with grammar rules I'm looking to for instance have in place >> > rules on scripts/coccinelle which would allow developers to use >> >> memremap() explicitly does not want get into arch semantics debates. > > Why? The ioremap() cluster fuck seems like a good example to learn from. > > I was also under the impression you are going to provide a new API with flags > to kill / make old ioremap() varaints use this new API with the flags passed? > Is that not the case ? Yes, I'll post the new series shortly once 0day says there are no build regressions. >> The pointer returned from memremap() is a "void *" that can be used as >> normal memory. If it is a normal pointer I don't see the role for >> sparse annotation. > > Hrm, do we want to *prevent* certain to use the memory range when it is direct? Can you give an example scenario? > >> > make coccicheck M=foo/ >> > >> > to find issues. I can perhaps help with this, but we'd need to do a good >> > sweep here to not only cover good territory but also get folks to agree >> > on things. >> > >> > 5) This may be related to 4), not sure. There are aligment requirements we >> > should probably iron out for architectures. How will we annotate these >> > requirements or allow architectures to be pedantic over these requirements? >> >> What requirements beyond PAGE_SIZE alignment would we need to worry about? > > That's a big concern some folks expressed, architecture folks would know more. > > One last thing: if you are providing a new API to replace a series of old > symbols that were used before (I though you were working towards this for all > ioremap_*() variants) we have to take care to ensure that any symbol that is > currently exported with EXPORT_SYMBOL_GPL() does not get an EXPORT_SYMBOL() > wrapper-escape since we do not want proprietary drivers to make use these > alternatives. Yes, memremap() is inheriting the export level of ioremap_cache.
On Wed, Jul 22, 2015 at 04:15:41PM -0700, Dan Williams wrote: > On Wed, Jul 22, 2015 at 3:55 PM, Luis R. Rodriguez <mcgrof@suse.com> wrote: > > On Tue, Jul 21, 2015 at 09:04:22PM -0700, Dan Williams wrote: > >> On Tue, Jul 21, 2015 at 4:58 PM, Luis R. Rodriguez <mcgrof@suse.com> wrote: > >> > On Sun, Jul 19, 2015 at 08:18:23PM -0400, Dan Williams wrote: > >> >> diff --git a/include/linux/io.h b/include/linux/io.h > >> >> index 080a4fbf2ba4..2983b6e63970 100644 > >> >> --- a/include/linux/io.h > >> >> +++ b/include/linux/io.h > >> >> @@ -192,4 +192,15 @@ static inline int arch_phys_wc_index(int handle) > >> >> #endif > >> >> #endif > >> >> > >> >> +enum { > >> >> + MEMREMAP_WB = 1 << 0, > >> >> + MEMREMAP_WT = 1 << 1, > >> >> + MEMREMAP_WC = 1 << 2, > >> >> + MEMREMAP_STRICT = 1 << 3, > >> >> + MEMREMAP_CACHE = MEMREMAP_WB, > >> >> +}; > >> > > >> > A few things: > >> > > >> > 1) You'll need MEMREMAP_UC now as well now. > >> > >> Why? I don't think it fits. If there are any I/O areas (non-memory) > >> in the range then it simply is not "memory" and should not be using > >> memremap(). In other words it seems like you really do need to heed > >> the __iomem annotation in the return value from ioremap_uc(). > > > > One can use a similar argument for some areas of use of write-combining, > > what litmus test are you using to add a flag above? Why would WC be OK > > and not UC? WC is a cache hack on x86... > > I should remove WC from the list for now, I'm not converting > ioremap_wc() to memremap at this time. Do you have plans to do so? If so can you elaborate on such plans ? > That said it's not the same argument. A driver calling ioremap_wc() > is explicitly signing up to handle flushing the write-combine buffer > and the expectation is that there are no I/O ranges in the mapping > that would be affected by write combining. The thing is the semantics even for write-combining need to be discussed further, write-combining for kernel interfaces are screwed up for at least one architecture becaues we didn't get things right or document expectations correctly amongst other things, so the above is not sufficient to define write-combining for now. > An ioremap_uc() mapping says "careful there are I/O sensitive registers in this range". Yes but we have ioremap_nocache(). I added ioremap_uc() and the goal behind ioremap_uc() was to enable efficient use of absolutely no cache at all, and this was particularly aimed at taking advantage of strong UC on PAT. This was also a compromise to help with the transition over on x86 from UC- to UC in the long term as we also convert all write-combining variant API users to (old mtrr_add(), now arch_phys_wc_add()) over from ioremap_nocache() to ioremap_wc() and ensure its MMIO registers use keep using ioremap_nocache() -- but for corner cases with old MTRR hacks we need ioremap_uc() to also avoid regressions when we do the flip on x86 fromr UC- to UC. How we want to map this to other architectures remaisn to be discovered, but perhaps they don't care. Anyway so why is WB and WT allowed if you are going to remove WC and not add UC? > >> > 2) as you are doing all this sweep over architectures on this please > >> > also address the lack of ioremap_*() variant implemention to return > >> > NULL, ie not supported, because we've decided for now that so long > >> > as the semantics are not well defined we can't expect architectures > >> > to get it right unless they are doing the work themselves, and the > >> > old strategy of just #defin'ing a variant to iorempa_nocache() which > >> > folks tended to just can lead to issues. In your case since you are > >> > jumping to the flags implementation this might be knocking two birds > >> > with one stone. > >> > >> I'm not going to do a general sweep for this as the expectation that > >> ioremap silently falls back if a mapping type is not supported is > >> buried all over the place. > > > > But it does not. It cannot. The reason, as I noted in a commit now merged > > on tip, is that the default wrappers are nested under #ifndef CONFIG_MMU, > > whereas really this should have just been used for ioremap() and iounmap(). > > > > That is, the ioremap_*() variants should have a definition even for !CONFIG_MMU, > > and since we actually don't want to enable sloppy defines of this the sensible > > defaults should be to return NULL on variants -- for both !CONFIG_MMU > > and for CONFIG_MMU. The way to fix then then is to move the default variant > > definitions out from #ifndef CONFIG_MMU and have them return NULL. We may > > be able to have them return something not-null by default always but we > > first need to beat to death the topic of semantics with all architecture > > folks to each that agreement. Until then the variants shoudl just return > > NULL encouraging arch developers to supply a proper implementation. > > That clean up is orthogonal to memremap. memremap will return NULL > for unsupported mapping types. It can be if you are not doing a major long term cleanup... > >> That said, new usages and conversions to > >> memremap() can be strict about this. For now, I'm only handling > >> ioremap_cache() and ioremap_wt() conversions. > > > > OK, if you are not going to do this let me know and I can do it. > > Yes, go ahead. OK.. > >> > 3) For the case that architectures have no MMU we currently do a direct > >> > mapping such as what you try to describe to do with memremap(). I wonder > >> > if its worth it to then enable that code to just map to memremap(). That > >> > throws off your usage of CONFIG_ARCH_HAS_MEMREMAP if we want to repurpose > >> > that implementation for no the MMU case, unless of course you just have a > >> > __memremap() which does the default basic direct mapping implementation. > >> > >> Yes, in the next rev of this series I am having it fall back to direct > >> mappings where it makes sense. > >> > >> > 4) Since we are all blaming semantics on our woes I'd like to ask for > >> > some effort on semantics to be well defined. Semantics here sholud cover > >> > some form of Documentation but also sparse annotation checks and perhaps > >> > Coccinelle grammar rules for how things should be done. This should not only > >> > cover general use but also if there are calls which depend on a cache > >> > type to have been set. If we used sparse annotations it may meen a > >> > different return type for each cache type. Not sure if we want this. > >> > If we went with grammar rules I'm looking to for instance have in place > >> > rules on scripts/coccinelle which would allow developers to use > >> > >> memremap() explicitly does not want get into arch semantics debates. > > > > Why? The ioremap() cluster fuck seems like a good example to learn from. > > > > I was also under the impression you are going to provide a new API with flags > > to kill / make old ioremap() varaints use this new API with the flags passed? > > Is that not the case ? > > Yes, I'll post the new series shortly once 0day says there are no > build regressions. Then what API was it you intend on using for that ? My point is that semantics shoudl matter then for that -- strongly. > >> The pointer returned from memremap() is a "void *" that can be used as > >> normal memory. If it is a normal pointer I don't see the role for > >> sparse annotation. > > > > Hrm, do we want to *prevent* certain to use the memory range when it is direct? > > Can you give an example scenario? arch_phys_wc_add(), set_memory_wc(), set_memory_*(). There may be others. The mem region API Toshi just fixed is another. > >> > make coccicheck M=foo/ > >> > > >> > to find issues. I can perhaps help with this, but we'd need to do a good > >> > sweep here to not only cover good territory but also get folks to agree > >> > on things. > >> > > >> > 5) This may be related to 4), not sure. There are aligment requirements we > >> > should probably iron out for architectures. How will we annotate these > >> > requirements or allow architectures to be pedantic over these requirements? > >> > >> What requirements beyond PAGE_SIZE alignment would we need to worry about? > > > > That's a big concern some folks expressed, architecture folks would know more. > > > > One last thing: if you are providing a new API to replace a series of old > > symbols that were used before (I though you were working towards this for all > > ioremap_*() variants) we have to take care to ensure that any symbol that is > > currently exported with EXPORT_SYMBOL_GPL() does not get an EXPORT_SYMBOL() > > wrapper-escape since we do not want proprietary drivers to make use these > > alternatives. > > Yes, memremap() is inheriting the export level of ioremap_cache. Like I said we need to take care to ensure EXPORT_SYMBOL_GPL() is not used then by new wrappers to enable proprietary drivers to make use of those symbosl through the new wrapper. One idea may be to work on symbol namespaces, if we cannot address this through static inlines for old callers and just making the new APIs EXPORT_SYMBOL_GPL(). Luis
On Wed, Jul 22, 2015 at 5:55 PM, Luis R. Rodriguez <mcgrof@suse.com> wrote: > On Wed, Jul 22, 2015 at 04:15:41PM -0700, Dan Williams wrote: > Anyway so why is WB and WT allowed if you are going to remove WC and not add UC? > Because all existing usages of WB and WT mappings are clearly free of I/O side effect concerns. The WC and now UC usages are a muddy mix of "sometimes there's I/O side effects, but it depends by arch and driver".
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1c5021002fe4..1d64aae0a226 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -3,6 +3,7 @@ config ARM default y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_MEMREMAP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 318175f62c24..305def28385f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -6,6 +6,7 @@ config ARM64 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_MEMREMAP select ARCH_HAS_SG_CHAIN select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 9d4aa18f2a82..ed363a0202b9 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -290,8 +290,7 @@ static int __init arm64_enable_runtime_services(void) pr_info("Remapping and enabling EFI services.\n"); mapsize = memmap.map_end - memmap.map; - memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, - mapsize); + memmap.map = memremap(memmap.phys_map, mapsize, MEMREMAP_CACHE); if (!memmap.map) { pr_err("Failed to remap EFI memory map\n"); return -1; @@ -299,8 +298,8 @@ static int __init arm64_enable_runtime_services(void) memmap.map_end = memmap.map + mapsize; efi.memmap = &memmap; - efi.systab = (__force void *)ioremap_cache(efi_system_table, - sizeof(efi_system_table_t)); + efi.systab = memremap(efi_system_table, sizeof(efi_system_table_t), + MEMREMAP_CACHE); if (!efi.systab) { pr_err("Failed to remap EFI System Table\n"); return -1; diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index aef3605a8c47..b9caf6cd44e6 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -73,19 +73,19 @@ static int smp_spin_table_cpu_init(unsigned int cpu) static int smp_spin_table_cpu_prepare(unsigned int cpu) { - __le64 __iomem *release_addr; + __le64 *release_addr; if (!cpu_release_addr[cpu]) return -ENODEV; /* * The cpu-release-addr may or may not be inside the linear mapping. - * As ioremap_cache will either give us a new mapping or reuse the - * existing linear mapping, we can use it to cover both cases. In - * either case the memory will be MT_NORMAL. + * As memremap will either give us a new mapping or reuse the existing + * linear mapping, we can use it to cover both cases. In either case + * the memory will be MT_NORMAL. */ - release_addr = ioremap_cache(cpu_release_addr[cpu], - sizeof(*release_addr)); + release_addr = memremap(cpu_release_addr[cpu], sizeof(*release_addr), + MEMREMAP_CACHE); if (!release_addr) return -ENOMEM; @@ -97,15 +97,14 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) * the boot protocol. */ writeq_relaxed(__pa(secondary_holding_pen), release_addr); - __flush_dcache_area((__force void *)release_addr, - sizeof(*release_addr)); + __flush_dcache_area(release_addr, sizeof(*release_addr)); /* * Send an event to wake up the secondary CPU. */ sev(); - iounmap(release_addr); + memunmap(release_addr); return 0; } diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 34aa19352dc1..2373bf183527 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -14,6 +14,7 @@ config FRV select OLD_SIGSUSPEND3 select OLD_SIGACTION select HAVE_DEBUG_STACKOVERFLOW + select ARCH_HAS_MEMREMAP config ZONE_DMA bool diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 2dd8f63bfbbb..831b1be8c43d 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -23,6 +23,7 @@ config M68K select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 select OLD_SIGACTION + select ARCH_HAS_MEMREMAP config RWSEM_GENERIC_SPINLOCK bool diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index 0b389a81c43a..5669fe3eb807 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -24,6 +24,7 @@ config METAG select HAVE_PERF_EVENTS select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNDERSCORE_SYMBOL_PREFIX + select ARCH_HAS_MEMREMAP select IRQ_DOMAIN select MODULES_USE_ELF_RELA select OF diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index aab7e46cadd5..ac9da90931b9 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -59,6 +59,7 @@ config MIPS select SYSCTL_EXCEPTION_TRACE select HAVE_VIRT_CPU_ACCOUNTING_GEN select HAVE_IRQ_TIME_ACCOUNTING + select ARCH_HAS_MEMREMAP menu "Machine selection" diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5ef27113b898..50889c7b5c81 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -89,6 +89,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_MEMREMAP select OF select OF_EARLY_FLATTREE select OF_RESERVED_MEM diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3dbb7e7909ca..80eb19c8b62d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86 select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_MEMREMAP select ARCH_HAS_PMEM_API select ARCH_HAS_SG_CHAIN select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index afa64adb75ee..b87ca059e5fc 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -31,19 +31,19 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!csize) return 0; - vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE); + vaddr = memremap(pfn << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_CACHE); if (!vaddr) return -ENOMEM; if (userbuf) { if (copy_to_user(buf, vaddr + offset, csize)) { - iounmap(vaddr); + memunmap(vaddr); return -EFAULT; } } else memcpy(buf, vaddr + offset, csize); set_iounmap_nonlazy(); - iounmap(vaddr); + memunmap(vaddr); return csize; } diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index dc1404bf8e4b..a2248482c14b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -49,7 +49,7 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf, pa = node->paddr + sizeof(struct setup_data) + pos; pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); if (PageHighMem(pg)) { - p = ioremap_cache(pa, count); + p = memremap(pa, count, MEMREMAP_CACHE); if (!p) return -ENXIO; } else @@ -58,7 +58,7 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf, remain = copy_to_user(user_buf, p, count); if (PageHighMem(pg)) - iounmap(p); + memunmap(p); if (remain) return -EFAULT; @@ -128,7 +128,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); if (PageHighMem(pg)) { - data = ioremap_cache(pa_data, sizeof(*data)); + data = memremap(pa_data, sizeof(*data), MEMREMAP_CACHE); if (!data) { kfree(node); error = -ENXIO; @@ -144,7 +144,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) pa_data = data->next; if (PageHighMem(pg)) - iounmap(data); + memunmap(data); if (error) goto err_dir; no++; diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index c2bedaea11f7..d2670bb75a08 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -16,8 +16,8 @@ #include <linux/stat.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/io.h> -#include <asm/io.h> #include <asm/setup.h> static ssize_t version_show(struct kobject *kobj, @@ -79,12 +79,12 @@ static int get_setup_data_paddr(int nr, u64 *paddr) *paddr = pa_data; return 0; } - data = ioremap_cache(pa_data, sizeof(*data)); + data = memremap(pa_data, sizeof(*data), MEMREMAP_CACHE); if (!data) return -ENOMEM; pa_data = data->next; - iounmap(data); + memunmap(data); i++; } return -EINVAL; @@ -97,17 +97,17 @@ static int __init get_setup_data_size(int nr, size_t *size) u64 pa_data = boot_params.hdr.setup_data; while (pa_data) { - data = ioremap_cache(pa_data, sizeof(*data)); + data = memremap(pa_data, sizeof(*data), MEMREMAP_CACHE); if (!data) return -ENOMEM; if (nr == i) { *size = data->len; - iounmap(data); + memunmap(data); return 0; } pa_data = data->next; - iounmap(data); + memunmap(data); i++; } return -EINVAL; @@ -127,12 +127,12 @@ static ssize_t type_show(struct kobject *kobj, ret = get_setup_data_paddr(nr, &paddr); if (ret) return ret; - data = ioremap_cache(paddr, sizeof(*data)); + data = memremap(paddr, sizeof(*data), MEMREMAP_CACHE); if (!data) return -ENOMEM; ret = sprintf(buf, "0x%x\n", data->type); - iounmap(data); + memunmap(data); return ret; } @@ -154,7 +154,7 @@ static ssize_t setup_data_data_read(struct file *fp, ret = get_setup_data_paddr(nr, &paddr); if (ret) return ret; - data = ioremap_cache(paddr, sizeof(*data)); + data = memremap(paddr, sizeof(*data), MEMREMAP_CACHE); if (!data) return -ENOMEM; @@ -170,15 +170,15 @@ static ssize_t setup_data_data_read(struct file *fp, goto out; ret = count; - p = ioremap_cache(paddr + sizeof(*data), data->len); + p = memremap(paddr + sizeof(*data), data->len, MEMREMAP_CACHE); if (!p) { ret = -ENOMEM; goto out; } memcpy(buf, p + off, count); - iounmap(p); + memunmap(p); out: - iounmap(data); + memunmap(data); return ret; } @@ -250,13 +250,13 @@ static int __init get_setup_data_total_num(u64 pa_data, int *nr) *nr = 0; while (pa_data) { *nr += 1; - data = ioremap_cache(pa_data, sizeof(*data)); + data = memremap(pa_data, sizeof(*data), MEMREMAP_CACHE); if (!data) { ret = -ENOMEM; goto out; } pa_data = data->next; - iounmap(data); + memunmap(data); } out: diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index ccf5c867b2eb..4920f735f551 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -401,12 +401,10 @@ void *xlate_dev_mem_ptr(phys_addr_t phys) if (page_is_ram(start >> PAGE_SHIFT)) return __va(phys); - vaddr = ioremap_cache(start, PAGE_SIZE); - /* Only add the offset on success and return NULL if the ioremap() failed: */ + vaddr = memremap(start, PAGE_SIZE, MEMREMAP_CACHE); if (vaddr) - vaddr += offset; - - return vaddr; + return vaddr + offset; + return NULL; } void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) @@ -414,7 +412,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) if (page_is_ram(phys >> PAGE_SHIFT)) return; - iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); + memunmap((void *)((unsigned long)addr & PAGE_MASK)); } static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index e5b872ba2484..da1078692c41 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -3,6 +3,7 @@ config ZONE_DMA config XTENSA def_bool y + select ARCH_HAS_MEMREMAP select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index a095d4f858da..d4992fea6994 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -318,7 +318,8 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type, sizeof(*trigger_tab) - 1); goto out; } - trigger_tab = ioremap_cache(trigger_paddr, sizeof(*trigger_tab)); + trigger_tab = memremap(trigger_paddr, sizeof(*trigger_tab), + MEMREMAP_CACHE); if (!trigger_tab) { pr_err(EINJ_PFX "Failed to map trigger table!\n"); goto out_rel_header; @@ -346,8 +347,8 @@ static int __einj_error_trigger(u64 trigger_paddr, u32 type, (unsigned long long)trigger_paddr + table_size - 1); goto out_rel_header; } - iounmap(trigger_tab); - trigger_tab = ioremap_cache(trigger_paddr, table_size); + memunmap(trigger_tab); + trigger_tab = memremap(trigger_paddr, table_size, MEMREMAP_CACHE); if (!trigger_tab) { pr_err(EINJ_PFX "Failed to map trigger table!\n"); goto out_rel_entry; @@ -409,7 +410,7 @@ out_rel_header: release_mem_region(trigger_paddr, sizeof(*trigger_tab)); out: if (trigger_tab) - iounmap(trigger_tab); + memunmap(trigger_tab); return rc; } diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 3670bbab57a3..dc49b0b42d65 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -77,7 +77,7 @@ static struct acpi_table_erst *erst_tab; static struct erst_erange { u64 base; u64 size; - void __iomem *vaddr; + void *vaddr; u32 attr; } erst_erange; @@ -1185,8 +1185,8 @@ static int __init erst_init(void) goto err_unmap_reg; } rc = -ENOMEM; - erst_erange.vaddr = ioremap_cache(erst_erange.base, - erst_erange.size); + erst_erange.vaddr = memremap(erst_erange.base, erst_erange.size, + MEMREMAP_CACHE); if (!erst_erange.vaddr) goto err_release_erange; diff --git a/drivers/firmware/google/memconsole.c b/drivers/firmware/google/memconsole.c index 2f569aaed4c7..2b8160d12ad6 100644 --- a/drivers/firmware/google/memconsole.c +++ b/drivers/firmware/google/memconsole.c @@ -52,14 +52,15 @@ static ssize_t memconsole_read(struct file *filp, struct kobject *kobp, char *memconsole; ssize_t ret; - memconsole = ioremap_cache(memconsole_baseaddr, memconsole_length); + memconsole = memremap(memconsole_baseaddr, memconsole_length, + MEMREMAP_CACHE); if (!memconsole) { pr_err("memconsole: ioremap_cache failed\n"); return -ENOMEM; } ret = memory_read_from_buffer(buf, count, &pos, memconsole, memconsole_length); - iounmap(memconsole); + memunmap(memconsole); return ret; } diff --git a/include/linux/device.h b/include/linux/device.h index 5a31bf3a4024..04e53eab71c5 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -668,6 +668,11 @@ extern void devm_free_pages(struct device *dev, unsigned long addr); void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res); +static inline void *devm_memremap_resource(struct device *dev, struct resource *res) +{ + return (void __force *) devm_ioremap_resource(dev, res); +} + /* allows to add/remove a custom action to devres stack */ int devm_add_action(struct device *dev, void (*action)(void *), void *data); void devm_remove_action(struct device *dev, void (*action)(void *), void *data); diff --git a/include/linux/io.h b/include/linux/io.h index 080a4fbf2ba4..2983b6e63970 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -192,4 +192,15 @@ static inline int arch_phys_wc_index(int handle) #endif #endif +enum { + MEMREMAP_WB = 1 << 0, + MEMREMAP_WT = 1 << 1, + MEMREMAP_WC = 1 << 2, + MEMREMAP_STRICT = 1 << 3, + MEMREMAP_CACHE = MEMREMAP_WB, +}; + +extern void *memremap(resource_size_t offset, size_t size, unsigned long flags); +extern void memunmap(void *addr); + #endif /* _LINUX_IO_H */ diff --git a/kernel/resource.c b/kernel/resource.c index fed052a1bc9f..a96376be513a 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -23,7 +23,7 @@ #include <linux/pfn.h> #include <linux/mm.h> #include <linux/resource_ext.h> -#include <asm/io.h> +#include <linux/io.h> struct resource ioport_resource = { @@ -528,6 +528,55 @@ int region_is_ram(resource_size_t start, unsigned long size) return ret; } +#ifdef CONFIG_ARCH_HAS_MEMREMAP +/* + * memremap() is "ioremap" for cases where it is known that the resource + * being mapped does not have i/o side effects and the __iomem + * annotation is not applicable. + */ +static bool memremap_valid(resource_size_t offset, size_t size) +{ + if (region_is_ram(offset, size) != 0) { + WARN_ONCE(1, "memremap attempted on ram %pa size: %zu\n", + &offset, size); + return false; + } + return true; +} + +void *memremap(resource_size_t offset, size_t size, unsigned long flags) +{ + void __iomem *addr = NULL; + + if (!memremap_valid(offset, size)) + return NULL; + + /* try all mapping types requested until one returns non-NULL */ + if (flags & MEMREMAP_CACHE) { + if (flags & MEMREMAP_STRICT) + addr = strict_ioremap_cache(offset, size); + else + addr = ioremap_cache(offset, size); + } + + if (!addr && (flags & MEMREMAP_WT)) { + if (flags & MEMREMAP_STRICT) + addr = strict_ioremap_wt(offset, size); + else + addr = ioremap_wt(offset, size); + } + + return (void __force *) addr; +} +EXPORT_SYMBOL(memremap); + +void memunmap(void *addr) +{ + iounmap((void __iomem __force *) addr); +} +EXPORT_SYMBOL(memunmap); +#endif /* CONFIG_ARCH_HAS_MEMREMAP */ + void __weak arch_remove_reservations(struct resource *avail) { } diff --git a/lib/Kconfig b/lib/Kconfig index 3a2ef67db6c7..097b99073924 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -526,7 +526,10 @@ source "lib/fonts/Kconfig" # config ARCH_HAS_SG_CHAIN - def_bool n + bool + +config ARCH_HAS_MEMREMAP + bool config ARCH_HAS_PMEM_API bool