diff mbox series

riscv: make ZONE_DMA32 optional

Message ID 20240827113611.537302-1-vladimir.kondratiev@mobileye.com (mailing list archive)
State New
Headers show
Series riscv: make ZONE_DMA32 optional | expand

Commit Message

Vladimir Kondratiev Aug. 27, 2024, 11:36 a.m. UTC
It is not necessary any RISCV platform has ZONE_DMA32.

Example - if platform has no DRAM in [0..4G] region,
it will report failure like below each boot.

[    0.088709] swapper/0: page allocation failure: order:7, mode:0xcc4(GFP_KERNEL|GFP_DMA32), nodemask=(null),cpuset=/
[    0.088832] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.11.0-rc5 #30
[    0.088864] Call Trace:
[    0.088869] [<ffffffff800059f2>] dump_backtrace+0x1c/0x24
[    0.088910] [<ffffffff805f328c>] show_stack+0x2c/0x38
[    0.088957] [<ffffffff805fd800>] dump_stack_lvl+0x52/0x74
[    0.088987] [<ffffffff805fd836>] dump_stack+0x14/0x1c
[    0.089010] [<ffffffff801a23a8>] warn_alloc+0xf4/0x176
[    0.089041] [<ffffffff801a3052>] __alloc_pages_noprof+0xc28/0xcb4
[    0.089067] [<ffffffff80086eda>] atomic_pool_expand+0x62/0x1f8
[    0.089090] [<ffffffff8080d674>] __dma_atomic_pool_init+0x46/0x9e
[    0.089115] [<ffffffff8080d762>] dma_atomic_pool_init+0x96/0x11c
[    0.089139] [<ffffffff80002146>] do_one_initcall+0x5c/0x1b2
[    0.089158] [<ffffffff8080127c>] kernel_init_freeable+0x214/0x274
[    0.089190] [<ffffffff805fefd8>] kernel_init+0x1e/0x10a
[    0.089209] [<ffffffff8060748a>] ret_from_fork+0xe/0x1c

Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@mobileye.com>
---
 arch/riscv/Kconfig | 2 +-
 mm/Kconfig         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

Comments

Drew Fustini Aug. 27, 2024, 11:10 p.m. UTC | #1
On Tue, Aug 27, 2024 at 02:36:11PM +0300, Vladimir Kondratiev wrote:
> It is not necessary any RISCV platform has ZONE_DMA32.
> 
> Example - if platform has no DRAM in [0..4G] region,
> it will report failure like below each boot.
> 
> [    0.088709] swapper/0: page allocation failure: order:7, mode:0xcc4(GFP_KERNEL|GFP_DMA32), nodemask=(null),cpuset=/
> [    0.088832] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.11.0-rc5 #30
> [    0.088864] Call Trace:
> [    0.088869] [<ffffffff800059f2>] dump_backtrace+0x1c/0x24
> [    0.088910] [<ffffffff805f328c>] show_stack+0x2c/0x38
> [    0.088957] [<ffffffff805fd800>] dump_stack_lvl+0x52/0x74
> [    0.088987] [<ffffffff805fd836>] dump_stack+0x14/0x1c
> [    0.089010] [<ffffffff801a23a8>] warn_alloc+0xf4/0x176
> [    0.089041] [<ffffffff801a3052>] __alloc_pages_noprof+0xc28/0xcb4
> [    0.089067] [<ffffffff80086eda>] atomic_pool_expand+0x62/0x1f8
> [    0.089090] [<ffffffff8080d674>] __dma_atomic_pool_init+0x46/0x9e
> [    0.089115] [<ffffffff8080d762>] dma_atomic_pool_init+0x96/0x11c
> [    0.089139] [<ffffffff80002146>] do_one_initcall+0x5c/0x1b2
> [    0.089158] [<ffffffff8080127c>] kernel_init_freeable+0x214/0x274
> [    0.089190] [<ffffffff805fefd8>] kernel_init+0x1e/0x10a
> [    0.089209] [<ffffffff8060748a>] ret_from_fork+0xe/0x1c
> 
> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@mobileye.com>
> ---
>  arch/riscv/Kconfig | 2 +-
>  mm/Kconfig         | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 0f3cd7c3a436..94a573112625 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -50,6 +50,7 @@ config RISCV
>  	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
>  	select ARCH_HAS_UBSAN
>  	select ARCH_HAS_VDSO_DATA
> +	select ARCH_HAS_ZONE_DMA_SET if 64BIT
>  	select ARCH_KEEP_MEMBLOCK if ACPI
>  	select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE	if 64BIT && MMU
>  	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
> @@ -200,7 +201,6 @@ config RISCV
>  	select THREAD_INFO_IN_TASK
>  	select TRACE_IRQFLAGS_SUPPORT
>  	select UACCESS_MEMCPY if !MMU
> -	select ZONE_DMA32 if 64BIT
>  
>  config CLANG_SUPPORTS_DYNAMIC_FTRACE
>  	def_bool CC_IS_CLANG
> diff --git a/mm/Kconfig b/mm/Kconfig
> index b72e7d040f78..97c85da98e89 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -1032,7 +1032,7 @@ config ZONE_DMA
>  config ZONE_DMA32
>  	bool "Support DMA32 zone" if ARCH_HAS_ZONE_DMA_SET
>  	depends on !X86_32
> -	default y if ARM64
> +	default y if ARM64 || (RISCV && 64BIT)
>  
>  config ZONE_DEVICE
>  	bool "Device memory (pmem, HMM, etc...) hotplug support"
> -- 
> 2.37.3
> 

Reviewed-by: Drew Fustini <dfustini@tenstorrent.com>

Thanks for sending this patch as I've also encountered that annoying
error on systems with DRAM above 4GB.

I tested this patch by changing the qemu virt machine to have DRAM
starting at 2^32:

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index cef41c150aaf..3033a2560edb 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -87,7 +87,7 @@ static const MemMapEntry virt_memmap[] = {
     [VIRT_IMSIC_S] =      { 0x28000000, VIRT_IMSIC_MAX_SIZE },
     [VIRT_PCIE_ECAM] =    { 0x30000000,    0x10000000 },
     [VIRT_PCIE_MMIO] =    { 0x40000000,    0x40000000 },
-    [VIRT_DRAM] =         { 0x80000000,           0x0 },
+    [VIRT_DRAM] =        { 0x100000000,           0x0 },
 };
 
 /* PCIe high mmio is fixed for RV32 */
Palmer Dabbelt Sept. 20, 2024, 8:58 a.m. UTC | #2
On Tue, 27 Aug 2024 16:10:20 PDT (-0700), dfustini@tenstorrent.com wrote:
> On Tue, Aug 27, 2024 at 02:36:11PM +0300, Vladimir Kondratiev wrote:
>> It is not necessary any RISCV platform has ZONE_DMA32.
>>
>> Example - if platform has no DRAM in [0..4G] region,
>> it will report failure like below each boot.
>>
>> [    0.088709] swapper/0: page allocation failure: order:7, mode:0xcc4(GFP_KERNEL|GFP_DMA32), nodemask=(null),cpuset=/
>> [    0.088832] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.11.0-rc5 #30
>> [    0.088864] Call Trace:
>> [    0.088869] [<ffffffff800059f2>] dump_backtrace+0x1c/0x24
>> [    0.088910] [<ffffffff805f328c>] show_stack+0x2c/0x38
>> [    0.088957] [<ffffffff805fd800>] dump_stack_lvl+0x52/0x74
>> [    0.088987] [<ffffffff805fd836>] dump_stack+0x14/0x1c
>> [    0.089010] [<ffffffff801a23a8>] warn_alloc+0xf4/0x176
>> [    0.089041] [<ffffffff801a3052>] __alloc_pages_noprof+0xc28/0xcb4
>> [    0.089067] [<ffffffff80086eda>] atomic_pool_expand+0x62/0x1f8
>> [    0.089090] [<ffffffff8080d674>] __dma_atomic_pool_init+0x46/0x9e
>> [    0.089115] [<ffffffff8080d762>] dma_atomic_pool_init+0x96/0x11c
>> [    0.089139] [<ffffffff80002146>] do_one_initcall+0x5c/0x1b2
>> [    0.089158] [<ffffffff8080127c>] kernel_init_freeable+0x214/0x274
>> [    0.089190] [<ffffffff805fefd8>] kernel_init+0x1e/0x10a
>> [    0.089209] [<ffffffff8060748a>] ret_from_fork+0xe/0x1c
>>
>> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@mobileye.com>
>> ---
>>  arch/riscv/Kconfig | 2 +-
>>  mm/Kconfig         | 2 +-
>>  2 files changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>> index 0f3cd7c3a436..94a573112625 100644
>> --- a/arch/riscv/Kconfig
>> +++ b/arch/riscv/Kconfig
>> @@ -50,6 +50,7 @@ config RISCV
>>  	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
>>  	select ARCH_HAS_UBSAN
>>  	select ARCH_HAS_VDSO_DATA
>> +	select ARCH_HAS_ZONE_DMA_SET if 64BIT
>>  	select ARCH_KEEP_MEMBLOCK if ACPI
>>  	select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE	if 64BIT && MMU
>>  	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
>> @@ -200,7 +201,6 @@ config RISCV
>>  	select THREAD_INFO_IN_TASK
>>  	select TRACE_IRQFLAGS_SUPPORT
>>  	select UACCESS_MEMCPY if !MMU
>> -	select ZONE_DMA32 if 64BIT
>>
>>  config CLANG_SUPPORTS_DYNAMIC_FTRACE
>>  	def_bool CC_IS_CLANG
>> diff --git a/mm/Kconfig b/mm/Kconfig
>> index b72e7d040f78..97c85da98e89 100644
>> --- a/mm/Kconfig
>> +++ b/mm/Kconfig
>> @@ -1032,7 +1032,7 @@ config ZONE_DMA
>>  config ZONE_DMA32
>>  	bool "Support DMA32 zone" if ARCH_HAS_ZONE_DMA_SET
>>  	depends on !X86_32
>> -	default y if ARM64
>> +	default y if ARM64 || (RISCV && 64BIT)
>>
>>  config ZONE_DEVICE
>>  	bool "Device memory (pmem, HMM, etc...) hotplug support"
>> --
>> 2.37.3
>>
>
> Reviewed-by: Drew Fustini <dfustini@tenstorrent.com>
>
> Thanks for sending this patch as I've also encountered that annoying
> error on systems with DRAM above 4GB.
>
> I tested this patch by changing the qemu virt machine to have DRAM
> starting at 2^32:
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index cef41c150aaf..3033a2560edb 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -87,7 +87,7 @@ static const MemMapEntry virt_memmap[] = {
>      [VIRT_IMSIC_S] =      { 0x28000000, VIRT_IMSIC_MAX_SIZE },
>      [VIRT_PCIE_ECAM] =    { 0x30000000,    0x10000000 },
>      [VIRT_PCIE_MMIO] =    { 0x40000000,    0x40000000 },
> -    [VIRT_DRAM] =         { 0x80000000,           0x0 },
> +    [VIRT_DRAM] =        { 0x100000000,           0x0 },
>  };
>
>  /* PCIe high mmio is fixed for RV32 */

IIRC the ZONE_DMA32 stuff existed for some of the early SiFive systems, 
where the expansion daughterboard's PCIe controller (via a Xilinx FPGA) 
could only handle 32-bit DMA addreses.  I think there's a similar quirk 
in the Microsemi PCIe controller on the PolarFire boards, but Conor 
would know for sure.
Drew Fustini Sept. 20, 2024, 1:18 p.m. UTC | #3
On Fri, Sep 20, 2024 at 01:58:23AM -0700, Palmer Dabbelt wrote:
> On Tue, 27 Aug 2024 16:10:20 PDT (-0700), dfustini@tenstorrent.com wrote:
> > On Tue, Aug 27, 2024 at 02:36:11PM +0300, Vladimir Kondratiev wrote:
> > > It is not necessary any RISCV platform has ZONE_DMA32.
> > > 
> > > Example - if platform has no DRAM in [0..4G] region,
> > > it will report failure like below each boot.
> > > 
> > > [    0.088709] swapper/0: page allocation failure: order:7, mode:0xcc4(GFP_KERNEL|GFP_DMA32), nodemask=(null),cpuset=/
> > > [    0.088832] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.11.0-rc5 #30
> > > [    0.088864] Call Trace:
> > > [    0.088869] [<ffffffff800059f2>] dump_backtrace+0x1c/0x24
> > > [    0.088910] [<ffffffff805f328c>] show_stack+0x2c/0x38
> > > [    0.088957] [<ffffffff805fd800>] dump_stack_lvl+0x52/0x74
> > > [    0.088987] [<ffffffff805fd836>] dump_stack+0x14/0x1c
> > > [    0.089010] [<ffffffff801a23a8>] warn_alloc+0xf4/0x176
> > > [    0.089041] [<ffffffff801a3052>] __alloc_pages_noprof+0xc28/0xcb4
> > > [    0.089067] [<ffffffff80086eda>] atomic_pool_expand+0x62/0x1f8
> > > [    0.089090] [<ffffffff8080d674>] __dma_atomic_pool_init+0x46/0x9e
> > > [    0.089115] [<ffffffff8080d762>] dma_atomic_pool_init+0x96/0x11c
> > > [    0.089139] [<ffffffff80002146>] do_one_initcall+0x5c/0x1b2
> > > [    0.089158] [<ffffffff8080127c>] kernel_init_freeable+0x214/0x274
> > > [    0.089190] [<ffffffff805fefd8>] kernel_init+0x1e/0x10a
> > > [    0.089209] [<ffffffff8060748a>] ret_from_fork+0xe/0x1c
> > > 
> > > Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@mobileye.com>
> > > ---
> > >  arch/riscv/Kconfig | 2 +-
> > >  mm/Kconfig         | 2 +-
> > >  2 files changed, 2 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index 0f3cd7c3a436..94a573112625 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -50,6 +50,7 @@ config RISCV
> > >  	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
> > >  	select ARCH_HAS_UBSAN
> > >  	select ARCH_HAS_VDSO_DATA
> > > +	select ARCH_HAS_ZONE_DMA_SET if 64BIT
> > >  	select ARCH_KEEP_MEMBLOCK if ACPI
> > >  	select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE	if 64BIT && MMU
> > >  	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
> > > @@ -200,7 +201,6 @@ config RISCV
> > >  	select THREAD_INFO_IN_TASK
> > >  	select TRACE_IRQFLAGS_SUPPORT
> > >  	select UACCESS_MEMCPY if !MMU
> > > -	select ZONE_DMA32 if 64BIT
> > > 
> > >  config CLANG_SUPPORTS_DYNAMIC_FTRACE
> > >  	def_bool CC_IS_CLANG
> > > diff --git a/mm/Kconfig b/mm/Kconfig
> > > index b72e7d040f78..97c85da98e89 100644
> > > --- a/mm/Kconfig
> > > +++ b/mm/Kconfig
> > > @@ -1032,7 +1032,7 @@ config ZONE_DMA
> > >  config ZONE_DMA32
> > >  	bool "Support DMA32 zone" if ARCH_HAS_ZONE_DMA_SET
> > >  	depends on !X86_32
> > > -	default y if ARM64
> > > +	default y if ARM64 || (RISCV && 64BIT)
> > > 
> > >  config ZONE_DEVICE
> > >  	bool "Device memory (pmem, HMM, etc...) hotplug support"
> > > --
> > > 2.37.3
> > > 
> > 
> > Reviewed-by: Drew Fustini <dfustini@tenstorrent.com>
> > 
> > Thanks for sending this patch as I've also encountered that annoying
> > error on systems with DRAM above 4GB.
> > 
> > I tested this patch by changing the qemu virt machine to have DRAM
> > starting at 2^32:
> > 
> > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> > index cef41c150aaf..3033a2560edb 100644
> > --- a/hw/riscv/virt.c
> > +++ b/hw/riscv/virt.c
> > @@ -87,7 +87,7 @@ static const MemMapEntry virt_memmap[] = {
> >      [VIRT_IMSIC_S] =      { 0x28000000, VIRT_IMSIC_MAX_SIZE },
> >      [VIRT_PCIE_ECAM] =    { 0x30000000,    0x10000000 },
> >      [VIRT_PCIE_MMIO] =    { 0x40000000,    0x40000000 },
> > -    [VIRT_DRAM] =         { 0x80000000,           0x0 },
> > +    [VIRT_DRAM] =        { 0x100000000,           0x0 },
> >  };
> > 
> >  /* PCIe high mmio is fixed for RV32 */
> 
> IIRC the ZONE_DMA32 stuff existed for some of the early SiFive systems,
> where the expansion daughterboard's PCIe controller (via a Xilinx FPGA)
> could only handle 32-bit DMA addreses.  I think there's a similar quirk in
> the Microsemi PCIe controller on the PolarFire boards, but Conor would know
> for sure.

I don't think this patch would affect those systems that need ZONE_DMA2.
I believe it just makes it possible to disable it in the kernel config.
The platform I'm working on has no memory below 4GB and all the PCIe
devices that I care about are not 32-bit constrained. Therefore I just
want to be able to turn it off in my .config.

Thanks,
Drew
Christoph Hellwig Sept. 20, 2024, 2:04 p.m. UTC | #4
On Tue, Aug 27, 2024 at 02:36:11PM +0300, Vladimir Kondratiev wrote:
> It is not necessary any RISCV platform has ZONE_DMA32.

That is an odd statement.  The point of ZONE_DMA32 is to make sure
that drivers can always allocate 32-bit DMAable memory, and without
ZONE_DMA32 that is very hard to provide unless you always have an
IOMMU.

> Example - if platform has no DRAM in [0..4G] region,
> it will report failure like below each boot.

ZONE_DMA32 is supposed to contain the 32-bit dma addressable memory,
not 32-bit physical.

Take a look at the changes that just went into Linus' tree for that.
Vladimir Kondratiev Sept. 22, 2024, 10:06 a.m. UTC | #5
>On Tue, Aug 27, 2024 at 02:36:11PM +0300, Vladimir Kondratiev wrote:
>> It is not necessary any RISCV platform has ZONE_DMA32.

>That is an odd statement.  The point of ZONE_DMA32 is to make sure
>that drivers can always allocate 32-bit DMAable memory, and without
>ZONE_DMA32 that is very hard to provide unless you always have an
>IOMMU.

This is the whole point - there are platforms where you can't allocate 32-bit
dma-able memory.
Unless there's IOMMU or some platform specific tricks,
DMA address is same as physical address.
In my example, platform has DRAM installed at address 32Gbytes.
32-bit devices would not work on such platform, and attempt to allocate
ZONE_DMA32 memory fails and kernel prints warning as in my original post

>ZONE_DMA32 is supposed to contain the 32-bit dma addressable memory,
>not 32-bit physical.

>Take a look at the changes that just went into Linus' tree for that.

I re-evaluated with Linus's tree as of now (commit 88264981f208), same error reported if I don't apply my patch:

[    0.191514] DMA: preallocated 512 KiB GFP_KERNEL pool for atomic allocations
[    0.191524] swapper/0: page allocation failure: order:7, mode:0xcc4(GFP_KERNEL|GFP_DMA32), nodemask=(null),cpuset=/,mem
s_allowed=0
[    0.191546] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.11.0 #2
[    0.191553] Hardware name: ????  (DT)
[    0.191556] Call Trace:
[    0.191560] [<ffffffff800059bc>] dump_backtrace+0x1c/0x24
[    0.191571] [<ffffffff805d1a3c>] show_stack+0x2c/0x38
[    0.191578] [<ffffffff805dc1e6>] dump_stack_lvl+0x52/0x74
[    0.191584] [<ffffffff805dc21c>] dump_stack+0x14/0x1c
[    0.191589] [<ffffffff801a8eec>] warn_alloc+0xf4/0x180
[    0.191598] [<ffffffff801a98e8>] __alloc_pages_noprof+0x970/0xd94
[    0.191605] [<ffffffff8008d2ae>] atomic_pool_expand+0x62/0x1f8
[    0.191615] [<ffffffff8060d6ae>] __dma_atomic_pool_init+0x46/0x9e
[    0.191620] [<ffffffff8060d79c>] dma_atomic_pool_init+0x96/0x11c
[    0.191626] [<ffffffff80002146>] do_one_initcall+0x5c/0x1b2
[    0.191631] [<ffffffff8060127c>] kernel_init_freeable+0x214/0x274
[    0.191636] [<ffffffff805dd9e4>] kernel_init+0x1e/0x10a
[    0.191644] [<ffffffff805e6eba>] ret_from_fork+0xe/0x1c
[    0.191709] Mem-Info:
[    0.191714] active_anon:0 inactive_anon:0 isolated_anon:0
[    0.191714]  active_file:0 inactive_file:0 isolated_file:0
[    0.191714]  unevictable:0 dirty:0 writeback:0
[    0.191714]  slab_reclaimable:26 slab_unreclaimable:742
[    0.191714]  mapped:0 shmem:0 pagetables:65
[    0.191714]  sec_pagetables:0 bounce:0
[    0.191714]  kernel_misc_reclaimable:0
[    0.191714]  free:1014355 free_pcp:0 free_cma:0
[    0.191727] 0 total pagecache pages
[    0.191730] 1048576 pages RAM
[    0.191733] 0 pages HighMem/MovableOnly
[    0.191735] 30264 pages reserved
[    0.191743] DMA: failed to allocate 496 KiB GFP_KERNEL|GFP_DMA32 pool for atomic allocation
Ben Dooks Sept. 23, 2024, 9:46 a.m. UTC | #6
On 20/09/2024 14:18, Drew Fustini wrote:
> On Fri, Sep 20, 2024 at 01:58:23AM -0700, Palmer Dabbelt wrote:
>> On Tue, 27 Aug 2024 16:10:20 PDT (-0700), dfustini@tenstorrent.com wrote:
>>> On Tue, Aug 27, 2024 at 02:36:11PM +0300, Vladimir Kondratiev wrote:
>>>> It is not necessary any RISCV platform has ZONE_DMA32.
>>>>
>>>> Example - if platform has no DRAM in [0..4G] region,
>>>> it will report failure like below each boot.
>>>>
>>>> [    0.088709] swapper/0: page allocation failure: order:7, mode:0xcc4(GFP_KERNEL|GFP_DMA32), nodemask=(null),cpuset=/
>>>> [    0.088832] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.11.0-rc5 #30
>>>> [    0.088864] Call Trace:
>>>> [    0.088869] [<ffffffff800059f2>] dump_backtrace+0x1c/0x24
>>>> [    0.088910] [<ffffffff805f328c>] show_stack+0x2c/0x38
>>>> [    0.088957] [<ffffffff805fd800>] dump_stack_lvl+0x52/0x74
>>>> [    0.088987] [<ffffffff805fd836>] dump_stack+0x14/0x1c
>>>> [    0.089010] [<ffffffff801a23a8>] warn_alloc+0xf4/0x176
>>>> [    0.089041] [<ffffffff801a3052>] __alloc_pages_noprof+0xc28/0xcb4
>>>> [    0.089067] [<ffffffff80086eda>] atomic_pool_expand+0x62/0x1f8
>>>> [    0.089090] [<ffffffff8080d674>] __dma_atomic_pool_init+0x46/0x9e
>>>> [    0.089115] [<ffffffff8080d762>] dma_atomic_pool_init+0x96/0x11c
>>>> [    0.089139] [<ffffffff80002146>] do_one_initcall+0x5c/0x1b2
>>>> [    0.089158] [<ffffffff8080127c>] kernel_init_freeable+0x214/0x274
>>>> [    0.089190] [<ffffffff805fefd8>] kernel_init+0x1e/0x10a
>>>> [    0.089209] [<ffffffff8060748a>] ret_from_fork+0xe/0x1c
>>>>
>>>> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@mobileye.com>
>>>> ---
>>>>   arch/riscv/Kconfig | 2 +-
>>>>   mm/Kconfig         | 2 +-
>>>>   2 files changed, 2 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>> index 0f3cd7c3a436..94a573112625 100644
>>>> --- a/arch/riscv/Kconfig
>>>> +++ b/arch/riscv/Kconfig
>>>> @@ -50,6 +50,7 @@ config RISCV
>>>>   	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
>>>>   	select ARCH_HAS_UBSAN
>>>>   	select ARCH_HAS_VDSO_DATA
>>>> +	select ARCH_HAS_ZONE_DMA_SET if 64BIT
>>>>   	select ARCH_KEEP_MEMBLOCK if ACPI
>>>>   	select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE	if 64BIT && MMU
>>>>   	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
>>>> @@ -200,7 +201,6 @@ config RISCV
>>>>   	select THREAD_INFO_IN_TASK
>>>>   	select TRACE_IRQFLAGS_SUPPORT
>>>>   	select UACCESS_MEMCPY if !MMU
>>>> -	select ZONE_DMA32 if 64BIT
>>>>
>>>>   config CLANG_SUPPORTS_DYNAMIC_FTRACE
>>>>   	def_bool CC_IS_CLANG
>>>> diff --git a/mm/Kconfig b/mm/Kconfig
>>>> index b72e7d040f78..97c85da98e89 100644
>>>> --- a/mm/Kconfig
>>>> +++ b/mm/Kconfig
>>>> @@ -1032,7 +1032,7 @@ config ZONE_DMA
>>>>   config ZONE_DMA32
>>>>   	bool "Support DMA32 zone" if ARCH_HAS_ZONE_DMA_SET
>>>>   	depends on !X86_32
>>>> -	default y if ARM64
>>>> +	default y if ARM64 || (RISCV && 64BIT)
>>>>
>>>>   config ZONE_DEVICE
>>>>   	bool "Device memory (pmem, HMM, etc...) hotplug support"
>>>> --
>>>> 2.37.3
>>>>
>>>
>>> Reviewed-by: Drew Fustini <dfustini@tenstorrent.com>
>>>
>>> Thanks for sending this patch as I've also encountered that annoying
>>> error on systems with DRAM above 4GB.
>>>
>>> I tested this patch by changing the qemu virt machine to have DRAM
>>> starting at 2^32:
>>>
>>> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
>>> index cef41c150aaf..3033a2560edb 100644
>>> --- a/hw/riscv/virt.c
>>> +++ b/hw/riscv/virt.c
>>> @@ -87,7 +87,7 @@ static const MemMapEntry virt_memmap[] = {
>>>       [VIRT_IMSIC_S] =      { 0x28000000, VIRT_IMSIC_MAX_SIZE },
>>>       [VIRT_PCIE_ECAM] =    { 0x30000000,    0x10000000 },
>>>       [VIRT_PCIE_MMIO] =    { 0x40000000,    0x40000000 },
>>> -    [VIRT_DRAM] =         { 0x80000000,           0x0 },
>>> +    [VIRT_DRAM] =        { 0x100000000,           0x0 },
>>>   };
>>>
>>>   /* PCIe high mmio is fixed for RV32 */
>>
>> IIRC the ZONE_DMA32 stuff existed for some of the early SiFive systems,
>> where the expansion daughterboard's PCIe controller (via a Xilinx FPGA)
>> could only handle 32-bit DMA addreses.  I think there's a similar quirk in
>> the Microsemi PCIe controller on the PolarFire boards, but Conor would know
>> for sure.

The Fu740 SoC has a mirror of part of DRAM in 32bit space specifically
I think for PCIe where something may be attached via a PCIe-PCI bridge.

> I don't think this patch would affect those systems that need ZONE_DMA2.
> I believe it just makes it possible to disable it in the kernel config.
> The platform I'm working on has no memory below 4GB and all the PCIe
> devices that I care about are not 32-bit constrained. Therefore I just
> want to be able to turn it off in my .config.
> 
> Thanks,
> Drew

I tried this a while ago and IIRC you run into issues with DMA
allocations from certain drivers that expect this to exist.

With new platforms that don't have any memory in DMA32 space
this may become more common. I've already had to deal with this
for an internal dev project, and it ended up having a couple of
hacks into the allocation code to silence warnings/errors.

This of course may have been fixed, given this was last year.
Christoph Hellwig Sept. 24, 2024, 6:39 a.m. UTC | #7
On Sun, Sep 22, 2024 at 10:06:59AM +0000, Vladimir Kondratiev wrote:
> This is the whole point - there are platforms where you can't allocate 32-bit
> dma-able memory.

Ugg.  They will be broke for all kinds of devices (plug in PCIe devices
IP IP blocks).

> Unless there's IOMMU or some platform specific tricks,
> DMA address is same as physical address.

Well, that's where the usual platform specific trick is, as the
platforms with high DRAM either have a mapping between physical
and DMA range (see the dma_ranges concept in the DMA and OF code)
or a mirror of the higher addresses.  Or very rarely (e.g. sparc)
a required IOMMU that is always used.
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 0f3cd7c3a436..94a573112625 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -50,6 +50,7 @@  config RISCV
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UBSAN
 	select ARCH_HAS_VDSO_DATA
+	select ARCH_HAS_ZONE_DMA_SET if 64BIT
 	select ARCH_KEEP_MEMBLOCK if ACPI
 	select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE	if 64BIT && MMU
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
@@ -200,7 +201,6 @@  config RISCV
 	select THREAD_INFO_IN_TASK
 	select TRACE_IRQFLAGS_SUPPORT
 	select UACCESS_MEMCPY if !MMU
-	select ZONE_DMA32 if 64BIT
 
 config CLANG_SUPPORTS_DYNAMIC_FTRACE
 	def_bool CC_IS_CLANG
diff --git a/mm/Kconfig b/mm/Kconfig
index b72e7d040f78..97c85da98e89 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1032,7 +1032,7 @@  config ZONE_DMA
 config ZONE_DMA32
 	bool "Support DMA32 zone" if ARCH_HAS_ZONE_DMA_SET
 	depends on !X86_32
-	default y if ARM64
+	default y if ARM64 || (RISCV && 64BIT)
 
 config ZONE_DEVICE
 	bool "Device memory (pmem, HMM, etc...) hotplug support"