diff mbox series

[RFC] hw/arm/virt: Avoid unexpected warning from Linux guest on host with Fujitsu CPUs

Message ID 20240606104745.291330-1-zhenyzha@redhat.com (mailing list archive)
State New, archived
Headers show
Series [RFC] hw/arm/virt: Avoid unexpected warning from Linux guest on host with Fujitsu CPUs | expand

Commit Message

Zhenyu Zhang June 6, 2024, 10:47 a.m. UTC
Multiple warning messages and corresponding backtraces are observed when Linux
guest is booted on the host with Fujitsu CPUs. One of them is shown as below.

[    0.032443] ------------[ cut here ]------------
[    0.032446] uart-pl011 9000000.pl011: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256)
[    0.032454] WARNING: CPU: 0 PID: 1 at arch/arm64/mm/dma-mapping.c:54 arch_setup_dma_ops+0xbc/0xcc
[    0.032470] Modules linked in:
[    0.032475] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-452.el9.aarch64 #1
[    0.032481] Hardware name: linux,dummy-virt (DT)
[    0.032484] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    0.032490] pc : arch_setup_dma_ops+0xbc/0xcc
[    0.032496] lr : arch_setup_dma_ops+0xbc/0xcc
[    0.032501] sp : ffff80008003b860
[    0.032503] x29: ffff80008003b860 x28: 0000000000000000 x27: ffffaae4b949049c
[    0.032510] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
[    0.032517] x23: 0000000000000100 x22: 0000000000000000 x21: 0000000000000000
[    0.032523] x20: 0000000100000000 x19: ffff2f06c02ea400 x18: ffffffffffffffff
[    0.032529] x17: 00000000208a5f76 x16: 000000006589dbcb x15: ffffaae4ba071c89
[    0.032535] x14: 0000000000000000 x13: ffffaae4ba071c84 x12: 455f525443206e61
[    0.032541] x11: 68742072656c6c61 x10: 0000000000000029 x9 : ffffaae4b7d21da4
[    0.032547] x8 : 0000000000000029 x7 : 4c414e494d5f414d x6 : 0000000000000029
[    0.032553] x5 : 000000000000000f x4 : ffffaae4b9617a00 x3 : 0000000000000001
[    0.032558] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff2f06c029be40
[    0.032564] Call trace:
[    0.032566]  arch_setup_dma_ops+0xbc/0xcc
[    0.032572]  of_dma_configure_id+0x138/0x300
[    0.032591]  amba_dma_configure+0x34/0xc0
[    0.032600]  really_probe+0x78/0x3dc
[    0.032614]  __driver_probe_device+0x108/0x160
[    0.032619]  driver_probe_device+0x44/0x114
[    0.032624]  __device_attach_driver+0xb8/0x14c
[    0.032629]  bus_for_each_drv+0x88/0xe4
[    0.032634]  __device_attach+0xb0/0x1e0
[    0.032638]  device_initial_probe+0x18/0x20
[    0.032643]  bus_probe_device+0xa8/0xb0
[    0.032648]  device_add+0x4b4/0x6c0
[    0.032652]  amba_device_try_add.part.0+0x48/0x360
[    0.032657]  amba_device_add+0x104/0x144
[    0.032662]  of_amba_device_create.isra.0+0x100/0x1c4
[    0.032666]  of_platform_bus_create+0x294/0x35c
[    0.032669]  of_platform_populate+0x5c/0x150
[    0.032672]  of_platform_default_populate_init+0xd0/0xec
[    0.032697]  do_one_initcall+0x4c/0x2e0
[    0.032701]  do_initcalls+0x100/0x13c
[    0.032707]  kernel_init_freeable+0x1c8/0x21c
[    0.032712]  kernel_init+0x28/0x140
[    0.032731]  ret_from_fork+0x10/0x20
[    0.032735] ---[ end trace 0000000000000000 ]---

In Linux, a check is applied to every device which is exposed through device-tree
node. The warning message is raised when the device isn't DMA coherent and the
cache line size is larger than ARCH_DMA_MINALIGN (128 bytes). The cache line is
sorted from CTR_EL0[CWG], which corresponds to 256 bytes on the guest CPUs. 
The DMA coherent capability is claimed through 'dma-coherent' in their
device-tree nodes.

I don't see those devices need to be DMA incoherent necessarily and those devices
will do DMA operations in practice. So lets add 'dma-coherent' property to their
device-tree nodes to avoid the unexpected warnings in the Linux guest.

Signed-off-by: Zhenyu Zhang <zhenyzha@redhat.com>
---
 hw/arm/boot.c        | 1 +
 hw/arm/virt.c        | 4 ++++
 hw/core/sysbus-fdt.c | 1 +
 3 files changed, 6 insertions(+)

Comments

Peter Maydell June 6, 2024, 11:56 a.m. UTC | #1
On Thu, 6 Jun 2024 at 11:48, Zhenyu Zhang <zhenyzha@redhat.com> wrote:
>
> Multiple warning messages and corresponding backtraces are observed when Linux
> guest is booted on the host with Fujitsu CPUs. One of them is shown as below.
>
> [    0.032443] ------------[ cut here ]------------
> [    0.032446] uart-pl011 9000000.pl011: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256)
> [    0.032454] WARNING: CPU: 0 PID: 1 at arch/arm64/mm/dma-mapping.c:54 arch_setup_dma_ops+0xbc/0xcc
> [    0.032470] Modules linked in:
> [    0.032475] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-452.el9.aarch64 #1
> [    0.032481] Hardware name: linux,dummy-virt (DT)
> [    0.032484] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> [    0.032490] pc : arch_setup_dma_ops+0xbc/0xcc
> [    0.032496] lr : arch_setup_dma_ops+0xbc/0xcc
> [    0.032501] sp : ffff80008003b860
> [    0.032503] x29: ffff80008003b860 x28: 0000000000000000 x27: ffffaae4b949049c
> [    0.032510] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
> [    0.032517] x23: 0000000000000100 x22: 0000000000000000 x21: 0000000000000000
> [    0.032523] x20: 0000000100000000 x19: ffff2f06c02ea400 x18: ffffffffffffffff
> [    0.032529] x17: 00000000208a5f76 x16: 000000006589dbcb x15: ffffaae4ba071c89
> [    0.032535] x14: 0000000000000000 x13: ffffaae4ba071c84 x12: 455f525443206e61
> [    0.032541] x11: 68742072656c6c61 x10: 0000000000000029 x9 : ffffaae4b7d21da4
> [    0.032547] x8 : 0000000000000029 x7 : 4c414e494d5f414d x6 : 0000000000000029
> [    0.032553] x5 : 000000000000000f x4 : ffffaae4b9617a00 x3 : 0000000000000001
> [    0.032558] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff2f06c029be40
> [    0.032564] Call trace:
> [    0.032566]  arch_setup_dma_ops+0xbc/0xcc
> [    0.032572]  of_dma_configure_id+0x138/0x300
> [    0.032591]  amba_dma_configure+0x34/0xc0
> [    0.032600]  really_probe+0x78/0x3dc
> [    0.032614]  __driver_probe_device+0x108/0x160
> [    0.032619]  driver_probe_device+0x44/0x114
> [    0.032624]  __device_attach_driver+0xb8/0x14c
> [    0.032629]  bus_for_each_drv+0x88/0xe4
> [    0.032634]  __device_attach+0xb0/0x1e0
> [    0.032638]  device_initial_probe+0x18/0x20
> [    0.032643]  bus_probe_device+0xa8/0xb0
> [    0.032648]  device_add+0x4b4/0x6c0
> [    0.032652]  amba_device_try_add.part.0+0x48/0x360
> [    0.032657]  amba_device_add+0x104/0x144
> [    0.032662]  of_amba_device_create.isra.0+0x100/0x1c4
> [    0.032666]  of_platform_bus_create+0x294/0x35c
> [    0.032669]  of_platform_populate+0x5c/0x150
> [    0.032672]  of_platform_default_populate_init+0xd0/0xec
> [    0.032697]  do_one_initcall+0x4c/0x2e0
> [    0.032701]  do_initcalls+0x100/0x13c
> [    0.032707]  kernel_init_freeable+0x1c8/0x21c
> [    0.032712]  kernel_init+0x28/0x140
> [    0.032731]  ret_from_fork+0x10/0x20
> [    0.032735] ---[ end trace 0000000000000000 ]---
>
> In Linux, a check is applied to every device which is exposed through device-tree
> node. The warning message is raised when the device isn't DMA coherent and the
> cache line size is larger than ARCH_DMA_MINALIGN (128 bytes). The cache line is
> sorted from CTR_EL0[CWG], which corresponds to 256 bytes on the guest CPUs.
> The DMA coherent capability is claimed through 'dma-coherent' in their
> device-tree nodes.

For QEMU emulated all our DMA is always coherent, so where we
have DMA-capable devices we should definitely tell the kernel
that that DMA is coherent.

Our pl011 does not do DMA, though (we do not set the dmas property), so
it's kind of bogus for the kernel to complain about that.

So I think we should take these changes where they refer to DMA
capable devices and ask the kernel folks to fix the warnings
where they refer to devices that aren't doing DMA. Looking through
the patch, though, my initial impression is that all these are
in the latter category...

> diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> index d480a7da02..cdf99966e6 100644
> --- a/hw/arm/boot.c
> +++ b/hw/arm/boot.c
> @@ -509,6 +509,7 @@ static void fdt_add_psci_node(void *fdt)
>      qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
>      qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
>      qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
> +    qemu_fdt_setprop(fdt, "/psci", "dma-coherent", NULL, 0);

The PSCI node is describing the firmware interface for
HVC or SMC calls -- I don't think it makes any sense
to think of this as doing DMA. So I would query the kernel
folks about this warning.

>  }
>
>  int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 3c93c0c0a6..d3e5f512e2 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -652,6 +652,7 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms)
>          qemu_fdt_setprop_cells(ms->fdt, "/pmu", "interrupts",
>                                 GIC_FDT_IRQ_TYPE_PPI,
>                                 INTID_TO_PPI(VIRTUAL_PMU_IRQ), irqflags);
> +        qemu_fdt_setprop(ms->fdt, "/pmu", "dma-coherent", NULL, 0);

What DMA interface does the PMU have?

>      }
>  }
>
> @@ -936,6 +937,7 @@ static void create_uart(const VirtMachineState *vms, int uart,
>                                 vms->clock_phandle, vms->clock_phandle);
>      qemu_fdt_setprop(ms->fdt, nodename, "clock-names",
>                           clocknames, sizeof(clocknames));
> +    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);

As above, our PL011 doesn't do any DMA and we do not advertise
to the kernel that it does.

>      if (uart == VIRT_UART) {
>          qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename);
> @@ -972,6 +974,7 @@ static void create_rtc(const VirtMachineState *vms)
>                             GIC_FDT_IRQ_FLAGS_LEVEL_HI);
>      qemu_fdt_setprop_cell(ms->fdt, nodename, "clocks", vms->clock_phandle);
>      qemu_fdt_setprop_string(ms->fdt, nodename, "clock-names", "apb_pclk");
> +    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);
>      g_free(nodename);
>  }

What DMA does the pl031 do?

>
> @@ -1077,6 +1080,7 @@ static void create_gpio_devices(const VirtMachineState *vms, int gpio,
>      qemu_fdt_setprop_cell(ms->fdt, nodename, "clocks", vms->clock_phandle);
>      qemu_fdt_setprop_string(ms->fdt, nodename, "clock-names", "apb_pclk");
>      qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", phandle);
> +    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);

As far as I know the PL061 is also not a DMA-capable device.

>      if (gpio != VIRT_GPIO) {
>          /* Mark as not usable by the normal world */
> diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
> index eebcd28f9a..da47071a95 100644
> --- a/hw/core/sysbus-fdt.c
> +++ b/hw/core/sysbus-fdt.c
> @@ -554,6 +554,7 @@ void platform_bus_add_all_fdt_nodes(void *fdt, const char *intc, hwaddr addr,
>      qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, bus_size);
>
>      qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", intc);
> +    qemu_fdt_setprop(fdt, node, "dma-coherent", NULL, 0);

Isn't this the fdt node for a bus, not a device?

thanks
-- PMM
Jonathan Cameron June 6, 2024, 5:13 p.m. UTC | #2
On Thu, 6 Jun 2024 12:56:59 +0100
Peter Maydell <peter.maydell@linaro.org> wrote:

> On Thu, 6 Jun 2024 at 11:48, Zhenyu Zhang <zhenyzha@redhat.com> wrote:
> >
> > Multiple warning messages and corresponding backtraces are observed when Linux
> > guest is booted on the host with Fujitsu CPUs. One of them is shown as below.
> >
> > [    0.032443] ------------[ cut here ]------------
> > [    0.032446] uart-pl011 9000000.pl011: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256)
> > [    0.032454] WARNING: CPU: 0 PID: 1 at arch/arm64/mm/dma-mapping.c:54 arch_setup_dma_ops+0xbc/0xcc
> > [    0.032470] Modules linked in:
> > [    0.032475] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-452.el9.aarch64 #1
> > [    0.032481] Hardware name: linux,dummy-virt (DT)
> > [    0.032484] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > [    0.032490] pc : arch_setup_dma_ops+0xbc/0xcc
> > [    0.032496] lr : arch_setup_dma_ops+0xbc/0xcc
> > [    0.032501] sp : ffff80008003b860
> > [    0.032503] x29: ffff80008003b860 x28: 0000000000000000 x27: ffffaae4b949049c
> > [    0.032510] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
> > [    0.032517] x23: 0000000000000100 x22: 0000000000000000 x21: 0000000000000000
> > [    0.032523] x20: 0000000100000000 x19: ffff2f06c02ea400 x18: ffffffffffffffff
> > [    0.032529] x17: 00000000208a5f76 x16: 000000006589dbcb x15: ffffaae4ba071c89
> > [    0.032535] x14: 0000000000000000 x13: ffffaae4ba071c84 x12: 455f525443206e61
> > [    0.032541] x11: 68742072656c6c61 x10: 0000000000000029 x9 : ffffaae4b7d21da4
> > [    0.032547] x8 : 0000000000000029 x7 : 4c414e494d5f414d x6 : 0000000000000029
> > [    0.032553] x5 : 000000000000000f x4 : ffffaae4b9617a00 x3 : 0000000000000001
> > [    0.032558] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff2f06c029be40
> > [    0.032564] Call trace:
> > [    0.032566]  arch_setup_dma_ops+0xbc/0xcc
> > [    0.032572]  of_dma_configure_id+0x138/0x300
> > [    0.032591]  amba_dma_configure+0x34/0xc0
> > [    0.032600]  really_probe+0x78/0x3dc
> > [    0.032614]  __driver_probe_device+0x108/0x160
> > [    0.032619]  driver_probe_device+0x44/0x114
> > [    0.032624]  __device_attach_driver+0xb8/0x14c
> > [    0.032629]  bus_for_each_drv+0x88/0xe4
> > [    0.032634]  __device_attach+0xb0/0x1e0
> > [    0.032638]  device_initial_probe+0x18/0x20
> > [    0.032643]  bus_probe_device+0xa8/0xb0
> > [    0.032648]  device_add+0x4b4/0x6c0
> > [    0.032652]  amba_device_try_add.part.0+0x48/0x360
> > [    0.032657]  amba_device_add+0x104/0x144
> > [    0.032662]  of_amba_device_create.isra.0+0x100/0x1c4
> > [    0.032666]  of_platform_bus_create+0x294/0x35c
> > [    0.032669]  of_platform_populate+0x5c/0x150
> > [    0.032672]  of_platform_default_populate_init+0xd0/0xec
> > [    0.032697]  do_one_initcall+0x4c/0x2e0
> > [    0.032701]  do_initcalls+0x100/0x13c
> > [    0.032707]  kernel_init_freeable+0x1c8/0x21c
> > [    0.032712]  kernel_init+0x28/0x140
> > [    0.032731]  ret_from_fork+0x10/0x20
> > [    0.032735] ---[ end trace 0000000000000000 ]---
> >
> > In Linux, a check is applied to every device which is exposed through device-tree
> > node. The warning message is raised when the device isn't DMA coherent and the
> > cache line size is larger than ARCH_DMA_MINALIGN (128 bytes). The cache line is
> > sorted from CTR_EL0[CWG], which corresponds to 256 bytes on the guest CPUs.
> > The DMA coherent capability is claimed through 'dma-coherent' in their
> > device-tree nodes.  
> 
> For QEMU emulated all our DMA is always coherent, so where we
> have DMA-capable devices we should definitely tell the kernel
> that that DMA is coherent.
> 
> Our pl011 does not do DMA, though (we do not set the dmas property), so
> it's kind of bogus for the kernel to complain about that.
> 
> So I think we should take these changes where they refer to DMA
> capable devices and ask the kernel folks to fix the warnings
> where they refer to devices that aren't doing DMA. Looking through
> the patch, though, my initial impression is that all these are
> in the latter category...

I was curious and have a very slow test running, so took a look.
of_dma_configure() is being passed force_dma = true.
https://elixir.bootlin.com/linux/v6.10-rc2/source/drivers/amba/bus.c#L361

The is a comment in of_dma_configure()
		/*
		 * For legacy reasons, we have to assume some devices need
		 * DMA configuration regardless of whether "dma-ranges" is
		 * correctly specified or not.
		 */
So this I think this is being triggered by a workaround for broken DT.

This was introduced by Robin Murphy +CC though you may need to ask on
kernel list because ARM / QEMU fun.

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=723288836628b

Relevant comment from that patch description:

"Certain bus types have a general expectation of
DMA capability and carry a well-established precedent that an absent
"dma-ranges" implies the same as the empty property, so we automatically
opt those in to DMA configuration regardless, to avoid regressing most
existing platforms."

The patch implies that AMBA is one of those.

So not sure this is solveable without a hack such as eliding the warning
message if dma_force was set as the situation probably isn't relevant then..

Jonathan

> 
> > diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> > index d480a7da02..cdf99966e6 100644
> > --- a/hw/arm/boot.c
> > +++ b/hw/arm/boot.c
> > @@ -509,6 +509,7 @@ static void fdt_add_psci_node(void *fdt)
> >      qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
> >      qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
> >      qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
> > +    qemu_fdt_setprop(fdt, "/psci", "dma-coherent", NULL, 0);  
> 
> The PSCI node is describing the firmware interface for
> HVC or SMC calls -- I don't think it makes any sense
> to think of this as doing DMA. So I would query the kernel
> folks about this warning.
> 
> >  }
> >
> >  int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> > index 3c93c0c0a6..d3e5f512e2 100644
> > --- a/hw/arm/virt.c
> > +++ b/hw/arm/virt.c
> > @@ -652,6 +652,7 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms)
> >          qemu_fdt_setprop_cells(ms->fdt, "/pmu", "interrupts",
> >                                 GIC_FDT_IRQ_TYPE_PPI,
> >                                 INTID_TO_PPI(VIRTUAL_PMU_IRQ), irqflags);
> > +        qemu_fdt_setprop(ms->fdt, "/pmu", "dma-coherent", NULL, 0);  
> 
> What DMA interface does the PMU have?
> 
> >      }
> >  }
> >
> > @@ -936,6 +937,7 @@ static void create_uart(const VirtMachineState *vms, int uart,
> >                                 vms->clock_phandle, vms->clock_phandle);
> >      qemu_fdt_setprop(ms->fdt, nodename, "clock-names",
> >                           clocknames, sizeof(clocknames));
> > +    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);  
> 
> As above, our PL011 doesn't do any DMA and we do not advertise
> to the kernel that it does.
> 
> >      if (uart == VIRT_UART) {
> >          qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename);
> > @@ -972,6 +974,7 @@ static void create_rtc(const VirtMachineState *vms)
> >                             GIC_FDT_IRQ_FLAGS_LEVEL_HI);
> >      qemu_fdt_setprop_cell(ms->fdt, nodename, "clocks", vms->clock_phandle);
> >      qemu_fdt_setprop_string(ms->fdt, nodename, "clock-names", "apb_pclk");
> > +    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);
> >      g_free(nodename);
> >  }  
> 
> What DMA does the pl031 do?
> 
> >
> > @@ -1077,6 +1080,7 @@ static void create_gpio_devices(const VirtMachineState *vms, int gpio,
> >      qemu_fdt_setprop_cell(ms->fdt, nodename, "clocks", vms->clock_phandle);
> >      qemu_fdt_setprop_string(ms->fdt, nodename, "clock-names", "apb_pclk");
> >      qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", phandle);
> > +    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);  
> 
> As far as I know the PL061 is also not a DMA-capable device.
> 
> >      if (gpio != VIRT_GPIO) {
> >          /* Mark as not usable by the normal world */
> > diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
> > index eebcd28f9a..da47071a95 100644
> > --- a/hw/core/sysbus-fdt.c
> > +++ b/hw/core/sysbus-fdt.c
> > @@ -554,6 +554,7 @@ void platform_bus_add_all_fdt_nodes(void *fdt, const char *intc, hwaddr addr,
> >      qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, bus_size);
> >
> >      qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", intc);
> > +    qemu_fdt_setprop(fdt, node, "dma-coherent", NULL, 0);  
> 
> Isn't this the fdt node for a bus, not a device?
> 
> thanks
> -- PMM
>
Robin Murphy June 6, 2024, 9:18 p.m. UTC | #3
On 2024-06-06 6:13 pm, Jonathan Cameron wrote:
> On Thu, 6 Jun 2024 12:56:59 +0100
> Peter Maydell <peter.maydell@linaro.org> wrote:
> 
>> On Thu, 6 Jun 2024 at 11:48, Zhenyu Zhang <zhenyzha@redhat.com> wrote:
>>>
>>> Multiple warning messages and corresponding backtraces are observed when Linux
>>> guest is booted on the host with Fujitsu CPUs. One of them is shown as below.
>>>
>>> [    0.032443] ------------[ cut here ]------------
>>> [    0.032446] uart-pl011 9000000.pl011: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256)
>>> [    0.032454] WARNING: CPU: 0 PID: 1 at arch/arm64/mm/dma-mapping.c:54 arch_setup_dma_ops+0xbc/0xcc
>>> [    0.032470] Modules linked in:
>>> [    0.032475] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-452.el9.aarch64 #1
>>> [    0.032481] Hardware name: linux,dummy-virt (DT)
>>> [    0.032484] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>>> [    0.032490] pc : arch_setup_dma_ops+0xbc/0xcc
>>> [    0.032496] lr : arch_setup_dma_ops+0xbc/0xcc
>>> [    0.032501] sp : ffff80008003b860
>>> [    0.032503] x29: ffff80008003b860 x28: 0000000000000000 x27: ffffaae4b949049c
>>> [    0.032510] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
>>> [    0.032517] x23: 0000000000000100 x22: 0000000000000000 x21: 0000000000000000
>>> [    0.032523] x20: 0000000100000000 x19: ffff2f06c02ea400 x18: ffffffffffffffff
>>> [    0.032529] x17: 00000000208a5f76 x16: 000000006589dbcb x15: ffffaae4ba071c89
>>> [    0.032535] x14: 0000000000000000 x13: ffffaae4ba071c84 x12: 455f525443206e61
>>> [    0.032541] x11: 68742072656c6c61 x10: 0000000000000029 x9 : ffffaae4b7d21da4
>>> [    0.032547] x8 : 0000000000000029 x7 : 4c414e494d5f414d x6 : 0000000000000029
>>> [    0.032553] x5 : 000000000000000f x4 : ffffaae4b9617a00 x3 : 0000000000000001
>>> [    0.032558] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff2f06c029be40
>>> [    0.032564] Call trace:
>>> [    0.032566]  arch_setup_dma_ops+0xbc/0xcc
>>> [    0.032572]  of_dma_configure_id+0x138/0x300
>>> [    0.032591]  amba_dma_configure+0x34/0xc0
>>> [    0.032600]  really_probe+0x78/0x3dc
>>> [    0.032614]  __driver_probe_device+0x108/0x160
>>> [    0.032619]  driver_probe_device+0x44/0x114
>>> [    0.032624]  __device_attach_driver+0xb8/0x14c
>>> [    0.032629]  bus_for_each_drv+0x88/0xe4
>>> [    0.032634]  __device_attach+0xb0/0x1e0
>>> [    0.032638]  device_initial_probe+0x18/0x20
>>> [    0.032643]  bus_probe_device+0xa8/0xb0
>>> [    0.032648]  device_add+0x4b4/0x6c0
>>> [    0.032652]  amba_device_try_add.part.0+0x48/0x360
>>> [    0.032657]  amba_device_add+0x104/0x144
>>> [    0.032662]  of_amba_device_create.isra.0+0x100/0x1c4
>>> [    0.032666]  of_platform_bus_create+0x294/0x35c
>>> [    0.032669]  of_platform_populate+0x5c/0x150
>>> [    0.032672]  of_platform_default_populate_init+0xd0/0xec
>>> [    0.032697]  do_one_initcall+0x4c/0x2e0
>>> [    0.032701]  do_initcalls+0x100/0x13c
>>> [    0.032707]  kernel_init_freeable+0x1c8/0x21c
>>> [    0.032712]  kernel_init+0x28/0x140
>>> [    0.032731]  ret_from_fork+0x10/0x20
>>> [    0.032735] ---[ end trace 0000000000000000 ]---
>>>
>>> In Linux, a check is applied to every device which is exposed through device-tree
>>> node. The warning message is raised when the device isn't DMA coherent and the
>>> cache line size is larger than ARCH_DMA_MINALIGN (128 bytes). The cache line is
>>> sorted from CTR_EL0[CWG], which corresponds to 256 bytes on the guest CPUs.
>>> The DMA coherent capability is claimed through 'dma-coherent' in their
>>> device-tree nodes.
>>
>> For QEMU emulated all our DMA is always coherent, so where we
>> have DMA-capable devices we should definitely tell the kernel
>> that that DMA is coherent.

The trick for that is to put the "dma-coherent" property right in the 
root of the DT so it plausibly communicates "the whole platform is 
coherent", and is then inherited by all devices, even those which 
shouldn't technically need it.

>> Our pl011 does not do DMA, though (we do not set the dmas property), so
>> it's kind of bogus for the kernel to complain about that.

The issue there is, per the history Jonathan dug up, DT on Arm got the 
assumption baked into it from day one that "dma-ranges" was implied for 
simple-bus and similar, and thus there is no easy generic way to 
indicate that any MMIO device *can't* do DMA. For Linux this means we 
end up having to assume that everything *might* be DMA-capable, since 
the only thing which knows for sure is a driver, but we have further 
legacy in the driver model which means we have to do perform the basic 
DMA setup for any device *before* its driver probes. Yes, it's a bit 
rubbish; feel free to shake your fist at the past.

(At least we learned and got it right in ACPI for arm64 by making the 
_CCA method mandatory for DMA-capable devices...)

>> So I think we should take these changes where they refer to DMA
>> capable devices and ask the kernel folks to fix the warnings
>> where they refer to devices that aren't doing DMA. Looking through
>> the patch, though, my initial impression is that all these are
>> in the latter category...
> 
> I was curious and have a very slow test running, so took a look.
> of_dma_configure() is being passed force_dma = true.
> https://elixir.bootlin.com/linux/v6.10-rc2/source/drivers/amba/bus.c#L361
> 
> The is a comment in of_dma_configure()
> 		/*
> 		 * For legacy reasons, we have to assume some devices need
> 		 * DMA configuration regardless of whether "dma-ranges" is
> 		 * correctly specified or not.
> 		 */
> So this I think this is being triggered by a workaround for broken DT.
> 
> This was introduced by Robin Murphy +CC though you may need to ask on
> kernel list because ARM / QEMU fun.
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=723288836628b
> 
> Relevant comment from that patch description:
> 
> "Certain bus types have a general expectation of
> DMA capability and carry a well-established precedent that an absent
> "dma-ranges" implies the same as the empty property, so we automatically
> opt those in to DMA configuration regardless, to avoid regressing most
> existing platforms."
> 
> The patch implies that AMBA is one of those.
> 
> So not sure this is solveable without a hack such as eliding the warning
> message if dma_force was set as the situation probably isn't relevant then..

Except it absolutely is, because the whole reason for setting force_dma 
on those buses is that they *do* commonly have DMA-capable devices, and 
they are also commonly non-coherent such that this condition would be 
serious. Especially AMBA, given that the things old enough to still be 
using that abstraction rather than plain platform (PL080, PL111, 
PL330,...) all predate ACE-Lite so don't even have the *possibility* of 
being coherent without external trickery in the interconnect.

Thanks,
Robin.
Peter Maydell June 7, 2024, 10:17 a.m. UTC | #4
On Thu, 6 Jun 2024 at 22:18, Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2024-06-06 6:13 pm, Jonathan Cameron wrote:
> > On Thu, 6 Jun 2024 12:56:59 +0100
> > Peter Maydell <peter.maydell@linaro.org> wrote:
> >
> >> On Thu, 6 Jun 2024 at 11:48, Zhenyu Zhang <zhenyzha@redhat.com> wrote:
> >>> In Linux, a check is applied to every device which is exposed through device-tree
> >>> node. The warning message is raised when the device isn't DMA coherent and the
> >>> cache line size is larger than ARCH_DMA_MINALIGN (128 bytes). The cache line is
> >>> sorted from CTR_EL0[CWG], which corresponds to 256 bytes on the guest CPUs.
> >>> The DMA coherent capability is claimed through 'dma-coherent' in their
> >>> device-tree nodes.
> >>
> >> For QEMU emulated all our DMA is always coherent, so where we
> >> have DMA-capable devices we should definitely tell the kernel
> >> that that DMA is coherent.
>
> The trick for that is to put the "dma-coherent" property right in the
> root of the DT so it plausibly communicates "the whole platform is
> coherent", and is then inherited by all devices, even those which
> shouldn't technically need it.

Ah, cool -- that's a pretty small change and it makes sense, and
avoids us having potential bugs in future where we forget to
mark a really-does-do-DMA device as dma-coherent. I like that
a lot better than adding incorrect dma-coherent tags to lots
of device nodes that aren't for DMA-capable devices.

thanks
-- PMM
Zhenyu Zhang June 7, 2024, 12:19 p.m. UTC | #5
On Thu, Jun 6, 2024 at 7:57 PM Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On Thu, 6 Jun 2024 at 11:48, Zhenyu Zhang <zhenyzha@redhat.com> wrote:
> >
> > Multiple warning messages and corresponding backtraces are observed when Linux
> > guest is booted on the host with Fujitsu CPUs. One of them is shown as below.
> >
> > [    0.032443] ------------[ cut here ]------------
> > [    0.032446] uart-pl011 9000000.pl011: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256)
> > [    0.032454] WARNING: CPU: 0 PID: 1 at arch/arm64/mm/dma-mapping.c:54 arch_setup_dma_ops+0xbc/0xcc
> > [    0.032470] Modules linked in:
> > [    0.032475] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-452.el9.aarch64 #1
> > [    0.032481] Hardware name: linux,dummy-virt (DT)
> > [    0.032484] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > [    0.032490] pc : arch_setup_dma_ops+0xbc/0xcc
> > [    0.032496] lr : arch_setup_dma_ops+0xbc/0xcc
> > [    0.032501] sp : ffff80008003b860
> > [    0.032503] x29: ffff80008003b860 x28: 0000000000000000 x27: ffffaae4b949049c
> > [    0.032510] x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000
> > [    0.032517] x23: 0000000000000100 x22: 0000000000000000 x21: 0000000000000000
> > [    0.032523] x20: 0000000100000000 x19: ffff2f06c02ea400 x18: ffffffffffffffff
> > [    0.032529] x17: 00000000208a5f76 x16: 000000006589dbcb x15: ffffaae4ba071c89
> > [    0.032535] x14: 0000000000000000 x13: ffffaae4ba071c84 x12: 455f525443206e61
> > [    0.032541] x11: 68742072656c6c61 x10: 0000000000000029 x9 : ffffaae4b7d21da4
> > [    0.032547] x8 : 0000000000000029 x7 : 4c414e494d5f414d x6 : 0000000000000029
> > [    0.032553] x5 : 000000000000000f x4 : ffffaae4b9617a00 x3 : 0000000000000001
> > [    0.032558] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff2f06c029be40
> > [    0.032564] Call trace:
> > [    0.032566]  arch_setup_dma_ops+0xbc/0xcc
> > [    0.032572]  of_dma_configure_id+0x138/0x300
> > [    0.032591]  amba_dma_configure+0x34/0xc0
> > [    0.032600]  really_probe+0x78/0x3dc
> > [    0.032614]  __driver_probe_device+0x108/0x160
> > [    0.032619]  driver_probe_device+0x44/0x114
> > [    0.032624]  __device_attach_driver+0xb8/0x14c
> > [    0.032629]  bus_for_each_drv+0x88/0xe4
> > [    0.032634]  __device_attach+0xb0/0x1e0
> > [    0.032638]  device_initial_probe+0x18/0x20
> > [    0.032643]  bus_probe_device+0xa8/0xb0
> > [    0.032648]  device_add+0x4b4/0x6c0
> > [    0.032652]  amba_device_try_add.part.0+0x48/0x360
> > [    0.032657]  amba_device_add+0x104/0x144
> > [    0.032662]  of_amba_device_create.isra.0+0x100/0x1c4
> > [    0.032666]  of_platform_bus_create+0x294/0x35c
> > [    0.032669]  of_platform_populate+0x5c/0x150
> > [    0.032672]  of_platform_default_populate_init+0xd0/0xec
> > [    0.032697]  do_one_initcall+0x4c/0x2e0
> > [    0.032701]  do_initcalls+0x100/0x13c
> > [    0.032707]  kernel_init_freeable+0x1c8/0x21c
> > [    0.032712]  kernel_init+0x28/0x140
> > [    0.032731]  ret_from_fork+0x10/0x20
> > [    0.032735] ---[ end trace 0000000000000000 ]---
> >
> > In Linux, a check is applied to every device which is exposed through device-tree
> > node. The warning message is raised when the device isn't DMA coherent and the
> > cache line size is larger than ARCH_DMA_MINALIGN (128 bytes). The cache line is
> > sorted from CTR_EL0[CWG], which corresponds to 256 bytes on the guest CPUs.
> > The DMA coherent capability is claimed through 'dma-coherent' in their
> > device-tree nodes.
>
> For QEMU emulated all our DMA is always coherent, so where we
> have DMA-capable devices we should definitely tell the kernel
> that that DMA is coherent.
>
> Our pl011 does not do DMA, though (we do not set the dmas property), so
> it's kind of bogus for the kernel to complain about that.
>
> So I think we should take these changes where they refer to DMA
> capable devices and ask the kernel folks to fix the warnings
> where they refer to devices that aren't doing DMA. Looking through
> the patch, though, my initial impression is that all these are
> in the latter category...
My initial thought was to fix it in the kernel too.
However, through preliminary research, I discovered some
'legacy reasons' mentioned in the discussion above.
So I'm worried that the fix will bring new risks.

>
> > diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> > index d480a7da02..cdf99966e6 100644
> > --- a/hw/arm/boot.c
> > +++ b/hw/arm/boot.c
> > @@ -509,6 +509,7 @@ static void fdt_add_psci_node(void *fdt)
> >      qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
> >      qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
> >      qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
> > +    qemu_fdt_setprop(fdt, "/psci", "dma-coherent", NULL, 0);
>
> The PSCI node is describing the firmware interface for
> HVC or SMC calls -- I don't think it makes any sense
> to think of this as doing DMA. So I would query the kernel
> folks about this warning.
Thanks a lot for your help!
>
> >  }
> >
> >  int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> > index 3c93c0c0a6..d3e5f512e2 100644
> > --- a/hw/arm/virt.c
> > +++ b/hw/arm/virt.c
> > @@ -652,6 +652,7 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms)
> >          qemu_fdt_setprop_cells(ms->fdt, "/pmu", "interrupts",
> >                                 GIC_FDT_IRQ_TYPE_PPI,
> >                                 INTID_TO_PPI(VIRTUAL_PMU_IRQ), irqflags);
> > +        qemu_fdt_setprop(ms->fdt, "/pmu", "dma-coherent", NULL, 0);
>
> What DMA interface does the PMU have?
]# dmesg | grep PMU
hw perfevents: enabled with armv8_pmuv3_0 PMU driver, 9 counters available

>
> >      }
> >  }
> >
> > @@ -936,6 +937,7 @@ static void create_uart(const VirtMachineState *vms, int uart,
> >                                 vms->clock_phandle, vms->clock_phandle);
> >      qemu_fdt_setprop(ms->fdt, nodename, "clock-names",
> >                           clocknames, sizeof(clocknames));
> > +    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);
>
> As above, our PL011 doesn't do any DMA and we do not advertise
> to the kernel that it does.
Indeed so.

>
> >      if (uart == VIRT_UART) {
> >          qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename);
> > @@ -972,6 +974,7 @@ static void create_rtc(const VirtMachineState *vms)
> >                             GIC_FDT_IRQ_FLAGS_LEVEL_HI);
> >      qemu_fdt_setprop_cell(ms->fdt, nodename, "clocks", vms->clock_phandle);
> >      qemu_fdt_setprop_string(ms->fdt, nodename, "clock-names", "apb_pclk");
> > +    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);
> >      g_free(nodename);
> >  }
>
> What DMA does the pl031 do?
I hit the warning on rtc-pl031

[    2.176009] ------------[ cut here ]------------
[    2.177959] rtc-pl031 9010000.pl031: ARCH_DMA_MINALIGN smaller than
CTR_EL0.CWG (128 < 256)
[    2.177975] WARNING: CPU: 0 PID: 1 at
arch/arm64/mm/dma-mapping.c:54 arch_setup_dma_ops+0xc0/0xe0
[    2.177998] Modules linked in:
[    2.178006] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G S
  -------  ---  6.9.0-7.el10.aarch64 #1
[    2.178015] Hardware name: linux,dummy-virt (DT)
[    2.178020] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    2.178028] pc : arch_setup_dma_ops+0xc0/0xe0
[    2.178037] lr : arch_setup_dma_ops+0xc0/0xe0
[    2.178044] sp : ffff80008003bac0
[    2.178048] x29: ffff80008003bac0 x28: 0000000000000000 x27: 0000000000000000
[    2.178059] x26: 0000000000000000 x25: 0000000000000000 x24: 00000000ffffffed
[    2.178070] x23: 0000000000000000 x22: 0000000000000100 x21: 0000000000000000
[    2.178080] x20: 0000000100000000 x19: ffff277b80311400 x18: ffffffffffffffff
[    2.178091] x17: 000000002e2c3887 x16: 000000006b071b85 x15: ffffab158b0e45eb
[    2.178101] x14: 0000000000000000 x13: ffffab158b0e45f0 x12: ffffab158adec6c0
[    2.178112] x11: ffffab158ab2c718 x10: ffffab158adec718 x9 : ffffab158894a9d0
[    2.178122] x8 : 0000000000000001 x7 : 00000000000bffe8 x6 : c0000000ffff7fff
[    2.178132] x5 : 00000000002bffa8 x4 : ffffab158ab2c4c8 x3 : 0000000000000001
[    2.178142] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff277b802c2a00
[    2.178152] Call trace:
[    2.178155]  arch_setup_dma_ops+0xc0/0xe0
[    2.178164]  of_dma_configure_id+0x29c/0x3f0
[    2.178178]  amba_dma_configure+0x38/0xd0
[    2.178188]  really_probe+0x7c/0x3a0
[    2.178196]  __driver_probe_device+0x84/0x160
[    2.178202]  driver_probe_device+0x44/0x130
[    2.178208]  __driver_attach+0xcc/0x208
[    2.178214]  bus_for_each_dev+0x84/0x100
[    2.178223]  driver_attach+0x2c/0x40
[    2.178229]  bus_add_driver+0x11c/0x238
[    2.178237]  driver_register+0x70/0x138
[    2.178244]  amba_driver_register+0x2c/0x48
[    2.178251]  pl031_driver_init+0x20/0x38
[    2.178260]  do_one_initcall+0x60/0x378
[    2.178268]  do_initcalls+0x114/0x158
[    2.178277]  kernel_init_freeable+0x1c4/0x228
[    2.178284]  kernel_init+0x28/0x158
[    2.178292]  ret_from_fork+0x10/0x20
[    2.178299] ---[ end trace 0000000000000000 ]---
[    2.178951] rtc-pl031 9010000.pl031: registered as rtc0
[    2.270706] rtc-pl031 9010000.pl031: setting system clock to
2024-06-07T08:14:37 UTC (1717748077)
[    2.282868] hid: raw HID events driver (C) Jiri Kosina
[    2.285235] usbcore: registered new interface driver usbhid
[    2.287579] usbhid: USB HID core driver

>
> >
> > @@ -1077,6 +1080,7 @@ static void create_gpio_devices(const VirtMachineState *vms, int gpio,
> >      qemu_fdt_setprop_cell(ms->fdt, nodename, "clocks", vms->clock_phandle);
> >      qemu_fdt_setprop_string(ms->fdt, nodename, "clock-names", "apb_pclk");
> >      qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", phandle);
> > +    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);
>
> As far as I know the PL061 is also not a DMA-capable device.
For the same reason, this is the warning I hit on pl061_gpio

[    1.479175] ------------[ cut here ]------------
[    1.570587] pl061_gpio 9030000.pl061: ARCH_DMA_MINALIGN smaller
than CTR_EL0.CWG (128 < 256)
[    1.570606] WARNING: CPU: 0 PID: 1 at
arch/arm64/mm/dma-mapping.c:54 arch_setup_dma_ops+0xc0/0xe0
[    1.570629] Modules linked in:
[    1.570638] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G S
  -------  ---  6.9.0-7.el10.aarch64 #1
[    1.570646] Hardware name: linux,dummy-virt (DT)
[    1.570652] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    1.570660] pc : arch_setup_dma_ops+0xc0/0xe0
[    1.570669] lr : arch_setup_dma_ops+0xc0/0xe0
[    1.570677] sp : ffff80008003bac0
[    1.570680] x29: ffff80008003bac0 x28: 0000000000000000 x27: 0000000000000000
[    1.570691] x26: 0000000000000000 x25: 0000000000000000 x24: 00000000ffffffed
[    1.570702] x23: 0000000000000000 x22: 0000000000000100 x21: 0000000000000000
[    1.570712] x20: 0000000100000000 x19: ffff277b8030c000 x18: ffffffffffffffff
[    1.570723] x17: 3061326332303862 x16: 3737326666666620 x15: ffffab158b0e1524
[    1.570733] x14: 0000000000000000 x13: ffffab158b0e1529 x12: ffffab158adec6c0
[    1.570743] x11: ffffab158ab2c718 x10: ffffab158adec718 x9 : ffffab158894a9d0
[    1.570754] x8 : 0000000000000001 x7 : 00000000000bffe8 x6 : c0000000ffff7fff
[    1.570764] x5 : 00000000002bffa8 x4 : ffffab158ab2c4c8 x3 : 0000000000000001
[    1.570773] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff277b802c2a00
[    1.570783] Call trace:
[    1.570787]  arch_setup_dma_ops+0xc0/0xe0
[    1.570796]  of_dma_configure_id+0x29c/0x3f0
[    1.570810]  amba_dma_configure+0x38/0xd0
[    1.570820]  really_probe+0x7c/0x3a0
[    1.570828]  __driver_probe_device+0x84/0x160
[    1.570834]  driver_probe_device+0x44/0x130
[    1.570839]  __driver_attach+0xcc/0x208
[    1.570845]  bus_for_each_dev+0x84/0x100
[    1.570855]  driver_attach+0x2c/0x40
[    1.570860]  bus_add_driver+0x11c/0x238
[    1.570868]  driver_register+0x70/0x138
[    1.570875]  amba_driver_register+0x2c/0x48
[    1.570882]  pl061_gpio_driver_init+0x20/0x38
[    1.570895]  do_one_initcall+0x60/0x378
[    1.570902]  do_initcalls+0x114/0x158
[    1.570911]  kernel_init_freeable+0x1c4/0x228
[    1.570918]  kernel_init+0x28/0x158
[    1.570926]  ret_from_fork+0x10/0x20
[    1.570934] ---[ end trace 0000000000000000 ]---

>
> >      if (gpio != VIRT_GPIO) {
> >          /* Mark as not usable by the normal world */
> > diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
> > index eebcd28f9a..da47071a95 100644
> > --- a/hw/core/sysbus-fdt.c
> > +++ b/hw/core/sysbus-fdt.c
> > @@ -554,6 +554,7 @@ void platform_bus_add_all_fdt_nodes(void *fdt, const char *intc, hwaddr addr,
> >      qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, bus_size);
> >
> >      qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", intc);
> > +    qemu_fdt_setprop(fdt, node, "dma-coherent", NULL, 0);
>
> Isn't this the fdt node for a bus, not a device?
Yes, since I hit the same warning messages.
So I want to fix it together.
Sorry for not describing it clearly.

[    1.476145] ------------[ cut here ]------------
[    1.478210] simple-pm-bus platform-bus@c000000: ARCH_DMA_MINALIGN
smaller than CTR_EL0.CWG (128 < 256)
[    1.478225] WARNING: CPU: 0 PID: 1 at
arch/arm64/mm/dma-mapping.c:54 arch_setup_dma_ops+0xc0/0xe0
[    1.478248] Modules linked in:
[    1.478257] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G S
  -------  ---  6.9.0-7.el10.aarch64 #1
[    1.478266] Hardware name: linux,dummy-virt (DT)
[    1.478270] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    1.478279] pc : arch_setup_dma_ops+0xc0/0xe0
[    1.478287] lr : arch_setup_dma_ops+0xc0/0xe0
[    1.478295] sp : ffff80008003bac0
[    1.478299] x29: ffff80008003bac0 x28: 0000000000000000 x27: 0000000000000000
[    1.478310] x26: 0000000000000000 x25: 0000000000000000 x24: 00000000ffffffed
[    1.478321] x23: 0000000000000000 x22: 0000000000000100 x21: 0000000000000000
[    1.478331] x20: 0000000100000000 x19: ffff277b80300010 x18: ffffffffffffffff
[    1.478342] x17: 0000000098c949f4 x16: 00000000099ae637 x15: ffffab158b0e0c2e
[    1.478352] x14: 0000000000000000 x13: ffffab158b0e0c33 x12: ffffab158adec6c0
[    1.478363] x11: ffffab158ab2c718 x10: ffffab158adec718 x9 : ffffab158894a9d0
[    1.478373] x8 : 0000000000000001 x7 : 00000000000bffe8 x6 : c0000000ffff7fff
[    1.478383] x5 : 00000000002bffa8 x4 : ffffab158ab2c4c8 x3 : 0000000000000001
[    1.478393] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff277b802c2a00
[    1.478403] Call trace:
[    1.478406]  arch_setup_dma_ops+0xc0/0xe0
[    1.478415]  of_dma_configure_id+0x29c/0x3f0
[    1.478429]  platform_dma_configure+0x108/0x120
[    1.478439]  really_probe+0x7c/0x3a0
[    1.478445]  __driver_probe_device+0x84/0x160
[    1.478451]  driver_probe_device+0x44/0x130
[    1.478457]  __driver_attach+0xcc/0x208
[    1.478462]  bus_for_each_dev+0x84/0x100
[    1.478472]  driver_attach+0x2c/0x40
[    1.478477]  bus_add_driver+0x11c/0x238
[    1.478485]  driver_register+0x70/0x138
[    1.478491]  __platform_driver_register+0x30/0x48
[    1.478498]  simple_pm_bus_driver_init+0x24/0x38
[    1.478511]  do_one_initcall+0x60/0x378
[    1.478518]  do_initcalls+0x114/0x158
[    1.478527]  kernel_init_freeable+0x1c4/0x228
[    1.478534]  kernel_init+0x28/0x158
[    1.478542]  ret_from_fork+0x10/0x20
[    1.478550] ---[ end trace 0000000000000000 ]---
[    1.479175] ------------[ cut here ]------------

>
> thanks
> -- PMM
>
Zhenyu Zhang June 7, 2024, 12:20 p.m. UTC | #6
On Fri, Jun 7, 2024 at 6:17 PM Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On Thu, 6 Jun 2024 at 22:18, Robin Murphy <robin.murphy@arm.com> wrote:
> >
> > On 2024-06-06 6:13 pm, Jonathan Cameron wrote:
> > > On Thu, 6 Jun 2024 12:56:59 +0100
> > > Peter Maydell <peter.maydell@linaro.org> wrote:
> > >
> > >> On Thu, 6 Jun 2024 at 11:48, Zhenyu Zhang <zhenyzha@redhat.com> wrote:
> > >>> In Linux, a check is applied to every device which is exposed through device-tree
> > >>> node. The warning message is raised when the device isn't DMA coherent and the
> > >>> cache line size is larger than ARCH_DMA_MINALIGN (128 bytes). The cache line is
> > >>> sorted from CTR_EL0[CWG], which corresponds to 256 bytes on the guest CPUs.
> > >>> The DMA coherent capability is claimed through 'dma-coherent' in their
> > >>> device-tree nodes.
> > >>
> > >> For QEMU emulated all our DMA is always coherent, so where we
> > >> have DMA-capable devices we should definitely tell the kernel
> > >> that that DMA is coherent.
> >
> > The trick for that is to put the "dma-coherent" property right in the
> > root of the DT so it plausibly communicates "the whole platform is
> > coherent", and is then inherited by all devices, even those which
> > shouldn't technically need it.
>
> Ah, cool -- that's a pretty small change and it makes sense, and
> avoids us having potential bugs in future where we forget to
> mark a really-does-do-DMA device as dma-coherent. I like that
> a lot better than adding incorrect dma-coherent tags to lots
> of device nodes that aren't for DMA-capable devices.
Hi Robin, Peter, and Jonathan,

Thanks for your suggestions, it helps me a lot!
I will submit a v2 patch that puts the "dma-coherent" property right
in the root of the DT so it plausibly communicates.
Yes, these discussions make sense.
I will modify and test it again on Fujitsu host.

Thanks again
Zhenyu

>
> thanks
> -- PMM
>
diff mbox series

Patch

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index d480a7da02..cdf99966e6 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -509,6 +509,7 @@  static void fdt_add_psci_node(void *fdt)
     qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
     qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
     qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
+    qemu_fdt_setprop(fdt, "/psci", "dma-coherent", NULL, 0);
 }
 
 int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 3c93c0c0a6..d3e5f512e2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -652,6 +652,7 @@  static void fdt_add_pmu_nodes(const VirtMachineState *vms)
         qemu_fdt_setprop_cells(ms->fdt, "/pmu", "interrupts",
                                GIC_FDT_IRQ_TYPE_PPI,
                                INTID_TO_PPI(VIRTUAL_PMU_IRQ), irqflags);
+        qemu_fdt_setprop(ms->fdt, "/pmu", "dma-coherent", NULL, 0);
     }
 }
 
@@ -936,6 +937,7 @@  static void create_uart(const VirtMachineState *vms, int uart,
                                vms->clock_phandle, vms->clock_phandle);
     qemu_fdt_setprop(ms->fdt, nodename, "clock-names",
                          clocknames, sizeof(clocknames));
+    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);
 
     if (uart == VIRT_UART) {
         qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename);
@@ -972,6 +974,7 @@  static void create_rtc(const VirtMachineState *vms)
                            GIC_FDT_IRQ_FLAGS_LEVEL_HI);
     qemu_fdt_setprop_cell(ms->fdt, nodename, "clocks", vms->clock_phandle);
     qemu_fdt_setprop_string(ms->fdt, nodename, "clock-names", "apb_pclk");
+    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);
     g_free(nodename);
 }
 
@@ -1077,6 +1080,7 @@  static void create_gpio_devices(const VirtMachineState *vms, int gpio,
     qemu_fdt_setprop_cell(ms->fdt, nodename, "clocks", vms->clock_phandle);
     qemu_fdt_setprop_string(ms->fdt, nodename, "clock-names", "apb_pclk");
     qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", phandle);
+    qemu_fdt_setprop(ms->fdt, nodename, "dma-coherent", NULL, 0);
 
     if (gpio != VIRT_GPIO) {
         /* Mark as not usable by the normal world */
diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
index eebcd28f9a..da47071a95 100644
--- a/hw/core/sysbus-fdt.c
+++ b/hw/core/sysbus-fdt.c
@@ -554,6 +554,7 @@  void platform_bus_add_all_fdt_nodes(void *fdt, const char *intc, hwaddr addr,
     qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, bus_size);
 
     qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", intc);
+    qemu_fdt_setprop(fdt, node, "dma-coherent", NULL, 0);
 
     dev = qdev_find_recursive(sysbus_get_default(), TYPE_PLATFORM_BUS_DEVICE);
     pbus = PLATFORM_BUS_DEVICE(dev);