diff mbox series

WARNING: drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c:364 vchiq_prepare_bulk_data

Message ID ad2b7034-ecc9-4d6e-844c-bfe32fb5b23c@gmx.net (mailing list archive)
State New, archived
Headers show
Series WARNING: drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c:364 vchiq_prepare_bulk_data | expand

Commit Message

Stefan Wahren June 9, 2024, 10:24 p.m. UTC
Hi,
i was able to trigger a flood of WARNINGs with Raspberry Pi 3 B Plus (
arm64/defconfig + CONFIG_BCM2835_VCHIQ=m + CONFIG_VCHIQ_CDEV=y ) running
the following userspace tool (triggers DMA transfer)

vchiq_test -f 1

The command seems to succeed as usual except of the warnings. But these
warnings are not reproducible with 32 bit configuration like
multi_v7_defconfig on the same Raspberry Pi 3 B Plus:

[   64.531826] WARNING: CPU: 3 PID: 867 at
drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c:364
vchiq_prepare_bulk_data+0x2cc/0x4e8 [vchiq]
[   64.531880] Modules linked in: aes_neon_blk af_alg 8021q garp mrp stp
llc microchip raspberrypi_cpufreq brcmfmac_wcc snd_bcm2835(C) lan78xx
brcmfmac brcmutil vc4 cfg80211 hci_uart snd_soc_hdmi_codec btqca cec
btbcm onboard_usb_hub drm_display_helper bluetooth crct10dif_ce
drm_dma_helper raspberrypi_hwmon clk_raspberrypi drm_kms_helper
ecdh_generic bcm2835_thermal i2c_bcm2835 ecc pwm_bcm2835 rfkill vchiq(C)
drm fuse ip_tables x_tables ipv6
[   64.532010] CPU: 3 PID: 867 Comm: vchiq_test Tainted: G WC        
6.4.0-rc4-00297-gf999f38b4e6f #24
[   64.532022] Hardware name: Raspberry Pi 3 Model B Plus Rev 1.3 (DT)
[   64.532029] pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS
BTYPE=--)
[   64.532038] pc : vchiq_prepare_bulk_data+0x2cc/0x4e8 [vchiq]
[   64.532060] lr : vchiq_prepare_bulk_data+0x320/0x4e8 [vchiq]
[   64.532080] sp : ffff8000093dbb10
[   64.532086] x29: ffff8000093dbb10 x28: 00000000000000a4 x27:
ffff80000b065000
[   64.532101] x26: ffff80000b065010 x25: ffff80000b065060 x24:
0000000000000000
[   64.532114] x23: 0000000000000001 x22: 0000000000000001 x21:
0000000000000001
[   64.532127] x20: ffff80000b065040 x19: 0000000000000002 x18:
0000000000000800
[   64.532139] x17: 001fffffffffffff x16: ffffdefad16fc6d0 x15:
ffff1449b720713c
[   64.532151] x14: 0000000000000000 x13: ffff1449b7207130 x12:
ffff1449b7207100
[   64.532163] x11: 0000000000000030 x10: 00000000000000ff x9 :
000000003f000000
[   64.532175] x8 : 00000000351ae800 x7 : 00000000351ae800 x6 :
00000000000000ff
[   64.532187] x5 : ffff1449b51ae801 x4 : ffff1449b9efc001 x3 :
0000000000000000
[   64.532201] x2 : 00000000f51ae800 x1 : fffffc5126e7bf02 x0 :
0000000000000001
[   64.532218] Call trace:
[   64.532222]  vchiq_prepare_bulk_data+0x2cc/0x4e8 [vchiq]
[   64.532247]  vchiq_bulk_transfer+0x25c/0x3bc [vchiq]
[   64.532272]  vchiq_irq_queue_bulk_tx_rx+0x6c/0x348 [vchiq]
[   64.532293]  vchiq_ioctl+0x684/0xbe8 [vchiq]
[   64.532313]  __arm64_sys_ioctl+0xa8/0xec
[   64.532327]  invoke_syscall+0x48/0x114
[   64.532341]  el0_svc_common.constprop.0+0x44/0xf4
[   64.532353]  do_el0_svc+0x3c/0xa8
[   64.532362]  el0_svc+0x2c/0x84
[   64.532379]  el0t_64_sync_handler+0xbc/0x138
[   64.532389]  el0t_64_sync+0x190/0x194
[   64.532400] ---[ end trace 0000000000000000 ]---

Since there are a lot of WARN_ON in the function, i applied the
following patch:


@@ -357,15 +357,19 @@ create_pagelist(struct vchiq_instance *instance,
char *buf, char __user *ubuf,
      k = 0;
      for_each_sg(scatterlist, sg, dma_buffers, i) {
          u32 len = sg_dma_len(sg);
-        u32 addr = sg_dma_address(sg);
+        dma_addr_t addr = sg_dma_address(sg);

          /* Note: addrs is the address + page_count - 1
           * The firmware expects blocks after the first to be page-
           * aligned and a multiple of the page size
           */
-        WARN_ON(len == 0);
-        WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
-        WARN_ON(i && (addr & ~PAGE_MASK));
+        if (len == 0)
+            pr_warn_once("%s: sg_dma_len() == 0\n", __func__);
+        else if (i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK))
+            pr_warn_once("%s: following block not page aligned\n",
__func__);
+        else if (i && (addr & ~PAGE_MASK))
+            pr_warn_once("%s: block %u, DMA address %pad doesn't align
with PAGE_MASK 0x%lx\n", __func__, i, &addr, PAGE_MASK);
+
          if (is_adjacent_block(addrs, addr, k))
              addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT);
          else
@@ -385,11 +389,15 @@ create_pagelist(struct vchiq_instance *instance,
char *buf, char __user *ubuf,
              return NULL;
          }

-        WARN_ON(!g_free_fragments);
+        if (!g_free_fragments)
+            pr_warn_once("%s: !g_free_fragments\n", __func__);

          down(&g_free_fragments_mutex);
          fragments = g_free_fragments;
-        WARN_ON(!fragments);
+
+        if (!fragments)
+            pr_warn_once("%s: !fragments\n", __func__);
+
          g_free_fragments = *(char **)g_free_fragments;
          up(&g_free_fragments_mutex);
          pagelist->type = PAGELIST_READ_WITH_FRAGMENTS +

After that i'm getting the following warning:

   create_pagelist: block 1, DMA address 0x00000000f5f62800 doesn't
align with PAGE_MASK 0xfffffffffffff000

Then i bisected the issue to this commit:

commit 1c1a429efd4ee8ca244cc2401365c983cda4ed76
Author: Catalin Marinas <catalin.marinas@arm.com>
Date:   Mon Jun 12 16:32:01 2023 +0100

     arm64: enable ARCH_WANT_KMALLOC_DMA_BOUNCE for arm64

     With the DMA bouncing of unaligned kmalloc() buffers now in place,
enable
     it for arm64 to allow the kmalloc-{8,16,32,48,96} caches.  In addition,
     always create the swiotlb buffer even when the end of RAM is within the
     32-bit physical address range (the swiotlb buffer can still be
disabled on
     the kernel command line).

So how can the root cause of these warnings be fixed?
Or is the specific WARN_ON() now obsolete?

Comments

Arnd Bergmann June 10, 2024, 6 a.m. UTC | #1
On Mon, Jun 10, 2024, at 00:24, Stefan Wahren wrote:

>
> After that i'm getting the following warning:
>
>    create_pagelist: block 1, DMA address 0x00000000f5f62800 doesn't
> align with PAGE_MASK 0xfffffffffffff000
>
> Then i bisected the issue to this commit:
>
> commit 1c1a429efd4ee8ca244cc2401365c983cda4ed76
> Author: Catalin Marinas <catalin.marinas@arm.com>
> Date:   Mon Jun 12 16:32:01 2023 +0100
>
>      arm64: enable ARCH_WANT_KMALLOC_DMA_BOUNCE for arm64
>
>      With the DMA bouncing of unaligned kmalloc() buffers now in place,
> enable
>      it for arm64 to allow the kmalloc-{8,16,32,48,96} caches.  In addition,
>      always create the swiotlb buffer even when the end of RAM is within the
>      32-bit physical address range (the swiotlb buffer can still be
> disabled on
>      the kernel command line).
>
> So how can the root cause of these warnings be fixed?
> Or is the specific WARN_ON() now obsolete?
>

It appears that the buffer that gets passed to the device
has a start address and length that both come from user
space, and in this case crosses a page boundary, and the
second half is not aligned to ARCH_KMALLOC_MINALIGN.

This means it's not actually a kmalloc buffer that goes
wrong, but userspace passing something that is problematic,
see also the comment above create_pagelist().

If that is a correct interpretation of what is going on,
one way to solve it would be to add the same logic for
vchiq user buffers that we have for kmalloc buffers:
if either the start or the size of the user buffer in
vchiq_irq_queue_bulk_tx_rx() are unaligned, just
allocate a new kernel buffer for the whole thing instead
of pinning the user buffer.

     Arnd
Phil Elwell June 10, 2024, 8:26 a.m. UTC | #2
On Mon, 10 Jun 2024 at 07:00, Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Mon, Jun 10, 2024, at 00:24, Stefan Wahren wrote:
>
> >
> > After that i'm getting the following warning:
> >
> >    create_pagelist: block 1, DMA address 0x00000000f5f62800 doesn't
> > align with PAGE_MASK 0xfffffffffffff000
> >
> > Then i bisected the issue to this commit:
> >
> > commit 1c1a429efd4ee8ca244cc2401365c983cda4ed76
> > Author: Catalin Marinas <catalin.marinas@arm.com>
> > Date:   Mon Jun 12 16:32:01 2023 +0100
> >
> >      arm64: enable ARCH_WANT_KMALLOC_DMA_BOUNCE for arm64
> >
> >      With the DMA bouncing of unaligned kmalloc() buffers now in place,
> > enable
> >      it for arm64 to allow the kmalloc-{8,16,32,48,96} caches.  In addition,
> >      always create the swiotlb buffer even when the end of RAM is within the
> >      32-bit physical address range (the swiotlb buffer can still be
> > disabled on
> >      the kernel command line).
> >
> > So how can the root cause of these warnings be fixed?
> > Or is the specific WARN_ON() now obsolete?
> >
>
> It appears that the buffer that gets passed to the device
> has a start address and length that both come from user
> space, and in this case crosses a page boundary, and the
> second half is not aligned to ARCH_KMALLOC_MINALIGN.
>
> This means it's not actually a kmalloc buffer that goes
> wrong, but userspace passing something that is problematic,
> see also the comment above create_pagelist().
>
> If that is a correct interpretation of what is going on,
> one way to solve it would be to add the same logic for
> vchiq user buffers that we have for kmalloc buffers:
> if either the start or the size of the user buffer in
> vchiq_irq_queue_bulk_tx_rx() are unaligned, just
> allocate a new kernel buffer for the whole thing instead
> of pinning the user buffer.
>
>      Arnd

Why is swiotlb involved at all? The DMA controller on BCM2837 can
access all RAM that is visible to the ARM cores.

Phil
Arnd Bergmann June 10, 2024, 9 a.m. UTC | #3
On Mon, Jun 10, 2024, at 10:26, Phil Elwell wrote:
> On Mon, 10 Jun 2024 at 07:00, Arnd Bergmann <arnd@arndb.de> wrote:
>
> Why is swiotlb involved at all? The DMA controller on BCM2837 can
> access all RAM that is visible to the ARM cores.

When a device is not cache-coherent and the buffer is not
cache aligned, we now use swiotlb to avoid clobbering data
in the same cache line during DMA synchronization.

We used to rely on kmalloc() returning buffers that are
cacheline aligned, but that was very expensive.

     Arnd
Laurent Pinchart June 10, 2024, 9:15 a.m. UTC | #4
On Mon, Jun 10, 2024 at 11:00:12AM +0200, Arnd Bergmann wrote:
> On Mon, Jun 10, 2024, at 10:26, Phil Elwell wrote:
> > On Mon, 10 Jun 2024 at 07:00, Arnd Bergmann <arnd@arndb.de> wrote:
> >
> > Why is swiotlb involved at all? The DMA controller on BCM2837 can
> > access all RAM that is visible to the ARM cores.
> 
> When a device is not cache-coherent and the buffer is not
> cache aligned, we now use swiotlb to avoid clobbering data
> in the same cache line during DMA synchronization.
> 
> We used to rely on kmalloc() returning buffers that are
> cacheline aligned, but that was very expensive.

Could we reject buffers provided by userspace that are not cache-aligned
?
Arnd Bergmann June 10, 2024, 9:19 a.m. UTC | #5
On Mon, Jun 10, 2024, at 11:15, Laurent Pinchart wrote:
> On Mon, Jun 10, 2024 at 11:00:12AM +0200, Arnd Bergmann wrote:
>> On Mon, Jun 10, 2024, at 10:26, Phil Elwell wrote:
>> > On Mon, 10 Jun 2024 at 07:00, Arnd Bergmann <arnd@arndb.de> wrote:
>> >
>> > Why is swiotlb involved at all? The DMA controller on BCM2837 can
>> > access all RAM that is visible to the ARM cores.
>> 
>> When a device is not cache-coherent and the buffer is not
>> cache aligned, we now use swiotlb to avoid clobbering data
>> in the same cache line during DMA synchronization.
>> 
>> We used to rely on kmalloc() returning buffers that are
>> cacheline aligned, but that was very expensive.
>
> Could we reject buffers provided by userspace that are not
> cache-aligned ?

My guess is that this will likely break existing applications,
in which case we cannot.

It's probably a good idea to take a look at what buffers
are actually passed by userspace today. That would also
help decide how we allocate bounce buffers if we have to.
E.g. it's a big difference if the buffers are always
within a few bytes, kilobytes or megabytes.

     Arnd
Phil Elwell June 10, 2024, 9:24 a.m. UTC | #6
On Mon, 10 Jun 2024 at 10:20, Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Mon, Jun 10, 2024, at 11:15, Laurent Pinchart wrote:
> > On Mon, Jun 10, 2024 at 11:00:12AM +0200, Arnd Bergmann wrote:
> >> On Mon, Jun 10, 2024, at 10:26, Phil Elwell wrote:
> >> > On Mon, 10 Jun 2024 at 07:00, Arnd Bergmann <arnd@arndb.de> wrote:
> >> >
> >> > Why is swiotlb involved at all? The DMA controller on BCM2837 can
> >> > access all RAM that is visible to the ARM cores.
> >>
> >> When a device is not cache-coherent and the buffer is not
> >> cache aligned, we now use swiotlb to avoid clobbering data
> >> in the same cache line during DMA synchronization.
> >>
> >> We used to rely on kmalloc() returning buffers that are
> >> cacheline aligned, but that was very expensive.
> >
> > Could we reject buffers provided by userspace that are not
> > cache-aligned ?
>
> My guess is that this will likely break existing applications,
> in which case we cannot.
>
> It's probably a good idea to take a look at what buffers
> are actually passed by userspace today. That would also
> help decide how we allocate bounce buffers if we have to.
> E.g. it's a big difference if the buffers are always
> within a few bytes, kilobytes or megabytes.
>
>      Arnd

vchiq sends partial cache lines at the start and of reads (as seen
from the ARM host) out of band, so the only misaligned DMA transfers
should be from ARM to VPU. This should not require a bounce buffer.

Phil
Arnd Bergmann June 10, 2024, 10:09 a.m. UTC | #7
On Mon, Jun 10, 2024, at 11:24, Phil Elwell wrote:
> On Mon, 10 Jun 2024 at 10:20, Arnd Bergmann <arnd@arndb.de> wrote:
>>
>> My guess is that this will likely break existing applications,
>> in which case we cannot.
>>
>> It's probably a good idea to take a look at what buffers
>> are actually passed by userspace today. That would also
>> help decide how we allocate bounce buffers if we have to.
>> E.g. it's a big difference if the buffers are always
>> within a few bytes, kilobytes or megabytes.
>
> vchiq sends partial cache lines at the start and of reads (as seen
> from the ARM host) out of band, so the only misaligned DMA transfers
> should be from ARM to VPU. This should not require a bounce buffer.

Ok, so if the partial data is sent out of band already, it
may be possible to just leave it out of the scatterlist so
it doesn't get bounced by dma_map_sg().

I don't see where that case is handled but that shouldn't
be too hard here.

     Arnd
Robin Murphy June 10, 2024, 10:25 a.m. UTC | #8
On 2024-06-10 10:24 am, Phil Elwell wrote:
> On Mon, 10 Jun 2024 at 10:20, Arnd Bergmann <arnd@arndb.de> wrote:
>>
>> On Mon, Jun 10, 2024, at 11:15, Laurent Pinchart wrote:
>>> On Mon, Jun 10, 2024 at 11:00:12AM +0200, Arnd Bergmann wrote:
>>>> On Mon, Jun 10, 2024, at 10:26, Phil Elwell wrote:
>>>>> On Mon, 10 Jun 2024 at 07:00, Arnd Bergmann <arnd@arndb.de> wrote:
>>>>>
>>>>> Why is swiotlb involved at all? The DMA controller on BCM2837 can
>>>>> access all RAM that is visible to the ARM cores.
>>>>
>>>> When a device is not cache-coherent and the buffer is not
>>>> cache aligned, we now use swiotlb to avoid clobbering data
>>>> in the same cache line during DMA synchronization.
>>>>
>>>> We used to rely on kmalloc() returning buffers that are
>>>> cacheline aligned, but that was very expensive.
>>>
>>> Could we reject buffers provided by userspace that are not
>>> cache-aligned ?
>>
>> My guess is that this will likely break existing applications,
>> in which case we cannot.
>>
>> It's probably a good idea to take a look at what buffers
>> are actually passed by userspace today. That would also
>> help decide how we allocate bounce buffers if we have to.
>> E.g. it's a big difference if the buffers are always
>> within a few bytes, kilobytes or megabytes.
>>
>>       Arnd
> 
> vchiq sends partial cache lines at the start and of reads (as seen
> from the ARM host) out of band, so the only misaligned DMA transfers
> should be from ARM to VPU. This should not require a bounce buffer.

Hmm, indeed the dma_kmalloc_safe() takes into account that unaligned 
DMA_TO_DEVICE does not need bouncing, so it would suggest that 
something's off in what vchiq is asking for.

Thanks,
Robin.
Stefan Wahren June 11, 2024, 10:47 a.m. UTC | #9
Hi,

Am 10.06.24 um 12:25 schrieb Robin Murphy:
> On 2024-06-10 10:24 am, Phil Elwell wrote:
>> On Mon, 10 Jun 2024 at 10:20, Arnd Bergmann <arnd@arndb.de> wrote:
>>>
>>> On Mon, Jun 10, 2024, at 11:15, Laurent Pinchart wrote:
>>>> On Mon, Jun 10, 2024 at 11:00:12AM +0200, Arnd Bergmann wrote:
>>>>> On Mon, Jun 10, 2024, at 10:26, Phil Elwell wrote:
>>>>>> On Mon, 10 Jun 2024 at 07:00, Arnd Bergmann <arnd@arndb.de> wrote:
>>>>>>
>>>>>> Why is swiotlb involved at all? The DMA controller on BCM2837 can
>>>>>> access all RAM that is visible to the ARM cores.
>>>>>
>>>>> When a device is not cache-coherent and the buffer is not
>>>>> cache aligned, we now use swiotlb to avoid clobbering data
>>>>> in the same cache line during DMA synchronization.
>>>>>
>>>>> We used to rely on kmalloc() returning buffers that are
>>>>> cacheline aligned, but that was very expensive.
>>>>
>>>> Could we reject buffers provided by userspace that are not
>>>> cache-aligned ?
>>>
>>> My guess is that this will likely break existing applications,
>>> in which case we cannot.
>>>
>>> It's probably a good idea to take a look at what buffers
>>> are actually passed by userspace today. That would also
>>> help decide how we allocate bounce buffers if we have to.
>>> E.g. it's a big difference if the buffers are always
>>> within a few bytes, kilobytes or megabytes.
>>>
>>>       Arnd
>>
>> vchiq sends partial cache lines at the start and of reads (as seen
>> from the ARM host) out of band, so the only misaligned DMA transfers
>> should be from ARM to VPU. This should not require a bounce buffer.
>
> Hmm, indeed the dma_kmalloc_safe() takes into account that unaligned
> DMA_TO_DEVICE does not need bouncing, so it would suggest that
> something's off in what vchiq is asking for.
I'm available to debug this further, but i need more guidance here.

At least i extend the output for the error case:

-               WARN_ON(len == 0);
-               WARN_ON(i && (i != (dma_buffers - 1)) && (len &
~PAGE_MASK));
-               WARN_ON(i && (addr & ~PAGE_MASK));
+               if (len == 0)
+                       pr_warn_once("%s: sg_dma_len() == 0\n", __func__);
+               else if (i && (i != (dma_buffers - 1)) && (len &
~PAGE_MASK))
+                       pr_warn_once("%s: following block not page
aligned\n", __func__);
+               else if (i && (addr & ~PAGE_MASK)) {
+                       pr_warn_once("%s: block %u, DMA address %pad
doesn't align with PAGE_MASK 0x%lx\n", __func__, i, &addr, PAGE_MASK);
+                       pr_warn_once("sg_dma_is_swiotlb: %d, dma_flags:
%x\n", sg_dma_is_swiotlb(sg), sg->dma_flags);
+               }

Example result:

[   84.180527] create_pagelist: block 1, DMA address 0x00000000f5f74800
doesn't align with PAGE_MASK 0xfffffffffffff000
[   84.180553] sg_dma_is_swiotlb: 0, dma_flags: 0

Is this helpful?

>
> Thanks,
> Robin.
Arnd Bergmann June 11, 2024, 11:08 a.m. UTC | #10
On Tue, Jun 11, 2024, at 12:47, Stefan Wahren wrote:
> Am 10.06.24 um 12:25 schrieb Robin Murphy:
>> On 2024-06-10 10:24 am, Phil Elwell wrote:
>>> On Mon, 10 Jun 2024 at 10:20, Arnd Bergmann <arnd@arndb.de> wrote:
>>>>       Arnd
>>>
>>> vchiq sends partial cache lines at the start and of reads (as seen
>>> from the ARM host) out of band, so the only misaligned DMA transfers
>>> should be from ARM to VPU. This should not require a bounce buffer.
>>
>> Hmm, indeed the dma_kmalloc_safe() takes into account that unaligned
>> DMA_TO_DEVICE does not need bouncing, so it would suggest that
>> something's off in what vchiq is asking for.
> I'm available to debug this further, but i need more guidance here.
>
> At least i extend the output for the error case:
>
> -               WARN_ON(len == 0);
> -               WARN_ON(i && (i != (dma_buffers - 1)) && (len &
> ~PAGE_MASK));
> -               WARN_ON(i && (addr & ~PAGE_MASK));
> +               if (len == 0)
> +                       pr_warn_once("%s: sg_dma_len() == 0\n", __func__);
> +               else if (i && (i != (dma_buffers - 1)) && (len &
> ~PAGE_MASK))
> +                       pr_warn_once("%s: following block not page
> aligned\n", __func__);
> +               else if (i && (addr & ~PAGE_MASK)) {
> +                       pr_warn_once("%s: block %u, DMA address %pad
> doesn't align with PAGE_MASK 0x%lx\n", __func__, i, &addr, PAGE_MASK);
> +                       pr_warn_once("sg_dma_is_swiotlb: %d, dma_flags:
> %x\n", sg_dma_is_swiotlb(sg), sg->dma_flags);
> +               }
>
> Example result:
>
> [   84.180527] create_pagelist: block 1, DMA address 0x00000000f5f74800
> doesn't align with PAGE_MASK 0xfffffffffffff000
> [   84.180553] sg_dma_is_swiotlb: 0, dma_flags: 0
>
> Is this helpful?

It's interesting that this does not have the SG_DMA_SWIOTLB
flag set, as the theory so far was that an unaligned
user address is what caused this to bounce.

I think the most helpful bit of information that is
currently missing is the 'ubuf' and 'count' arguments
that got passed down from userspace into create_pagelist(),
to see what alignment they have in the failure case.

     Arnd
Stefan Wahren June 11, 2024, 11:37 a.m. UTC | #11
Am 11.06.24 um 13:08 schrieb Arnd Bergmann:
> On Tue, Jun 11, 2024, at 12:47, Stefan Wahren wrote:
>> Am 10.06.24 um 12:25 schrieb Robin Murphy:
>>> On 2024-06-10 10:24 am, Phil Elwell wrote:
>>>> On Mon, 10 Jun 2024 at 10:20, Arnd Bergmann <arnd@arndb.de> wrote:
>>>>>        Arnd
>>>> vchiq sends partial cache lines at the start and of reads (as seen
>>>> from the ARM host) out of band, so the only misaligned DMA transfers
>>>> should be from ARM to VPU. This should not require a bounce buffer.
>>> Hmm, indeed the dma_kmalloc_safe() takes into account that unaligned
>>> DMA_TO_DEVICE does not need bouncing, so it would suggest that
>>> something's off in what vchiq is asking for.
>> I'm available to debug this further, but i need more guidance here.
>>
>> At least i extend the output for the error case:
>>
>> -               WARN_ON(len == 0);
>> -               WARN_ON(i && (i != (dma_buffers - 1)) && (len &
>> ~PAGE_MASK));
>> -               WARN_ON(i && (addr & ~PAGE_MASK));
>> +               if (len == 0)
>> +                       pr_warn_once("%s: sg_dma_len() == 0\n", __func__);
>> +               else if (i && (i != (dma_buffers - 1)) && (len &
>> ~PAGE_MASK))
>> +                       pr_warn_once("%s: following block not page
>> aligned\n", __func__);
>> +               else if (i && (addr & ~PAGE_MASK)) {
>> +                       pr_warn_once("%s: block %u, DMA address %pad
>> doesn't align with PAGE_MASK 0x%lx\n", __func__, i, &addr, PAGE_MASK);
>> +                       pr_warn_once("sg_dma_is_swiotlb: %d, dma_flags:
>> %x\n", sg_dma_is_swiotlb(sg), sg->dma_flags);
>> +               }
>>
>> Example result:
>>
>> [   84.180527] create_pagelist: block 1, DMA address 0x00000000f5f74800
>> doesn't align with PAGE_MASK 0xfffffffffffff000
>> [   84.180553] sg_dma_is_swiotlb: 0, dma_flags: 0
>>
>> Is this helpful?
> It's interesting that this does not have the SG_DMA_SWIOTLB
> flag set, as the theory so far was that an unaligned
> user address is what caused this to bounce.
>
> I think the most helpful bit of information that is
> currently missing is the 'ubuf' and 'count' arguments
> that got passed down from userspace into create_pagelist(),
> to see what alignment they have in the failure case.
Here is my attempt:

         if (len == 0)
             pr_warn_once("%s: sg_dma_len() == 0\n", __func__);
         else if (i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK))
             pr_warn_once("%s: following block not page aligned\n",
__func__);
         else if (i && (addr & ~PAGE_MASK)) {
             pr_warn_once("%s: block %u, DMA address %pad doesn't align
with PAGE_MASK 0x%lx\n", __func__, i, &addr, PAGE_MASK);
             pr_warn_once("sg_dma_is_swiotlb: %d, dma_flags: %x\n",
sg_dma_is_swiotlb(sg), sg->dma_flags);
             pr_warn_once("type = %s\n", (type == PAGELIST_WRITE) ?
"PAGELIST_WRITE" : "PAGELIST_READ");
             if (buf)
                 pr_warn_once("buf = %p, count = %zu\n", buf, count);
             else
                 pr_warn_once("ubuf = %p, count = %zu\n", ubuf, count);
         }

Output:

[   66.184030] create_pagelist: block 1, DMA address 0x00000000f5fc7800
doesn't align with PAGE_MASK 0xfffffffffffff000
[   66.184056] sg_dma_is_swiotlb: 0, dma_flags: 0
[   66.184063] type = PAGELIST_READ
[   66.184066] ubuf = 00000000266a70a7, count = 0
>
>       Arnd
Arnd Bergmann June 11, 2024, 12:14 p.m. UTC | #12
On Tue, Jun 11, 2024, at 13:37, Stefan Wahren wrote:
> Am 11.06.24 um 13:08 schrieb Arnd Bergmann:
>> On Tue, Jun 11, 2024, at 12:47, Stefan Wahren wrote:

>
>          if (len == 0)
>              pr_warn_once("%s: sg_dma_len() == 0\n", __func__);
>          else if (i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK))
>              pr_warn_once("%s: following block not page aligned\n",
> __func__);
>          else if (i && (addr & ~PAGE_MASK)) {
>              pr_warn_once("%s: block %u, DMA address %pad doesn't align
> with PAGE_MASK 0x%lx\n", __func__, i, &addr, PAGE_MASK);
>              pr_warn_once("sg_dma_is_swiotlb: %d, dma_flags: %x\n",
> sg_dma_is_swiotlb(sg), sg->dma_flags);
>              pr_warn_once("type = %s\n", (type == PAGELIST_WRITE) ?
> "PAGELIST_WRITE" : "PAGELIST_READ");
>              if (buf)
>                  pr_warn_once("buf = %p, count = %zu\n", buf, count);
>              else
>                  pr_warn_once("ubuf = %p, count = %zu\n", ubuf, count);
>          }
>
> Output:
>
> [   66.184030] create_pagelist: block 1, DMA address 0x00000000f5fc7800
> doesn't align with PAGE_MASK 0xfffffffffffff000
> [   66.184056] sg_dma_is_swiotlb: 0, dma_flags: 0
> [   66.184063] type = PAGELIST_READ
> [   66.184066] ubuf = 00000000266a70a7, count = 0

From my reading of the code, it's not really meant to handle
count=0, so maybe the answer for that is to just return an
error (or possibly success) when there is data attached to
the request from user space, something like

--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c
@@ -328,6 +328,11 @@ static int vchiq_irq_queue_bulk_tx_rx(struct vchiq_instance *instance,
                userdata = args->userdata;
        }
 
+       if (!args->size) {
+               ret = 0;
+               goto out;
+       }
+
        status = vchiq_bulk_transfer(instance, args->handle, NULL, args->data, args->size,
                                     userdata, args->mode, dir);
Stefan Wahren June 11, 2024, 1:09 p.m. UTC | #13
Hi Arnd,

Am 11.06.24 um 14:14 schrieb Arnd Bergmann:
> On Tue, Jun 11, 2024, at 13:37, Stefan Wahren wrote:
>> Am 11.06.24 um 13:08 schrieb Arnd Bergmann:
>>> On Tue, Jun 11, 2024, at 12:47, Stefan Wahren wrote:
>>           if (len == 0)
>>               pr_warn_once("%s: sg_dma_len() == 0\n", __func__);
>>           else if (i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK))
>>               pr_warn_once("%s: following block not page aligned\n",
>> __func__);
>>           else if (i && (addr & ~PAGE_MASK)) {
>>               pr_warn_once("%s: block %u, DMA address %pad doesn't align
>> with PAGE_MASK 0x%lx\n", __func__, i, &addr, PAGE_MASK);
>>               pr_warn_once("sg_dma_is_swiotlb: %d, dma_flags: %x\n",
>> sg_dma_is_swiotlb(sg), sg->dma_flags);
>>               pr_warn_once("type = %s\n", (type == PAGELIST_WRITE) ?
>> "PAGELIST_WRITE" : "PAGELIST_READ");
>>               if (buf)
>>                   pr_warn_once("buf = %p, count = %zu\n", buf, count);
>>               else
>>                   pr_warn_once("ubuf = %p, count = %zu\n", ubuf, count);
>>           }
>>
>> Output:
>>
>> [   66.184030] create_pagelist: block 1, DMA address 0x00000000f5fc7800
>> doesn't align with PAGE_MASK 0xfffffffffffff000
>> [   66.184056] sg_dma_is_swiotlb: 0, dma_flags: 0
>> [   66.184063] type = PAGELIST_READ
>> [   66.184066] ubuf = 00000000266a70a7, count = 0
sorry my debug attempt for count was pointless. The value is always
decremented to zero at this point. So your suggested change won't have
any effect.
>  From my reading of the code, it's not really meant to handle
> count=0, so maybe the answer for that is to just return an
> error (or possibly success) when there is data attached to
> the request from user space, something like
>
> --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c
> +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_dev.c
> @@ -328,6 +328,11 @@ static int vchiq_irq_queue_bulk_tx_rx(struct vchiq_instance *instance,
>                  userdata = args->userdata;
>          }
>
> +       if (!args->size) {
> +               ret = 0;
> +               goto out;
> +       }
> +
>          status = vchiq_bulk_transfer(instance, args->handle, NULL, args->data, args->size,
>                                       userdata, args->mode, dir);
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Robin Murphy June 11, 2024, 1:35 p.m. UTC | #14
On 11/06/2024 12:37 pm, Stefan Wahren wrote:
> Am 11.06.24 um 13:08 schrieb Arnd Bergmann:
>> On Tue, Jun 11, 2024, at 12:47, Stefan Wahren wrote:
>>> Am 10.06.24 um 12:25 schrieb Robin Murphy:
>>>> On 2024-06-10 10:24 am, Phil Elwell wrote:
>>>>> On Mon, 10 Jun 2024 at 10:20, Arnd Bergmann <arnd@arndb.de> wrote:
>>>>>>        Arnd
>>>>> vchiq sends partial cache lines at the start and of reads (as seen
>>>>> from the ARM host) out of band, so the only misaligned DMA transfers
>>>>> should be from ARM to VPU. This should not require a bounce buffer.
>>>> Hmm, indeed the dma_kmalloc_safe() takes into account that unaligned
>>>> DMA_TO_DEVICE does not need bouncing, so it would suggest that
>>>> something's off in what vchiq is asking for.
>>> I'm available to debug this further, but i need more guidance here.
>>>
>>> At least i extend the output for the error case:
>>>
>>> -               WARN_ON(len == 0);
>>> -               WARN_ON(i && (i != (dma_buffers - 1)) && (len &
>>> ~PAGE_MASK));
>>> -               WARN_ON(i && (addr & ~PAGE_MASK));
>>> +               if (len == 0)
>>> +                       pr_warn_once("%s: sg_dma_len() == 0\n", 
>>> __func__);
>>> +               else if (i && (i != (dma_buffers - 1)) && (len &
>>> ~PAGE_MASK))
>>> +                       pr_warn_once("%s: following block not page
>>> aligned\n", __func__);
>>> +               else if (i && (addr & ~PAGE_MASK)) {
>>> +                       pr_warn_once("%s: block %u, DMA address %pad
>>> doesn't align with PAGE_MASK 0x%lx\n", __func__, i, &addr, PAGE_MASK);
>>> +                       pr_warn_once("sg_dma_is_swiotlb: %d, dma_flags:
>>> %x\n", sg_dma_is_swiotlb(sg), sg->dma_flags);
>>> +               }
>>>
>>> Example result:
>>>
>>> [   84.180527] create_pagelist: block 1, DMA address 0x00000000f5f74800
>>> doesn't align with PAGE_MASK 0xfffffffffffff000
>>> [   84.180553] sg_dma_is_swiotlb: 0, dma_flags: 0
>>>
>>> Is this helpful?
>> It's interesting that this does not have the SG_DMA_SWIOTLB
>> flag set, as the theory so far was that an unaligned
>> user address is what caused this to bounce.

dma-direct doesn't use that flag (at least for now; potentially it might 
offer a tiny micro-optimisation, but not enough to really matter). At 
this point, len and pagelistinfo->dma_dir (as passed to dma_map_sg()) 
are what would have defined the bounce condition...

>> I think the most helpful bit of information that is
>> currently missing is the 'ubuf' and 'count' arguments
>> that got passed down from userspace into create_pagelist(),
>> to see what alignment they have in the failure case.
> Here is my attempt:
> 
>          if (len == 0)
>              pr_warn_once("%s: sg_dma_len() == 0\n", __func__);
>          else if (i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK))
>              pr_warn_once("%s: following block not page aligned\n",
> __func__);
>          else if (i && (addr & ~PAGE_MASK)) {
>              pr_warn_once("%s: block %u, DMA address %pad doesn't align
> with PAGE_MASK 0x%lx\n", __func__, i, &addr, PAGE_MASK);
>              pr_warn_once("sg_dma_is_swiotlb: %d, dma_flags: %x\n",
> sg_dma_is_swiotlb(sg), sg->dma_flags);
>              pr_warn_once("type = %s\n", (type == PAGELIST_WRITE) ?
> "PAGELIST_WRITE" : "PAGELIST_READ");
>              if (buf)
>                  pr_warn_once("buf = %p, count = %zu\n", buf, count);
>              else
>                  pr_warn_once("ubuf = %p, count = %zu\n", ubuf, count);
>          }
> 
> Output:
> 
> [   66.184030] create_pagelist: block 1, DMA address 0x00000000f5fc7800
> doesn't align with PAGE_MASK 0xfffffffffffff000
> [   66.184056] sg_dma_is_swiotlb: 0, dma_flags: 0
> [   66.184063] type = PAGELIST_READ

...so from the dma_dir assignment, this would imply DMA_FROM_DEVICE, 
(indicating vchiq writing to RAM, CPU reading from RAM) which with the 
unaligned address below would indeed cause a bounce. However that 
appears to contradict what Phil said is supposed to happen :/

Thanks,
Robin

> [   66.184066] ubuf = 00000000266a70a7, count = 0
>>
>>       Arnd
>
diff mbox series

Patch

diff --git
a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index 9fb8f657cc78..2beeb94f629e 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -192,7 +192,7 @@  cleanup_pagelistinfo(struct vchiq_instance
*instance, struct vchiq_pagelist_info
  }

  static inline bool
-is_adjacent_block(u32 *addrs, u32 addr, unsigned int k)
+is_adjacent_block(u32 *addrs, dma_addr_t addr, unsigned int k)
  {
      u32 tmp;