diff mbox series

[rpmsg,v2,1/1] rpmsg: virtio_rpmsg_bus: fix unexpected huge vmap mappings

Message ID 1545812449-32455-1-git-send-email-fugang.duan@nxp.com (mailing list archive)
State New, archived
Headers show
Series [rpmsg,v2,1/1] rpmsg: virtio_rpmsg_bus: fix unexpected huge vmap mappings | expand

Commit Message

Andy Duan Dec. 26, 2018, 8:25 a.m. UTC
From: Fugang Duan <fugang.duan@nxp.com>

If RPMSG dma memory allocate from per-device mem pool by calling .dma_alloc_coherent(),
the size is bigger than 2M bytes and alignment with 2M (PMD_SIZE), then kernel dump by
calling .vmalloc_to_page().

Since per-device dma pool do vmap mappings by __ioremap(), __ioremap() might use
the hugepage mapping, which in turn will cause the vmalloc_page failed to return
the correct page due to the PTE not setup.

For exp, when reserve 8M bytes per-device dma mem pool, __ioremap() will use hugepage
mapping:
 __ioremap
	ioremap_page_range
		ioremap_pud_range
			ioremap_pmd_range
				pmd_set_huge(pmd, phys_addr + addr, prot)

Commit:029c54b09599 ("mm/vmalloc.c: huge-vmap: fail gracefully on unexpected huge
vmap mapping") ensure that vmalloc_to_page() does not go off into the weeds trying
to dereference huge PUDs or PMDs as table entries:
rpmsg_sg_init ->
	vmalloc_to_page->
		WARN_ON_ONCE(pmd_bad(*pmd));

In generally, .dma_alloc_coherent() allocate memory from CMA pool/DMA pool/atomic_pool,
or swiotlb slabs pool, the virt address mapping to physical address should be lineal,
so for the rpmsg scatterlist initialization can use pfn to find the page to avoid to
call .vmalloc_to_page().

Kernel dump:
[    0.881722] WARNING: CPU: 0 PID: 1 at mm/vmalloc.c:301 vmalloc_to_page+0xbc/0xc8
[    0.889094] Modules linked in:
[    0.892139] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.14.78-05581-gc61a572 #206
[    0.899604] Hardware name: Freescale i.MX8QM MEK (DT)
[    0.904643] task: ffff8008f6c98000 task.stack: ffff000008068000
[    0.910549] PC is at vmalloc_to_page+0xbc/0xc8
[    0.914987] LR is at rpmsg_sg_init+0x70/0xcc
[    0.919238] pc : [<ffff0000081c80d4>] lr : [<ffff000008ac471c>] pstate: 40000045
[    0.926619] sp : ffff00000806b8b0
[    0.929923] x29: ffff00000806b8b0 x28: ffff00000961cdf0
[    0.935220] x27: ffff00000961cdf0 x26: 0000000000000000
[    0.940519] x25: 0000000000040000 x24: ffff00000961ce40
[    0.945819] x23: ffff00000f000000 x22: ffff00000961ce30
[    0.951118] x21: 0000000000000000 x20: ffff00000806b950
[    0.956417] x19: 0000000000000000 x18: 000000000000000e
[    0.961717] x17: 0000000000000001 x16: 0000000000000019
[    0.967016] x15: 0000000000000033 x14: 616d64202c303030
[    0.972316] x13: 3030306630303030 x12: 3066666666206176
[    0.977615] x11: 203a737265666675 x10: 62203334394c203a
[    0.982914] x9 : 000000000000009f x8 : ffff00000806b970
[    0.988214] x7 : 0000000000000000 x6 : ffff000009690712
[    0.993513] x5 : 0000000000000000 x4 : 0000000080000000
[    0.998812] x3 : 00e8000090800f0d x2 : ffff8008ffffd3c0
[    1.004112] x1 : 0000000000000000 x0 : ffff00000f000000
[    1.009416] Call trace:
[    1.011849] Exception stack(0xffff00000806b770 to 0xffff00000806b8b0)
[    1.018279] b760:                                   ffff00000f000000 0000000000000000
[    1.026094] b780: ffff8008ffffd3c0 00e8000090800f0d 0000000080000000 0000000000000000
[    1.033915] b7a0: ffff000009690712 0000000000000000 ffff00000806b970 000000000000009f
[    1.041731] b7c0: 62203334394c203a 203a737265666675 3066666666206176 3030306630303030
[    1.049550] b7e0: 616d64202c303030 0000000000000033 0000000000000019 0000000000000001
[    1.057368] b800: 000000000000000e 0000000000000000 ffff00000806b950 0000000000000000
[    1.065188] b820: ffff00000961ce30 ffff00000f000000 ffff00000961ce40 0000000000040000
[    1.073008] b840: 0000000000000000 ffff00000961cdf0 ffff00000961cdf0 ffff00000806b8b0
[    1.080825] b860: ffff000008ac471c ffff00000806b8b0 ffff0000081c80d4 0000000040000045
[    1.088646] b880: ffff0000092c8528 ffff00000806b890 ffffffffffffffff ffff000008ac4710
[    1.096461] b8a0: ffff00000806b8b0 ffff0000081c80d4
[    1.101327] [<ffff0000081c80d4>] vmalloc_to_page+0xbc/0xc8
[    1.106800] [<ffff000008ac4968>] rpmsg_probe+0x1f0/0x49c
[    1.112107] [<ffff00000859a9a0>] virtio_dev_probe+0x198/0x210
[    1.117839] [<ffff0000086a1c70>] driver_probe_device+0x220/0x2d4
[    1.123829] [<ffff0000086a1e90>] __device_attach_driver+0x98/0xc8
[    1.129913] [<ffff00000869fe7c>] bus_for_each_drv+0x54/0x94
[    1.135470] [<ffff0000086a1944>] __device_attach+0xc4/0x12c
[    1.141029] [<ffff0000086a1ed0>] device_initial_probe+0x10/0x18
[    1.146937] [<ffff0000086a0e48>] bus_probe_device+0x90/0x98
[    1.152501] [<ffff00000869ef88>] device_add+0x3f4/0x570
[    1.157709] [<ffff00000869f120>] device_register+0x1c/0x28
[    1.163182] [<ffff00000859a4f8>] register_virtio_device+0xb8/0x114
[    1.169353] [<ffff000008ac5e94>] imx_rpmsg_probe+0x3a0/0x5d0
[    1.175003] [<ffff0000086a3768>] platform_drv_probe+0x50/0xbc
[    1.180730] [<ffff0000086a1c70>] driver_probe_device+0x220/0x2d4
[    1.186725] [<ffff0000086a1dc8>] __driver_attach+0xa4/0xa8
[    1.192199] [<ffff00000869fdc4>] bus_for_each_dev+0x58/0x98
[    1.197759] [<ffff0000086a1598>] driver_attach+0x20/0x28
[    1.203058] [<ffff0000086a1114>] bus_add_driver+0x1c0/0x224
[    1.208619] [<ffff0000086a26ec>] driver_register+0x68/0x108
[    1.214178] [<ffff0000086a36ac>] __platform_driver_register+0x4c/0x54
[    1.220614] [<ffff0000093d14fc>] imx_rpmsg_init+0x1c/0x50
[    1.225999] [<ffff000008084144>] do_one_initcall+0x38/0x124
[    1.231560] [<ffff000009370d28>] kernel_init_freeable+0x18c/0x228
[    1.237640] [<ffff000008d51b60>] kernel_init+0x10/0x100
[    1.242849] [<ffff000008085348>] ret_from_fork+0x10/0x18
[    1.248154] ---[ end trace bcc95d4e07033434 ]---

v2:
 - use pfn_to_page(PHYS_PFN(x)) instead of phys_to_page(x) since
   .phys_to_page() interface has arch platform limitation.

Reviewed-by: Richard Zhu <hongxing.zhu@nxp.com>
Suggested-and-reviewed-by: Jason Liu <jason.hui.liu@nxp.com>
Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
---
 drivers/rpmsg/virtio_rpmsg_bus.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

Comments

Ard Biesheuvel Dec. 26, 2018, 12:27 p.m. UTC | #1
On Wed, 26 Dec 2018 at 09:25, Andy Duan <fugang.duan@nxp.com> wrote:
>
> From: Fugang Duan <fugang.duan@nxp.com>
>
> If RPMSG dma memory allocate from per-device mem pool by calling .dma_alloc_coherent(),
> the size is bigger than 2M bytes and alignment with 2M (PMD_SIZE), then kernel dump by
> calling .vmalloc_to_page().
>
> Since per-device dma pool do vmap mappings by __ioremap(), __ioremap() might use
> the hugepage mapping, which in turn will cause the vmalloc_page failed to return
> the correct page due to the PTE not setup.

If there are legal uses for vmalloc_to_page() even if the region is
not mapped down to pages [which appears to be the case here], I'd
prefer to fix vmalloc_to_page() instead of adding this hack. Or
perhaps we need a sg_xxx helper that translates any virtual address
(vmalloc or otherwise) into a scatterlist entry?


>
> For exp, when reserve 8M bytes per-device dma mem pool, __ioremap() will use hugepage
> mapping:
>  __ioremap
>         ioremap_page_range
>                 ioremap_pud_range
>                         ioremap_pmd_range
>                                 pmd_set_huge(pmd, phys_addr + addr, prot)
>
> Commit:029c54b09599 ("mm/vmalloc.c: huge-vmap: fail gracefully on unexpected huge
> vmap mapping") ensure that vmalloc_to_page() does not go off into the weeds trying
> to dereference huge PUDs or PMDs as table entries:
> rpmsg_sg_init ->
>         vmalloc_to_page->
>                 WARN_ON_ONCE(pmd_bad(*pmd));
>
> In generally, .dma_alloc_coherent() allocate memory from CMA pool/DMA pool/atomic_pool,
> or swiotlb slabs pool, the virt address mapping to physical address should be lineal,
> so for the rpmsg scatterlist initialization can use pfn to find the page to avoid to
> call .vmalloc_to_page().
>
> Kernel dump:
> [    0.881722] WARNING: CPU: 0 PID: 1 at mm/vmalloc.c:301 vmalloc_to_page+0xbc/0xc8
> [    0.889094] Modules linked in:
> [    0.892139] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.14.78-05581-gc61a572 #206
> [    0.899604] Hardware name: Freescale i.MX8QM MEK (DT)
> [    0.904643] task: ffff8008f6c98000 task.stack: ffff000008068000
> [    0.910549] PC is at vmalloc_to_page+0xbc/0xc8
> [    0.914987] LR is at rpmsg_sg_init+0x70/0xcc
> [    0.919238] pc : [<ffff0000081c80d4>] lr : [<ffff000008ac471c>] pstate: 40000045
> [    0.926619] sp : ffff00000806b8b0
> [    0.929923] x29: ffff00000806b8b0 x28: ffff00000961cdf0
> [    0.935220] x27: ffff00000961cdf0 x26: 0000000000000000
> [    0.940519] x25: 0000000000040000 x24: ffff00000961ce40
> [    0.945819] x23: ffff00000f000000 x22: ffff00000961ce30
> [    0.951118] x21: 0000000000000000 x20: ffff00000806b950
> [    0.956417] x19: 0000000000000000 x18: 000000000000000e
> [    0.961717] x17: 0000000000000001 x16: 0000000000000019
> [    0.967016] x15: 0000000000000033 x14: 616d64202c303030
> [    0.972316] x13: 3030306630303030 x12: 3066666666206176
> [    0.977615] x11: 203a737265666675 x10: 62203334394c203a
> [    0.982914] x9 : 000000000000009f x8 : ffff00000806b970
> [    0.988214] x7 : 0000000000000000 x6 : ffff000009690712
> [    0.993513] x5 : 0000000000000000 x4 : 0000000080000000
> [    0.998812] x3 : 00e8000090800f0d x2 : ffff8008ffffd3c0
> [    1.004112] x1 : 0000000000000000 x0 : ffff00000f000000
> [    1.009416] Call trace:
> [    1.011849] Exception stack(0xffff00000806b770 to 0xffff00000806b8b0)
> [    1.018279] b760:                                   ffff00000f000000 0000000000000000
> [    1.026094] b780: ffff8008ffffd3c0 00e8000090800f0d 0000000080000000 0000000000000000
> [    1.033915] b7a0: ffff000009690712 0000000000000000 ffff00000806b970 000000000000009f
> [    1.041731] b7c0: 62203334394c203a 203a737265666675 3066666666206176 3030306630303030
> [    1.049550] b7e0: 616d64202c303030 0000000000000033 0000000000000019 0000000000000001
> [    1.057368] b800: 000000000000000e 0000000000000000 ffff00000806b950 0000000000000000
> [    1.065188] b820: ffff00000961ce30 ffff00000f000000 ffff00000961ce40 0000000000040000
> [    1.073008] b840: 0000000000000000 ffff00000961cdf0 ffff00000961cdf0 ffff00000806b8b0
> [    1.080825] b860: ffff000008ac471c ffff00000806b8b0 ffff0000081c80d4 0000000040000045
> [    1.088646] b880: ffff0000092c8528 ffff00000806b890 ffffffffffffffff ffff000008ac4710
> [    1.096461] b8a0: ffff00000806b8b0 ffff0000081c80d4
> [    1.101327] [<ffff0000081c80d4>] vmalloc_to_page+0xbc/0xc8
> [    1.106800] [<ffff000008ac4968>] rpmsg_probe+0x1f0/0x49c
> [    1.112107] [<ffff00000859a9a0>] virtio_dev_probe+0x198/0x210
> [    1.117839] [<ffff0000086a1c70>] driver_probe_device+0x220/0x2d4
> [    1.123829] [<ffff0000086a1e90>] __device_attach_driver+0x98/0xc8
> [    1.129913] [<ffff00000869fe7c>] bus_for_each_drv+0x54/0x94
> [    1.135470] [<ffff0000086a1944>] __device_attach+0xc4/0x12c
> [    1.141029] [<ffff0000086a1ed0>] device_initial_probe+0x10/0x18
> [    1.146937] [<ffff0000086a0e48>] bus_probe_device+0x90/0x98
> [    1.152501] [<ffff00000869ef88>] device_add+0x3f4/0x570
> [    1.157709] [<ffff00000869f120>] device_register+0x1c/0x28
> [    1.163182] [<ffff00000859a4f8>] register_virtio_device+0xb8/0x114
> [    1.169353] [<ffff000008ac5e94>] imx_rpmsg_probe+0x3a0/0x5d0
> [    1.175003] [<ffff0000086a3768>] platform_drv_probe+0x50/0xbc
> [    1.180730] [<ffff0000086a1c70>] driver_probe_device+0x220/0x2d4
> [    1.186725] [<ffff0000086a1dc8>] __driver_attach+0xa4/0xa8
> [    1.192199] [<ffff00000869fdc4>] bus_for_each_dev+0x58/0x98
> [    1.197759] [<ffff0000086a1598>] driver_attach+0x20/0x28
> [    1.203058] [<ffff0000086a1114>] bus_add_driver+0x1c0/0x224
> [    1.208619] [<ffff0000086a26ec>] driver_register+0x68/0x108
> [    1.214178] [<ffff0000086a36ac>] __platform_driver_register+0x4c/0x54
> [    1.220614] [<ffff0000093d14fc>] imx_rpmsg_init+0x1c/0x50
> [    1.225999] [<ffff000008084144>] do_one_initcall+0x38/0x124
> [    1.231560] [<ffff000009370d28>] kernel_init_freeable+0x18c/0x228
> [    1.237640] [<ffff000008d51b60>] kernel_init+0x10/0x100
> [    1.242849] [<ffff000008085348>] ret_from_fork+0x10/0x18
> [    1.248154] ---[ end trace bcc95d4e07033434 ]---
>
> v2:
>  - use pfn_to_page(PHYS_PFN(x)) instead of phys_to_page(x) since
>    .phys_to_page() interface has arch platform limitation.
>
> Reviewed-by: Richard Zhu <hongxing.zhu@nxp.com>
> Suggested-and-reviewed-by: Jason Liu <jason.hui.liu@nxp.com>
> Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
> ---
>  drivers/rpmsg/virtio_rpmsg_bus.c | 25 +++++++++++++------------
>  1 file changed, 13 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
> index 664f957..d548bd0 100644
> --- a/drivers/rpmsg/virtio_rpmsg_bus.c
> +++ b/drivers/rpmsg/virtio_rpmsg_bus.c
> @@ -196,16 +196,17 @@ static int virtio_rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src,
>   * location (in vmalloc or in kernel).
>   */
>  static void
> -rpmsg_sg_init(struct scatterlist *sg, void *cpu_addr, unsigned int len)
> +rpmsg_sg_init(struct virtproc_info *vrp, struct scatterlist *sg,
> +             void *cpu_addr, unsigned int len)
>  {
> -       if (is_vmalloc_addr(cpu_addr)) {
> -               sg_init_table(sg, 1);
> -               sg_set_page(sg, vmalloc_to_page(cpu_addr), len,
> -                           offset_in_page(cpu_addr));
> -       } else {
> -               WARN_ON(!virt_addr_valid(cpu_addr));
> -               sg_init_one(sg, cpu_addr, len);
> -       }
> +       unsigned int offset;
> +       dma_addr_t dev_add = vrp->bufs_dma + (cpu_addr - vrp->rbufs);
> +       struct page *page = pfn_to_page(PHYS_PFN(dma_to_phys(vrp->bufs_dev,
> +                                       dev_add)));
> +
> +       offset = offset_in_page(cpu_addr);
> +       sg_init_table(sg, 1);
> +       sg_set_page(sg, page, len, offset);
>  }
>
>  /**
> @@ -626,7 +627,7 @@ static int rpmsg_send_offchannel_raw(struct rpmsg_device *rpdev,
>                          msg, sizeof(*msg) + msg->len, true);
>  #endif
>
> -       rpmsg_sg_init(&sg, msg, sizeof(*msg) + len);
> +       rpmsg_sg_init(vrp, &sg, msg, sizeof(*msg) + len);
>
>         mutex_lock(&vrp->tx_lock);
>
> @@ -750,7 +751,7 @@ static int rpmsg_recv_single(struct virtproc_info *vrp, struct device *dev,
>                 dev_warn(dev, "msg received with no recipient\n");
>
>         /* publish the real size of the buffer */
> -       rpmsg_sg_init(&sg, msg, vrp->buf_size);
> +       rpmsg_sg_init(vrp, &sg, msg, vrp->buf_size);
>
>         /* add the buffer back to the remote processor's virtqueue */
>         err = virtqueue_add_inbuf(vrp->rvq, &sg, 1, msg, GFP_KERNEL);
> @@ -934,7 +935,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
>                 struct scatterlist sg;
>                 void *cpu_addr = vrp->rbufs + i * vrp->buf_size;
>
> -               rpmsg_sg_init(&sg, cpu_addr, vrp->buf_size);
> +               rpmsg_sg_init(vrp, &sg, cpu_addr, vrp->buf_size);
>
>                 err = virtqueue_add_inbuf(vrp->rvq, &sg, 1, cpu_addr,
>                                           GFP_KERNEL);
> --
> 1.9.1
>
Christoph Hellwig Dec. 26, 2018, 2:50 p.m. UTC | #2
On Wed, Dec 26, 2018 at 01:27:25PM +0100, Ard Biesheuvel wrote:
> If there are legal uses for vmalloc_to_page() even if the region is
> not mapped down to pages [which appears to be the case here], I'd
> prefer to fix vmalloc_to_page() instead of adding this hack. Or
> perhaps we need a sg_xxx helper that translates any virtual address
> (vmalloc or otherwise) into a scatterlist entry?

What rpmsg does is completely bogus and needs to be fixed ASAP.  The
virtual address returned from dma_alloc_coherent must not be passed to
virt_to_page or vmalloc_to_page, but only use as a kernel virtual
address.  It might not be backed by pages, or might create aliases that
must not be used with VIVT caches.

rpmsg needs to either stop trying to extract pages from
dma_alloc_coherent, or just replace its use of dma_alloc_coherent
with the normal page allocator and the streaming DMA API.
Andy Duan Dec. 27, 2018, 2:36 a.m. UTC | #3
From: Christoph Hellwig <hch@infradead.org> Sent: 2018年12月26日 22:51
> On Wed, Dec 26, 2018 at 01:27:25PM +0100, Ard Biesheuvel wrote:
> > If there are legal uses for vmalloc_to_page() even if the region is
> > not mapped down to pages [which appears to be the case here], I'd
> > prefer to fix vmalloc_to_page() instead of adding this hack. Or
> > perhaps we need a sg_xxx helper that translates any virtual address
> > (vmalloc or otherwise) into a scatterlist entry?
> 
> What rpmsg does is completely bogus and needs to be fixed ASAP.  The
> virtual address returned from dma_alloc_coherent must not be passed to
> virt_to_page or vmalloc_to_page, but only use as a kernel virtual address.  It
> might not be backed by pages, or might create aliases that must not be used
> with VIVT caches.
> 
> rpmsg needs to either stop trying to extract pages from dma_alloc_coherent,
> or just replace its use of dma_alloc_coherent with the normal page allocator
> and the streaming DMA API.

Rpmsg is used to communicate with remote cpu like M4, the allocated memory is shared by Linux and M4 side.
In general, Linux side reserved the static memory region like per-device DMA pool as coherent memory for the RPMSG receive/transmit buffers.
For the static memory region, normal page allocator cannot match the requirement unless there have protocol to tell M4 the dynamic RPMSG receive/transmit buffers.

To stop to extract pages from dma_alloc_coherent, the rpmsg bus implementation base on virtio that already use the scatterlist mechanism for vring memory. So for virtio driver like RPMSG bus, we have to extract pages from dma_alloc_coherent.

I don't think the patch is one hack,  as we already know the physical address for the coherent memory,  just want to get pages, the interface "pfn_to_page(PHYS_PFN(x))" is very reasonable to the related pages.  

If you stick to use normal page allocator and streaming DMA API in RPMSG,  then we have to:
- add new quirk feature for virtio like the same function as "VIRTIO_F_IOMMU_PLATFORM", register the new feature for RPMSG virto driver.  Then RPMSG virtio bus driver only need to allocate the continuous pages.
- the static memory region for M4 is not supported.

Any idea ?

Regards,
Andy Duan
Christoph Hellwig Dec. 27, 2018, 12:19 p.m. UTC | #4
Hi Andy,

first please do not write lines longer than 72 characters, as they turn your mail
into an unreadable mess without prior editing..

On Thu, Dec 27, 2018 at 02:36:53AM +0000, Andy Duan wrote:
> Rpmsg is used to communicate with remote cpu like M4, the allocated
> memory is shared by Linux and M4 side. In general, Linux side reserved
> the static memory region like per-device DMA pool as coherent memory
> for the RPMSG receive/transmit buffers. For the static memory region,
> normal page allocator cannot match the requirement unless there have
> protocol to tell M4 the dynamic RPMSG receive/transmit buffers.

In that case you need a OF reserved memory node, like we use for the
"shared-dma-pool" coherent or contiguous allocations.  Currently we
have those two variants wired up the the DMA allocator, but they can
also used directly by drivers.  To be honest I don't really like like
drivers getting too intimate with the memory allocator, but I also
don't think that providing a little glue code to instanciat a CMA
pool for a memory that can be used directly by the driver is much
of an issue.  Most of it could be reused from the existing code,
just with a slightly lower level interfaces.

> To stop to extract pages from dma_alloc_coherent, the rpmsg bus
> implementation base on virtio that already use the scatterlist
> mechanism for vring memory. So for virtio driver like RPMSG bus,
> we have to extract pages from dma_alloc_coherent.

This sentence doesn't parse for me.

> I don't think the patch is one hack,  as we already know the physical
> address for the coherent memory,  just want to get pages, the
> interface "pfn_to_page(PHYS_PFN(x))" is very reasonable to the
> related pages.  

struct scatterlist doesn't (directly) refer to physical address,
it refers to page structures, which encode a kernel virtual
address in the kernel direct mapping, and we intentionally do not
guarantee a return in the kernel direct mapping for the DMA
coherent allocator, as in many cases we have to either allocate
from a special pool, from a special address window or remap
memory to mark it as uncached.  How that is done is an
implenentation detail that is not exposed to drivers and may
change at any time.

> If you stick to use normal page allocator and streaming DMA
> API in RPMSG,  then we have to:
> - add new quirk feature for virtio like the same function as
> "VIRTIO_F_IOMMU_PLATFORM",

You have to do that anyway.  The current !VIRTIO_F_IOMMU_PLATFORM
is completely broken for any virtio devic that is not actually
virtualized but real hardware, and must not be used for real
hardware devices.
Andy Duan Dec. 28, 2018, 1:48 a.m. UTC | #5
From: Christoph Hellwig <hch@infradead.org> Sent: 2018年12月27日 20:19
> On Thu, Dec 27, 2018 at 02:36:53AM +0000, Andy Duan wrote:
> > Rpmsg is used to communicate with remote cpu like M4, the allocated
> > memory is shared by Linux and M4 side. In general, Linux side reserved
> > the static memory region like per-device DMA pool as coherent memory
> > for the RPMSG receive/transmit buffers. For the static memory region,
> > normal page allocator cannot match the requirement unless there have
> > protocol to tell M4 the dynamic RPMSG receive/transmit buffers.
> 
> In that case you need a OF reserved memory node, like we use for the
> "shared-dma-pool" coherent or contiguous allocations.  Currently we have
> those two variants wired up the the DMA allocator, but they can also used
> directly by drivers.  To be honest I don't really like like drivers getting too
> intimate with the memory allocator, but I also don't think that providing a little
> glue code to instanciat a CMA pool for a memory that can be used directly by
> the driver is much of an issue.  Most of it could be reused from the existing
> code, just with a slightly lower level interfaces.
> 
> > To stop to extract pages from dma_alloc_coherent, the rpmsg bus
> > implementation base on virtio that already use the scatterlist
> > mechanism for vring memory. So for virtio driver like RPMSG bus, we
> > have to extract pages from dma_alloc_coherent.
> 
> This sentence doesn't parse for me.

Virtio supply the APIs that require the scatterlist pages for virtio in/out buf:
int virtqueue_add_inbuf(struct virtqueue *vq,
                        struct scatterlist *sg, unsigned int num,
                        void *data,
                        gfp_t gfp)
int virtqueue_add_outbuf(struct virtqueue *vq,
                         struct scatterlist *sg, unsigned int num,
                         void *data,
                         gfp_t gfp)


> 
> > I don't think the patch is one hack,  as we already know the physical
> > address for the coherent memory,  just want to get pages, the
> > interface "pfn_to_page(PHYS_PFN(x))" is very reasonable to the related
> > pages.
> 
> struct scatterlist doesn't (directly) refer to physical address, it refers to page
> structures, which encode a kernel virtual address in the kernel direct mapping,
> and we intentionally do not guarantee a return in the kernel direct mapping
> for the DMA coherent allocator, as in many cases we have to either allocate
> from a special pool, from a special address window or remap memory to mark
> it as uncached.  How that is done is an implenentation detail that is not
> exposed to drivers and may change at any time.
> 
> > If you stick to use normal page allocator and streaming DMA API in
> > RPMSG,  then we have to:
> > - add new quirk feature for virtio like the same function as
> > "VIRTIO_F_IOMMU_PLATFORM",
> 
> You have to do that anyway.  

I discuss with our team, use page allocator cannot match our requirement.
i.MX8QM/QXP platforms have partition feature that limit M4 only access 
fixed ddr memory region. Suppose other platforms also have similar limitation
for secure case.

So it requires to use OF reserved memory for the "shared-dma-pool" coherent or contiguous allocations.

> The current !VIRTIO_F_IOMMU_PLATFORM is
> completely broken for any virtio devic that is not actually virtualized but real
> hardware, and must not be used for real hardware devices.

Thank you for your comments
Andy Duan Jan. 10, 2019, 1:45 a.m. UTC | #6
From: Andy Duan Sent: 2018年12月28日 9:48
> From: Christoph Hellwig <hch@infradead.org> Sent: 2018年12月27日
> 20:19
> > On Thu, Dec 27, 2018 at 02:36:53AM +0000, Andy Duan wrote:
> > > Rpmsg is used to communicate with remote cpu like M4, the allocated
> > > memory is shared by Linux and M4 side. In general, Linux side
> > > reserved the static memory region like per-device DMA pool as
> > > coherent memory for the RPMSG receive/transmit buffers. For the
> > > static memory region, normal page allocator cannot match the
> > > requirement unless there have protocol to tell M4 the dynamic RPMSG
> receive/transmit buffers.
> >
> > In that case you need a OF reserved memory node, like we use for the
> > "shared-dma-pool" coherent or contiguous allocations.  Currently we
> > have those two variants wired up the the DMA allocator, but they can
> > also used directly by drivers.  To be honest I don't really like like
> > drivers getting too intimate with the memory allocator, but I also
> > don't think that providing a little glue code to instanciat a CMA pool
> > for a memory that can be used directly by the driver is much of an
> > issue.  Most of it could be reused from the existing code, just with a slightly
> lower level interfaces.
> >
> > > To stop to extract pages from dma_alloc_coherent, the rpmsg bus
> > > implementation base on virtio that already use the scatterlist
> > > mechanism for vring memory. So for virtio driver like RPMSG bus, we
> > > have to extract pages from dma_alloc_coherent.
> >
> > This sentence doesn't parse for me.
> 
> Virtio supply the APIs that require the scatterlist pages for virtio in/out buf:
> int virtqueue_add_inbuf(struct virtqueue *vq,
>                         struct scatterlist *sg, unsigned int num,
>                         void *data,
>                         gfp_t gfp)
> int virtqueue_add_outbuf(struct virtqueue *vq,
>                          struct scatterlist *sg, unsigned int num,
>                          void *data,
>                          gfp_t gfp)
> 
> 
> >
> > > I don't think the patch is one hack,  as we already know the
> > > physical address for the coherent memory,  just want to get pages,
> > > the interface "pfn_to_page(PHYS_PFN(x))" is very reasonable to the
> > > related pages.
> >
> > struct scatterlist doesn't (directly) refer to physical address, it
> > refers to page structures, which encode a kernel virtual address in
> > the kernel direct mapping, and we intentionally do not guarantee a
> > return in the kernel direct mapping for the DMA coherent allocator, as
> > in many cases we have to either allocate from a special pool, from a
> > special address window or remap memory to mark it as uncached.  How
> > that is done is an implenentation detail that is not exposed to drivers and
> may change at any time.
> >
> > > If you stick to use normal page allocator and streaming DMA API in
> > > RPMSG,  then we have to:
> > > - add new quirk feature for virtio like the same function as
> > > "VIRTIO_F_IOMMU_PLATFORM",
> >
> > You have to do that anyway.
> 
> I discuss with our team, use page allocator cannot match our requirement.
> i.MX8QM/QXP platforms have partition feature that limit M4 only access fixed
> ddr memory region. Suppose other platforms also have similar limitation for
> secure case.
> 
> So it requires to use OF reserved memory for the "shared-dma-pool" coherent
> or contiguous allocations.
> 
Do you have any other comments for the patch ? 
Current driver break remoteproc on NXP i.MX8 platform , the patch is bugfix the virtio rpmsg bus, we hope the patch enter to next and stable tree if no other comments.
Loic PALLARDY Jan. 10, 2019, 1:06 p.m. UTC | #7
Hi Andy,

> -----Original Message-----
> From: Andy Duan <fugang.duan@nxp.com>
> Sent: jeudi 10 janvier 2019 02:45
> To: Christoph Hellwig <hch@infradead.org>; bjorn.andersson@linaro.org;
> ohad@wizery.com
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>; Andrew Morton
> <akpm@linux-foundation.org>; Linux-MM <linux-mm@kvack.org>; Robin
> Murphy <robin.murphy@arm.com>; linux-remoteproc@vger.kernel.org;
> anup@brainfault.org; Loic PALLARDY <loic.pallardy@st.com>; dl-linux-imx
> <linux-imx@nxp.com>; Richard Zhu <hongxing.zhu@nxp.com>; Jason Liu
> <jason.hui.liu@nxp.com>; Peng Fan <peng.fan@nxp.com>
> Subject: RE: [rpmsg PATCH v2 1/1] rpmsg: virtio_rpmsg_bus: fix unexpected
> huge vmap mappings
> 
> From: Andy Duan Sent: 2018年12月28日 9:48
> > From: Christoph Hellwig <hch@infradead.org> Sent: 2018年12月27日
> > 20:19
> > > On Thu, Dec 27, 2018 at 02:36:53AM +0000, Andy Duan wrote:
> > > > Rpmsg is used to communicate with remote cpu like M4, the allocated
> > > > memory is shared by Linux and M4 side. In general, Linux side
> > > > reserved the static memory region like per-device DMA pool as
> > > > coherent memory for the RPMSG receive/transmit buffers. For the
> > > > static memory region, normal page allocator cannot match the
> > > > requirement unless there have protocol to tell M4 the dynamic RPMSG
> > receive/transmit buffers.
> > >
> > > In that case you need a OF reserved memory node, like we use for the
> > > "shared-dma-pool" coherent or contiguous allocations.  Currently we
> > > have those two variants wired up the the DMA allocator, but they can
> > > also used directly by drivers.  To be honest I don't really like like
> > > drivers getting too intimate with the memory allocator, but I also
> > > don't think that providing a little glue code to instanciat a CMA pool
> > > for a memory that can be used directly by the driver is much of an
> > > issue.  Most of it could be reused from the existing code, just with a
> slightly
> > lower level interfaces.
> > >
> > > > To stop to extract pages from dma_alloc_coherent, the rpmsg bus
> > > > implementation base on virtio that already use the scatterlist
> > > > mechanism for vring memory. So for virtio driver like RPMSG bus, we
> > > > have to extract pages from dma_alloc_coherent.
> > >
> > > This sentence doesn't parse for me.
> >
> > Virtio supply the APIs that require the scatterlist pages for virtio in/out buf:
> > int virtqueue_add_inbuf(struct virtqueue *vq,
> >                         struct scatterlist *sg, unsigned int num,
> >                         void *data,
> >                         gfp_t gfp)
> > int virtqueue_add_outbuf(struct virtqueue *vq,
> >                          struct scatterlist *sg, unsigned int num,
> >                          void *data,
> >                          gfp_t gfp)
> >
> >
> > >
> > > > I don't think the patch is one hack,  as we already know the
> > > > physical address for the coherent memory,  just want to get pages,
> > > > the interface "pfn_to_page(PHYS_PFN(x))" is very reasonable to the
> > > > related pages.
> > >
> > > struct scatterlist doesn't (directly) refer to physical address, it
> > > refers to page structures, which encode a kernel virtual address in
> > > the kernel direct mapping, and we intentionally do not guarantee a
> > > return in the kernel direct mapping for the DMA coherent allocator, as
> > > in many cases we have to either allocate from a special pool, from a
> > > special address window or remap memory to mark it as uncached.  How
> > > that is done is an implenentation detail that is not exposed to drivers and
> > may change at any time.
> > >
> > > > If you stick to use normal page allocator and streaming DMA API in
> > > > RPMSG,  then we have to:
> > > > - add new quirk feature for virtio like the same function as
> > > > "VIRTIO_F_IOMMU_PLATFORM",
> > >
> > > You have to do that anyway.
> >
> > I discuss with our team, use page allocator cannot match our requirement.
> > i.MX8QM/QXP platforms have partition feature that limit M4 only access
> fixed
> > ddr memory region. Suppose other platforms also have similar limitation
> for
> > secure case.
> >
> > So it requires to use OF reserved memory for the "shared-dma-pool"
> coherent
> > or contiguous allocations.
> >
> Do you have any other comments for the patch ?

I tried your patch on ST platform and it doesn't compile neither on kernel v5.0-rc1 nor on Bjorn's rpmsg-next.
dma_to_phys() is unknown as dma-direct.h not included and ‘struct virtproc_info’ has no member named ‘bufs_dev’.

Could you please send a new version fixing compilation issue. I would like to test it on my platform to provide you feedback.

Regards,
Loic

> Current driver break remoteproc on NXP i.MX8 platform , the patch is bugfix
> the virtio rpmsg bus, we hope the patch enter to next and stable tree if no
> other comments.
Christoph Hellwig Jan. 10, 2019, 2:07 p.m. UTC | #8
On Thu, Jan 10, 2019 at 01:45:20AM +0000, Andy Duan wrote:
> Do you have any other comments for the patch ? 
> Current driver break remoteproc on NXP i.MX8 platform , the patch is bugfix the virtio rpmsg bus, we hope the patch enter to next and stable tree if no other comments. 

The answer remains that you CAN NOT call vmalloc_to_page or virt_to_page
on DMA coherent memory, and the driver has been broken ever since it
was merged.  We need to fix the root cause and not the symptom.
Andy Duan Jan. 11, 2019, 1:28 a.m. UTC | #9
From: Christoph Hellwig <mailto:hch@infradead.org> Sent: 2019年1月10日 22:07
> On Thu, Jan 10, 2019 at 01:45:20AM +0000, Andy Duan wrote:
> > Do you have any other comments for the patch ?
> > Current driver break remoteproc on NXP i.MX8 platform , the patch is bugfix
> the virtio rpmsg bus, we hope the patch enter to next and stable tree if no
> other comments.
> 
> The answer remains that you CAN NOT call vmalloc_to_page or virt_to_page
> on DMA coherent memory, and the driver has been broken ever since it was
> merged.  We need to fix the root cause and not the symptom.

As NXP i.MX8 platform requirement that M4 only access the fixed memory region, so do
You have any suggestion to fix the issue and satisfy the requirement ? Or do you have plan
To fix the root cause ?

Thanks.
Andy Duan Jan. 11, 2019, 1:56 a.m. UTC | #10
Hi Loic,

From: Loic PALLARDY <loic.pallardy@st.com> Sent: 2019年1月10日 21:06
> Hi Andy,
> 
> > -----Original Message-----
> > From: Andy Duan <fugang.duan@nxp.com>
> > Sent: jeudi 10 janvier 2019 02:45
> > To: Christoph Hellwig <hch@infradead.org>; bjorn.andersson@linaro.org;
> > ohad@wizery.com
> > Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>; Andrew Morton
> > <akpm@linux-foundation.org>; Linux-MM <linux-mm@kvack.org>; Robin
> > Murphy <robin.murphy@arm.com>; linux-remoteproc@vger.kernel.org;
> > anup@brainfault.org; Loic PALLARDY <loic.pallardy@st.com>;
> > dl-linux-imx <linux-imx@nxp.com>; Richard Zhu <hongxing.zhu@nxp.com>;
> > Jason Liu <jason.hui.liu@nxp.com>; Peng Fan <peng.fan@nxp.com>
> > Subject: RE: [rpmsg PATCH v2 1/1] rpmsg: virtio_rpmsg_bus: fix
> > unexpected huge vmap mappings
> >
> > From: Andy Duan Sent: 2018年12月28日 9:48
> > > From: Christoph Hellwig <hch@infradead.org> Sent: 2018年12月27日
> > > 20:19
> > > > On Thu, Dec 27, 2018 at 02:36:53AM +0000, Andy Duan wrote:
> > > > > Rpmsg is used to communicate with remote cpu like M4, the
> > > > > allocated memory is shared by Linux and M4 side. In general,
> > > > > Linux side reserved the static memory region like per-device DMA
> > > > > pool as coherent memory for the RPMSG receive/transmit buffers.
> > > > > For the static memory region, normal page allocator cannot match
> > > > > the requirement unless there have protocol to tell M4 the
> > > > > dynamic RPMSG
> > > receive/transmit buffers.
> > > >
> > > > In that case you need a OF reserved memory node, like we use for
> > > > the "shared-dma-pool" coherent or contiguous allocations.
> > > > Currently we have those two variants wired up the the DMA
> > > > allocator, but they can also used directly by drivers.  To be
> > > > honest I don't really like like drivers getting too intimate with
> > > > the memory allocator, but I also don't think that providing a
> > > > little glue code to instanciat a CMA pool for a memory that can be
> > > > used directly by the driver is much of an issue.  Most of it could
> > > > be reused from the existing code, just with a
> > slightly
> > > lower level interfaces.
> > > >
> > > > > To stop to extract pages from dma_alloc_coherent, the rpmsg bus
> > > > > implementation base on virtio that already use the scatterlist
> > > > > mechanism for vring memory. So for virtio driver like RPMSG bus,
> > > > > we have to extract pages from dma_alloc_coherent.
> > > >
> > > > This sentence doesn't parse for me.
> > >
> > > Virtio supply the APIs that require the scatterlist pages for virtio in/out buf:
> > > int virtqueue_add_inbuf(struct virtqueue *vq,
> > >                         struct scatterlist *sg, unsigned int num,
> > >                         void *data,
> > >                         gfp_t gfp)
> > > int virtqueue_add_outbuf(struct virtqueue *vq,
> > >                          struct scatterlist *sg, unsigned int num,
> > >                          void *data,
> > >                          gfp_t gfp)
> > >
> > >
> > > >
> > > > > I don't think the patch is one hack,  as we already know the
> > > > > physical address for the coherent memory,  just want to get
> > > > > pages, the interface "pfn_to_page(PHYS_PFN(x))" is very
> > > > > reasonable to the related pages.
> > > >
> > > > struct scatterlist doesn't (directly) refer to physical address,
> > > > it refers to page structures, which encode a kernel virtual
> > > > address in the kernel direct mapping, and we intentionally do not
> > > > guarantee a return in the kernel direct mapping for the DMA
> > > > coherent allocator, as in many cases we have to either allocate
> > > > from a special pool, from a special address window or remap memory
> > > > to mark it as uncached.  How that is done is an implenentation
> > > > detail that is not exposed to drivers and
> > > may change at any time.
> > > >
> > > > > If you stick to use normal page allocator and streaming DMA API
> > > > > in RPMSG,  then we have to:
> > > > > - add new quirk feature for virtio like the same function as
> > > > > "VIRTIO_F_IOMMU_PLATFORM",
> > > >
> > > > You have to do that anyway.
> > >
> > > I discuss with our team, use page allocator cannot match our requirement.
> > > i.MX8QM/QXP platforms have partition feature that limit M4 only
> > > access
> > fixed
> > > ddr memory region. Suppose other platforms also have similar
> > > limitation
> > for
> > > secure case.
> > >
> > > So it requires to use OF reserved memory for the "shared-dma-pool"
> > coherent
> > > or contiguous allocations.
> > >
> > Do you have any other comments for the patch ?
> 
> I tried your patch on ST platform and it doesn't compile neither on kernel
> v5.0-rc1 nor on Bjorn's rpmsg-next.
> dma_to_phys() is unknown as dma-direct.h not included and ‘struct
> virtproc_info’ has no member named ‘bufs_dev’.
> 
> Could you please send a new version fixing compilation issue. I would like to
> test it on my platform to provide you feedback.
> 

phys_to_dma, dma_to_phys and dma_capable are helpers published by
architecture code for use of swiotlb and xen-swiotlb only.  Drivers are
not supposed to use these directly, but use the DMA API instead.

So the fix is not acceptable due to arch limitation.  As Christoph's said, 
It needs to fix the root cause.

Andy
Christoph Hellwig Jan. 14, 2019, 9:53 a.m. UTC | #11
On Fri, Jan 11, 2019 at 01:28:46AM +0000, Andy Duan wrote:
> As NXP i.MX8 platform requirement that M4 only access the fixed memory region, so do
> You have any suggestion to fix the issue and satisfy the requirement ? Or do you have plan
> To fix the root cause ?

I think the answer is to use RESERVEDMEM_OF_DECLARE without the DMA
coherent boilerplate code.

For the initial prototype just do it inside the driver, although I'd
like to eventually factor this out into common code, especially if my
proposal for more general availability of DMA_ATTR_NON_CONSISTENT goes
ahead.
Andy Duan Jan. 16, 2019, 3:38 a.m. UTC | #12
From: Christoph Hellwig <hch@infradead.org> Sent: Monday, January 14, 2019 5:53 PM
> On Fri, Jan 11, 2019 at 01:28:46AM +0000, Andy Duan wrote:
> > As NXP i.MX8 platform requirement that M4 only access the fixed memory
> > region, so do You have any suggestion to fix the issue and satisfy the
> > requirement ? Or do you have plan To fix the root cause ?
> 
> I think the answer is to use RESERVEDMEM_OF_DECLARE without the DMA
> coherent boilerplate code.
If use RESERVEDMEM_OF_DECLARE with DMA contiguous code (CMA), it can
match the requirement, but as you know, the CMA size alignment is 32M bytes,
we only need 8M bytes contiguous mem for rpmsg.

> 
> For the initial prototype just do it inside the driver, although I'd like to
> eventually factor this out into common code, especially if my proposal for
> more general availability of DMA_ATTR_NON_CONSISTENT goes ahead.
diff mbox series

Patch

diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 664f957..d548bd0 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -196,16 +196,17 @@  static int virtio_rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src,
  * location (in vmalloc or in kernel).
  */
 static void
-rpmsg_sg_init(struct scatterlist *sg, void *cpu_addr, unsigned int len)
+rpmsg_sg_init(struct virtproc_info *vrp, struct scatterlist *sg,
+	      void *cpu_addr, unsigned int len)
 {
-	if (is_vmalloc_addr(cpu_addr)) {
-		sg_init_table(sg, 1);
-		sg_set_page(sg, vmalloc_to_page(cpu_addr), len,
-			    offset_in_page(cpu_addr));
-	} else {
-		WARN_ON(!virt_addr_valid(cpu_addr));
-		sg_init_one(sg, cpu_addr, len);
-	}
+	unsigned int offset;
+	dma_addr_t dev_add = vrp->bufs_dma + (cpu_addr - vrp->rbufs);
+	struct page *page = pfn_to_page(PHYS_PFN(dma_to_phys(vrp->bufs_dev,
+					dev_add)));
+
+	offset = offset_in_page(cpu_addr);
+	sg_init_table(sg, 1);
+	sg_set_page(sg, page, len, offset);
 }
 
 /**
@@ -626,7 +627,7 @@  static int rpmsg_send_offchannel_raw(struct rpmsg_device *rpdev,
 			 msg, sizeof(*msg) + msg->len, true);
 #endif
 
-	rpmsg_sg_init(&sg, msg, sizeof(*msg) + len);
+	rpmsg_sg_init(vrp, &sg, msg, sizeof(*msg) + len);
 
 	mutex_lock(&vrp->tx_lock);
 
@@ -750,7 +751,7 @@  static int rpmsg_recv_single(struct virtproc_info *vrp, struct device *dev,
 		dev_warn(dev, "msg received with no recipient\n");
 
 	/* publish the real size of the buffer */
-	rpmsg_sg_init(&sg, msg, vrp->buf_size);
+	rpmsg_sg_init(vrp, &sg, msg, vrp->buf_size);
 
 	/* add the buffer back to the remote processor's virtqueue */
 	err = virtqueue_add_inbuf(vrp->rvq, &sg, 1, msg, GFP_KERNEL);
@@ -934,7 +935,7 @@  static int rpmsg_probe(struct virtio_device *vdev)
 		struct scatterlist sg;
 		void *cpu_addr = vrp->rbufs + i * vrp->buf_size;
 
-		rpmsg_sg_init(&sg, cpu_addr, vrp->buf_size);
+		rpmsg_sg_init(vrp, &sg, cpu_addr, vrp->buf_size);
 
 		err = virtqueue_add_inbuf(vrp->rvq, &sg, 1, cpu_addr,
 					  GFP_KERNEL);