diff mbox series

[RFC,v3,9/9] media: platform: Add Mediatek ISP P1 shared memory device

Message ID 20190611035344.29814-10-jungo.lin@mediatek.com (mailing list archive)
State RFC
Headers show
Series [RFC,v3,1/9] dt-bindings: mt8183: Added camera ISP Pass 1 | expand

Commit Message

Jungo Lin June 11, 2019, 3:53 a.m. UTC
The purpose of this child device is to provide shared
memory management for exchanging tuning data between co-processor
and the Pass 1 unit of the camera ISP system, including cache
buffer handling.

Signed-off-by: Jungo Lin <jungo.lin@mediatek.com>
---
This patch depends on "Add support for mt8183 SCP"[1].

[1] https://patchwork.kernel.org/cover/10972143/
---
 .../platform/mtk-isp/isp_50/cam/Makefile      |   1 +
 .../mtk-isp/isp_50/cam/mtk_cam-smem.c         | 304 ++++++++++++++++++
 .../mtk-isp/isp_50/cam/mtk_cam-smem.h         |  18 ++
 3 files changed, 323 insertions(+)
 create mode 100644 drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c
 create mode 100644 drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.h

Comments

Tomasz Figa July 1, 2019, 7:25 a.m. UTC | #1
Hi Jungo,

On Tue, Jun 11, 2019 at 11:53:44AM +0800, Jungo Lin wrote:
> The purpose of this child device is to provide shared
> memory management for exchanging tuning data between co-processor
> and the Pass 1 unit of the camera ISP system, including cache
> buffer handling.
> 

Looks like we haven't really progressed on getting this replaced with
something that doesn't require so much custom code. Let me propose something
better then.

We already have a reserved memory mode in DT. If it has a compatible string
of "shared-dma-pool", it would be registered in the coherent DMA framework
[1]. That would make it available for consumer devices to look-up.

Now if we add a "memory-region" property to the SCP device node and point it
to our reserved memory node, the SCP driver could look it up and hook to the
DMA mapping API using of_reserved_mem_device_init_by_idx[2].

That basically makes any dma_alloc_*(), dma_map_*(), etc. calls on the SCP
struct device use the coherent DMA ops, which operate on the assigned memory
pool. With that, the P1 driver could just directly use those calls to
manage the memory, without any custom code.

There is an example how this setup works in the s5p-mfc driver[3], but it
needs to be noted that it creates child nodes, because it can have more than
1 DMA port, which may need its own memory pool. In our case, we wouldn't
need child nodes and could just use the SCP device directly.

[1] https://elixir.bootlin.com/linux/v5.2-rc7/source/kernel/dma/coherent.c#L345
[2] https://elixir.bootlin.com/linux/v5.2-rc7/source/drivers/of/of_reserved_mem.c#L312
[3] https://elixir.bootlin.com/linux/v5.2-rc7/source/drivers/media/platform/s5p-mfc/s5p_mfc.c#L1075

Let me also post some specific comments below, in case we end up still
needing any of the code.

> Signed-off-by: Jungo Lin <jungo.lin@mediatek.com>
> ---
> This patch depends on "Add support for mt8183 SCP"[1].
> 
> [1] https://patchwork.kernel.org/cover/10972143/
> ---
>  .../platform/mtk-isp/isp_50/cam/Makefile      |   1 +
>  .../mtk-isp/isp_50/cam/mtk_cam-smem.c         | 304 ++++++++++++++++++
>  .../mtk-isp/isp_50/cam/mtk_cam-smem.h         |  18 ++
>  3 files changed, 323 insertions(+)
>  create mode 100644 drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c
>  create mode 100644 drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.h
> 
> diff --git a/drivers/media/platform/mtk-isp/isp_50/cam/Makefile b/drivers/media/platform/mtk-isp/isp_50/cam/Makefile
> index 95f0b1c8fa1c..d545ca6f09c5 100644
> --- a/drivers/media/platform/mtk-isp/isp_50/cam/Makefile
> +++ b/drivers/media/platform/mtk-isp/isp_50/cam/Makefile
> @@ -4,5 +4,6 @@ mtk-cam-isp-objs += mtk_cam-ctrl.o
>  mtk-cam-isp-objs += mtk_cam-v4l2-util.o
>  mtk-cam-isp-objs += mtk_cam.o
>  mtk-cam-isp-objs += mtk_cam-scp.o
> +mtk-cam-isp-objs += mtk_cam-smem.o
>  
>  obj-$(CONFIG_VIDEO_MEDIATEK_ISP_PASS1) += mtk-cam-isp.o
> \ No newline at end of file
> diff --git a/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c b/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c
> new file mode 100644
> index 000000000000..a9845668ce10
> --- /dev/null
> +++ b/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c
> @@ -0,0 +1,304 @@
> +// SPDX-License-Identifier: GPL-2.0
> +//
> +// Copyright (c) 2018 MediaTek Inc.
> +
> +#include <asm/cacheflush.h>
> +#include <linux/device.h>
> +#include <linux/io.h>
> +#include <linux/iommu.h>
> +#include <linux/of.h>
> +#include <linux/of_fdt.h>
> +#include <linux/of_reserved_mem.h>
> +#include <linux/platform_device.h>
> +#include <linux/platform_data/mtk_scp.h>
> +#include <media/videobuf2-dma-contig.h>
> +
> +#include "mtk_cam-smem.h"
> +
> +static struct dma_map_ops smem_dma_ops;
> +
> +struct mtk_cam_smem_dev {
> +	struct device *dev;
> +	struct sg_table sgt;
> +	struct page **smem_pages;
> +	dma_addr_t smem_base;
> +	dma_addr_t smem_dma_base;
> +	int smem_size;
> +};
> +
> +struct dma_coherent_mem {
> +	void		*virt_base;
> +	dma_addr_t	device_base;
> +	unsigned long	pfn_base;
> +	int		size;
> +	int		flags;
> +	unsigned long	*bitmap;
> +	spinlock_t	spinlock; /* dma_coherent_mem attributes protection */
> +	bool		use_dev_dma_pfn_offset;
> +};
> +
> +dma_addr_t mtk_cam_smem_iova_to_scp_addr(struct device *dev,
> +					 dma_addr_t iova)
> +{
> +	struct iommu_domain *domain;
> +	dma_addr_t addr, limit;
> +	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
> +
> +	domain = iommu_get_domain_for_dev(dev);
> +	if (!domain) {
> +		dev_warn(dev, "No iommu group domain\n");
> +		return 0;
> +	}
> +
> +	addr = iommu_iova_to_phys(domain, iova);
> +	limit = smem_dev->smem_base + smem_dev->smem_size;
> +	if (addr < smem_dev->smem_base || addr >= limit) {
> +		dev_err(dev,
> +			"Unexpected scp_addr:%pad must >= %pad and < %pad)\n",
> +			&addr, &smem_dev->smem_base, &limit);
> +		return 0;
> +	}
> +	return addr;
> +}

This isn't correct. One could pass an IOVA that wasn't allocated for the SCP
and then the address wouldn't be valid, because it would point outside of
the address range allowed for SCP to access and also it would only point to
the first page backing the IOVA.

The correct approach would be to always carry SCP DMA address and IOVA
together in some kind of struct describing such buffers.

> +
> +static int mtk_cam_smem_get_sgtable(struct device *dev,
> +				    struct sg_table *sgt,
> +				    void *cpu_addr, dma_addr_t dma_addr,
> +				    size_t size, unsigned long attrs)
> +{
> +	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
> +	size_t pages_count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +	dma_addr_t scp_addr = mtk_cam_smem_iova_to_scp_addr(dev, dma_addr);
> +	u32 pages_start = (scp_addr - smem_dev->smem_base) >> PAGE_SHIFT;
> +
> +	dev_dbg(dev,
> +		"%s:page:%u va:%pK scp addr:%pad, aligned size:%zu pages:%zu\n",
> +		__func__, pages_start, cpu_addr, &scp_addr, size, pages_count);
> +
> +	return sg_alloc_table_from_pages(sgt,
> +		smem_dev->smem_pages + pages_start,
> +		pages_count, 0, size, GFP_KERNEL);
> +}

This should be just dma_get_sgtable_attrs(), in the approach I suggested at
the top.

> +
> +static void *mtk_cam_smem_get_cpu_addr(struct mtk_cam_smem_dev *smem_dev,
> +				       dma_addr_t addr)
> +{
> +	struct device *dev = smem_dev->dev;
> +	struct dma_coherent_mem *dma_mem = dev->dma_mem;
> +
> +	if (addr < smem_dev->smem_base ||
> +	    addr > smem_dev->smem_base + smem_dev->smem_size) {

This is off by one, should be >=.

Also, this wouldn't really guarantee the CPU access the caller is going to
do is valid, because it doesn't consider the access operation size.

Generally I'd suggest designing the code so that it doesn't have to convert
offset addresses between different address spaces.

> +		dev_err(dev, "Invalid scp_addr %pad from sg\n", &addr);
> +		return NULL;
> +	}
> +	return dma_mem->virt_base + (addr - smem_dev->smem_base);
> +}
> +
> +static void mtk_cam_smem_sync_sg_for_cpu(struct device *dev,
> +					 struct scatterlist *sgl, int nelems,
> +					 enum dma_data_direction dir)
> +{
> +	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
> +	dma_addr_t scp_addr = sg_phys(sgl);
> +	void *cpu_addr = mtk_cam_smem_get_cpu_addr(smem_dev, scp_addr);
> +
> +	dev_dbg(dev,
> +		"__dma_unmap_area:scp_addr:%pad,vaddr:%pK,size:%d,dir:%d\n",
> +		&scp_addr, cpu_addr, sgl->length, dir);
> +	__dma_unmap_area(cpu_addr, sgl->length, dir);

It's not allowed to use this function anywhere outside of the DMA API
internals. See the comment [4].

[4] https://elixir.bootlin.com/linux/v5.2-rc7/source/arch/arm64/include/asm/cacheflush.h#L112

> +}
> +
> +static void mtk_cam_smem_sync_sg_for_device(struct device *dev,
> +					    struct scatterlist *sgl,
> +					    int nelems,
> +					    enum dma_data_direction dir)
> +{
> +	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
> +	dma_addr_t scp_addr = sg_phys(sgl);
> +	void *cpu_addr = mtk_cam_smem_get_cpu_addr(smem_dev, scp_addr);
> +
> +	dev_dbg(dev,
> +		"__dma_map_area:scp_addr:%pad,vaddr:%pK,size:%d,dir:%d\n",
> +		&scp_addr, cpu_addr, sgl->length, dir);
> +	__dma_map_area(cpu_addr, sgl->length, dir);

Ditto.

> +}
> +
> +static void mtk_cam_smem_setup_dma_ops(struct device *dev,
> +				       struct dma_map_ops *smem_ops)
> +{
> +	memcpy((void *)smem_ops, dev->dma_ops, sizeof(*smem_ops));
> +	smem_ops->get_sgtable = mtk_cam_smem_get_sgtable;
> +	smem_ops->sync_sg_for_device = mtk_cam_smem_sync_sg_for_device;
> +	smem_ops->sync_sg_for_cpu = mtk_cam_smem_sync_sg_for_cpu;
> +	set_dma_ops(dev, smem_ops);
> +}
> +
> +static int mtk_cam_reserved_drm_sg_init(struct mtk_cam_smem_dev *smem_dev)
> +{
> +	u32 size_align, n_pages;
> +	struct device *dev = smem_dev->dev;
> +	struct sg_table *sgt = &smem_dev->sgt;
> +	struct page **pages;
> +	dma_addr_t dma_addr;
> +	unsigned int i;
> +	int ret;
> +
> +	smem_dev->smem_base = scp_get_reserve_mem_phys(SCP_ISP_MEM2_ID);
> +	smem_dev->smem_size = scp_get_reserve_mem_size(SCP_ISP_MEM2_ID);
> +	if (!smem_dev->smem_base || !smem_dev->smem_size)
> +		return -EPROBE_DEFER;
> +
> +	dev_info(dev, "%s dev:0x%pK base:%pad size:%u MiB\n",
> +		 __func__,
> +		 smem_dev->dev,
> +		 &smem_dev->smem_base,
> +		 (smem_dev->smem_size / SZ_1M));
> +
> +	size_align = PAGE_ALIGN(smem_dev->smem_size);
> +	n_pages = size_align >> PAGE_SHIFT;
> +
> +	pages = kmalloc_array(n_pages, sizeof(struct page *), GFP_KERNEL);
> +	if (!pages)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < n_pages; i++)
> +		pages[i] = phys_to_page(smem_dev->smem_base + i * PAGE_SIZE);
> +
> +	ret = sg_alloc_table_from_pages(sgt, pages, n_pages, 0,
> +					size_align, GFP_KERNEL);
> +	if (ret) {
> +		dev_err(dev, "failed to alloca sg table:%d\n", ret);
> +		goto fail_table_alloc;
> +	}
> +	sgt->nents = dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents,
> +				      DMA_BIDIRECTIONAL,
> +				      DMA_ATTR_SKIP_CPU_SYNC);
> +	if (!sgt->nents) {
> +		dev_err(dev, "failed to dma sg map\n");
> +		goto fail_map;
> +	}
> +
> +	dma_addr = sg_dma_address(sgt->sgl);
> +	ret = dma_declare_coherent_memory(dev, smem_dev->smem_base,
> +					  dma_addr, size_align,
> +					  DMA_MEMORY_EXCLUSIVE);
> +	if (ret) {
> +		dev_err(dev, "Unable to declare smem  memory:%d\n", ret);
> +		goto fail_map;
> +	}
> +
> +	dev_info(dev, "Coherent mem pa:%pad/%pad, size:%d\n",
> +		 &smem_dev->smem_base, &dma_addr, size_align);
> +
> +	smem_dev->smem_size = size_align;
> +	smem_dev->smem_pages = pages;
> +	smem_dev->smem_dma_base = dma_addr;
> +
> +	return 0;
> +
> +fail_map:
> +	sg_free_table(sgt);
> +fail_table_alloc:
> +	while (n_pages--)
> +		__free_page(pages[n_pages]);
> +	kfree(pages);
> +
> +	return -ENOMEM;
> +}
> +
> +/* DMA memory related helper functions */
> +static void mtk_cam_memdev_release(struct device *dev)
> +{
> +	vb2_dma_contig_clear_max_seg_size(dev);
> +}
> +
> +static struct device *mtk_cam_alloc_smem_dev(struct device *dev,
> +					     const char *name)
> +{
> +	struct device *child;
> +	int ret;
> +
> +	child = devm_kzalloc(dev, sizeof(*child), GFP_KERNEL);
> +	if (!child)
> +		return NULL;
> +
> +	child->parent = dev;
> +	child->iommu_group = dev->iommu_group;

This isn't something that can be set explicitly. It's an internal field of
the IOMMU subsystem.

> +	child->release = mtk_cam_memdev_release;
> +	dev_set_name(child, name);
> +	set_dma_ops(child, get_dma_ops(dev));
> +	child->dma_mask = dev->dma_mask;
> +	ret = dma_set_coherent_mask(child, DMA_BIT_MASK(32));
> +	if (ret)
> +		return NULL;
> +
> +	vb2_dma_contig_set_max_seg_size(child, DMA_BIT_MASK(32));
> +
> +	if (device_register(child)) {
> +		device_del(child);
> +		return NULL;
> +	}
> +
> +	return child;
> +}

We shouldn't need child devices, just one SCP device, as I mentioned above.

> +
> +static int mtk_cam_composer_dma_init(struct mtk_isp_p1_ctx *isp_ctx)
> +{
> +	struct isp_p1_device *p1_dev = p1_ctx_to_dev(isp_ctx);
> +	struct device *dev = &p1_dev->pdev->dev;
> +	u32 size;
> +	dma_addr_t addr;
> +
> +	isp_ctx->scp_mem_pa = scp_get_reserve_mem_phys(SCP_ISP_MEM_ID);
> +	size = PAGE_ALIGN(scp_get_reserve_mem_size(SCP_ISP_MEM_ID));
> +	if (!isp_ctx->scp_mem_pa || !size)
> +		return -EPROBE_DEFER;
> +
> +	dev_info(dev, "scp addr:%pad size:0x%x\n", &isp_ctx->scp_mem_pa, size);

This isn't something that deserves the "info" log level. Should be "dbg"
or removed.

Best regards,
Tomasz
Jungo Lin July 5, 2019, 3:33 a.m. UTC | #2
Hi Tomasz,

On Mon, 2019-07-01 at 16:25 +0900, Tomasz Figa wrote:
> Hi Jungo,
> 
> On Tue, Jun 11, 2019 at 11:53:44AM +0800, Jungo Lin wrote:
> > The purpose of this child device is to provide shared
> > memory management for exchanging tuning data between co-processor
> > and the Pass 1 unit of the camera ISP system, including cache
> > buffer handling.
> > 
> 
> Looks like we haven't really progressed on getting this replaced with
> something that doesn't require so much custom code. Let me propose something
> better then.
> 
> We already have a reserved memory mode in DT. If it has a compatible string
> of "shared-dma-pool", it would be registered in the coherent DMA framework
> [1]. That would make it available for consumer devices to look-up.
> 
> Now if we add a "memory-region" property to the SCP device node and point it
> to our reserved memory node, the SCP driver could look it up and hook to the
> DMA mapping API using of_reserved_mem_device_init_by_idx[2].
> 
> That basically makes any dma_alloc_*(), dma_map_*(), etc. calls on the SCP
> struct device use the coherent DMA ops, which operate on the assigned memory
> pool. With that, the P1 driver could just directly use those calls to
> manage the memory, without any custom code.
> 
> There is an example how this setup works in the s5p-mfc driver[3], but it
> needs to be noted that it creates child nodes, because it can have more than
> 1 DMA port, which may need its own memory pool. In our case, we wouldn't
> need child nodes and could just use the SCP device directly.
> 
> [1] https://elixir.bootlin.com/linux/v5.2-rc7/source/kernel/dma/coherent.c#L345
> [2] https://elixir.bootlin.com/linux/v5.2-rc7/source/drivers/of/of_reserved_mem.c#L312
> [3] https://elixir.bootlin.com/linux/v5.2-rc7/source/drivers/media/platform/s5p-mfc/s5p_mfc.c#L1075
> 
> Let me also post some specific comments below, in case we end up still
> needing any of the code.
> 

Thanks your suggestions.

After applying your suggestion in SCP device driver, we could remove
mtk_cam-smem.h/c. Currently, we use dma_alloc_coherent with SCP device
to get SCP address. We could touch the buffer with this SCP address in
SCP processor. 

After that, we use dma_map_page_attrs with P1 device which supports
IOMMU domain to get IOVA address. For this address, we will assign
it to our ISP HW device to proceed.

Below is the snippet for ISP P1 compose buffer initialization.

	ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
				 MAX_COMPOSER_SIZE, &addr, GFP_KERNEL);
	if (!ptr) {
		dev_err(dev, "failed to allocate compose memory\n");
		return -ENOMEM;
	}
	isp_ctx->scp_mem_pa = addr;
	dev_dbg(dev, "scp addr:%pad\n", &addr);

	/* get iova address */
	addr = dma_map_page_attrs(dev, phys_to_page(addr), 0,
				  MAX_COMPOSER_SIZE, DMA_BIDIRECTIONAL,
				  DMA_ATTR_SKIP_CPU_SYNC);
	if (dma_mapping_error(dev, addr)) {
		isp_ctx->scp_mem_pa = 0;
		dev_err(dev, "Failed to map scp iova\n");
		return -ENOMEM;
	}
	isp_ctx->scp_mem_iova = addr;

Moreover, we have another meta input buffer usage.
For this kind of buffer, it will be allocated by V4L2 framework
with dma_alloc_coherent with SCP device. In order to get IOVA,
we will add dma_map_page_attrs in vb2_ops' buf_init function.
In buf_cleanup function, we will call dma_unmap_page_attrs function.

Based on these current implementation, do you think it is correct?
If we got any wrong, please let us know. 

Btw, we also DMA_ATTR_NO_KERNEL_MAPPING DMA attribte to
avoid dma_sync_sg_for_device. Othewise, it will hit the KE.
Maybe we could not get the correct sg_table.
Do you think it is a bug and need to fix?

For this new implementation, it will apply ISP P1 & P2 drivers[1].

[1] https://patchwork.kernel.org/cover/10905221/

> > Signed-off-by: Jungo Lin <jungo.lin@mediatek.com>
> > ---
> > This patch depends on "Add support for mt8183 SCP"[1].
> > 
> > [1] https://patchwork.kernel.org/cover/10972143/
> > ---
> >  .../platform/mtk-isp/isp_50/cam/Makefile      |   1 +
> >  .../mtk-isp/isp_50/cam/mtk_cam-smem.c         | 304 ++++++++++++++++++
> >  .../mtk-isp/isp_50/cam/mtk_cam-smem.h         |  18 ++
> >  3 files changed, 323 insertions(+)
> >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c
> >  create mode 100644 drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.h
> > 
> > diff --git a/drivers/media/platform/mtk-isp/isp_50/cam/Makefile b/drivers/media/platform/mtk-isp/isp_50/cam/Makefile
> > index 95f0b1c8fa1c..d545ca6f09c5 100644
> > --- a/drivers/media/platform/mtk-isp/isp_50/cam/Makefile
> > +++ b/drivers/media/platform/mtk-isp/isp_50/cam/Makefile
> > @@ -4,5 +4,6 @@ mtk-cam-isp-objs += mtk_cam-ctrl.o
> >  mtk-cam-isp-objs += mtk_cam-v4l2-util.o
> >  mtk-cam-isp-objs += mtk_cam.o
> >  mtk-cam-isp-objs += mtk_cam-scp.o
> > +mtk-cam-isp-objs += mtk_cam-smem.o
> >  
> >  obj-$(CONFIG_VIDEO_MEDIATEK_ISP_PASS1) += mtk-cam-isp.o
> > \ No newline at end of file
> > diff --git a/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c b/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c
> > new file mode 100644
> > index 000000000000..a9845668ce10
> > --- /dev/null
> > +++ b/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c
> > @@ -0,0 +1,304 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +//
> > +// Copyright (c) 2018 MediaTek Inc.
> > +
> > +#include <asm/cacheflush.h>
> > +#include <linux/device.h>
> > +#include <linux/io.h>
> > +#include <linux/iommu.h>
> > +#include <linux/of.h>
> > +#include <linux/of_fdt.h>
> > +#include <linux/of_reserved_mem.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/platform_data/mtk_scp.h>
> > +#include <media/videobuf2-dma-contig.h>
> > +
> > +#include "mtk_cam-smem.h"
> > +
> > +static struct dma_map_ops smem_dma_ops;
> > +
> > +struct mtk_cam_smem_dev {
> > +	struct device *dev;
> > +	struct sg_table sgt;
> > +	struct page **smem_pages;
> > +	dma_addr_t smem_base;
> > +	dma_addr_t smem_dma_base;
> > +	int smem_size;
> > +};
> > +
> > +struct dma_coherent_mem {
> > +	void		*virt_base;
> > +	dma_addr_t	device_base;
> > +	unsigned long	pfn_base;
> > +	int		size;
> > +	int		flags;
> > +	unsigned long	*bitmap;
> > +	spinlock_t	spinlock; /* dma_coherent_mem attributes protection */
> > +	bool		use_dev_dma_pfn_offset;
> > +};
> > +
> > +dma_addr_t mtk_cam_smem_iova_to_scp_addr(struct device *dev,
> > +					 dma_addr_t iova)
> > +{
> > +	struct iommu_domain *domain;
> > +	dma_addr_t addr, limit;
> > +	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
> > +
> > +	domain = iommu_get_domain_for_dev(dev);
> > +	if (!domain) {
> > +		dev_warn(dev, "No iommu group domain\n");
> > +		return 0;
> > +	}
> > +
> > +	addr = iommu_iova_to_phys(domain, iova);
> > +	limit = smem_dev->smem_base + smem_dev->smem_size;
> > +	if (addr < smem_dev->smem_base || addr >= limit) {
> > +		dev_err(dev,
> > +			"Unexpected scp_addr:%pad must >= %pad and < %pad)\n",
> > +			&addr, &smem_dev->smem_base, &limit);
> > +		return 0;
> > +	}
> > +	return addr;
> > +}
> 
> This isn't correct. One could pass an IOVA that wasn't allocated for the SCP
> and then the address wouldn't be valid, because it would point outside of
> the address range allowed for SCP to access and also it would only point to
> the first page backing the IOVA.
> 
> The correct approach would be to always carry SCP DMA address and IOVA
> together in some kind of struct describing such buffers.
> 

We will remove this function in next patch & handle this in buf_init
function.

> > +
> > +static int mtk_cam_smem_get_sgtable(struct device *dev,
> > +				    struct sg_table *sgt,
> > +				    void *cpu_addr, dma_addr_t dma_addr,
> > +				    size_t size, unsigned long attrs)
> > +{
> > +	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
> > +	size_t pages_count = PAGE_ALIGN(size) >> PAGE_SHIFT;
> > +	dma_addr_t scp_addr = mtk_cam_smem_iova_to_scp_addr(dev, dma_addr);
> > +	u32 pages_start = (scp_addr - smem_dev->smem_base) >> PAGE_SHIFT;
> > +
> > +	dev_dbg(dev,
> > +		"%s:page:%u va:%pK scp addr:%pad, aligned size:%zu pages:%zu\n",
> > +		__func__, pages_start, cpu_addr, &scp_addr, size, pages_count);
> > +
> > +	return sg_alloc_table_from_pages(sgt,
> > +		smem_dev->smem_pages + pages_start,
> > +		pages_count, 0, size, GFP_KERNEL);
> > +}
> 
> This should be just dma_get_sgtable_attrs(), in the approach I suggested at
> the top.
> 

Yes, we will remove this in next patch.

> > +
> > +static void *mtk_cam_smem_get_cpu_addr(struct mtk_cam_smem_dev *smem_dev,
> > +				       dma_addr_t addr)
> > +{
> > +	struct device *dev = smem_dev->dev;
> > +	struct dma_coherent_mem *dma_mem = dev->dma_mem;
> > +
> > +	if (addr < smem_dev->smem_base ||
> > +	    addr > smem_dev->smem_base + smem_dev->smem_size) {
> 
> This is off by one, should be >=.
> 
> Also, this wouldn't really guarantee the CPU access the caller is going to
> do is valid, because it doesn't consider the access operation size.
> 
> Generally I'd suggest designing the code so that it doesn't have to convert
> offset addresses between different address spaces.
> 

Yes, we will remove this in next patch.

> > +		dev_err(dev, "Invalid scp_addr %pad from sg\n", &addr);
> > +		return NULL;
> > +	}
> > +	return dma_mem->virt_base + (addr - smem_dev->smem_base);
> > +}
> > +
> > +static void mtk_cam_smem_sync_sg_for_cpu(struct device *dev,
> > +					 struct scatterlist *sgl, int nelems,
> > +					 enum dma_data_direction dir)
> > +{
> > +	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
> > +	dma_addr_t scp_addr = sg_phys(sgl);
> > +	void *cpu_addr = mtk_cam_smem_get_cpu_addr(smem_dev, scp_addr);
> > +
> > +	dev_dbg(dev,
> > +		"__dma_unmap_area:scp_addr:%pad,vaddr:%pK,size:%d,dir:%d\n",
> > +		&scp_addr, cpu_addr, sgl->length, dir);
> > +	__dma_unmap_area(cpu_addr, sgl->length, dir);
> 
> It's not allowed to use this function anywhere outside of the DMA API
> internals. See the comment [4].
> 
> [4] https://elixir.bootlin.com/linux/v5.2-rc7/source/arch/arm64/include/asm/cacheflush.h#L112
> 

Ok, got it and remove this next patch.

> > +}
> > +
> > +static void mtk_cam_smem_sync_sg_for_device(struct device *dev,
> > +					    struct scatterlist *sgl,
> > +					    int nelems,
> > +					    enum dma_data_direction dir)
> > +{
> > +	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
> > +	dma_addr_t scp_addr = sg_phys(sgl);
> > +	void *cpu_addr = mtk_cam_smem_get_cpu_addr(smem_dev, scp_addr);
> > +
> > +	dev_dbg(dev,
> > +		"__dma_map_area:scp_addr:%pad,vaddr:%pK,size:%d,dir:%d\n",
> > +		&scp_addr, cpu_addr, sgl->length, dir);
> > +	__dma_map_area(cpu_addr, sgl->length, dir);
> 
> Ditto.
> 

Ok, got it and remove this next patch.

> > +}
> > +
> > +static void mtk_cam_smem_setup_dma_ops(struct device *dev,
> > +				       struct dma_map_ops *smem_ops)
> > +{
> > +	memcpy((void *)smem_ops, dev->dma_ops, sizeof(*smem_ops));
> > +	smem_ops->get_sgtable = mtk_cam_smem_get_sgtable;
> > +	smem_ops->sync_sg_for_device = mtk_cam_smem_sync_sg_for_device;
> > +	smem_ops->sync_sg_for_cpu = mtk_cam_smem_sync_sg_for_cpu;
> > +	set_dma_ops(dev, smem_ops);
> > +}
> > +
> > +static int mtk_cam_reserved_drm_sg_init(struct mtk_cam_smem_dev *smem_dev)
> > +{
> > +	u32 size_align, n_pages;
> > +	struct device *dev = smem_dev->dev;
> > +	struct sg_table *sgt = &smem_dev->sgt;
> > +	struct page **pages;
> > +	dma_addr_t dma_addr;
> > +	unsigned int i;
> > +	int ret;
> > +
> > +	smem_dev->smem_base = scp_get_reserve_mem_phys(SCP_ISP_MEM2_ID);
> > +	smem_dev->smem_size = scp_get_reserve_mem_size(SCP_ISP_MEM2_ID);
> > +	if (!smem_dev->smem_base || !smem_dev->smem_size)
> > +		return -EPROBE_DEFER;
> > +
> > +	dev_info(dev, "%s dev:0x%pK base:%pad size:%u MiB\n",
> > +		 __func__,
> > +		 smem_dev->dev,
> > +		 &smem_dev->smem_base,
> > +		 (smem_dev->smem_size / SZ_1M));
> > +
> > +	size_align = PAGE_ALIGN(smem_dev->smem_size);
> > +	n_pages = size_align >> PAGE_SHIFT;
> > +
> > +	pages = kmalloc_array(n_pages, sizeof(struct page *), GFP_KERNEL);
> > +	if (!pages)
> > +		return -ENOMEM;
> > +
> > +	for (i = 0; i < n_pages; i++)
> > +		pages[i] = phys_to_page(smem_dev->smem_base + i * PAGE_SIZE);
> > +
> > +	ret = sg_alloc_table_from_pages(sgt, pages, n_pages, 0,
> > +					size_align, GFP_KERNEL);
> > +	if (ret) {
> > +		dev_err(dev, "failed to alloca sg table:%d\n", ret);
> > +		goto fail_table_alloc;
> > +	}
> > +	sgt->nents = dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents,
> > +				      DMA_BIDIRECTIONAL,
> > +				      DMA_ATTR_SKIP_CPU_SYNC);
> > +	if (!sgt->nents) {
> > +		dev_err(dev, "failed to dma sg map\n");
> > +		goto fail_map;
> > +	}
> > +
> > +	dma_addr = sg_dma_address(sgt->sgl);
> > +	ret = dma_declare_coherent_memory(dev, smem_dev->smem_base,
> > +					  dma_addr, size_align,
> > +					  DMA_MEMORY_EXCLUSIVE);
> > +	if (ret) {
> > +		dev_err(dev, "Unable to declare smem  memory:%d\n", ret);
> > +		goto fail_map;
> > +	}
> > +
> > +	dev_info(dev, "Coherent mem pa:%pad/%pad, size:%d\n",
> > +		 &smem_dev->smem_base, &dma_addr, size_align);
> > +
> > +	smem_dev->smem_size = size_align;
> > +	smem_dev->smem_pages = pages;
> > +	smem_dev->smem_dma_base = dma_addr;
> > +
> > +	return 0;
> > +
> > +fail_map:
> > +	sg_free_table(sgt);
> > +fail_table_alloc:
> > +	while (n_pages--)
> > +		__free_page(pages[n_pages]);
> > +	kfree(pages);
> > +
> > +	return -ENOMEM;
> > +}
> > +
> > +/* DMA memory related helper functions */
> > +static void mtk_cam_memdev_release(struct device *dev)
> > +{
> > +	vb2_dma_contig_clear_max_seg_size(dev);
> > +}
> > +
> > +static struct device *mtk_cam_alloc_smem_dev(struct device *dev,
> > +					     const char *name)
> > +{
> > +	struct device *child;
> > +	int ret;
> > +
> > +	child = devm_kzalloc(dev, sizeof(*child), GFP_KERNEL);
> > +	if (!child)
> > +		return NULL;
> > +
> > +	child->parent = dev;
> > +	child->iommu_group = dev->iommu_group;
> 
> This isn't something that can be set explicitly. It's an internal field of
> the IOMMU subsystem.
> 
> > +	child->release = mtk_cam_memdev_release;
> > +	dev_set_name(child, name);
> > +	set_dma_ops(child, get_dma_ops(dev));
> > +	child->dma_mask = dev->dma_mask;
> > +	ret = dma_set_coherent_mask(child, DMA_BIT_MASK(32));
> > +	if (ret)
> > +		return NULL;
> > +
> > +	vb2_dma_contig_set_max_seg_size(child, DMA_BIT_MASK(32));
> > +
> > +	if (device_register(child)) {
> > +		device_del(child);
> > +		return NULL;
> > +	}
> > +
> > +	return child;
> > +}
> 
> We shouldn't need child devices, just one SCP device, as I mentioned above.
> 

Ok, got your point. Just keep one single SCP device for single reserved
memory range.

> > +
> > +static int mtk_cam_composer_dma_init(struct mtk_isp_p1_ctx *isp_ctx)
> > +{
> > +	struct isp_p1_device *p1_dev = p1_ctx_to_dev(isp_ctx);
> > +	struct device *dev = &p1_dev->pdev->dev;
> > +	u32 size;
> > +	dma_addr_t addr;
> > +
> > +	isp_ctx->scp_mem_pa = scp_get_reserve_mem_phys(SCP_ISP_MEM_ID);
> > +	size = PAGE_ALIGN(scp_get_reserve_mem_size(SCP_ISP_MEM_ID));
> > +	if (!isp_ctx->scp_mem_pa || !size)
> > +		return -EPROBE_DEFER;
> > +
> > +	dev_info(dev, "scp addr:%pad size:0x%x\n", &isp_ctx->scp_mem_pa, size);
> 
> This isn't something that deserves the "info" log level. Should be "dbg"
> or removed.
> 

Ok, we will change the log level from info to debug.

> Best regards,
> Tomasz
> 
> _______________________________________________
> Linux-mediatek mailing list
> Linux-mediatek@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-mediatek

Thanks for your valued comments.

Best regards,


Jungo
Tomasz Figa July 5, 2019, 4:22 a.m. UTC | #3
Hi Jungo,

On Fri, Jul 5, 2019 at 12:33 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Mon, 2019-07-01 at 16:25 +0900, Tomasz Figa wrote:
> > Hi Jungo,
> >
> > On Tue, Jun 11, 2019 at 11:53:44AM +0800, Jungo Lin wrote:
> > > The purpose of this child device is to provide shared
> > > memory management for exchanging tuning data between co-processor
> > > and the Pass 1 unit of the camera ISP system, including cache
> > > buffer handling.
> > >
> >
> > Looks like we haven't really progressed on getting this replaced with
> > something that doesn't require so much custom code. Let me propose something
> > better then.
> >
> > We already have a reserved memory mode in DT. If it has a compatible string
> > of "shared-dma-pool", it would be registered in the coherent DMA framework
> > [1]. That would make it available for consumer devices to look-up.
> >
> > Now if we add a "memory-region" property to the SCP device node and point it
> > to our reserved memory node, the SCP driver could look it up and hook to the
> > DMA mapping API using of_reserved_mem_device_init_by_idx[2].
> >
> > That basically makes any dma_alloc_*(), dma_map_*(), etc. calls on the SCP
> > struct device use the coherent DMA ops, which operate on the assigned memory
> > pool. With that, the P1 driver could just directly use those calls to
> > manage the memory, without any custom code.
> >
> > There is an example how this setup works in the s5p-mfc driver[3], but it
> > needs to be noted that it creates child nodes, because it can have more than
> > 1 DMA port, which may need its own memory pool. In our case, we wouldn't
> > need child nodes and could just use the SCP device directly.
> >
> > [1] https://elixir.bootlin.com/linux/v5.2-rc7/source/kernel/dma/coherent.c#L345
> > [2] https://elixir.bootlin.com/linux/v5.2-rc7/source/drivers/of/of_reserved_mem.c#L312
> > [3] https://elixir.bootlin.com/linux/v5.2-rc7/source/drivers/media/platform/s5p-mfc/s5p_mfc.c#L1075
> >
> > Let me also post some specific comments below, in case we end up still
> > needing any of the code.
> >
>
> Thanks your suggestions.
>
> After applying your suggestion in SCP device driver, we could remove
> mtk_cam-smem.h/c. Currently, we use dma_alloc_coherent with SCP device
> to get SCP address. We could touch the buffer with this SCP address in
> SCP processor.
>
> After that, we use dma_map_page_attrs with P1 device which supports
> IOMMU domain to get IOVA address. For this address, we will assign
> it to our ISP HW device to proceed.
>
> Below is the snippet for ISP P1 compose buffer initialization.
>
>         ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
>                                  MAX_COMPOSER_SIZE, &addr, GFP_KERNEL);
>         if (!ptr) {
>                 dev_err(dev, "failed to allocate compose memory\n");
>                 return -ENOMEM;
>         }
>         isp_ctx->scp_mem_pa = addr;

addr contains a DMA address, not a physical address. Could we call it
scp_mem_dma instead?

>         dev_dbg(dev, "scp addr:%pad\n", &addr);
>
>         /* get iova address */
>         addr = dma_map_page_attrs(dev, phys_to_page(addr), 0,

addr is a DMA address, so phys_to_page() can't be called on it. The
simplest thing here would be to use dma_map_single() with ptr as the
CPU address expected.

>                                   MAX_COMPOSER_SIZE, DMA_BIDIRECTIONAL,
>                                   DMA_ATTR_SKIP_CPU_SYNC);
>         if (dma_mapping_error(dev, addr)) {
>                 isp_ctx->scp_mem_pa = 0;

We also need to free the allocated memory.

>                 dev_err(dev, "Failed to map scp iova\n");
>                 return -ENOMEM;
>         }
>         isp_ctx->scp_mem_iova = addr;
>
> Moreover, we have another meta input buffer usage.
> For this kind of buffer, it will be allocated by V4L2 framework
> with dma_alloc_coherent with SCP device. In order to get IOVA,
> we will add dma_map_page_attrs in vb2_ops' buf_init function.
> In buf_cleanup function, we will call dma_unmap_page_attrs function.

As per above, we don't have access to the struct page we want to map.
We probably want to get the CPU VA using vb2_plane_vaddr() and call
dma_map_single() instead.

>
> Based on these current implementation, do you think it is correct?
> If we got any wrong, please let us know.
>
> Btw, we also DMA_ATTR_NO_KERNEL_MAPPING DMA attribte to
> avoid dma_sync_sg_for_device. Othewise, it will hit the KE.
> Maybe we could not get the correct sg_table.
> Do you think it is a bug and need to fix?

I think DMA_ATTR_NO_KERNEL_MAPPING is good to have for all the buffers
that don't need to be accessed from the kernel anyway, to avoid
unnecessary kernel mapping operations. However, for coherent memory
pool, it doesn't change anything, because the memory always has a
kernel mapping. We also need the kernel virtual address for
dma_map_single(). Also the flag doesn't eliminate the need to do the
sync, e.g. if the userspace accesses the buffer.

Could you give me more information about the failure you're seeing?
Where is the dma_sync_sg_for_device() called from? Where do you get
the sgtable from?

Best regards,
Tomasz
Jungo Lin July 5, 2019, 5:44 a.m. UTC | #4
Hi, Tomasz:
On Fri, 2019-07-05 at 13:22 +0900, Tomasz Figa wrote:
> Hi Jungo,
> 
> On Fri, Jul 5, 2019 at 12:33 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Mon, 2019-07-01 at 16:25 +0900, Tomasz Figa wrote:
> > > Hi Jungo,
> > >
> > > On Tue, Jun 11, 2019 at 11:53:44AM +0800, Jungo Lin wrote:
> > > > The purpose of this child device is to provide shared
> > > > memory management for exchanging tuning data between co-processor
> > > > and the Pass 1 unit of the camera ISP system, including cache
> > > > buffer handling.
> > > >
> > >
> > > Looks like we haven't really progressed on getting this replaced with
> > > something that doesn't require so much custom code. Let me propose something
> > > better then.
> > >
> > > We already have a reserved memory mode in DT. If it has a compatible string
> > > of "shared-dma-pool", it would be registered in the coherent DMA framework
> > > [1]. That would make it available for consumer devices to look-up.
> > >
> > > Now if we add a "memory-region" property to the SCP device node and point it
> > > to our reserved memory node, the SCP driver could look it up and hook to the
> > > DMA mapping API using of_reserved_mem_device_init_by_idx[2].
> > >
> > > That basically makes any dma_alloc_*(), dma_map_*(), etc. calls on the SCP
> > > struct device use the coherent DMA ops, which operate on the assigned memory
> > > pool. With that, the P1 driver could just directly use those calls to
> > > manage the memory, without any custom code.
> > >
> > > There is an example how this setup works in the s5p-mfc driver[3], but it
> > > needs to be noted that it creates child nodes, because it can have more than
> > > 1 DMA port, which may need its own memory pool. In our case, we wouldn't
> > > need child nodes and could just use the SCP device directly.
> > >
> > > [1] https://elixir.bootlin.com/linux/v5.2-rc7/source/kernel/dma/coherent.c#L345
> > > [2] https://elixir.bootlin.com/linux/v5.2-rc7/source/drivers/of/of_reserved_mem.c#L312
> > > [3] https://elixir.bootlin.com/linux/v5.2-rc7/source/drivers/media/platform/s5p-mfc/s5p_mfc.c#L1075
> > >
> > > Let me also post some specific comments below, in case we end up still
> > > needing any of the code.
> > >
> >
> > Thanks your suggestions.
> >
> > After applying your suggestion in SCP device driver, we could remove
> > mtk_cam-smem.h/c. Currently, we use dma_alloc_coherent with SCP device
> > to get SCP address. We could touch the buffer with this SCP address in
> > SCP processor.
> >
> > After that, we use dma_map_page_attrs with P1 device which supports
> > IOMMU domain to get IOVA address. For this address, we will assign
> > it to our ISP HW device to proceed.
> >
> > Below is the snippet for ISP P1 compose buffer initialization.
> >
> >         ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
> >                                  MAX_COMPOSER_SIZE, &addr, GFP_KERNEL);
> >         if (!ptr) {
> >                 dev_err(dev, "failed to allocate compose memory\n");
> >                 return -ENOMEM;
> >         }
> >         isp_ctx->scp_mem_pa = addr;
> 
> addr contains a DMA address, not a physical address. Could we call it
> scp_mem_dma instead?
> 

Ok, we will rename this.

> >         dev_dbg(dev, "scp addr:%pad\n", &addr);
> >
> >         /* get iova address */
> >         addr = dma_map_page_attrs(dev, phys_to_page(addr), 0,
> 
> addr is a DMA address, so phys_to_page() can't be called on it. The
> simplest thing here would be to use dma_map_single() with ptr as the
> CPU address expected.
> 

Got it. We will revise to use dma_map_single() with ptr.

> >                                   MAX_COMPOSER_SIZE, DMA_BIDIRECTIONAL,
> >                                   DMA_ATTR_SKIP_CPU_SYNC);
> >         if (dma_mapping_error(dev, addr)) {
> >                 isp_ctx->scp_mem_pa = 0;
> 
> We also need to free the allocated memory.
> 

Ok, we will add the dma_unmap_single to free the allocated memory.

> >                 dev_err(dev, "Failed to map scp iova\n");
> >                 return -ENOMEM;
> >         }
> >         isp_ctx->scp_mem_iova = addr;
> >
> > Moreover, we have another meta input buffer usage.
> > For this kind of buffer, it will be allocated by V4L2 framework
> > with dma_alloc_coherent with SCP device. In order to get IOVA,
> > we will add dma_map_page_attrs in vb2_ops' buf_init function.
> > In buf_cleanup function, we will call dma_unmap_page_attrs function.
> 
> As per above, we don't have access to the struct page we want to map.
> We probably want to get the CPU VA using vb2_plane_vaddr() and call
> dma_map_single() instead.
> 

Got it. We will revise this to use dma_map_single() with CPU VA which is
got from vb2_plane_vaddr() function.

> >
> > Based on these current implementation, do you think it is correct?
> > If we got any wrong, please let us know.
> >
> > Btw, we also DMA_ATTR_NO_KERNEL_MAPPING DMA attribte to
> > avoid dma_sync_sg_for_device. Othewise, it will hit the KE.
> > Maybe we could not get the correct sg_table.
> > Do you think it is a bug and need to fix?
> 
> I think DMA_ATTR_NO_KERNEL_MAPPING is good to have for all the buffers
> that don't need to be accessed from the kernel anyway, to avoid
> unnecessary kernel mapping operations. However, for coherent memory
> pool, it doesn't change anything, because the memory always has a
> kernel mapping. We also need the kernel virtual address for
> dma_map_single(). Also the flag doesn't eliminate the need to do the
> sync, e.g. if the userspace accesses the buffer.
> 
> Could you give me more information about the failure you're seeing?
> Where is the dma_sync_sg_for_device() called from? Where do you get
> the sgtable from?
> 
> Best regards,
> Tomasz

Sorry. I forgot provide one information related to this issue.
Here is the call stack of panic KE if we enable DMA_ATTR_NON_CONSISTENT
DMA flag. Maybe we should not enable this flag for coherent memory pool.

[Function]
vb2_dc_alloc

[Code]
	if (!(buf->attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
	    (buf->attrs & DMA_ATTR_NON_CONSISTENT))
		buf->dma_sgt = vb2_dc_get_base_sgt(buf);

[KE]
[   59.234326] pstate: 80000005 (Nzcv daif -PAN -UAO)
[   59.234935] pc : __clean_dcache_area_poc+0x20/0x38
[   59.235537] lr : __swiotlb_sync_sg_for_device+0x74/0x9c
[   59.249430] Call trace:
[   59.249742]  __clean_dcache_area_poc+0x20/0x38
[   59.250303]  vb2_dc_prepare+0x5c/0x6c
[   59.250763]  __buf_prepare+0x790/0x8a4
[   59.251234]  vb2_req_prepare+0x38/0x68
[   59.251707]  vb2_request_validate+0x40/0x9c
[   59.252235]  media_request_ioctl+0x124/0x2a4
[   59.252774]  __arm64_compat_sys_ioctl+0xf4/0x25c
[   59.253356]  el0_svc_common+0xa4/0x154
[   59.253828]  el0_svc_compat_handler+0x2c/0x38
[   59.254377]  el0_svc_compat+0x8/0x18
[   59.254827] Code: 9ac32042 8b010001 d1000443 8a230000 (d50b7a20)
[   59.255592] ---[ end trace eb37ebade032c2fc ]---
[   59.256173] Kernel panic - not syncing: Fatal exception

Thanks,

Jungo
Jungo Lin July 5, 2019, 7:59 a.m. UTC | #5
Hi Tomasz:

On Fri, 2019-07-05 at 13:22 +0900, Tomasz Figa wrote:
> Hi Jungo,
> 
> On Fri, Jul 5, 2019 at 12:33 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
> >
> > Hi Tomasz,

[snip]

> > After applying your suggestion in SCP device driver, we could remove
> > mtk_cam-smem.h/c. Currently, we use dma_alloc_coherent with SCP device
> > to get SCP address. We could touch the buffer with this SCP address in
> > SCP processor.
> >
> > After that, we use dma_map_page_attrs with P1 device which supports
> > IOMMU domain to get IOVA address. For this address, we will assign
> > it to our ISP HW device to proceed.
> >
> > Below is the snippet for ISP P1 compose buffer initialization.
> >
> >         ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
> >                                  MAX_COMPOSER_SIZE, &addr, GFP_KERNEL);
> >         if (!ptr) {
> >                 dev_err(dev, "failed to allocate compose memory\n");
> >                 return -ENOMEM;
> >         }
> >         isp_ctx->scp_mem_pa = addr;
> 
> addr contains a DMA address, not a physical address. Could we call it
> scp_mem_dma instead?
> 
> >         dev_dbg(dev, "scp addr:%pad\n", &addr);
> >
> >         /* get iova address */
> >         addr = dma_map_page_attrs(dev, phys_to_page(addr), 0,
> 
> addr is a DMA address, so phys_to_page() can't be called on it. The
> simplest thing here would be to use dma_map_single() with ptr as the
> CPU address expected.
> 

We have changed to use ma_map_single() with ptr, but encounter IOMMU
error. From the debug log of iommu_dma_map_page[3], we got
0x0000000054800000 instead of expected address: 0x0000000050800000[2].
There is a address offset(0x4000000). If we change to use
dma_map_page_attrs with phys_to_page(addr), the address is correct as we
expected[2]. Do you have any suggestion on this issue? Do we miss
something?

[1]
[    1.344786] __dma_alloc_from_coherent: 0x800000 PAGE_SHIFT:12
device_base:0x0000000050000000 dma:0x0000000050800000
virt_base:ffffff8014000000 va:ffffff8014800000

[    1.346890] mtk-cam 1a000000.camisp: scp addr:0x0000000050800000
va:ffffff8014800000

[    1.347864] iommu_dma_map_page:0x0000000054800000 offset:0
[    1.348562] mtk-cam 1a000000.camisp: iova addr:0x00000000fde00000

[2]
[    1.346738] __dma_alloc_from_coherent: 0x800000 PAGE_SHIFT:12
device_base:0x0000000050000000 dma:0x0000000050800000
virt_base:ffffff8014000000 va:ffffff8014800000
[    1.348841] mtk-cam 1a000000.camisp: scp addr:0x0000000050800000
va:ffffff8014800000
[    1.349816] iommu_dma_map_page:0x0000000050800000 offset:0
[    1.350514] mtk-cam 1a000000.camisp: iova addr:0x00000000fde00000


[3]
dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
		unsigned long offset, size_t size, int prot)
{
	phys_addr_t phys = page_to_phys(page);
	pr_err("iommu_dma_map_page:%pa offset:%lu\n", &phys, offset);

	return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot,
			iommu_get_dma_domain(dev));
}

[snip]

Best regards,

Jungo
Tomasz Figa July 23, 2019, 7:20 a.m. UTC | #6
Hi Jungo,

On Fri, Jul 5, 2019 at 4:59 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
>
> Hi Tomasz:
>
> On Fri, 2019-07-05 at 13:22 +0900, Tomasz Figa wrote:
> > Hi Jungo,
> >
> > On Fri, Jul 5, 2019 at 12:33 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
>
> [snip]
>
> > > After applying your suggestion in SCP device driver, we could remove
> > > mtk_cam-smem.h/c. Currently, we use dma_alloc_coherent with SCP device
> > > to get SCP address. We could touch the buffer with this SCP address in
> > > SCP processor.
> > >
> > > After that, we use dma_map_page_attrs with P1 device which supports
> > > IOMMU domain to get IOVA address. For this address, we will assign
> > > it to our ISP HW device to proceed.
> > >
> > > Below is the snippet for ISP P1 compose buffer initialization.
> > >
> > >         ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
> > >                                  MAX_COMPOSER_SIZE, &addr, GFP_KERNEL);
> > >         if (!ptr) {
> > >                 dev_err(dev, "failed to allocate compose memory\n");
> > >                 return -ENOMEM;
> > >         }
> > >         isp_ctx->scp_mem_pa = addr;
> >
> > addr contains a DMA address, not a physical address. Could we call it
> > scp_mem_dma instead?
> >
> > >         dev_dbg(dev, "scp addr:%pad\n", &addr);
> > >
> > >         /* get iova address */
> > >         addr = dma_map_page_attrs(dev, phys_to_page(addr), 0,
> >
> > addr is a DMA address, so phys_to_page() can't be called on it. The
> > simplest thing here would be to use dma_map_single() with ptr as the
> > CPU address expected.
> >
>
> We have changed to use ma_map_single() with ptr, but encounter IOMMU
> error. From the debug log of iommu_dma_map_page[3], we got
> 0x0000000054800000 instead of expected address: 0x0000000050800000[2].
> There is a address offset(0x4000000). If we change to use
> dma_map_page_attrs with phys_to_page(addr), the address is correct as we
> expected[2]. Do you have any suggestion on this issue? Do we miss
> something?

Sorry for the late reply. Could you show me the code changes you made
to use dma_map_single()? It would sound like the virtual address
passed to dma_map_single() isn't correct.

Best regards,
Tomasz

>
> [1]
> [    1.344786] __dma_alloc_from_coherent: 0x800000 PAGE_SHIFT:12
> device_base:0x0000000050000000 dma:0x0000000050800000
> virt_base:ffffff8014000000 va:ffffff8014800000
>
> [    1.346890] mtk-cam 1a000000.camisp: scp addr:0x0000000050800000
> va:ffffff8014800000
>
> [    1.347864] iommu_dma_map_page:0x0000000054800000 offset:0
> [    1.348562] mtk-cam 1a000000.camisp: iova addr:0x00000000fde00000
>
> [2]
> [    1.346738] __dma_alloc_from_coherent: 0x800000 PAGE_SHIFT:12
> device_base:0x0000000050000000 dma:0x0000000050800000
> virt_base:ffffff8014000000 va:ffffff8014800000
> [    1.348841] mtk-cam 1a000000.camisp: scp addr:0x0000000050800000
> va:ffffff8014800000
> [    1.349816] iommu_dma_map_page:0x0000000050800000 offset:0
> [    1.350514] mtk-cam 1a000000.camisp: iova addr:0x00000000fde00000
>
>
> [3]
> dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
>                 unsigned long offset, size_t size, int prot)
> {
>         phys_addr_t phys = page_to_phys(page);
>         pr_err("iommu_dma_map_page:%pa offset:%lu\n", &phys, offset);
>
>         return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot,
>                         iommu_get_dma_domain(dev));
> }
>
> [snip]
>
> Best regards,
>
> Jungo
>
Jungo Lin July 23, 2019, 8:21 a.m. UTC | #7
Hi, Tomasz:

On Tue, 2019-07-23 at 16:20 +0900, Tomasz Figa wrote:
> Hi Jungo,
> 
> On Fri, Jul 5, 2019 at 4:59 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
> >
> > Hi Tomasz:
> >
> > On Fri, 2019-07-05 at 13:22 +0900, Tomasz Figa wrote:
> > > Hi Jungo,
> > >
> > > On Fri, Jul 5, 2019 at 12:33 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> >
> > [snip]
> >
> > > > After applying your suggestion in SCP device driver, we could remove
> > > > mtk_cam-smem.h/c. Currently, we use dma_alloc_coherent with SCP device
> > > > to get SCP address. We could touch the buffer with this SCP address in
> > > > SCP processor.
> > > >
> > > > After that, we use dma_map_page_attrs with P1 device which supports
> > > > IOMMU domain to get IOVA address. For this address, we will assign
> > > > it to our ISP HW device to proceed.
> > > >
> > > > Below is the snippet for ISP P1 compose buffer initialization.
> > > >
> > > >         ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
> > > >                                  MAX_COMPOSER_SIZE, &addr, GFP_KERNEL);
> > > >         if (!ptr) {
> > > >                 dev_err(dev, "failed to allocate compose memory\n");
> > > >                 return -ENOMEM;
> > > >         }
> > > >         isp_ctx->scp_mem_pa = addr;
> > >
> > > addr contains a DMA address, not a physical address. Could we call it
> > > scp_mem_dma instead?
> > >
> > > >         dev_dbg(dev, "scp addr:%pad\n", &addr);
> > > >
> > > >         /* get iova address */
> > > >         addr = dma_map_page_attrs(dev, phys_to_page(addr), 0,
> > >
> > > addr is a DMA address, so phys_to_page() can't be called on it. The
> > > simplest thing here would be to use dma_map_single() with ptr as the
> > > CPU address expected.
> > >
> >
> > We have changed to use ma_map_single() with ptr, but encounter IOMMU
> > error. From the debug log of iommu_dma_map_page[3], we got
> > 0x0000000054800000 instead of expected address: 0x0000000050800000[2].
> > There is a address offset(0x4000000). If we change to use
> > dma_map_page_attrs with phys_to_page(addr), the address is correct as we
> > expected[2]. Do you have any suggestion on this issue? Do we miss
> > something?
> 
> Sorry for the late reply. Could you show me the code changes you made
> to use dma_map_single()? It would sound like the virtual address
> passed to dma_map_single() isn't correct.
> 
> Best regards,
> Tomasz
> 


Please check the below code snippet in today's testing.

	p1_dev->cam_dev.smem_dev = &p1_dev->scp_pdev->dev;
	ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
				 MTK_ISP_COMPOSER_MEM_SIZE, &addr, GFP_KERNEL);
	if (!ptr) {
		dev_err(dev, "failed to allocate compose memory\n");
		return -ENOMEM;
	}
	p1_dev->composer_scp_addr = addr;
	p1_dev->composer_virt_addr = ptr;
	dev_info(dev, "scp addr:%pad va:%pK\n", &addr, ptr);

	/* get iova address */
	addr = dma_map_single(dev, ptr, MTK_ISP_COMPOSER_MEM_SIZE,
DMA_BIDIRECTIONAL);
	if (dma_mapping_error(dev, addr)) {
		dma_free_coherent(p1_dev->cam_dev.smem_dev,
				  MTK_ISP_COMPOSER_MEM_SIZE,
				  ptr, p1_dev->composer_scp_addr);
		dev_err(dev, "Failed to map scp iova\n");
		ret = -ENOMEM;
		goto fail_free_mem;
	}
	p1_dev->composer_iova = addr;
	dev_info(dev, "scp iova addr:%pad\n", &addr);

Moreover, below is extracted log[2].

We guess the virtual address which is returned by dma_alloc_coherent
function is not valid kernel logical address. It is actually returned by
memremap() in dma_init_coherent_memory(). Moreover, dma_map_single()
will call virt_to_page() function. For virt_to_page function, it
requires a logical address[1].

[1]https://www.oreilly.com/library/view/linux-device-drivers/0596005903/ch15.html

[2]
  322 [    1.238269] mtk-cam-p1 1a006000.camisp: scp
addr:0x0000000052000000 va:00000000a3adc471
  323 [    1.239582] mtk-cam-p1 1a006000.camisp: scp iova
addr:0x00000000fde00000
 7716 [    1.238963] mtk-cam-p1 1a006000.camisp: scp
addr:0x0000000052000000 va:0000000042ec580f
 7717 [    1.240276] mtk-cam-p1 1a006000.camisp: scp iova
addr:0x00000000fde00000
15088 [    1.239309] mtk-cam-p1 1a006000.camisp: scp
addr:0x0000000052000000 va:000000005e5b3462
15089 [    1.240626] mtk-cam-p1 1a006000.camisp: scp iova
addr:0x00000000fde00000

Best regards,

Jungo

> >
> > [1]
> > [    1.344786] __dma_alloc_from_coherent: 0x800000 PAGE_SHIFT:12
> > device_base:0x0000000050000000 dma:0x0000000050800000
> > virt_base:ffffff8014000000 va:ffffff8014800000
> >
> > [    1.346890] mtk-cam 1a000000.camisp: scp addr:0x0000000050800000
> > va:ffffff8014800000
> >
> > [    1.347864] iommu_dma_map_page:0x0000000054800000 offset:0
> > [    1.348562] mtk-cam 1a000000.camisp: iova addr:0x00000000fde00000
> >
> > [2]
> > [    1.346738] __dma_alloc_from_coherent: 0x800000 PAGE_SHIFT:12
> > device_base:0x0000000050000000 dma:0x0000000050800000
> > virt_base:ffffff8014000000 va:ffffff8014800000
> > [    1.348841] mtk-cam 1a000000.camisp: scp addr:0x0000000050800000
> > va:ffffff8014800000
> > [    1.349816] iommu_dma_map_page:0x0000000050800000 offset:0
> > [    1.350514] mtk-cam 1a000000.camisp: iova addr:0x00000000fde00000
> >
> >
> > [3]
> > dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
> >                 unsigned long offset, size_t size, int prot)
> > {
> >         phys_addr_t phys = page_to_phys(page);
> >         pr_err("iommu_dma_map_page:%pa offset:%lu\n", &phys, offset);
> >
> >         return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot,
> >                         iommu_get_dma_domain(dev));
> > }
> >
> > [snip]
> >
> > Best regards,
> >
> > Jungo
> >
> 
> _______________________________________________
> Linux-mediatek mailing list
> Linux-mediatek@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-mediatek
Tomasz Figa July 26, 2019, 5:15 a.m. UTC | #8
On Tue, Jul 23, 2019 at 5:22 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
>
> Hi, Tomasz:
>
> On Tue, 2019-07-23 at 16:20 +0900, Tomasz Figa wrote:
> > Hi Jungo,
> >
> > On Fri, Jul 5, 2019 at 4:59 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
> > >
> > > Hi Tomasz:
> > >
> > > On Fri, 2019-07-05 at 13:22 +0900, Tomasz Figa wrote:
> > > > Hi Jungo,
> > > >
> > > > On Fri, Jul 5, 2019 at 12:33 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
> > > > >
> > > > > Hi Tomasz,
> > >
> > > [snip]
> > >
> > > > > After applying your suggestion in SCP device driver, we could remove
> > > > > mtk_cam-smem.h/c. Currently, we use dma_alloc_coherent with SCP device
> > > > > to get SCP address. We could touch the buffer with this SCP address in
> > > > > SCP processor.
> > > > >
> > > > > After that, we use dma_map_page_attrs with P1 device which supports
> > > > > IOMMU domain to get IOVA address. For this address, we will assign
> > > > > it to our ISP HW device to proceed.
> > > > >
> > > > > Below is the snippet for ISP P1 compose buffer initialization.
> > > > >
> > > > >         ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
> > > > >                                  MAX_COMPOSER_SIZE, &addr, GFP_KERNEL);
> > > > >         if (!ptr) {
> > > > >                 dev_err(dev, "failed to allocate compose memory\n");
> > > > >                 return -ENOMEM;
> > > > >         }
> > > > >         isp_ctx->scp_mem_pa = addr;
> > > >
> > > > addr contains a DMA address, not a physical address. Could we call it
> > > > scp_mem_dma instead?
> > > >
> > > > >         dev_dbg(dev, "scp addr:%pad\n", &addr);
> > > > >
> > > > >         /* get iova address */
> > > > >         addr = dma_map_page_attrs(dev, phys_to_page(addr), 0,
> > > >
> > > > addr is a DMA address, so phys_to_page() can't be called on it. The
> > > > simplest thing here would be to use dma_map_single() with ptr as the
> > > > CPU address expected.
> > > >
> > >
> > > We have changed to use ma_map_single() with ptr, but encounter IOMMU
> > > error. From the debug log of iommu_dma_map_page[3], we got
> > > 0x0000000054800000 instead of expected address: 0x0000000050800000[2].
> > > There is a address offset(0x4000000). If we change to use
> > > dma_map_page_attrs with phys_to_page(addr), the address is correct as we
> > > expected[2]. Do you have any suggestion on this issue? Do we miss
> > > something?
> >
> > Sorry for the late reply. Could you show me the code changes you made
> > to use dma_map_single()? It would sound like the virtual address
> > passed to dma_map_single() isn't correct.
> >
> > Best regards,
> > Tomasz
> >
>
>
> Please check the below code snippet in today's testing.
>
>         p1_dev->cam_dev.smem_dev = &p1_dev->scp_pdev->dev;
>         ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
>                                  MTK_ISP_COMPOSER_MEM_SIZE, &addr, GFP_KERNEL);
>         if (!ptr) {
>                 dev_err(dev, "failed to allocate compose memory\n");
>                 return -ENOMEM;
>         }
>         p1_dev->composer_scp_addr = addr;
>         p1_dev->composer_virt_addr = ptr;
>         dev_info(dev, "scp addr:%pad va:%pK\n", &addr, ptr);
>
>         /* get iova address */
>         addr = dma_map_single(dev, ptr, MTK_ISP_COMPOSER_MEM_SIZE,
> DMA_BIDIRECTIONAL);
>         if (dma_mapping_error(dev, addr)) {
>                 dma_free_coherent(p1_dev->cam_dev.smem_dev,
>                                   MTK_ISP_COMPOSER_MEM_SIZE,
>                                   ptr, p1_dev->composer_scp_addr);
>                 dev_err(dev, "Failed to map scp iova\n");
>                 ret = -ENOMEM;
>                 goto fail_free_mem;
>         }
>         p1_dev->composer_iova = addr;
>         dev_info(dev, "scp iova addr:%pad\n", &addr);
>
> Moreover, below is extracted log[2].
>
> We guess the virtual address which is returned by dma_alloc_coherent
> function is not valid kernel logical address. It is actually returned by
> memremap() in dma_init_coherent_memory(). Moreover, dma_map_single()
> will call virt_to_page() function. For virt_to_page function, it
> requires a logical address[1].
>
> [1]https://www.oreilly.com/library/view/linux-device-drivers/0596005903/ch15.html
>

Indeed virt_to_page() works only with kernel LOWMEM addresses. Whether
virt_to_page() is the right thing to do in dma_map_single() is a good
question, but let's assume it was implemented like this for a reason.

However, you also can't call phys_to_page() on the DMA addresses
returned by dma_alloc_*() either. It works just by luck, because SCP
DMA addresses and CPU physical addresses are numerically the same.

Could you try dma_get_sgtable() with the SCP struct device and then
dma_map_sg() with the P1 struct device?

Best regards,
Tomasz

> [2]
>   322 [    1.238269] mtk-cam-p1 1a006000.camisp: scp
> addr:0x0000000052000000 va:00000000a3adc471
>   323 [    1.239582] mtk-cam-p1 1a006000.camisp: scp iova
> addr:0x00000000fde00000
>  7716 [    1.238963] mtk-cam-p1 1a006000.camisp: scp
> addr:0x0000000052000000 va:0000000042ec580f
>  7717 [    1.240276] mtk-cam-p1 1a006000.camisp: scp iova
> addr:0x00000000fde00000
> 15088 [    1.239309] mtk-cam-p1 1a006000.camisp: scp
> addr:0x0000000052000000 va:000000005e5b3462
> 15089 [    1.240626] mtk-cam-p1 1a006000.camisp: scp iova
> addr:0x00000000fde00000
>
> Best regards,
>
> Jungo
>
> > >
> > > [1]
> > > [    1.344786] __dma_alloc_from_coherent: 0x800000 PAGE_SHIFT:12
> > > device_base:0x0000000050000000 dma:0x0000000050800000
> > > virt_base:ffffff8014000000 va:ffffff8014800000
> > >
> > > [    1.346890] mtk-cam 1a000000.camisp: scp addr:0x0000000050800000
> > > va:ffffff8014800000
> > >
> > > [    1.347864] iommu_dma_map_page:0x0000000054800000 offset:0
> > > [    1.348562] mtk-cam 1a000000.camisp: iova addr:0x00000000fde00000
> > >
> > > [2]
> > > [    1.346738] __dma_alloc_from_coherent: 0x800000 PAGE_SHIFT:12
> > > device_base:0x0000000050000000 dma:0x0000000050800000
> > > virt_base:ffffff8014000000 va:ffffff8014800000
> > > [    1.348841] mtk-cam 1a000000.camisp: scp addr:0x0000000050800000
> > > va:ffffff8014800000
> > > [    1.349816] iommu_dma_map_page:0x0000000050800000 offset:0
> > > [    1.350514] mtk-cam 1a000000.camisp: iova addr:0x00000000fde00000
> > >
> > >
> > > [3]
> > > dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
> > >                 unsigned long offset, size_t size, int prot)
> > > {
> > >         phys_addr_t phys = page_to_phys(page);
> > >         pr_err("iommu_dma_map_page:%pa offset:%lu\n", &phys, offset);
> > >
> > >         return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot,
> > >                         iommu_get_dma_domain(dev));
> > > }
> > >
> > > [snip]
> > >
> > > Best regards,
> > >
> > > Jungo
> > >
> >
> > _______________________________________________
> > Linux-mediatek mailing list
> > Linux-mediatek@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-mediatek
>
>
Christoph Hellwig July 26, 2019, 7:41 a.m. UTC | #9
On Fri, Jul 26, 2019 at 02:15:14PM +0900, Tomasz Figa wrote:
> Could you try dma_get_sgtable() with the SCP struct device and then
> dma_map_sg() with the P1 struct device?

Please don't do that.  dma_get_sgtable is a pretty broken API (see
the common near the arm implementation) and we should not add more
users of it.  If you want a piece of memory that can be mapped to
multiple devices allocate it using alloc_pages and then just map
it to each device.
Tomasz Figa July 26, 2019, 7:42 a.m. UTC | #10
On Fri, Jul 26, 2019 at 4:41 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Fri, Jul 26, 2019 at 02:15:14PM +0900, Tomasz Figa wrote:
> > Could you try dma_get_sgtable() with the SCP struct device and then
> > dma_map_sg() with the P1 struct device?
>
> Please don't do that.  dma_get_sgtable is a pretty broken API (see
> the common near the arm implementation) and we should not add more
> users of it.  If you want a piece of memory that can be mapped to
> multiple devices allocate it using alloc_pages and then just map
> it to each device.

Thanks for taking a look at this thread.

Unfortunately that wouldn't work. We have a specific reserved memory
pool that is the only memory area accessible to one of the devices.
Any idea how to handle this?

Best regards,
Tomasz
Robin Murphy July 26, 2019, 11:04 a.m. UTC | #11
On 26/07/2019 08:42, Tomasz Figa wrote:
> On Fri, Jul 26, 2019 at 4:41 PM Christoph Hellwig <hch@infradead.org> wrote:
>>
>> On Fri, Jul 26, 2019 at 02:15:14PM +0900, Tomasz Figa wrote:
>>> Could you try dma_get_sgtable() with the SCP struct device and then
>>> dma_map_sg() with the P1 struct device?
>>
>> Please don't do that.  dma_get_sgtable is a pretty broken API (see
>> the common near the arm implementation) and we should not add more
>> users of it.  If you want a piece of memory that can be mapped to
>> multiple devices allocate it using alloc_pages and then just map
>> it to each device.
> 
> Thanks for taking a look at this thread.
> 
> Unfortunately that wouldn't work. We have a specific reserved memory
> pool that is the only memory area accessible to one of the devices.
> Any idea how to handle this?

If it's reserved in the sense of being outside struct-page-backed 
"kernel memory", then provided you have a consistent CPU physical 
address it might be reasonable for other devices to access it via 
dma_map_resource().

Robin.
Jungo Lin July 26, 2019, 11:59 a.m. UTC | #12
Hi Robin:

On Fri, 2019-07-26 at 12:04 +0100, Robin Murphy wrote:
> On 26/07/2019 08:42, Tomasz Figa wrote:
> > On Fri, Jul 26, 2019 at 4:41 PM Christoph Hellwig <hch@infradead.org> wrote:
> >>
> >> On Fri, Jul 26, 2019 at 02:15:14PM +0900, Tomasz Figa wrote:
> >>> Could you try dma_get_sgtable() with the SCP struct device and then
> >>> dma_map_sg() with the P1 struct device?
> >>
> >> Please don't do that.  dma_get_sgtable is a pretty broken API (see
> >> the common near the arm implementation) and we should not add more
> >> users of it.  If you want a piece of memory that can be mapped to
> >> multiple devices allocate it using alloc_pages and then just map
> >> it to each device.
> > 
> > Thanks for taking a look at this thread.
> > 
> > Unfortunately that wouldn't work. We have a specific reserved memory
> > pool that is the only memory area accessible to one of the devices.
> > Any idea how to handle this?
> 
> If it's reserved in the sense of being outside struct-page-backed 
> "kernel memory", then provided you have a consistent CPU physical 
> address it might be reasonable for other devices to access it via 
> dma_map_resource().
> 
> Robin.

Thank you for your suggestion.

After revising to use dma_map_resource(), it is worked. Below is the
current implementation. Pleas kindly help us to check if there is any
misunderstanding.

#define MTK_ISP_COMPOSER_MEM_SIZE		0x200000

	/*
	 * Allocate coherent reserved memory for SCP firmware usage.
	 * The size of SCP composer's memory is fixed to 0x200000
	 * for the requirement of firmware.
	 */
	ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
				 MTK_ISP_COMPOSER_MEM_SIZE, &addr, GFP_KERNEL);
	if (!ptr) {
		dev_err(dev, "failed to allocate compose memory\n");
		return -ENOMEM;
	}
	p1_dev->composer_scp_addr = addr;
	p1_dev->composer_virt_addr = ptr;
	dev_dbg(dev, "scp addr:%pad va:%pK\n", &addr, ptr);

	/*
	 * This reserved memory is also be used by ISP P1 HW.
	 * Need to get iova address for ISP P1 DMA.
	 */
	addr = dma_map_resource(dev, addr, MTK_ISP_COMPOSER_MEM_SIZE,
				DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
	if (dma_mapping_error(dev, addr)) {
		dev_err(dev, "Failed to map scp iova\n");
		ret = -ENOMEM;
		goto fail_free_mem;
	}
	p1_dev->composer_iova = addr;
	dev_info(dev, "scp iova addr:%pad\n", &addr);

Moreover, appropriate Tomasz & Christoph's help on this issue.

Best regards,

Jungo
Tomasz Figa July 26, 2019, 2:04 p.m. UTC | #13
On Fri, Jul 26, 2019 at 8:59 PM Jungo Lin <jungo.lin@mediatek.com> wrote:
>
> Hi Robin:
>
> On Fri, 2019-07-26 at 12:04 +0100, Robin Murphy wrote:
> > On 26/07/2019 08:42, Tomasz Figa wrote:
> > > On Fri, Jul 26, 2019 at 4:41 PM Christoph Hellwig <hch@infradead.org> wrote:
> > >>
> > >> On Fri, Jul 26, 2019 at 02:15:14PM +0900, Tomasz Figa wrote:
> > >>> Could you try dma_get_sgtable() with the SCP struct device and then
> > >>> dma_map_sg() with the P1 struct device?
> > >>
> > >> Please don't do that.  dma_get_sgtable is a pretty broken API (see
> > >> the common near the arm implementation) and we should not add more
> > >> users of it.  If you want a piece of memory that can be mapped to
> > >> multiple devices allocate it using alloc_pages and then just map
> > >> it to each device.
> > >
> > > Thanks for taking a look at this thread.
> > >
> > > Unfortunately that wouldn't work. We have a specific reserved memory
> > > pool that is the only memory area accessible to one of the devices.
> > > Any idea how to handle this?
> >
> > If it's reserved in the sense of being outside struct-page-backed
> > "kernel memory", then provided you have a consistent CPU physical
> > address it might be reasonable for other devices to access it via
> > dma_map_resource().
> >
> > Robin.
>
> Thank you for your suggestion.
>
> After revising to use dma_map_resource(), it is worked. Below is the
> current implementation. Pleas kindly help us to check if there is any
> misunderstanding.
>
> #define MTK_ISP_COMPOSER_MEM_SIZE               0x200000
>
>         /*
>          * Allocate coherent reserved memory for SCP firmware usage.
>          * The size of SCP composer's memory is fixed to 0x200000
>          * for the requirement of firmware.
>          */
>         ptr = dma_alloc_coherent(p1_dev->cam_dev.smem_dev,
>                                  MTK_ISP_COMPOSER_MEM_SIZE, &addr, GFP_KERNEL);
>         if (!ptr) {
>                 dev_err(dev, "failed to allocate compose memory\n");
>                 return -ENOMEM;
>         }
>         p1_dev->composer_scp_addr = addr;
>         p1_dev->composer_virt_addr = ptr;
>         dev_dbg(dev, "scp addr:%pad va:%pK\n", &addr, ptr);
>
>         /*
>          * This reserved memory is also be used by ISP P1 HW.
>          * Need to get iova address for ISP P1 DMA.
>          */
>         addr = dma_map_resource(dev, addr, MTK_ISP_COMPOSER_MEM_SIZE,
>                                 DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);

This is still incorrect, because addr is a DMA address, but the second
argument to dma_map_resource() is a physical address.

>         if (dma_mapping_error(dev, addr)) {
>                 dev_err(dev, "Failed to map scp iova\n");
>                 ret = -ENOMEM;
>                 goto fail_free_mem;
>         }
>         p1_dev->composer_iova = addr;
>         dev_info(dev, "scp iova addr:%pad\n", &addr);
>
> Moreover, appropriate Tomasz & Christoph's help on this issue.

Robin, the memory is specified using the reserved-memory DT binding
and managed by the coherent DMA pool framework. We can allocate from
it using dma_alloc_coherent(), which gives us a DMA address, not CPU
physial address (although in practice on this platform they are equal
numerically).

Best regards,
Tomasz
diff mbox series

Patch

diff --git a/drivers/media/platform/mtk-isp/isp_50/cam/Makefile b/drivers/media/platform/mtk-isp/isp_50/cam/Makefile
index 95f0b1c8fa1c..d545ca6f09c5 100644
--- a/drivers/media/platform/mtk-isp/isp_50/cam/Makefile
+++ b/drivers/media/platform/mtk-isp/isp_50/cam/Makefile
@@ -4,5 +4,6 @@  mtk-cam-isp-objs += mtk_cam-ctrl.o
 mtk-cam-isp-objs += mtk_cam-v4l2-util.o
 mtk-cam-isp-objs += mtk_cam.o
 mtk-cam-isp-objs += mtk_cam-scp.o
+mtk-cam-isp-objs += mtk_cam-smem.o
 
 obj-$(CONFIG_VIDEO_MEDIATEK_ISP_PASS1) += mtk-cam-isp.o
\ No newline at end of file
diff --git a/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c b/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c
new file mode 100644
index 000000000000..a9845668ce10
--- /dev/null
+++ b/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.c
@@ -0,0 +1,304 @@ 
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2018 MediaTek Inc.
+
+#include <asm/cacheflush.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/mtk_scp.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "mtk_cam-smem.h"
+
+static struct dma_map_ops smem_dma_ops;
+
+struct mtk_cam_smem_dev {
+	struct device *dev;
+	struct sg_table sgt;
+	struct page **smem_pages;
+	dma_addr_t smem_base;
+	dma_addr_t smem_dma_base;
+	int smem_size;
+};
+
+struct dma_coherent_mem {
+	void		*virt_base;
+	dma_addr_t	device_base;
+	unsigned long	pfn_base;
+	int		size;
+	int		flags;
+	unsigned long	*bitmap;
+	spinlock_t	spinlock; /* dma_coherent_mem attributes protection */
+	bool		use_dev_dma_pfn_offset;
+};
+
+dma_addr_t mtk_cam_smem_iova_to_scp_addr(struct device *dev,
+					 dma_addr_t iova)
+{
+	struct iommu_domain *domain;
+	dma_addr_t addr, limit;
+	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
+
+	domain = iommu_get_domain_for_dev(dev);
+	if (!domain) {
+		dev_warn(dev, "No iommu group domain\n");
+		return 0;
+	}
+
+	addr = iommu_iova_to_phys(domain, iova);
+	limit = smem_dev->smem_base + smem_dev->smem_size;
+	if (addr < smem_dev->smem_base || addr >= limit) {
+		dev_err(dev,
+			"Unexpected scp_addr:%pad must >= %pad and < %pad)\n",
+			&addr, &smem_dev->smem_base, &limit);
+		return 0;
+	}
+	return addr;
+}
+
+static int mtk_cam_smem_get_sgtable(struct device *dev,
+				    struct sg_table *sgt,
+				    void *cpu_addr, dma_addr_t dma_addr,
+				    size_t size, unsigned long attrs)
+{
+	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
+	size_t pages_count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	dma_addr_t scp_addr = mtk_cam_smem_iova_to_scp_addr(dev, dma_addr);
+	u32 pages_start = (scp_addr - smem_dev->smem_base) >> PAGE_SHIFT;
+
+	dev_dbg(dev,
+		"%s:page:%u va:%pK scp addr:%pad, aligned size:%zu pages:%zu\n",
+		__func__, pages_start, cpu_addr, &scp_addr, size, pages_count);
+
+	return sg_alloc_table_from_pages(sgt,
+		smem_dev->smem_pages + pages_start,
+		pages_count, 0, size, GFP_KERNEL);
+}
+
+static void *mtk_cam_smem_get_cpu_addr(struct mtk_cam_smem_dev *smem_dev,
+				       dma_addr_t addr)
+{
+	struct device *dev = smem_dev->dev;
+	struct dma_coherent_mem *dma_mem = dev->dma_mem;
+
+	if (addr < smem_dev->smem_base ||
+	    addr > smem_dev->smem_base + smem_dev->smem_size) {
+		dev_err(dev, "Invalid scp_addr %pad from sg\n", &addr);
+		return NULL;
+	}
+	return dma_mem->virt_base + (addr - smem_dev->smem_base);
+}
+
+static void mtk_cam_smem_sync_sg_for_cpu(struct device *dev,
+					 struct scatterlist *sgl, int nelems,
+					 enum dma_data_direction dir)
+{
+	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
+	dma_addr_t scp_addr = sg_phys(sgl);
+	void *cpu_addr = mtk_cam_smem_get_cpu_addr(smem_dev, scp_addr);
+
+	dev_dbg(dev,
+		"__dma_unmap_area:scp_addr:%pad,vaddr:%pK,size:%d,dir:%d\n",
+		&scp_addr, cpu_addr, sgl->length, dir);
+	__dma_unmap_area(cpu_addr, sgl->length, dir);
+}
+
+static void mtk_cam_smem_sync_sg_for_device(struct device *dev,
+					    struct scatterlist *sgl,
+					    int nelems,
+					    enum dma_data_direction dir)
+{
+	struct mtk_cam_smem_dev *smem_dev = dev_get_drvdata(dev);
+	dma_addr_t scp_addr = sg_phys(sgl);
+	void *cpu_addr = mtk_cam_smem_get_cpu_addr(smem_dev, scp_addr);
+
+	dev_dbg(dev,
+		"__dma_map_area:scp_addr:%pad,vaddr:%pK,size:%d,dir:%d\n",
+		&scp_addr, cpu_addr, sgl->length, dir);
+	__dma_map_area(cpu_addr, sgl->length, dir);
+}
+
+static void mtk_cam_smem_setup_dma_ops(struct device *dev,
+				       struct dma_map_ops *smem_ops)
+{
+	memcpy((void *)smem_ops, dev->dma_ops, sizeof(*smem_ops));
+	smem_ops->get_sgtable = mtk_cam_smem_get_sgtable;
+	smem_ops->sync_sg_for_device = mtk_cam_smem_sync_sg_for_device;
+	smem_ops->sync_sg_for_cpu = mtk_cam_smem_sync_sg_for_cpu;
+	set_dma_ops(dev, smem_ops);
+}
+
+static int mtk_cam_reserved_drm_sg_init(struct mtk_cam_smem_dev *smem_dev)
+{
+	u32 size_align, n_pages;
+	struct device *dev = smem_dev->dev;
+	struct sg_table *sgt = &smem_dev->sgt;
+	struct page **pages;
+	dma_addr_t dma_addr;
+	unsigned int i;
+	int ret;
+
+	smem_dev->smem_base = scp_get_reserve_mem_phys(SCP_ISP_MEM2_ID);
+	smem_dev->smem_size = scp_get_reserve_mem_size(SCP_ISP_MEM2_ID);
+	if (!smem_dev->smem_base || !smem_dev->smem_size)
+		return -EPROBE_DEFER;
+
+	dev_info(dev, "%s dev:0x%pK base:%pad size:%u MiB\n",
+		 __func__,
+		 smem_dev->dev,
+		 &smem_dev->smem_base,
+		 (smem_dev->smem_size / SZ_1M));
+
+	size_align = PAGE_ALIGN(smem_dev->smem_size);
+	n_pages = size_align >> PAGE_SHIFT;
+
+	pages = kmalloc_array(n_pages, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < n_pages; i++)
+		pages[i] = phys_to_page(smem_dev->smem_base + i * PAGE_SIZE);
+
+	ret = sg_alloc_table_from_pages(sgt, pages, n_pages, 0,
+					size_align, GFP_KERNEL);
+	if (ret) {
+		dev_err(dev, "failed to alloca sg table:%d\n", ret);
+		goto fail_table_alloc;
+	}
+	sgt->nents = dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents,
+				      DMA_BIDIRECTIONAL,
+				      DMA_ATTR_SKIP_CPU_SYNC);
+	if (!sgt->nents) {
+		dev_err(dev, "failed to dma sg map\n");
+		goto fail_map;
+	}
+
+	dma_addr = sg_dma_address(sgt->sgl);
+	ret = dma_declare_coherent_memory(dev, smem_dev->smem_base,
+					  dma_addr, size_align,
+					  DMA_MEMORY_EXCLUSIVE);
+	if (ret) {
+		dev_err(dev, "Unable to declare smem  memory:%d\n", ret);
+		goto fail_map;
+	}
+
+	dev_info(dev, "Coherent mem pa:%pad/%pad, size:%d\n",
+		 &smem_dev->smem_base, &dma_addr, size_align);
+
+	smem_dev->smem_size = size_align;
+	smem_dev->smem_pages = pages;
+	smem_dev->smem_dma_base = dma_addr;
+
+	return 0;
+
+fail_map:
+	sg_free_table(sgt);
+fail_table_alloc:
+	while (n_pages--)
+		__free_page(pages[n_pages]);
+	kfree(pages);
+
+	return -ENOMEM;
+}
+
+/* DMA memory related helper functions */
+static void mtk_cam_memdev_release(struct device *dev)
+{
+	vb2_dma_contig_clear_max_seg_size(dev);
+}
+
+static struct device *mtk_cam_alloc_smem_dev(struct device *dev,
+					     const char *name)
+{
+	struct device *child;
+	int ret;
+
+	child = devm_kzalloc(dev, sizeof(*child), GFP_KERNEL);
+	if (!child)
+		return NULL;
+
+	child->parent = dev;
+	child->iommu_group = dev->iommu_group;
+	child->release = mtk_cam_memdev_release;
+	dev_set_name(child, name);
+	set_dma_ops(child, get_dma_ops(dev));
+	child->dma_mask = dev->dma_mask;
+	ret = dma_set_coherent_mask(child, DMA_BIT_MASK(32));
+	if (ret)
+		return NULL;
+
+	vb2_dma_contig_set_max_seg_size(child, DMA_BIT_MASK(32));
+
+	if (device_register(child)) {
+		device_del(child);
+		return NULL;
+	}
+
+	return child;
+}
+
+static int mtk_cam_composer_dma_init(struct mtk_isp_p1_ctx *isp_ctx)
+{
+	struct isp_p1_device *p1_dev = p1_ctx_to_dev(isp_ctx);
+	struct device *dev = &p1_dev->pdev->dev;
+	u32 size;
+	dma_addr_t addr;
+
+	isp_ctx->scp_mem_pa = scp_get_reserve_mem_phys(SCP_ISP_MEM_ID);
+	size = PAGE_ALIGN(scp_get_reserve_mem_size(SCP_ISP_MEM_ID));
+	if (!isp_ctx->scp_mem_pa || !size)
+		return -EPROBE_DEFER;
+
+	dev_info(dev, "scp addr:%pad size:0x%x\n", &isp_ctx->scp_mem_pa, size);
+
+	/* get iova address */
+	addr = dma_map_page_attrs(dev, phys_to_page(isp_ctx->scp_mem_pa), 0,
+				  size, DMA_BIDIRECTIONAL,
+				  DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(dev, addr)) {
+		isp_ctx->scp_mem_pa = 0;
+		dev_err(dev, "Failed to map scp iova\n");
+		return -ENOMEM;
+	}
+
+	isp_ctx->scp_mem_iova = addr;
+
+	return 0;
+}
+
+int mtk_cam_reserved_memory_init(struct isp_p1_device *p1_dev)
+{
+	struct device *dev = &p1_dev->pdev->dev;
+	struct mtk_cam_smem_dev *smem_dev;
+	int ret;
+
+	ret = mtk_cam_composer_dma_init(&p1_dev->isp_ctx);
+	if (ret)
+		return ret;
+
+	/* Allocate context */
+	smem_dev = devm_kzalloc(dev, sizeof(*smem_dev), GFP_KERNEL);
+	if (!smem_dev)
+		return -ENOMEM;
+
+	smem_dev->dev = mtk_cam_alloc_smem_dev(dev, "cam-smem");
+	if (!smem_dev->dev) {
+		dev_err(dev, "failed to alloc smem device\n");
+		return -ENODEV;
+	}
+	dev_set_drvdata(smem_dev->dev, smem_dev);
+	p1_dev->cam_dev.smem_dev = smem_dev->dev;
+
+	ret = mtk_cam_reserved_drm_sg_init(smem_dev);
+	if (ret)
+		return ret;
+
+	mtk_cam_smem_setup_dma_ops(smem_dev->dev, &smem_dma_ops);
+
+	return 0;
+}
diff --git a/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.h b/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.h
new file mode 100644
index 000000000000..981d47178e99
--- /dev/null
+++ b/drivers/media/platform/mtk-isp/isp_50/cam/mtk_cam-smem.h
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 MediaTek Inc.
+ */
+
+#ifndef __MTK_CAM_ISP_SMEM_H
+#define __MTK_CAM_ISP_SMEM_H
+
+#include <linux/dma-mapping.h>
+
+#include "mtk_cam.h"
+
+int mtk_cam_reserved_memory_init(struct isp_p1_device *p1_dev);
+dma_addr_t mtk_cam_smem_iova_to_scp_addr(struct device *smem_dev,
+					 dma_addr_t iova);
+
+#endif
+