diff mbox series

[v2] remoteproc: imx_dsp_rproc: add custom memory copy implementation for i.MX DSP Cores

Message ID 20230131170436.31280-1-iuliana.prodan@oss.nxp.com (mailing list archive)
State Superseded
Headers show
Series [v2] remoteproc: imx_dsp_rproc: add custom memory copy implementation for i.MX DSP Cores | expand

Commit Message

Iuliana Prodan (OSS) Jan. 31, 2023, 5:04 p.m. UTC
From: Iuliana Prodan <iuliana.prodan@nxp.com>

The IRAM is part of the HiFi DSP.
According to hardware specification only 32-bits write are allowed
otherwise we get a Kernel panic.

Therefore add a custom memory copy and memset functions to deal with
the above restriction.

Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>

---
Changes since v1
- added missing check for cases when the memory slot is bigger than the file size;
- added a custom memset function
- removed is_iomem flag since is not used here
- updated custom memcpy function to avoid reading after end of source
---
 drivers/remoteproc/imx_dsp_rproc.c | 181 ++++++++++++++++++++++++++++-
 1 file changed, 180 insertions(+), 1 deletion(-)

Comments

Peng Fan (OSS) Feb. 1, 2023, 12:27 a.m. UTC | #1
On 2/1/2023 1:04 AM, Iuliana Prodan (OSS) wrote:
> From: Iuliana Prodan <iuliana.prodan@nxp.com>
> 
> The IRAM is part of the HiFi DSP.
> According to hardware specification only 32-bits write are allowed
> otherwise we get a Kernel panic.
> 
> Therefore add a custom memory copy and memset functions to deal with
> the above restriction.

Which platform has this limitation? This driver has been landed for
quite some time, is there any specific condition to trigger the issue?

Regards,
Peng.


> 
> Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>
> 
> ---
> Changes since v1
> - added missing check for cases when the memory slot is bigger than the file size;
> - added a custom memset function
> - removed is_iomem flag since is not used here
> - updated custom memcpy function to avoid reading after end of source
> ---
>   drivers/remoteproc/imx_dsp_rproc.c | 181 ++++++++++++++++++++++++++++-
>   1 file changed, 180 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/remoteproc/imx_dsp_rproc.c b/drivers/remoteproc/imx_dsp_rproc.c
> index e4b1e962d56ad..d0dcc0820fadd 100644
> --- a/drivers/remoteproc/imx_dsp_rproc.c
> +++ b/drivers/remoteproc/imx_dsp_rproc.c
> @@ -715,6 +715,185 @@ static void imx_dsp_rproc_kick(struct rproc *rproc, int vqid)
>   		dev_err(dev, "%s: failed (%d, err:%d)\n", __func__, vqid, err);
>   }
>   
> +/*
> + * Custom memory copy implementation for i.MX DSP Cores
> + *
> + * The IRAM is part of the HiFi DSP.
> + * According to hw specs only 32-bits writes are allowed.
> + */
> +static int imx_dsp_rproc_memcpy(void *dest, const void *src, size_t size)
> +{
> +	const u8 *src_byte = src;
> +	u32 affected_mask;
> +	u32 tmp;
> +	int i, q, r;
> +
> +	/* destination must be 32bit aligned */
> +	if (!IS_ALIGNED((u64)dest, 4))
> +		return -EINVAL;
> +
> +	q = size / 4;
> +	r = size % 4;
> +
> +	/* __iowrite32_copy use 32bit size values so divide by 4 */
> +	__iowrite32_copy(dest, src, q);
> +
> +	if (r) {
> +		affected_mask = (1 << (8 * r)) - 1;
> +
> +		/* first read the 32bit data of dest, then change affected
> +		 * bytes, and write back to dest.
> +		 * For unaffected bytes, it should not be changed
> +		 */
> +		tmp = ioread32(dest + q * 4);
> +		tmp &= ~affected_mask;
> +
> +		/* avoid reading after end of source */
> +		for (i = 0; i < r; i++)
> +			tmp |= (src_byte[q * 4 + i] << (8 * i));
> +
> +		iowrite32(tmp, dest + q * 4);
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Custom memset implementation for i.MX DSP Cores
> + *
> + * The IRAM is part of the HiFi DSP.
> + * According to hw specs only 32-bits writes are allowed.
> + */
> +static int imx_dsp_rproc_memset(void *addr, u8 value, size_t size)
> +{
> +	u32 affected_mask;
> +	u32 tmp_val = value;
> +	u32 *tmp_dst = addr;
> +	u32 tmp;
> +	int q, r;
> +
> +	/* destination must be 32bit aligned */
> +	if (!IS_ALIGNED((u64)addr, 4))
> +		return -EINVAL;
> +
> +	tmp_val |= tmp_val << 8;
> +	tmp_val |= tmp_val << 16;
> +
> +	q = size / 4;
> +	r = size % 4;
> +
> +	while (q--)
> +		iowrite32(tmp_val, tmp_dst++);
> +
> +	if (r) {
> +		affected_mask = (1 << (8 * r)) - 1;
> +
> +		/* first read the 32bit data of addr, then change affected
> +		 * bytes, and write back to addr.
> +		 * For unaffected bytes, it should not be changed
> +		 */
> +		tmp = ioread32(tmp_dst);
> +		tmp &= ~affected_mask;
> +
> +		tmp |= (tmp_val & affected_mask);
> +		iowrite32(tmp, tmp_dst);
> +	}
> +
> +	return 0;
> +}
> +/**
> + * imx_dsp_rproc_elf_load_segments() - load firmware segments to memory
> + * @rproc: remote processor which will be booted using these fw segments
> + * @fw: the ELF firmware image
> + *
> + * This function loads the firmware segments to memory, where the remote
> + * processor expects them.
> + *
> + * Return: 0 on success and an appropriate error code otherwise
> + */
> +static int imx_dsp_rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw)
> +{
> +	struct device *dev = &rproc->dev;
> +	const void *ehdr, *phdr;
> +	int i, ret = 0;
> +	u16 phnum;
> +	const u8 *elf_data = fw->data;
> +	u8 class = fw_elf_get_class(fw);
> +	u32 elf_phdr_get_size = elf_size_of_phdr(class);
> +
> +	ehdr = elf_data;
> +	phnum = elf_hdr_get_e_phnum(class, ehdr);
> +	phdr = elf_data + elf_hdr_get_e_phoff(class, ehdr);
> +
> +	/* go through the available ELF segments */
> +	for (i = 0; i < phnum; i++, phdr += elf_phdr_get_size) {
> +		u64 da = elf_phdr_get_p_paddr(class, phdr);
> +		u64 memsz = elf_phdr_get_p_memsz(class, phdr);
> +		u64 filesz = elf_phdr_get_p_filesz(class, phdr);
> +		u64 offset = elf_phdr_get_p_offset(class, phdr);
> +		u32 type = elf_phdr_get_p_type(class, phdr);
> +		void *ptr;
> +
> +		if (type != PT_LOAD || !memsz)
> +			continue;
> +
> +		dev_dbg(dev, "phdr: type %d da 0x%llx memsz 0x%llx filesz 0x%llx\n",
> +			type, da, memsz, filesz);
> +
> +		if (filesz > memsz) {
> +			dev_err(dev, "bad phdr filesz 0x%llx memsz 0x%llx\n",
> +				filesz, memsz);
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		if (offset + filesz > fw->size) {
> +			dev_err(dev, "truncated fw: need 0x%llx avail 0x%zx\n",
> +				offset + filesz, fw->size);
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		if (!rproc_u64_fit_in_size_t(memsz)) {
> +			dev_err(dev, "size (%llx) does not fit in size_t type\n",
> +				memsz);
> +			ret = -EOVERFLOW;
> +			break;
> +		}
> +
> +		/* grab the kernel address for this device address */
> +		ptr = rproc_da_to_va(rproc, da, memsz, NULL);
> +		if (!ptr) {
> +			dev_err(dev, "bad phdr da 0x%llx mem 0x%llx\n", da,
> +				memsz);
> +			ret = -EINVAL;
> +			break;
> +		}
> +
> +		/* put the segment where the remote processor expects it */
> +		if (filesz) {
> +			ret = imx_dsp_rproc_memcpy(ptr, elf_data + offset, filesz);
> +			if (ret) {
> +				dev_err(dev, "memory copy failed for da 0x%llx memsz 0x%llx\n",
> +					da, memsz);
> +				break;
> +			}
> +		}
> +
> +		/* zero out remaining memory for this segment */
> +		if (memsz > filesz) {
> +			ret = imx_dsp_rproc_memset(ptr + filesz, 0, memsz - filesz);
> +			if (ret) {
> +				dev_err(dev, "memset failed for da 0x%llx memsz 0x%llx\n",
> +					da, memsz);
> +				break;
> +			}
> +		}
> +	}
> +
> +	return ret;
> +}
> +
>   static int imx_dsp_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
>   {
>   	if (rproc_elf_load_rsc_table(rproc, fw))
> @@ -729,7 +908,7 @@ static const struct rproc_ops imx_dsp_rproc_ops = {
>   	.start		= imx_dsp_rproc_start,
>   	.stop		= imx_dsp_rproc_stop,
>   	.kick		= imx_dsp_rproc_kick,
> -	.load		= rproc_elf_load_segments,
> +	.load		= imx_dsp_rproc_elf_load_segments,
>   	.parse_fw	= imx_dsp_rproc_parse_fw,
>   	.sanity_check	= rproc_elf_sanity_check,
>   	.get_boot_addr	= rproc_elf_get_boot_addr,
Iuliana Prodan Feb. 1, 2023, 8:41 a.m. UTC | #2
Hi Peng,

On 2/1/2023 2:27 AM, Peng Fan wrote:
>
>
> On 2/1/2023 1:04 AM, Iuliana Prodan (OSS) wrote:
>> From: Iuliana Prodan <iuliana.prodan@nxp.com>
>>
>> The IRAM is part of the HiFi DSP.
>> According to hardware specification only 32-bits write are allowed
>> otherwise we get a Kernel panic.
>>
>> Therefore add a custom memory copy and memset functions to deal with
>> the above restriction.
>
> Which platform has this limitation? This driver has been landed for
> quite some time, is there any specific condition to trigger the issue?
>
> Regards,
> Peng.
>
Any platform with HiFi DSP.

As I explained in the previous version, until now, it was used in a 
limited scenario and the firmware was correctly built to respect the 
write restriction - having the IRAM sections size a multiple of 4bytes.

Now, I was trying a simple hello_world sample from Zephyr, compiled with 
GCC and I crashed the Kernel trying to load it on the hifi4 DSP.

Thanks,

Iulia
kernel test robot Feb. 4, 2023, 7:23 a.m. UTC | #3
Hi Iuliana,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on remoteproc/rproc-next]
[also build test WARNING on linus/master v6.2-rc6]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Iuliana-Prodan-OSS/remoteproc-imx_dsp_rproc-add-custom-memory-copy-implementation-for-i-MX-DSP-Cores/20230201-011011
base:   git://git.kernel.org/pub/scm/linux/kernel/git/remoteproc/linux.git rproc-next
patch link:    https://lore.kernel.org/r/20230131170436.31280-1-iuliana.prodan%40oss.nxp.com
patch subject: [PATCH v2] remoteproc: imx_dsp_rproc: add custom memory copy implementation for i.MX DSP Cores
config: arm-allyesconfig (https://download.01.org/0day-ci/archive/20230204/202302041520.m9CY8p6U-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/65bca8739891378a92cf6a5774e2ad72630a4276
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Iuliana-Prodan-OSS/remoteproc-imx_dsp_rproc-add-custom-memory-copy-implementation-for-i-MX-DSP-Cores/20230201-011011
        git checkout 65bca8739891378a92cf6a5774e2ad72630a4276
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/remoteproc/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/kernel.h:15,
                    from include/linux/clk.h:13,
                    from drivers/remoteproc/imx_dsp_rproc.c:6:
   drivers/remoteproc/imx_dsp_rproc.c: In function 'imx_dsp_rproc_memcpy':
>> drivers/remoteproc/imx_dsp_rproc.c:732:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     732 |         if (!IS_ALIGNED((u64)dest, 4))
         |                         ^
   include/linux/align.h:13:44: note: in definition of macro 'IS_ALIGNED'
      13 | #define IS_ALIGNED(x, a)                (((x) & ((typeof(x))(a) - 1)) == 0)
         |                                            ^
>> drivers/remoteproc/imx_dsp_rproc.c:732:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     732 |         if (!IS_ALIGNED((u64)dest, 4))
         |                         ^
   include/linux/align.h:13:58: note: in definition of macro 'IS_ALIGNED'
      13 | #define IS_ALIGNED(x, a)                (((x) & ((typeof(x))(a) - 1)) == 0)
         |                                                          ^
   drivers/remoteproc/imx_dsp_rproc.c: In function 'imx_dsp_rproc_memset':
   drivers/remoteproc/imx_dsp_rproc.c:776:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     776 |         if (!IS_ALIGNED((u64)addr, 4))
         |                         ^
   include/linux/align.h:13:44: note: in definition of macro 'IS_ALIGNED'
      13 | #define IS_ALIGNED(x, a)                (((x) & ((typeof(x))(a) - 1)) == 0)
         |                                            ^
   drivers/remoteproc/imx_dsp_rproc.c:776:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     776 |         if (!IS_ALIGNED((u64)addr, 4))
         |                         ^
   include/linux/align.h:13:58: note: in definition of macro 'IS_ALIGNED'
      13 | #define IS_ALIGNED(x, a)                (((x) & ((typeof(x))(a) - 1)) == 0)
         |                                                          ^


vim +732 drivers/remoteproc/imx_dsp_rproc.c

   717	
   718	/*
   719	 * Custom memory copy implementation for i.MX DSP Cores
   720	 *
   721	 * The IRAM is part of the HiFi DSP.
   722	 * According to hw specs only 32-bits writes are allowed.
   723	 */
   724	static int imx_dsp_rproc_memcpy(void *dest, const void *src, size_t size)
   725	{
   726		const u8 *src_byte = src;
   727		u32 affected_mask;
   728		u32 tmp;
   729		int i, q, r;
   730	
   731		/* destination must be 32bit aligned */
 > 732		if (!IS_ALIGNED((u64)dest, 4))
   733			return -EINVAL;
   734	
   735		q = size / 4;
   736		r = size % 4;
   737	
   738		/* __iowrite32_copy use 32bit size values so divide by 4 */
   739		__iowrite32_copy(dest, src, q);
   740	
   741		if (r) {
   742			affected_mask = (1 << (8 * r)) - 1;
   743	
   744			/* first read the 32bit data of dest, then change affected
   745			 * bytes, and write back to dest.
   746			 * For unaffected bytes, it should not be changed
   747			 */
   748			tmp = ioread32(dest + q * 4);
   749			tmp &= ~affected_mask;
   750	
   751			/* avoid reading after end of source */
   752			for (i = 0; i < r; i++)
   753				tmp |= (src_byte[q * 4 + i] << (8 * i));
   754	
   755			iowrite32(tmp, dest + q * 4);
   756		}
   757	
   758		return 0;
   759	}
   760
diff mbox series

Patch

diff --git a/drivers/remoteproc/imx_dsp_rproc.c b/drivers/remoteproc/imx_dsp_rproc.c
index e4b1e962d56ad..d0dcc0820fadd 100644
--- a/drivers/remoteproc/imx_dsp_rproc.c
+++ b/drivers/remoteproc/imx_dsp_rproc.c
@@ -715,6 +715,185 @@  static void imx_dsp_rproc_kick(struct rproc *rproc, int vqid)
 		dev_err(dev, "%s: failed (%d, err:%d)\n", __func__, vqid, err);
 }
 
+/*
+ * Custom memory copy implementation for i.MX DSP Cores
+ *
+ * The IRAM is part of the HiFi DSP.
+ * According to hw specs only 32-bits writes are allowed.
+ */
+static int imx_dsp_rproc_memcpy(void *dest, const void *src, size_t size)
+{
+	const u8 *src_byte = src;
+	u32 affected_mask;
+	u32 tmp;
+	int i, q, r;
+
+	/* destination must be 32bit aligned */
+	if (!IS_ALIGNED((u64)dest, 4))
+		return -EINVAL;
+
+	q = size / 4;
+	r = size % 4;
+
+	/* __iowrite32_copy use 32bit size values so divide by 4 */
+	__iowrite32_copy(dest, src, q);
+
+	if (r) {
+		affected_mask = (1 << (8 * r)) - 1;
+
+		/* first read the 32bit data of dest, then change affected
+		 * bytes, and write back to dest.
+		 * For unaffected bytes, it should not be changed
+		 */
+		tmp = ioread32(dest + q * 4);
+		tmp &= ~affected_mask;
+
+		/* avoid reading after end of source */
+		for (i = 0; i < r; i++)
+			tmp |= (src_byte[q * 4 + i] << (8 * i));
+
+		iowrite32(tmp, dest + q * 4);
+	}
+
+	return 0;
+}
+
+/*
+ * Custom memset implementation for i.MX DSP Cores
+ *
+ * The IRAM is part of the HiFi DSP.
+ * According to hw specs only 32-bits writes are allowed.
+ */
+static int imx_dsp_rproc_memset(void *addr, u8 value, size_t size)
+{
+	u32 affected_mask;
+	u32 tmp_val = value;
+	u32 *tmp_dst = addr;
+	u32 tmp;
+	int q, r;
+
+	/* destination must be 32bit aligned */
+	if (!IS_ALIGNED((u64)addr, 4))
+		return -EINVAL;
+
+	tmp_val |= tmp_val << 8;
+	tmp_val |= tmp_val << 16;
+
+	q = size / 4;
+	r = size % 4;
+
+	while (q--)
+		iowrite32(tmp_val, tmp_dst++);
+
+	if (r) {
+		affected_mask = (1 << (8 * r)) - 1;
+
+		/* first read the 32bit data of addr, then change affected
+		 * bytes, and write back to addr.
+		 * For unaffected bytes, it should not be changed
+		 */
+		tmp = ioread32(tmp_dst);
+		tmp &= ~affected_mask;
+
+		tmp |= (tmp_val & affected_mask);
+		iowrite32(tmp, tmp_dst);
+	}
+
+	return 0;
+}
+/**
+ * imx_dsp_rproc_elf_load_segments() - load firmware segments to memory
+ * @rproc: remote processor which will be booted using these fw segments
+ * @fw: the ELF firmware image
+ *
+ * This function loads the firmware segments to memory, where the remote
+ * processor expects them.
+ *
+ * Return: 0 on success and an appropriate error code otherwise
+ */
+static int imx_dsp_rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw)
+{
+	struct device *dev = &rproc->dev;
+	const void *ehdr, *phdr;
+	int i, ret = 0;
+	u16 phnum;
+	const u8 *elf_data = fw->data;
+	u8 class = fw_elf_get_class(fw);
+	u32 elf_phdr_get_size = elf_size_of_phdr(class);
+
+	ehdr = elf_data;
+	phnum = elf_hdr_get_e_phnum(class, ehdr);
+	phdr = elf_data + elf_hdr_get_e_phoff(class, ehdr);
+
+	/* go through the available ELF segments */
+	for (i = 0; i < phnum; i++, phdr += elf_phdr_get_size) {
+		u64 da = elf_phdr_get_p_paddr(class, phdr);
+		u64 memsz = elf_phdr_get_p_memsz(class, phdr);
+		u64 filesz = elf_phdr_get_p_filesz(class, phdr);
+		u64 offset = elf_phdr_get_p_offset(class, phdr);
+		u32 type = elf_phdr_get_p_type(class, phdr);
+		void *ptr;
+
+		if (type != PT_LOAD || !memsz)
+			continue;
+
+		dev_dbg(dev, "phdr: type %d da 0x%llx memsz 0x%llx filesz 0x%llx\n",
+			type, da, memsz, filesz);
+
+		if (filesz > memsz) {
+			dev_err(dev, "bad phdr filesz 0x%llx memsz 0x%llx\n",
+				filesz, memsz);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (offset + filesz > fw->size) {
+			dev_err(dev, "truncated fw: need 0x%llx avail 0x%zx\n",
+				offset + filesz, fw->size);
+			ret = -EINVAL;
+			break;
+		}
+
+		if (!rproc_u64_fit_in_size_t(memsz)) {
+			dev_err(dev, "size (%llx) does not fit in size_t type\n",
+				memsz);
+			ret = -EOVERFLOW;
+			break;
+		}
+
+		/* grab the kernel address for this device address */
+		ptr = rproc_da_to_va(rproc, da, memsz, NULL);
+		if (!ptr) {
+			dev_err(dev, "bad phdr da 0x%llx mem 0x%llx\n", da,
+				memsz);
+			ret = -EINVAL;
+			break;
+		}
+
+		/* put the segment where the remote processor expects it */
+		if (filesz) {
+			ret = imx_dsp_rproc_memcpy(ptr, elf_data + offset, filesz);
+			if (ret) {
+				dev_err(dev, "memory copy failed for da 0x%llx memsz 0x%llx\n",
+					da, memsz);
+				break;
+			}
+		}
+
+		/* zero out remaining memory for this segment */
+		if (memsz > filesz) {
+			ret = imx_dsp_rproc_memset(ptr + filesz, 0, memsz - filesz);
+			if (ret) {
+				dev_err(dev, "memset failed for da 0x%llx memsz 0x%llx\n",
+					da, memsz);
+				break;
+			}
+		}
+	}
+
+	return ret;
+}
+
 static int imx_dsp_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
 {
 	if (rproc_elf_load_rsc_table(rproc, fw))
@@ -729,7 +908,7 @@  static const struct rproc_ops imx_dsp_rproc_ops = {
 	.start		= imx_dsp_rproc_start,
 	.stop		= imx_dsp_rproc_stop,
 	.kick		= imx_dsp_rproc_kick,
-	.load		= rproc_elf_load_segments,
+	.load		= imx_dsp_rproc_elf_load_segments,
 	.parse_fw	= imx_dsp_rproc_parse_fw,
 	.sanity_check	= rproc_elf_sanity_check,
 	.get_boot_addr	= rproc_elf_get_boot_addr,