diff mbox series

[v1,3/5] soc: qcom: memory_dump: Add memory dump driver

Message ID 1698052857-6918-4-git-send-email-quic_zhenhuah@quicinc.com (mailing list archive)
State Changes Requested
Headers show
Series soc/arm64: qcom: add initial version of memory dump | expand

Commit Message

Zhenhua Huang Oct. 23, 2023, 9:20 a.m. UTC
Qualcomm memory dump driver initializes system memory dump table.
Firmware dumps system cache, internal memory, peripheral registers
to DDR as per this table after system crashes and warm resets. The
driver reserves memory, populates ids and sizes for firmware dumping
according to the configuration.

Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
---
 MAINTAINERS                    |   7 +
 drivers/soc/qcom/Kconfig       |  11 +
 drivers/soc/qcom/Makefile      |   1 +
 drivers/soc/qcom/memory_dump.c | 540 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 559 insertions(+)
 create mode 100644 drivers/soc/qcom/memory_dump.c

Comments

Krzysztof Kozlowski Oct. 23, 2023, 9:31 a.m. UTC | #1
On 23/10/2023 11:20, Zhenhua Huang wrote:
> Qualcomm memory dump driver initializes system memory dump table.
> Firmware dumps system cache, internal memory, peripheral registers
> to DDR as per this table after system crashes and warm resets. The
> driver reserves memory, populates ids and sizes for firmware dumping
> according to the configuration.
> 
> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
> ---

...

> +
> +/* populate allocated region */
> +static int __init mem_dump_populate_mem(struct device *dev,
> +					struct page *start_page,
> +					size_t total_size)
> +{
> +	struct qcom_memory_dump *memdump = dev_get_drvdata(dev);
> +	struct qcom_dump_entry dump_entry;
> +	struct qcom_dump_data *dump_data;
> +	struct xbc_node *linked_list;
> +	phys_addr_t phys_end_addr;
> +	phys_addr_t phys_addr;
> +	const char *size_p;
> +	void *dump_vaddr;
> +	const char *id_p;
> +	int ret = 0;
> +	int size;
> +	int id;
> +
> +	phys_addr = page_to_phys(start_page);
> +	phys_end_addr = phys_addr + total_size;
> +	dump_vaddr = page_to_virt(start_page);
> +
> +	ret = mem_dump_register_data_table(dev, dump_vaddr, phys_addr);
> +	if (ret) {
> +		dev_err_probe(dev, ret, "Mem Dump table set up is failed\n");
> +		return ret;

That's not the syntax. Syntax is return dev_err_probe

> +	}
> +
> +	ret = qcom_init_memdump_imem_area(dev, total_size);
> +	if (ret)
> +		return ret;
> +
> +	/* Apply two tables: QCOM_DUMP_TYPE_TABLE and QCOM_DUMP_TYPE_DATA */
> +	mem_dump_apply_offset(&dump_vaddr, &phys_addr,
> +			      sizeof(struct qcom_dump_table) * 2);
> +
> +	/* Both "id" and "size" must be present */
> +	xbc_node_for_each_subkey(memdump->mem_dump_node, linked_list) {
> +		const char *name = xbc_node_get_data(linked_list);
> +
> +		if (!name)
> +			continue;
> +
> +		id_p = xbc_node_find_value(linked_list, "id", NULL);
> +		size_p = xbc_node_find_value(linked_list, "size", NULL);
> +
> +		if (id_p && size_p) {
> +			ret = kstrtoint(id_p, 0, &id);
> +			if (ret)
> +				continue;
> +
> +			ret = kstrtoint(size_p, 0, &size);
> +
> +			if (ret)
> +				continue;
> +
> +		/*
> +		 * Physical layout: starting from two qcom_dump_data.
> +		 * Following are respective dump meta data and reserved regions.
> +		 * Qcom_dump_data is populated by the driver, fw parse it
> +		 * and dump respective info into dump mem.
> +		 * Illustrate the layout:
> +		 *
> +		 *   +------------------------+------------------------+
> +		 *   | qcom_dump_table(TABLE) | qcom_dump_table(DATA)  |
> +		 *   +------------------------+------------------------+
> +		 *   +-------------+----------+-------------+----------+
> +		 *   |qcom_dump_data| dump mem|qcom_dump_data| dump mem |
> +		 *   +-------------+----------+-------------+----------+
> +		 *   +-------------+----------+-------------+----------+
> +		 *   |qcom_dump_data| dump mem|qcom_dump_data| dump mem |
> +		 *   +-------------+----------+-------------+----------+
> +		 *   ...
> +		 */

You have wrong indentation here.

> +			dump_data = dump_vaddr;
> +			dump_data->addr = phys_addr + QCOM_DUMP_DATA_SIZE;
> +			dump_data->len = size;
> +			dump_entry.id = id;
> +			strscpy(dump_data->name, name,
> +				sizeof(dump_data->name));
> +			dump_entry.addr = phys_addr;
> +			ret = mem_dump_data_register(dev, QCOM_DUMP_TABLE_LINUX,
> +						     &dump_entry);
> +			if (ret) {
> +				dev_err_probe(dev, ret, "Dump data setup failed, id = %d\n",
> +					      id);

Syntax is return dev_err_probe

> +				return ret;
> +			}
> +
> +			mem_dump_apply_offset(&dump_vaddr, &phys_addr,
> +					      size + QCOM_DUMP_DATA_SIZE);
> +			if (phys_addr > phys_end_addr) {
> +				dev_err_probe(dev, -ENOMEM, "Exceeding allocated region\n");

ENOMEM? Does not look right then.

> +				return -ENOMEM;
> +			}
> +		} else {
> +			continue;
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static int __init mem_dump_probe(struct platform_device *pdev)
> +{
> +	struct qcom_memory_dump *memdump;
> +	struct device *dev = &pdev->dev;
> +	struct page *page;
> +	size_t total_size;
> +	int ret = 0;
> +
> +	memdump = devm_kzalloc(dev, sizeof(struct qcom_memory_dump),
> +			       GFP_KERNEL);
> +	if (!memdump)
> +		return -ENOMEM;
> +
> +	dev_set_drvdata(dev, memdump);
> +
> +	/* check and initiate CMA region */
> +	ret = mem_dump_reserve_mem(dev);
> +	if (ret)
> +		return ret;
> +
> +	/* allocate and populate */
> +	page = mem_dump_alloc_mem(dev, &total_size);
> +	if (IS_ERR(page)) {
> +		ret = PTR_ERR(page);
> +		dev_err_probe(dev, ret, "mem dump alloc failed\n");

No, the syntax is:
ret = dev_err_probe

But why do you print messgaes for memory allocations?

> +		goto release;
> +	}
> +
> +	ret = mem_dump_populate_mem(dev, page, total_size);
> +	if (!ret)
> +		dev_info(dev, "Mem dump region populated successfully\n");

Drop simple probe success messages. Not needed.

> +	else
> +		goto free;
> +
> +	return 0;
> +
> +free:
> +	cma_release(dev_get_cma_area(dev), page, (total_size / PAGE_SIZE));
> +
> +release:
> +	of_reserved_mem_device_release(dev);

Where is cleanup on unbind?

> +	return ret;
> +}
> +
> +static const struct of_device_id mem_dump_match_table[] = {
> +	{.compatible = "qcom,mem-dump",},
> +	{}
> +};
> +
Best regards,
Krzysztof
Zhenhua Huang Oct. 23, 2023, 11:43 a.m. UTC | #2
On 2023/10/23 17:31, Krzysztof Kozlowski wrote:
> On 23/10/2023 11:20, Zhenhua Huang wrote:
>> Qualcomm memory dump driver initializes system memory dump table.
>> Firmware dumps system cache, internal memory, peripheral registers
>> to DDR as per this table after system crashes and warm resets. The
>> driver reserves memory, populates ids and sizes for firmware dumping
>> according to the configuration.
>>
>> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
>> ---
> 
> ...
> 
>> +
>> +/* populate allocated region */
>> +static int __init mem_dump_populate_mem(struct device *dev,
>> +					struct page *start_page,
>> +					size_t total_size)
>> +{
>> +	struct qcom_memory_dump *memdump = dev_get_drvdata(dev);
>> +	struct qcom_dump_entry dump_entry;
>> +	struct qcom_dump_data *dump_data;
>> +	struct xbc_node *linked_list;
>> +	phys_addr_t phys_end_addr;
>> +	phys_addr_t phys_addr;
>> +	const char *size_p;
>> +	void *dump_vaddr;
>> +	const char *id_p;
>> +	int ret = 0;
>> +	int size;
>> +	int id;
>> +
>> +	phys_addr = page_to_phys(start_page);
>> +	phys_end_addr = phys_addr + total_size;
>> +	dump_vaddr = page_to_virt(start_page);
>> +
>> +	ret = mem_dump_register_data_table(dev, dump_vaddr, phys_addr);
>> +	if (ret) {
>> +		dev_err_probe(dev, ret, "Mem Dump table set up is failed\n");
>> +		return ret;
> 
> That's not the syntax. Syntax is return dev_err_probe
> 

Got it, Thanks.

>> +	}
>> +
>> +	ret = qcom_init_memdump_imem_area(dev, total_size);
>> +	if (ret)
>> +		return ret;
>> +
>> +	/* Apply two tables: QCOM_DUMP_TYPE_TABLE and QCOM_DUMP_TYPE_DATA */
>> +	mem_dump_apply_offset(&dump_vaddr, &phys_addr,
>> +			      sizeof(struct qcom_dump_table) * 2);
>> +
>> +	/* Both "id" and "size" must be present */
>> +	xbc_node_for_each_subkey(memdump->mem_dump_node, linked_list) {
>> +		const char *name = xbc_node_get_data(linked_list);
>> +
>> +		if (!name)
>> +			continue;
>> +
>> +		id_p = xbc_node_find_value(linked_list, "id", NULL);
>> +		size_p = xbc_node_find_value(linked_list, "size", NULL);
>> +
>> +		if (id_p && size_p) {
>> +			ret = kstrtoint(id_p, 0, &id);
>> +			if (ret)
>> +				continue;
>> +
>> +			ret = kstrtoint(size_p, 0, &size);
>> +
>> +			if (ret)
>> +				continue;
>> +
>> +		/*
>> +		 * Physical layout: starting from two qcom_dump_data.
>> +		 * Following are respective dump meta data and reserved regions.
>> +		 * Qcom_dump_data is populated by the driver, fw parse it
>> +		 * and dump respective info into dump mem.
>> +		 * Illustrate the layout:
>> +		 *
>> +		 *   +------------------------+------------------------+
>> +		 *   | qcom_dump_table(TABLE) | qcom_dump_table(DATA)  |
>> +		 *   +------------------------+------------------------+
>> +		 *   +-------------+----------+-------------+----------+
>> +		 *   |qcom_dump_data| dump mem|qcom_dump_data| dump mem |
>> +		 *   +-------------+----------+-------------+----------+
>> +		 *   +-------------+----------+-------------+----------+
>> +		 *   |qcom_dump_data| dump mem|qcom_dump_data| dump mem |
>> +		 *   +-------------+----------+-------------+----------+
>> +		 *   ...
>> +		 */
> 
> You have wrong indentation here.

Thanks for catching.

> 
>> +			dump_data = dump_vaddr;
>> +			dump_data->addr = phys_addr + QCOM_DUMP_DATA_SIZE;
>> +			dump_data->len = size;
>> +			dump_entry.id = id;
>> +			strscpy(dump_data->name, name,
>> +				sizeof(dump_data->name));
>> +			dump_entry.addr = phys_addr;
>> +			ret = mem_dump_data_register(dev, QCOM_DUMP_TABLE_LINUX,
>> +						     &dump_entry);
>> +			if (ret) {
>> +				dev_err_probe(dev, ret, "Dump data setup failed, id = %d\n",
>> +					      id);
> 
> Syntax is return dev_err_probe
> 
>> +				return ret;
>> +			}
>> +
>> +			mem_dump_apply_offset(&dump_vaddr, &phys_addr,
>> +					      size + QCOM_DUMP_DATA_SIZE);
>> +			if (phys_addr > phys_end_addr) {
>> +				dev_err_probe(dev, -ENOMEM, "Exceeding allocated region\n");
> 
> ENOMEM? Does not look right then.

ENOMEM means the memory allocated not enough? any suggestion? Thanks.

> 
>> +				return -ENOMEM;
>> +			}
>> +		} else {
>> +			continue;
>> +		}
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +static int __init mem_dump_probe(struct platform_device *pdev)
>> +{
>> +	struct qcom_memory_dump *memdump;
>> +	struct device *dev = &pdev->dev;
>> +	struct page *page;
>> +	size_t total_size;
>> +	int ret = 0;
>> +
>> +	memdump = devm_kzalloc(dev, sizeof(struct qcom_memory_dump),
>> +			       GFP_KERNEL);
>> +	if (!memdump)
>> +		return -ENOMEM;
>> +
>> +	dev_set_drvdata(dev, memdump);
>> +
>> +	/* check and initiate CMA region */
>> +	ret = mem_dump_reserve_mem(dev);
>> +	if (ret)
>> +		return ret;
>> +
>> +	/* allocate and populate */
>> +	page = mem_dump_alloc_mem(dev, &total_size);
>> +	if (IS_ERR(page)) {
>> +		ret = PTR_ERR(page);
>> +		dev_err_probe(dev, ret, "mem dump alloc failed\n");
> 
> No, the syntax is:
> ret = dev_err_probe
> 
> But why do you print messgaes for memory allocations?

Do you think it's useless because mm codes should print error as well if 
failure ?

> 
>> +		goto release;
>> +	}
>> +
>> +	ret = mem_dump_populate_mem(dev, page, total_size);
>> +	if (!ret)
>> +		dev_info(dev, "Mem dump region populated successfully\n");
> 
> Drop simple probe success messages. Not needed.

OK

> 
>> +	else
>> +		goto free;
>> +
>> +	return 0;
>> +
>> +free:
>> +	cma_release(dev_get_cma_area(dev), page, (total_size / PAGE_SIZE));
>> +
>> +release:
>> +	of_reserved_mem_device_release(dev);
> 
> Where is cleanup on unbind?
> 

Will add, Thanks for pointing out.

>> +	return ret;
>> +}
>> +
>> +static const struct of_device_id mem_dump_match_table[] = {
>> +	{.compatible = "qcom,mem-dump",},
>> +	{}
>> +};
>> +
> Best regards,
> Krzysztof
> 

Thanks,
Zhenhua
Krzysztof Kozlowski Oct. 23, 2023, 11:46 a.m. UTC | #3
On 23/10/2023 13:43, Zhenhua Huang wrote:
>>> +
>>> +			mem_dump_apply_offset(&dump_vaddr, &phys_addr,
>>> +					      size + QCOM_DUMP_DATA_SIZE);
>>> +			if (phys_addr > phys_end_addr) {
>>> +				dev_err_probe(dev, -ENOMEM, "Exceeding allocated region\n");
>>
>> ENOMEM? Does not look right then.
> 
> ENOMEM means the memory allocated not enough? any suggestion? Thanks.

The error code is okay, but we rarely need to print error messages for
memory allocation failures. Core prints it already.

> 
>>
>>> +				return -ENOMEM;
>>> +			}
>>> +		} else {
>>> +			continue;
>>> +		}
>>> +	}
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +static int __init mem_dump_probe(struct platform_device *pdev)
>>> +{
>>> +	struct qcom_memory_dump *memdump;
>>> +	struct device *dev = &pdev->dev;
>>> +	struct page *page;
>>> +	size_t total_size;
>>> +	int ret = 0;
>>> +
>>> +	memdump = devm_kzalloc(dev, sizeof(struct qcom_memory_dump),
>>> +			       GFP_KERNEL);
>>> +	if (!memdump)
>>> +		return -ENOMEM;
>>> +
>>> +	dev_set_drvdata(dev, memdump);
>>> +
>>> +	/* check and initiate CMA region */
>>> +	ret = mem_dump_reserve_mem(dev);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	/* allocate and populate */
>>> +	page = mem_dump_alloc_mem(dev, &total_size);
>>> +	if (IS_ERR(page)) {
>>> +		ret = PTR_ERR(page);
>>> +		dev_err_probe(dev, ret, "mem dump alloc failed\n");
>>
>> No, the syntax is:
>> ret = dev_err_probe
>>
>> But why do you print messgaes for memory allocations?
> 
> Do you think it's useless because mm codes should print error as well if 
> failure ?

We fixed this in kernel long, long, long time ago so we have even
coccicheck scripts to find misuses.



Best regards,
Krzysztof
Zhenhua Huang Oct. 23, 2023, 12:19 p.m. UTC | #4
On 2023/10/23 19:46, Krzysztof Kozlowski wrote:
> On 23/10/2023 13:43, Zhenhua Huang wrote:
>>>> +
>>>> +			mem_dump_apply_offset(&dump_vaddr, &phys_addr,
>>>> +					      size + QCOM_DUMP_DATA_SIZE);
>>>> +			if (phys_addr > phys_end_addr) {
>>>> +				dev_err_probe(dev, -ENOMEM, "Exceeding allocated region\n");
>>>
>>> ENOMEM? Does not look right then.
>>
>> ENOMEM means the memory allocated not enough? any suggestion? Thanks.
> 
> The error code is okay, but we rarely need to print error messages for
> memory allocation failures. Core prints it already.

It's not same as below case. Allocation is successful here, while the 
driver used more than allocated size.

> 
>>
>>>
>>>> +				return -ENOMEM;
>>>> +			}
>>>> +		} else {
>>>> +			continue;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	return ret;
>>>> +}
>>>> +
>>>> +static int __init mem_dump_probe(struct platform_device *pdev)
>>>> +{
>>>> +	struct qcom_memory_dump *memdump;
>>>> +	struct device *dev = &pdev->dev;
>>>> +	struct page *page;
>>>> +	size_t total_size;
>>>> +	int ret = 0;
>>>> +
>>>> +	memdump = devm_kzalloc(dev, sizeof(struct qcom_memory_dump),
>>>> +			       GFP_KERNEL);
>>>> +	if (!memdump)
>>>> +		return -ENOMEM;
>>>> +
>>>> +	dev_set_drvdata(dev, memdump);
>>>> +
>>>> +	/* check and initiate CMA region */
>>>> +	ret = mem_dump_reserve_mem(dev);
>>>> +	if (ret)
>>>> +		return ret;
>>>> +
>>>> +	/* allocate and populate */
>>>> +	page = mem_dump_alloc_mem(dev, &total_size);
>>>> +	if (IS_ERR(page)) {
>>>> +		ret = PTR_ERR(page);
>>>> +		dev_err_probe(dev, ret, "mem dump alloc failed\n");
>>>
>>> No, the syntax is:
>>> ret = dev_err_probe
>>>
>>> But why do you print messgaes for memory allocations?
>>
>> Do you think it's useless because mm codes should print error as well if
>> failure ?
> 
> We fixed this in kernel long, long, long time ago so we have even
> coccicheck scripts to find misuses.
> 

Thanks, yeah.. I know mm codes already have for example warn_alloc() to 
print error messages. Thanks for pointing out.

> 
> 
> Best regards,
> Krzysztof
> 

Thanks,
Zhenhua
Krzysztof Kozlowski Oct. 23, 2023, 12:53 p.m. UTC | #5
On 23/10/2023 14:19, Zhenhua Huang wrote:
> 
> 
> On 2023/10/23 19:46, Krzysztof Kozlowski wrote:
>> On 23/10/2023 13:43, Zhenhua Huang wrote:
>>>>> +
>>>>> +			mem_dump_apply_offset(&dump_vaddr, &phys_addr,
>>>>> +					      size + QCOM_DUMP_DATA_SIZE);
>>>>> +			if (phys_addr > phys_end_addr) {
>>>>> +				dev_err_probe(dev, -ENOMEM, "Exceeding allocated region\n");
>>>>
>>>> ENOMEM? Does not look right then.
>>>
>>> ENOMEM means the memory allocated not enough? any suggestion? Thanks.
>>
>> The error code is okay, but we rarely need to print error messages for
>> memory allocation failures. Core prints it already.
> 
> It's not same as below case. Allocation is successful here, while the 
> driver used more than allocated size.

$ man errno
ENOMEM means: Not enough space/cannot allocate memory


Best regards,
Krzysztof
Konrad Dybcio Oct. 23, 2023, 1:56 p.m. UTC | #6
On 23.10.2023 11:20, Zhenhua Huang wrote:
> Qualcomm memory dump driver initializes system memory dump table.
> Firmware dumps system cache, internal memory, peripheral registers
> to DDR as per this table after system crashes and warm resets. The
> driver reserves memory, populates ids and sizes for firmware dumping
> according to the configuration.
> 
> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
> ---
[...]


> +#define MAX_NUM_ENTRIES		0x150
The number of entries makes more sense as a dec number

> +#define QCOM_DUMP_MAKE_VERSION(major, minor)	(((major) << 20) | (minor))
> +#define QCOM_DUMP_TABLE_VERSION		QCOM_DUMP_MAKE_VERSION(2, 0)
I feel like doing this:

#define QCOM_DUMP_TABLE_VERSION(major, minor)	((major << 20) | (minor))

...

someval = QCOM_DUMP_TABLE_VERSION(2, 0)

would make more sense, since v2.0 seems to be the only supported target..

[...]

> +			if (phys_addr > phys_end_addr) {
> +				dev_err_probe(dev, -ENOMEM, "Exceeding allocated region\n");
> +				return -ENOMEM;
> +			}
> +		} else {
> +			continue;
You can check for the inverse and bail out early, saving yourself
a lot of tabs

[...]

> +MODULE_DESCRIPTION("Memory Dump Driver");
Missing some mention of it being QC specific

Konrad
Zhenhua Huang Oct. 24, 2023, 7:36 a.m. UTC | #7
Thanks Konrad.

On 2023/10/23 21:56, Konrad Dybcio wrote:
> On 23.10.2023 11:20, Zhenhua Huang wrote:
>> Qualcomm memory dump driver initializes system memory dump table.
>> Firmware dumps system cache, internal memory, peripheral registers
>> to DDR as per this table after system crashes and warm resets. The
>> driver reserves memory, populates ids and sizes for firmware dumping
>> according to the configuration.
>>
>> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
>> ---
> [...]
> 
> 
>> +#define MAX_NUM_ENTRIES		0x150
> The number of entries makes more sense as a dec number

Done

> 
>> +#define QCOM_DUMP_MAKE_VERSION(major, minor)	(((major) << 20) | (minor))
>> +#define QCOM_DUMP_TABLE_VERSION		QCOM_DUMP_MAKE_VERSION(2, 0)
> I feel like doing this:
> 
> #define QCOM_DUMP_TABLE_VERSION(major, minor)	((major << 20) | (minor))
> 
> ...
> 
> someval = QCOM_DUMP_TABLE_VERSION(2, 0)
> 
> would make more sense, since v2.0 seems to be the only supported target..
> 

Done

> [...]
> 
>> +			if (phys_addr > phys_end_addr) {
>> +				dev_err_probe(dev, -ENOMEM, "Exceeding allocated region\n");
>> +				return -ENOMEM;
>> +			}
>> +		} else {
>> +			continue;
> You can check for the inverse and bail out early, saving yourself
> a lot of tabs

Good suggestion. Thanks.

> 
> [...]
> 
>> +MODULE_DESCRIPTION("Memory Dump Driver");
> Missing some mention of it being QC specific

Add Qualcomm tag. Thanks.

> 
> Konrad
Zhenhua Huang Oct. 24, 2023, 10:57 a.m. UTC | #8
On 2023/10/23 20:53, Krzysztof Kozlowski wrote:
>> It's not same as below case. Allocation is successful here, while the
>> driver used more than allocated size.
> $ man errno
> ENOMEM means: Not enough space/cannot allocate memory

I think "Not enough space" should be applicable here?

Thanks,
Zhenhua
Krzysztof Kozlowski Oct. 24, 2023, 1:58 p.m. UTC | #9
On 24/10/2023 12:57, Zhenhua Huang wrote:
> 
> 
> On 2023/10/23 20:53, Krzysztof Kozlowski wrote:
>>> It's not same as below case. Allocation is successful here, while the
>>> driver used more than allocated size.
>> $ man errno
>> ENOMEM means: Not enough space/cannot allocate memory
> 
> I think "Not enough space" should be applicable here?

To me: not. This is some configuration problem, not lack of mmaped
address space or lack of free pages. It's true that NOMEM is also used
for limits (e.g. "The process's maximum number of mappings would have
been exceeded.", "The process's RLIMIT_DATA limit, described in
getrlimit(2), would have been exceeded."), but I am not sure whether
this fits this case.

Best regards,
Krzysztof
kernel test robot Oct. 26, 2023, 6:58 p.m. UTC | #10
Hi Zhenhua,

kernel test robot noticed the following build warnings:

[auto build test WARNING on robh/for-next]
[also build test WARNING on arm64/for-next/core linus/master v6.6-rc7 next-20231026]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhenhua-Huang/dt-bindings-soc-qcom-Add-memory_dump-driver-bindings/20231023-172245
base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git for-next
patch link:    https://lore.kernel.org/r/1698052857-6918-4-git-send-email-quic_zhenhuah%40quicinc.com
patch subject: [PATCH v1 3/5] soc: qcom: memory_dump: Add memory dump driver
reproduce: (https://download.01.org/0day-ci/archive/20231027/202310270221.jqcwE137-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310270221.jqcwE137-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> Warning: MAINTAINERS references a file that doesn't exist: Documentation/devicetree/bindings/soc/qcom/qcom,memory_dump.yaml
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 90f1328..096e0f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17792,6 +17792,13 @@  S:	Maintained
 F:	Documentation/devicetree/bindings/regulator/vqmmc-ipq4019-regulator.yaml
 F:	drivers/regulator/vqmmc-ipq4019-regulator.c
 
+QUALCOMM MEMORY DUMP DRIVER
+M:	Zhenhua Huang <quic_zhenhuah@quicinc.com>
+L:	linux-arm-msm@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/soc/qcom/qcom,memory_dump.yaml
+F:	drivers/soc/qcom/memory_dump.c
+
 QUALCOMM NAND CONTROLLER DRIVER
 M:	Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
 L:	linux-mtd@lists.infradead.org
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 7153488..1842f4e 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -74,6 +74,17 @@  config QCOM_KRYO_L2_ACCESSORS
 	bool
 	depends on (ARCH_QCOM || COMPILE_TEST) && ARM64
 
+config QCOM_MEMORY_DUMP
+       bool "QCOM Memory Dump Support"
+       depends on ARCH_QCOM || COMPILE_TEST
+       select BOOT_CONFIG
+       help
+         Qualcomm memory dump driver initializes system memory dump table.
+         Firmware dumps system cache, internal memory, peripheral registers to
+         DDR as per this table after system crash and warm reset.
+         The driver allocates reserved memory and populates ids, sizes for
+         firmware to dump according to configuration.
+
 config QCOM_MDT_LOADER
 	tristate
 	select QCOM_SCM
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index bbca2e1..988880c 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -28,6 +28,7 @@  obj-$(CONFIG_QCOM_STATS)	+= qcom_stats.o
 obj-$(CONFIG_QCOM_WCNSS_CTRL) += wcnss_ctrl.o
 obj-$(CONFIG_QCOM_APR) += apr.o
 obj-$(CONFIG_QCOM_LLCC) += llcc-qcom.o
+obj-$(CONFIG_QCOM_MEMORY_DUMP) += memory_dump.o
 obj-$(CONFIG_QCOM_KRYO_L2_ACCESSORS) +=	kryo-l2-accessors.o
 obj-$(CONFIG_QCOM_ICC_BWMON)	+= icc-bwmon.o
 qcom_ice-objs			+= ice.o
diff --git a/drivers/soc/qcom/memory_dump.c b/drivers/soc/qcom/memory_dump.c
new file mode 100644
index 0000000..57cd897
--- /dev/null
+++ b/drivers/soc/qcom/memory_dump.c
@@ -0,0 +1,540 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ *
+ * Qualcomm memory dump driver dynamically reserves memory and provides
+ * hints(id and size) of debugging information based on specified
+ * protocols with firmware into pre-allocated memory. Firmware then does the
+ * real data capture. The debugging information includes cache contents,
+ * internal memory, registers.
+ * After crash and warm reboot, firmware scans ids, sizes and stores contents
+ * into reserved memory accordingly. Firmware then enters into full dump mode
+ * which dumps whole DDR to host through USB.
+ *
+ */
+#include <asm/barrier.h>
+#include <linux/bootconfig.h>
+#include <linux/cma.h>
+#include <linux/device.h>
+#include <linux/dma-map-ops.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/firmware/qcom/qcom_scm.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#define MAX_NUM_ENTRIES		0x150
+#define QCOM_DUMP_MAKE_VERSION(major, minor)	(((major) << 20) | (minor))
+#define QCOM_DUMP_TABLE_VERSION		QCOM_DUMP_MAKE_VERSION(2, 0)
+#define QCOM_DUMP_DATA_SIZE sizeof(struct qcom_dump_data)
+
+enum qcom_dump_table_ids {
+	QCOM_DUMP_TABLE_LINUX,
+	QCOM_DUMP_TABLE_MAX = MAX_NUM_ENTRIES,
+};
+
+enum qcom_dump_type {
+	QCOM_DUMP_TYPE_DATA,
+	QCOM_DUMP_TYPE_TABLE,
+};
+
+/*
+ * +----------+         1st level
+ * |IMEM      |------+-----------------+
+ * +----------+      | qcom_dump_table |
+ *                   |---------------- |
+ *                   | version         |
+ *                   | num_entryies    |
+ *                   | ..              |
+ *                   |---------------- |
+ *             +-----|qcom_dump_entry  |
+ *             |     |qcom_dump_entry  |
+ *             |     |  ...            |
+ *             |     +-----------------+
+ *             |
+ *             |
+ *             |        2nd level
+ *             |     +-----------------+
+ *             ------| qcom_dump_table |
+ *                   |---------------- |
+ *                   | version         |
+ *                   | num_entryies    |
+ *                   | ..              |
+ *                   |---------------- |     +-------------+     +----------+
+ *                   |qcom_dump_entry  |-----|qcom_dump_data|----| data     |
+ *                   |qcom_dump_entry  |     +-------------+     +----------+
+ *                   |  ...            |---- +-------------+     +----------+
+ *                   +-----------------+     |qcom_dump_data|----| data     |
+ *                                           +-------------+     +----------+
+ *
+ * Structures can not be packed due to protocols with firmware.
+ */
+struct qcom_dump_data {
+	__le32 version;
+	__le32 magic;
+	char name[32];
+	__le64 addr;
+	__le64 len;
+	__le32 reserved;
+};
+
+struct qcom_dump_entry {
+	__le32 id;
+	char name[32];
+	__le32 type;
+	__le64 addr;
+};
+
+struct qcom_dump_table {
+	__le32 version;
+	__le32 num_entries;
+	struct qcom_dump_entry entries[MAX_NUM_ENTRIES];
+};
+
+struct qcom_memory_dump {
+	u64 table_phys;
+	struct qcom_dump_table *table;
+	struct xbc_node *mem_dump_node;
+	/* Cached 2nd level table */
+	struct qcom_dump_table *cached_2nd_table;
+};
+
+static void __init mem_dump_entry_set(struct device *dev,
+				      struct qcom_dump_entry *entry,
+				      u32 id,
+				      u32 type, uint64_t addr)
+{
+	entry->id = id;
+	entry->type = type;
+	entry->addr = addr;
+}
+
+/* 1st level table register */
+static int __init mem_dump_table_register(struct device *dev,
+					  struct qcom_dump_entry *entry)
+{
+	struct qcom_memory_dump *memdump = dev_get_drvdata(dev);
+	struct qcom_dump_entry *last_entry;
+	struct qcom_dump_table *table = memdump->table;
+
+	if (!table || table->num_entries >= MAX_NUM_ENTRIES)
+		return -EINVAL;
+
+	last_entry = &table->entries[table->num_entries];
+	mem_dump_entry_set(dev, last_entry, entry->id,
+			   QCOM_DUMP_TYPE_TABLE, entry->addr);
+	table->num_entries++;
+
+	return 0;
+}
+
+/* Get 2nd level table */
+static struct qcom_dump_table * __init
+mem_dump_get_table(struct device *dev,
+		   enum qcom_dump_table_ids id)
+{
+	struct qcom_memory_dump *memdump = dev_get_drvdata(dev);
+	struct qcom_dump_table *table = memdump->table;
+	unsigned long offset;
+	int i;
+
+	if (memdump->cached_2nd_table)
+		return memdump->cached_2nd_table;
+
+	if (!table) {
+		dev_err(dev, "Mem dump base table does not exist\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	for (i = 0; i < MAX_NUM_ENTRIES; i++) {
+		if (table->entries[i].id == id)
+			break;
+	}
+
+	if (i == MAX_NUM_ENTRIES || !table->entries[i].addr) {
+		dev_err(dev, "Mem dump base table entry %d invalid\n", id);
+		return ERR_PTR(-EINVAL);
+	}
+
+	offset = table->entries[i].addr - memdump->table_phys;
+
+	/* Get the table pointer. Phy and virt addr has same offset */
+	table = (void *)memdump->table + offset;
+	/* Cache it for next time visit */
+	memdump->cached_2nd_table = table;
+
+	return table;
+}
+
+/* register in 2nd level table */
+static int __init mem_dump_data_register(struct device *dev,
+					 enum qcom_dump_table_ids id,
+					 struct qcom_dump_entry *entry)
+{
+	struct qcom_dump_entry *last_entry;
+	struct qcom_dump_table *table;
+
+	/* Get 2nd level table */
+	table = mem_dump_get_table(dev, id);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	if (!table || table->num_entries >= MAX_NUM_ENTRIES)
+		return -EINVAL;
+
+	last_entry = &table->entries[table->num_entries];
+	mem_dump_entry_set(dev, last_entry, entry->id, QCOM_DUMP_TYPE_DATA,
+			   entry->addr);
+	table->num_entries++;
+
+	return 0;
+}
+
+static int __init qcom_init_memdump_imem_area(struct device *dev, size_t size)
+{
+	struct qcom_memory_dump *memdump = dev_get_drvdata(dev);
+	void __iomem *table_offset;
+	void __iomem *table_base;
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL,
+				     "qcom,qcom-imem-mem-dump-table");
+	if (!np) {
+		dev_err_probe(dev, -ENODEV,
+			      "Mem dump base table DT node does not exist\n");
+		return -ENODEV;
+	}
+
+	table_base = devm_of_iomap(dev, np, 0, NULL);
+	if (!table_base) {
+		dev_err_probe(dev, -ENOMEM,
+			      "Mem dump base table imem offset mapping failed\n");
+		return -ENOMEM;
+	}
+
+	np = of_find_compatible_node(NULL, NULL,
+				     "qcom,qcom-imem-mem-dump-table-size");
+	if (!np) {
+		dev_err_probe(dev, -ENODEV,
+			      "Mem dump base table size DT node does not exist\n");
+		devm_iounmap(dev, table_base);
+		return -ENODEV;
+	}
+
+	table_offset = devm_of_iomap(dev, np, 0, NULL);
+	if (!table_offset) {
+		dev_err_probe(dev, -ENOMEM,
+			      "Mem dump base table size imem offset mapping failed\n");
+		devm_iounmap(dev, table_base);
+		return -ENOMEM;
+	}
+
+	memcpy_toio(table_base, &memdump->table_phys,
+		    sizeof(memdump->table_phys));
+	memcpy_toio(table_offset,
+		    &size, sizeof(size_t));
+
+	/* Ensure write to table_base is complete before unmapping */
+	mb();
+	dev_dbg(dev, "QCOM Memory Dump base table set up in IMEM\n");
+
+	devm_iounmap(dev, table_base);
+	devm_iounmap(dev, table_offset);
+	return 0;
+}
+
+/* Helper function for applying both vaddr and phys addr */
+static void __init mem_dump_apply_offset(void **dump_vaddr,
+					 phys_addr_t *phys_addr, size_t offset)
+{
+	*dump_vaddr += offset;
+	*phys_addr += offset;
+}
+
+/* Populate 1st level: QCOM_DUMP_TABLE_LINUX */
+static int __init mem_dump_register_data_table(struct device *dev,
+					       void *dump_vaddr,
+					       phys_addr_t phys_addr)
+{
+	struct qcom_memory_dump *memdump = dev_get_drvdata(dev);
+	struct qcom_dump_table *table;
+	struct qcom_dump_entry entry;
+	int ret;
+
+	memdump->table = dump_vaddr;
+	memdump->table->version = QCOM_DUMP_TABLE_VERSION;
+	memdump->table_phys = phys_addr;
+	mem_dump_apply_offset(&dump_vaddr, &phys_addr, sizeof(*table));
+
+	table = dump_vaddr;
+	table->version = QCOM_DUMP_TABLE_VERSION;
+	entry.id = QCOM_DUMP_TABLE_LINUX;
+	entry.addr = phys_addr;
+	ret = mem_dump_table_register(dev, &entry);
+	if (ret) {
+		dev_err(dev, "Mem dump apps data table register failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __init mem_dump_reserve_mem(struct device *dev)
+{
+	int ret;
+
+	if (of_property_present(dev->of_node, "memory-region")) {
+		ret = of_reserved_mem_device_init_by_idx(dev,
+							 dev->of_node, 0);
+		if (ret)
+			dev_err_probe(dev, ret,
+				      "Failed to initialize reserved mem\n");
+		return ret;
+	}
+
+	/* Using default CMA region is fallback choice */
+	dev_dbg(dev, "Using default CMA region\n");
+	return 0;
+}
+
+static struct page * __init
+mem_dump_alloc_mem(struct device *dev, size_t *total_size)
+{
+	struct qcom_memory_dump *memdump = dev_get_drvdata(dev);
+	struct xbc_node *linked_list;
+	int num_of_nodes = 0;
+	struct page *page;
+	const char *size_p;
+	const char *id_p;
+	int ret = 0;
+	int size;
+	int id;
+
+	memdump->mem_dump_node = xbc_find_node("memory_dump_config");
+	if (!memdump->mem_dump_node) {
+		dev_err(dev, "xbc config not found\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	*total_size = sizeof(struct qcom_dump_table) * 2;
+
+	xbc_node_for_each_subkey(memdump->mem_dump_node, linked_list) {
+		const char *name = xbc_node_get_data(linked_list);
+
+		if (!name)
+			continue;
+
+		id_p = xbc_node_find_value(linked_list, "id", NULL);
+		size_p = xbc_node_find_value(linked_list, "size", NULL);
+
+		if (id_p && size_p) {
+			ret = kstrtoint(id_p, 0, &id);
+			if (ret)
+				continue;
+
+			ret = kstrtoint(size_p, 0, &size);
+			if (ret)
+				continue;
+
+			if (check_add_overflow(*total_size, size, total_size))
+				return ERR_PTR(-EOVERFLOW);
+
+			num_of_nodes++;
+		} else {
+			continue;
+		}
+	}
+
+	if (!num_of_nodes)
+		return ERR_PTR(-EINVAL);
+
+	if (check_add_overflow(*total_size,
+			       (QCOM_DUMP_DATA_SIZE * num_of_nodes),
+			       total_size))
+		return ERR_PTR(-EOVERFLOW);
+
+	/* Align total_size */
+	if (*total_size > ALIGN(*total_size, PAGE_SIZE))
+		return ERR_PTR(-EOVERFLOW);
+	*total_size = ALIGN(*total_size, PAGE_SIZE);
+
+	/*
+	 * Physical continuous buffer.
+	 */
+	page = cma_alloc(dev_get_cma_area(dev), (*total_size / PAGE_SIZE),
+			 0, false);
+	if (page)
+		memset(page_address(page), 0, *total_size);
+	else
+		return ERR_PTR(-ENOMEM);
+
+	return page;
+}
+
+/* populate allocated region */
+static int __init mem_dump_populate_mem(struct device *dev,
+					struct page *start_page,
+					size_t total_size)
+{
+	struct qcom_memory_dump *memdump = dev_get_drvdata(dev);
+	struct qcom_dump_entry dump_entry;
+	struct qcom_dump_data *dump_data;
+	struct xbc_node *linked_list;
+	phys_addr_t phys_end_addr;
+	phys_addr_t phys_addr;
+	const char *size_p;
+	void *dump_vaddr;
+	const char *id_p;
+	int ret = 0;
+	int size;
+	int id;
+
+	phys_addr = page_to_phys(start_page);
+	phys_end_addr = phys_addr + total_size;
+	dump_vaddr = page_to_virt(start_page);
+
+	ret = mem_dump_register_data_table(dev, dump_vaddr, phys_addr);
+	if (ret) {
+		dev_err_probe(dev, ret, "Mem Dump table set up is failed\n");
+		return ret;
+	}
+
+	ret = qcom_init_memdump_imem_area(dev, total_size);
+	if (ret)
+		return ret;
+
+	/* Apply two tables: QCOM_DUMP_TYPE_TABLE and QCOM_DUMP_TYPE_DATA */
+	mem_dump_apply_offset(&dump_vaddr, &phys_addr,
+			      sizeof(struct qcom_dump_table) * 2);
+
+	/* Both "id" and "size" must be present */
+	xbc_node_for_each_subkey(memdump->mem_dump_node, linked_list) {
+		const char *name = xbc_node_get_data(linked_list);
+
+		if (!name)
+			continue;
+
+		id_p = xbc_node_find_value(linked_list, "id", NULL);
+		size_p = xbc_node_find_value(linked_list, "size", NULL);
+
+		if (id_p && size_p) {
+			ret = kstrtoint(id_p, 0, &id);
+			if (ret)
+				continue;
+
+			ret = kstrtoint(size_p, 0, &size);
+
+			if (ret)
+				continue;
+
+		/*
+		 * Physical layout: starting from two qcom_dump_data.
+		 * Following are respective dump meta data and reserved regions.
+		 * Qcom_dump_data is populated by the driver, fw parse it
+		 * and dump respective info into dump mem.
+		 * Illustrate the layout:
+		 *
+		 *   +------------------------+------------------------+
+		 *   | qcom_dump_table(TABLE) | qcom_dump_table(DATA)  |
+		 *   +------------------------+------------------------+
+		 *   +-------------+----------+-------------+----------+
+		 *   |qcom_dump_data| dump mem|qcom_dump_data| dump mem |
+		 *   +-------------+----------+-------------+----------+
+		 *   +-------------+----------+-------------+----------+
+		 *   |qcom_dump_data| dump mem|qcom_dump_data| dump mem |
+		 *   +-------------+----------+-------------+----------+
+		 *   ...
+		 */
+			dump_data = dump_vaddr;
+			dump_data->addr = phys_addr + QCOM_DUMP_DATA_SIZE;
+			dump_data->len = size;
+			dump_entry.id = id;
+			strscpy(dump_data->name, name,
+				sizeof(dump_data->name));
+			dump_entry.addr = phys_addr;
+			ret = mem_dump_data_register(dev, QCOM_DUMP_TABLE_LINUX,
+						     &dump_entry);
+			if (ret) {
+				dev_err_probe(dev, ret, "Dump data setup failed, id = %d\n",
+					      id);
+				return ret;
+			}
+
+			mem_dump_apply_offset(&dump_vaddr, &phys_addr,
+					      size + QCOM_DUMP_DATA_SIZE);
+			if (phys_addr > phys_end_addr) {
+				dev_err_probe(dev, -ENOMEM, "Exceeding allocated region\n");
+				return -ENOMEM;
+			}
+		} else {
+			continue;
+		}
+	}
+
+	return ret;
+}
+
+static int __init mem_dump_probe(struct platform_device *pdev)
+{
+	struct qcom_memory_dump *memdump;
+	struct device *dev = &pdev->dev;
+	struct page *page;
+	size_t total_size;
+	int ret = 0;
+
+	memdump = devm_kzalloc(dev, sizeof(struct qcom_memory_dump),
+			       GFP_KERNEL);
+	if (!memdump)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, memdump);
+
+	/* check and initiate CMA region */
+	ret = mem_dump_reserve_mem(dev);
+	if (ret)
+		return ret;
+
+	/* allocate and populate */
+	page = mem_dump_alloc_mem(dev, &total_size);
+	if (IS_ERR(page)) {
+		ret = PTR_ERR(page);
+		dev_err_probe(dev, ret, "mem dump alloc failed\n");
+		goto release;
+	}
+
+	ret = mem_dump_populate_mem(dev, page, total_size);
+	if (!ret)
+		dev_info(dev, "Mem dump region populated successfully\n");
+	else
+		goto free;
+
+	return 0;
+
+free:
+	cma_release(dev_get_cma_area(dev), page, (total_size / PAGE_SIZE));
+
+release:
+	of_reserved_mem_device_release(dev);
+	return ret;
+}
+
+static const struct of_device_id mem_dump_match_table[] = {
+	{.compatible = "qcom,mem-dump",},
+	{}
+};
+
+static struct platform_driver mem_dump_driver = {
+	.driver = {
+		.name = "qcom_mem_dump",
+		.of_match_table = mem_dump_match_table,
+	},
+};
+module_platform_driver_probe(mem_dump_driver, mem_dump_probe);
+
+MODULE_DESCRIPTION("Memory Dump Driver");
+MODULE_LICENSE("GPL");