diff mbox series

[v2,1/2] remoteproc: Add character device interface

Message ID 1585699438-14394-1-git-send-email-rishabhb@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series [v2,1/2] remoteproc: Add character device interface | expand

Commit Message

Rishabh Bhatnagar April 1, 2020, 12:03 a.m. UTC
Add the character device interface for userspace applications.
This interface can be used in order to boot up and shutdown
remote subsystems. Currently there is only a sysfs interface
which the userspace clients can use. If a usersapce application
crashes after booting the remote processor does not get any
indication about the crash. It might still assume that the
application is running. For example modem uses remotefs service
to fetch data from disk/flash memory. If the remotefs service
crashes, modem keeps on requesting data which might lead to a
crash. Adding a character device interface makes the remote
processor tightly coupled with the user space application.
A crash of the application leads to a close on the file descriptors
therefore shutting down the remoteproc.

Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
---
 drivers/remoteproc/Kconfig               |   9 +++
 drivers/remoteproc/Makefile              |   1 +
 drivers/remoteproc/remoteproc_cdev.c     | 100 +++++++++++++++++++++++++++++++
 drivers/remoteproc/remoteproc_internal.h |  22 +++++++
 include/linux/remoteproc.h               |   2 +
 5 files changed, 134 insertions(+)
 create mode 100644 drivers/remoteproc/remoteproc_cdev.c

Comments

Clément Leger April 1, 2020, 7:12 a.m. UTC | #1
Hi Rishabh,

----- On 1 Apr, 2020, at 02:03, Rishabh Bhatnagar rishabhb@codeaurora.org wrote:

> Add the character device interface for userspace applications.
> This interface can be used in order to boot up and shutdown
> remote subsystems. Currently there is only a sysfs interface
> which the userspace clients can use. If a usersapce application

Typo: usersapce -> userspace.

> crashes after booting the remote processor does not get any
> indication about the crash. It might still assume that the
> application is running. For example modem uses remotefs service
> to fetch data from disk/flash memory. If the remotefs service
> crashes, modem keeps on requesting data which might lead to a
> crash. Adding a character device interface makes the remote
> processor tightly coupled with the user space application.

Should probably use userspace for coherency.

> A crash of the application leads to a close on the file descriptors
> therefore shutting down the remoteproc.
> 
> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> ---
> drivers/remoteproc/Kconfig               |   9 +++
> drivers/remoteproc/Makefile              |   1 +
> drivers/remoteproc/remoteproc_cdev.c     | 100 +++++++++++++++++++++++++++++++
> drivers/remoteproc/remoteproc_internal.h |  22 +++++++
> include/linux/remoteproc.h               |   2 +
> 5 files changed, 134 insertions(+)
> create mode 100644 drivers/remoteproc/remoteproc_cdev.c
> 
> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
> index de3862c..6374b79 100644
> --- a/drivers/remoteproc/Kconfig
> +++ b/drivers/remoteproc/Kconfig
> @@ -14,6 +14,15 @@ config REMOTEPROC
> 
> if REMOTEPROC
> 
> +config REMOTEPROC_CDEV
> +	bool "Remoteproc character device interface"
> +	help
> +	  Say y here to have a character device interface for Remoteproc

Not sure about the capital Remoteproc 'R' here, but maybe others can
comment.

> +	  framework. Userspace can boot/shutdown remote processors through
> +	  this interface.
> +
> +	  It's safe to say N if you don't want to use this interface.
> +
> config IMX_REMOTEPROC
> 	tristate "IMX6/7 remoteproc support"
> 	depends on ARCH_MXC
> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
> index e30a1b1..b7d4f77 100644
> --- a/drivers/remoteproc/Makefile
> +++ b/drivers/remoteproc/Makefile
> @@ -9,6 +9,7 @@ remoteproc-y				+= remoteproc_debugfs.o
> remoteproc-y				+= remoteproc_sysfs.o
> remoteproc-y				+= remoteproc_virtio.o
> remoteproc-y				+= remoteproc_elf_loader.o
> +obj-$(CONFIG_REMOTEPROC_CDEV)		+= remoteproc_cdev.o
> obj-$(CONFIG_IMX_REMOTEPROC)		+= imx_rproc.o
> obj-$(CONFIG_MTK_SCP)			+= mtk_scp.o mtk_scp_ipi.o
> obj-$(CONFIG_OMAP_REMOTEPROC)		+= omap_remoteproc.o
> diff --git a/drivers/remoteproc/remoteproc_cdev.c
> b/drivers/remoteproc/remoteproc_cdev.c
> new file mode 100644
> index 0000000..8182bd1
> --- /dev/null
> +++ b/drivers/remoteproc/remoteproc_cdev.c
> @@ -0,0 +1,100 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Character device interface driver for Remoteproc framework.
> + *
> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
> + */
> +
> +#include <linux/cdev.h>
> +#include <linux/fs.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/remoteproc.h>
> +
> +#include "remoteproc_internal.h"
> +
> +#define NUM_RPROC_DEVICES	64
> +static dev_t rproc_cdev;
> +static DEFINE_IDA(cdev_minor_ida);
> +
> +static int rproc_cdev_open(struct inode *inode, struct file *file)
> +{
> +	struct rproc *rproc;
> +
> +	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
> +
> +	if (!rproc)
> +		return -EINVAL;
> +
> +	if (rproc->state == RPROC_RUNNING)
> +		return -EBUSY;
> +
> +	return rproc_boot(rproc);
> +}
> +
> +static int rproc_cdev_release(struct inode *inode, struct file *file)
> +{
> +	struct rproc *rproc;
> +
> +	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
> +
> +	if (!rproc || rproc->state != RPROC_RUNNING)

rproc state != RUNNING can probably happen if the remoteproc
crashed I think. However, you'll return EINVAL, though I'm not
sure about what the user could do in such cases, maybe returning
another more meaningful value could be useful.

> +		return -EINVAL;
> +
> +	rproc_shutdown(rproc);
> +
> +	return 0;
> +}
> +
> +static const struct file_operations rproc_fops = {
> +	.open = rproc_cdev_open,
> +	.release = rproc_cdev_release,
> +};
> +
> +int rproc_char_device_add(struct rproc *rproc)
> +{
> +	int ret, minor;
> +	dev_t cdevt;
> +
> +	minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
> +			       GFP_KERNEL);
> +	if (minor < 0) {
> +		dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
> +			__func__, minor);
> +		return -ENODEV;
> +	}
> +
> +	cdev_init(&rproc->char_dev, &rproc_fops);
> +	rproc->char_dev.owner = THIS_MODULE;
> +
> +	cdevt = MKDEV(MAJOR(rproc_cdev), minor);
> +	ret = cdev_add(&rproc->char_dev, cdevt, 1);
> +	if (ret < 0)
> +		ida_simple_remove(&cdev_minor_ida, minor);
> +
> +	rproc->dev.devt = cdevt;
> +	return ret;
> +}
> +
> +void rproc_char_device_remove(struct rproc *rproc)
> +{
> +	__unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 1,
> +			    "rproc");
> +	ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
> +}
> +
> +void __init rproc_init_cdev(void)
> +{
> +	int ret;
> +
> +	ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, "rproc");
> +	if (ret < 0) {
> +		pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
> +		return;
> +	}
> +}
> +
> +void __exit rproc_exit_cdev(void)
> +{
> +	__unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, "rproc");
> +}
> diff --git a/drivers/remoteproc/remoteproc_internal.h
> b/drivers/remoteproc/remoteproc_internal.h
> index 493ef92..28d61a1 100644
> --- a/drivers/remoteproc/remoteproc_internal.h
> +++ b/drivers/remoteproc/remoteproc_internal.h
> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char *name,
> struct rproc *rproc,
> int rproc_init_sysfs(void);
> void rproc_exit_sysfs(void);
> 
> +#ifdef CONFIG_REMOTEPROC_CDEV
> +void rproc_init_cdev(void);
> +void rproc_exit_cdev(void);
> +int rproc_char_device_add(struct rproc *rproc);
> +void rproc_char_device_remove(struct rproc *rproc);
> +#else
> +static inline void rproc_init_cdev(void)
> +{
> +}
> +static inline void rproc_exit_cdev(void)
> +{
> +}
> +static inline int rproc_char_device_add(struct rproc *rproc)
> +{
> +	return 0;
> +}
> +static inline void  rproc_char_device_remove(struct rproc *rproc)
> +{
> +}
> +#endif
> +
> void rproc_free_vring(struct rproc_vring *rvring);
> int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
> 
> @@ -63,6 +84,7 @@ struct resource_table *rproc_elf_find_loaded_rsc_table(struct
> rproc *rproc,
> struct rproc_mem_entry *
> rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
> 
> +

Useless newline ?

> static inline
> int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
> {
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index 16ad666..c4ca796 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -37,6 +37,7 @@
> 
> #include <linux/types.h>
> #include <linux/mutex.h>
> +#include <linux/cdev.h>
> #include <linux/virtio.h>
> #include <linux/completion.h>
> #include <linux/idr.h>
> @@ -514,6 +515,7 @@ struct rproc {
> 	bool auto_boot;
> 	struct list_head dump_segments;
> 	int nb_vdev;
> +	struct cdev char_dev;
> };
> 
> /**
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
Bjorn Andersson April 1, 2020, 8:08 p.m. UTC | #2
On Tue 31 Mar 17:03 PDT 2020, Rishabh Bhatnagar wrote:

> Add the character device interface for userspace applications.
> This interface can be used in order to boot up and shutdown
> remote subsystems. Currently there is only a sysfs interface
> which the userspace clients can use. If a usersapce application
> crashes after booting the remote processor does not get any
> indication about the crash. It might still assume that the
> application is running. For example modem uses remotefs service
> to fetch data from disk/flash memory. If the remotefs service
> crashes, modem keeps on requesting data which might lead to a
> crash. Adding a character device interface makes the remote
> processor tightly coupled with the user space application.
> A crash of the application leads to a close on the file descriptors
> therefore shutting down the remoteproc.
> 
> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> ---
>  drivers/remoteproc/Kconfig               |   9 +++
>  drivers/remoteproc/Makefile              |   1 +
>  drivers/remoteproc/remoteproc_cdev.c     | 100 +++++++++++++++++++++++++++++++
>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
>  include/linux/remoteproc.h               |   2 +
>  5 files changed, 134 insertions(+)
>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
> 
> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
> index de3862c..6374b79 100644
> --- a/drivers/remoteproc/Kconfig
> +++ b/drivers/remoteproc/Kconfig
> @@ -14,6 +14,15 @@ config REMOTEPROC
>  
>  if REMOTEPROC
>  
> +config REMOTEPROC_CDEV
> +	bool "Remoteproc character device interface"
> +	help
> +	  Say y here to have a character device interface for Remoteproc
> +	  framework. Userspace can boot/shutdown remote processors through
> +	  this interface.
> +
> +	  It's safe to say N if you don't want to use this interface.
> +
>  config IMX_REMOTEPROC
>  	tristate "IMX6/7 remoteproc support"
>  	depends on ARCH_MXC
> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
> index e30a1b1..b7d4f77 100644
> --- a/drivers/remoteproc/Makefile
> +++ b/drivers/remoteproc/Makefile
> @@ -9,6 +9,7 @@ remoteproc-y				+= remoteproc_debugfs.o
>  remoteproc-y				+= remoteproc_sysfs.o
>  remoteproc-y				+= remoteproc_virtio.o
>  remoteproc-y				+= remoteproc_elf_loader.o
> +obj-$(CONFIG_REMOTEPROC_CDEV)		+= remoteproc_cdev.o
>  obj-$(CONFIG_IMX_REMOTEPROC)		+= imx_rproc.o
>  obj-$(CONFIG_MTK_SCP)			+= mtk_scp.o mtk_scp_ipi.o
>  obj-$(CONFIG_OMAP_REMOTEPROC)		+= omap_remoteproc.o
> diff --git a/drivers/remoteproc/remoteproc_cdev.c b/drivers/remoteproc/remoteproc_cdev.c
> new file mode 100644
> index 0000000..8182bd1
> --- /dev/null
> +++ b/drivers/remoteproc/remoteproc_cdev.c
> @@ -0,0 +1,100 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Character device interface driver for Remoteproc framework.
> + *
> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
> + */
> +
> +#include <linux/cdev.h>
> +#include <linux/fs.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/remoteproc.h>
> +
> +#include "remoteproc_internal.h"
> +
> +#define NUM_RPROC_DEVICES	64
> +static dev_t rproc_cdev;
> +static DEFINE_IDA(cdev_minor_ida);
> +
> +static int rproc_cdev_open(struct inode *inode, struct file *file)
> +{
> +	struct rproc *rproc;
> +
> +	rproc = container_of(inode->i_cdev, struct rproc, char_dev);

I would prefer this to be a on a single line
	struct rproc *rproc = container_of(..);

> +
> +	if (!rproc)

I don't think this is possible.

> +		return -EINVAL;
> +
> +	if (rproc->state == RPROC_RUNNING)
> +		return -EBUSY;
> +

This is racy, it's possible that state wasn't RUNNING, but at the time
we're entering rproc_boot() it is - or as Clement correctly points out,
the core might be in CRASHED state.

> +	return rproc_boot(rproc);

Feels like we would want a variant of rproc_boot() that doesn't do
refcounting... Maybe a rproc_boot_exclusive() that takes the mutex and
then fails if the core is refcounted already?

> +}
> +
> +static int rproc_cdev_release(struct inode *inode, struct file *file)
> +{
> +	struct rproc *rproc;
> +
> +	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
> +
> +	if (!rproc || rproc->state != RPROC_RUNNING)
> +		return -EINVAL;

rproc can't be NULL here and the core might be in CRASHED state here, in
which case we still want to abort that and shut down the core...

Note that in the event of calling close() the return value is discarded
on the way to userspace and in the event that the process is killed we
still expect the remoteproc to be shut down (if the refcount hit 0). 

> +
> +	rproc_shutdown(rproc);
> +
> +	return 0;
> +}
> +
> +static const struct file_operations rproc_fops = {
> +	.open = rproc_cdev_open,
> +	.release = rproc_cdev_release,
> +};
> +
> +int rproc_char_device_add(struct rproc *rproc)
> +{
> +	int ret, minor;
> +	dev_t cdevt;
> +
> +	minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,

Can't you use rproc->index as minor?

> +			       GFP_KERNEL);
> +	if (minor < 0) {
> +		dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
> +			__func__, minor);
> +		return -ENODEV;
> +	}
> +
> +	cdev_init(&rproc->char_dev, &rproc_fops);
> +	rproc->char_dev.owner = THIS_MODULE;
> +
> +	cdevt = MKDEV(MAJOR(rproc_cdev), minor);
> +	ret = cdev_add(&rproc->char_dev, cdevt, 1);
> +	if (ret < 0)
> +		ida_simple_remove(&cdev_minor_ida, minor);
> +
> +	rproc->dev.devt = cdevt;
> +	return ret;
> +}
> +
> +void rproc_char_device_remove(struct rproc *rproc)
> +{
> +	__unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 1,
> +			    "rproc");

Shouldn't this be cdev_del()?

> +	ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
> +}
> +
> +void __init rproc_init_cdev(void)
> +{
> +	int ret;
> +
> +	ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, "rproc");
> +	if (ret < 0) {
> +		pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
> +		return;

Drop this return, and hence the {}

> +	}
> +}
> +
> +void __exit rproc_exit_cdev(void)
> +{
> +	__unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, "rproc");

unregister_chrdev_region();

> +}
> diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
> index 493ef92..28d61a1 100644
> --- a/drivers/remoteproc/remoteproc_internal.h
> +++ b/drivers/remoteproc/remoteproc_internal.h
> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char *name, struct rproc *rproc,
>  int rproc_init_sysfs(void);
>  void rproc_exit_sysfs(void);
>  
> +#ifdef CONFIG_REMOTEPROC_CDEV
> +void rproc_init_cdev(void);
> +void rproc_exit_cdev(void);
> +int rproc_char_device_add(struct rproc *rproc);
> +void rproc_char_device_remove(struct rproc *rproc);
> +#else
> +static inline void rproc_init_cdev(void)
> +{
> +}
> +static inline void rproc_exit_cdev(void)
> +{
> +}
> +static inline int rproc_char_device_add(struct rproc *rproc)
> +{
> +	return 0;
> +}
> +static inline void  rproc_char_device_remove(struct rproc *rproc)
> +{
> +}
> +#endif
> +
>  void rproc_free_vring(struct rproc_vring *rvring);
>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
>  
> @@ -63,6 +84,7 @@ struct resource_table *rproc_elf_find_loaded_rsc_table(struct rproc *rproc,
>  struct rproc_mem_entry *
>  rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
>  
> +
>  static inline
>  int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
>  {
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index 16ad666..c4ca796 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -37,6 +37,7 @@
>  
>  #include <linux/types.h>
>  #include <linux/mutex.h>
> +#include <linux/cdev.h>
>  #include <linux/virtio.h>
>  #include <linux/completion.h>
>  #include <linux/idr.h>
> @@ -514,6 +515,7 @@ struct rproc {
>  	bool auto_boot;
>  	struct list_head dump_segments;
>  	int nb_vdev;
> +	struct cdev char_dev;
>  };
>  
>  /**
> -- 
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
Rishabh Bhatnagar April 1, 2020, 11:53 p.m. UTC | #3
On 2020-04-01 13:08, Bjorn Andersson wrote:
> On Tue 31 Mar 17:03 PDT 2020, Rishabh Bhatnagar wrote:
> 
>> Add the character device interface for userspace applications.
>> This interface can be used in order to boot up and shutdown
>> remote subsystems. Currently there is only a sysfs interface
>> which the userspace clients can use. If a usersapce application
>> crashes after booting the remote processor does not get any
>> indication about the crash. It might still assume that the
>> application is running. For example modem uses remotefs service
>> to fetch data from disk/flash memory. If the remotefs service
>> crashes, modem keeps on requesting data which might lead to a
>> crash. Adding a character device interface makes the remote
>> processor tightly coupled with the user space application.
>> A crash of the application leads to a close on the file descriptors
>> therefore shutting down the remoteproc.
>> 
>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>> ---
>>  drivers/remoteproc/Kconfig               |   9 +++
>>  drivers/remoteproc/Makefile              |   1 +
>>  drivers/remoteproc/remoteproc_cdev.c     | 100 
>> +++++++++++++++++++++++++++++++
>>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
>>  include/linux/remoteproc.h               |   2 +
>>  5 files changed, 134 insertions(+)
>>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
>> 
>> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
>> index de3862c..6374b79 100644
>> --- a/drivers/remoteproc/Kconfig
>> +++ b/drivers/remoteproc/Kconfig
>> @@ -14,6 +14,15 @@ config REMOTEPROC
>> 
>>  if REMOTEPROC
>> 
>> +config REMOTEPROC_CDEV
>> +	bool "Remoteproc character device interface"
>> +	help
>> +	  Say y here to have a character device interface for Remoteproc
>> +	  framework. Userspace can boot/shutdown remote processors through
>> +	  this interface.
>> +
>> +	  It's safe to say N if you don't want to use this interface.
>> +
>>  config IMX_REMOTEPROC
>>  	tristate "IMX6/7 remoteproc support"
>>  	depends on ARCH_MXC
>> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
>> index e30a1b1..b7d4f77 100644
>> --- a/drivers/remoteproc/Makefile
>> +++ b/drivers/remoteproc/Makefile
>> @@ -9,6 +9,7 @@ remoteproc-y				+= remoteproc_debugfs.o
>>  remoteproc-y				+= remoteproc_sysfs.o
>>  remoteproc-y				+= remoteproc_virtio.o
>>  remoteproc-y				+= remoteproc_elf_loader.o
>> +obj-$(CONFIG_REMOTEPROC_CDEV)		+= remoteproc_cdev.o
>>  obj-$(CONFIG_IMX_REMOTEPROC)		+= imx_rproc.o
>>  obj-$(CONFIG_MTK_SCP)			+= mtk_scp.o mtk_scp_ipi.o
>>  obj-$(CONFIG_OMAP_REMOTEPROC)		+= omap_remoteproc.o
>> diff --git a/drivers/remoteproc/remoteproc_cdev.c 
>> b/drivers/remoteproc/remoteproc_cdev.c
>> new file mode 100644
>> index 0000000..8182bd1
>> --- /dev/null
>> +++ b/drivers/remoteproc/remoteproc_cdev.c
>> @@ -0,0 +1,100 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Character device interface driver for Remoteproc framework.
>> + *
>> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
>> + */
>> +
>> +#include <linux/cdev.h>
>> +#include <linux/fs.h>
>> +#include <linux/module.h>
>> +#include <linux/mutex.h>
>> +#include <linux/remoteproc.h>
>> +
>> +#include "remoteproc_internal.h"
>> +
>> +#define NUM_RPROC_DEVICES	64
>> +static dev_t rproc_cdev;
>> +static DEFINE_IDA(cdev_minor_ida);
>> +
>> +static int rproc_cdev_open(struct inode *inode, struct file *file)
>> +{
>> +	struct rproc *rproc;
>> +
>> +	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
> 
> I would prefer this to be a on a single line
> 	struct rproc *rproc = container_of(..);
> 
>> +
>> +	if (!rproc)
> 
> I don't think this is possible.
> 
>> +		return -EINVAL;
>> +
>> +	if (rproc->state == RPROC_RUNNING)
>> +		return -EBUSY;
>> +
> 
> This is racy, it's possible that state wasn't RUNNING, but at the time
> we're entering rproc_boot() it is - or as Clement correctly points out,
> the core might be in CRASHED state.
> 
I can check for OFFLINE state and only then call rproc_boot.
But we would need to check inside rproc_boot if the state was OFFLINE as 
well.
Same issue we might need to fix for sysfs interface?
>> +	return rproc_boot(rproc);
> 
> Feels like we would want a variant of rproc_boot() that doesn't do
> refcounting... Maybe a rproc_boot_exclusive() that takes the mutex and
> then fails if the core is refcounted already?
> 
>> +}
>> +
>> +static int rproc_cdev_release(struct inode *inode, struct file *file)
>> +{
>> +	struct rproc *rproc;
>> +
>> +	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>> +
>> +	if (!rproc || rproc->state != RPROC_RUNNING)
>> +		return -EINVAL;
> 
> rproc can't be NULL here and the core might be in CRASHED state here, 
> in
> which case we still want to abort that and shut down the core...
> 
> Note that in the event of calling close() the return value is discarded
> on the way to userspace and in the event that the process is killed we
> still expect the remoteproc to be shut down (if the refcount hit 0).
> 
>> +
>> +	rproc_shutdown(rproc);
>> +
>> +	return 0;
>> +}
>> +
>> +static const struct file_operations rproc_fops = {
>> +	.open = rproc_cdev_open,
>> +	.release = rproc_cdev_release,
>> +};
>> +
>> +int rproc_char_device_add(struct rproc *rproc)
>> +{
>> +	int ret, minor;
>> +	dev_t cdevt;
>> +
>> +	minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
> 
> Can't you use rproc->index as minor?
> 
>> +			       GFP_KERNEL);
>> +	if (minor < 0) {
>> +		dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
>> +			__func__, minor);
>> +		return -ENODEV;
>> +	}
>> +
>> +	cdev_init(&rproc->char_dev, &rproc_fops);
>> +	rproc->char_dev.owner = THIS_MODULE;
>> +
>> +	cdevt = MKDEV(MAJOR(rproc_cdev), minor);
>> +	ret = cdev_add(&rproc->char_dev, cdevt, 1);
>> +	if (ret < 0)
>> +		ida_simple_remove(&cdev_minor_ida, minor);
>> +
>> +	rproc->dev.devt = cdevt;
>> +	return ret;
>> +}
>> +
>> +void rproc_char_device_remove(struct rproc *rproc)
>> +{
>> +	__unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 
>> 1,
>> +			    "rproc");
> 
> Shouldn't this be cdev_del()?
_unregist_chrdev should delete the cdev internally.
> 
>> +	ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
>> +}
>> +
>> +void __init rproc_init_cdev(void)
>> +{
>> +	int ret;
>> +
>> +	ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, 
>> "rproc");
>> +	if (ret < 0) {
>> +		pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
>> +		return;
> 
> Drop this return, and hence the {}
> 
>> +	}
>> +}
>> +
>> +void __exit rproc_exit_cdev(void)
>> +{
>> +	__unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, 
>> "rproc");
> 
> unregister_chrdev_region();
> 
we might want to delete character devices as well, which is done by 
__unregister_chrdev
>> +}
>> diff --git a/drivers/remoteproc/remoteproc_internal.h 
>> b/drivers/remoteproc/remoteproc_internal.h
>> index 493ef92..28d61a1 100644
>> --- a/drivers/remoteproc/remoteproc_internal.h
>> +++ b/drivers/remoteproc/remoteproc_internal.h
>> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char 
>> *name, struct rproc *rproc,
>>  int rproc_init_sysfs(void);
>>  void rproc_exit_sysfs(void);
>> 
>> +#ifdef CONFIG_REMOTEPROC_CDEV
>> +void rproc_init_cdev(void);
>> +void rproc_exit_cdev(void);
>> +int rproc_char_device_add(struct rproc *rproc);
>> +void rproc_char_device_remove(struct rproc *rproc);
>> +#else
>> +static inline void rproc_init_cdev(void)
>> +{
>> +}
>> +static inline void rproc_exit_cdev(void)
>> +{
>> +}
>> +static inline int rproc_char_device_add(struct rproc *rproc)
>> +{
>> +	return 0;
>> +}
>> +static inline void  rproc_char_device_remove(struct rproc *rproc)
>> +{
>> +}
>> +#endif
>> +
>>  void rproc_free_vring(struct rproc_vring *rvring);
>>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
>> 
>> @@ -63,6 +84,7 @@ struct resource_table 
>> *rproc_elf_find_loaded_rsc_table(struct rproc *rproc,
>>  struct rproc_mem_entry *
>>  rproc_find_carveout_by_name(struct rproc *rproc, const char *name, 
>> ...);
>> 
>> +
>>  static inline
>>  int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware 
>> *fw)
>>  {
>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>> index 16ad666..c4ca796 100644
>> --- a/include/linux/remoteproc.h
>> +++ b/include/linux/remoteproc.h
>> @@ -37,6 +37,7 @@
>> 
>>  #include <linux/types.h>
>>  #include <linux/mutex.h>
>> +#include <linux/cdev.h>
>>  #include <linux/virtio.h>
>>  #include <linux/completion.h>
>>  #include <linux/idr.h>
>> @@ -514,6 +515,7 @@ struct rproc {
>>  	bool auto_boot;
>>  	struct list_head dump_segments;
>>  	int nb_vdev;
>> +	struct cdev char_dev;
>>  };
>> 
>>  /**
>> --
>> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora 
>> Forum,
>> a Linux Foundation Collaborative Project
Arnaud POULIQUEN April 2, 2020, 5:28 p.m. UTC | #4
Hi 

On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
> Add the character device interface for userspace applications.
> This interface can be used in order to boot up and shutdown
> remote subsystems. Currently there is only a sysfs interface
> which the userspace clients can use. If a usersapce application
> crashes after booting the remote processor does not get any
> indication about the crash. It might still assume that the
> application is running. For example modem uses remotefs service
> to fetch data from disk/flash memory. If the remotefs service
> crashes, modem keeps on requesting data which might lead to a
> crash. Adding a character device interface makes the remote
> processor tightly coupled with the user space application.
> A crash of the application leads to a close on the file descriptors
> therefore shutting down the remoteproc.

Sorry I'm late in the discussion, I hope I've gone through the whole
discussion so I don't reopen a closed point...

Something here is not crystal clear to me so I'd rather share it...

I suppose that you the automatic restart of the application is not possible to
stop and restart the remote processor...
 
Why this use case can not be solved by a process monitor or a service
in userland that detects the application crash and stop the remote firmware using
the sysfs interface?

I just want to be sure that there is no alternative to this, because having two ways
for application to shutdown the firmware seems to me confusing...

What about the opposite service, mean inform the application that the remote
processor is crashed?
Do you identify such need? or the "auto" crash recovery is sufficient?

Thanks,
Arnaud
> 
> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> ---
>  drivers/remoteproc/Kconfig               |   9 +++
>  drivers/remoteproc/Makefile              |   1 +
>  drivers/remoteproc/remoteproc_cdev.c     | 100 +++++++++++++++++++++++++++++++
>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
>  include/linux/remoteproc.h               |   2 +
>  5 files changed, 134 insertions(+)
>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
> 
> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
> index de3862c..6374b79 100644
> --- a/drivers/remoteproc/Kconfig
> +++ b/drivers/remoteproc/Kconfig
> @@ -14,6 +14,15 @@ config REMOTEPROC
>  
>  if REMOTEPROC
>  
> +config REMOTEPROC_CDEV
> +	bool "Remoteproc character device interface"
> +	help
> +	  Say y here to have a character device interface for Remoteproc
> +	  framework. Userspace can boot/shutdown remote processors through
> +	  this interface.
> +
> +	  It's safe to say N if you don't want to use this interface.
> +
>  config IMX_REMOTEPROC
>  	tristate "IMX6/7 remoteproc support"
>  	depends on ARCH_MXC
> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
> index e30a1b1..b7d4f77 100644
> --- a/drivers/remoteproc/Makefile
> +++ b/drivers/remoteproc/Makefile
> @@ -9,6 +9,7 @@ remoteproc-y				+= remoteproc_debugfs.o
>  remoteproc-y				+= remoteproc_sysfs.o
>  remoteproc-y				+= remoteproc_virtio.o
>  remoteproc-y				+= remoteproc_elf_loader.o
> +obj-$(CONFIG_REMOTEPROC_CDEV)		+= remoteproc_cdev.o
>  obj-$(CONFIG_IMX_REMOTEPROC)		+= imx_rproc.o
>  obj-$(CONFIG_MTK_SCP)			+= mtk_scp.o mtk_scp_ipi.o
>  obj-$(CONFIG_OMAP_REMOTEPROC)		+= omap_remoteproc.o
> diff --git a/drivers/remoteproc/remoteproc_cdev.c b/drivers/remoteproc/remoteproc_cdev.c
> new file mode 100644
> index 0000000..8182bd1
> --- /dev/null
> +++ b/drivers/remoteproc/remoteproc_cdev.c
> @@ -0,0 +1,100 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Character device interface driver for Remoteproc framework.
> + *
> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
> + */
> +
> +#include <linux/cdev.h>
> +#include <linux/fs.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/remoteproc.h>
> +
> +#include "remoteproc_internal.h"
> +
> +#define NUM_RPROC_DEVICES	64
> +static dev_t rproc_cdev;
> +static DEFINE_IDA(cdev_minor_ida);
> +
> +static int rproc_cdev_open(struct inode *inode, struct file *file)
> +{
> +	struct rproc *rproc;
> +
> +	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
> +
> +	if (!rproc)
> +		return -EINVAL;
> +
> +	if (rproc->state == RPROC_RUNNING)
> +		return -EBUSY;
> +
> +	return rproc_boot(rproc);
> +}
> +
> +static int rproc_cdev_release(struct inode *inode, struct file *file)
> +{
> +	struct rproc *rproc;
> +
> +	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
> +
> +	if (!rproc || rproc->state != RPROC_RUNNING)
> +		return -EINVAL;
> +
> +	rproc_shutdown(rproc);
> +
> +	return 0;
> +}
> +
> +static const struct file_operations rproc_fops = {
> +	.open = rproc_cdev_open,
> +	.release = rproc_cdev_release,
> +};
> +
> +int rproc_char_device_add(struct rproc *rproc)
> +{
> +	int ret, minor;
> +	dev_t cdevt;
> +
> +	minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
> +			       GFP_KERNEL);
> +	if (minor < 0) {
> +		dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
> +			__func__, minor);
> +		return -ENODEV;
> +	}
> +
> +	cdev_init(&rproc->char_dev, &rproc_fops);
> +	rproc->char_dev.owner = THIS_MODULE;
> +
> +	cdevt = MKDEV(MAJOR(rproc_cdev), minor);
> +	ret = cdev_add(&rproc->char_dev, cdevt, 1);
> +	if (ret < 0)
> +		ida_simple_remove(&cdev_minor_ida, minor);
> +
> +	rproc->dev.devt = cdevt;
> +	return ret;
> +}
> +
> +void rproc_char_device_remove(struct rproc *rproc)
> +{
> +	__unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 1,
> +			    "rproc");
> +	ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
> +}
> +
> +void __init rproc_init_cdev(void)
> +{
> +	int ret;
> +
> +	ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, "rproc");
> +	if (ret < 0) {
> +		pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
> +		return;
> +	}
> +}
> +
> +void __exit rproc_exit_cdev(void)
> +{
> +	__unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, "rproc");
> +}
> diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
> index 493ef92..28d61a1 100644
> --- a/drivers/remoteproc/remoteproc_internal.h
> +++ b/drivers/remoteproc/remoteproc_internal.h
> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char *name, struct rproc *rproc,
>  int rproc_init_sysfs(void);
>  void rproc_exit_sysfs(void);
>  
> +#ifdef CONFIG_REMOTEPROC_CDEV
> +void rproc_init_cdev(void);
> +void rproc_exit_cdev(void);
> +int rproc_char_device_add(struct rproc *rproc);
> +void rproc_char_device_remove(struct rproc *rproc);
> +#else
> +static inline void rproc_init_cdev(void)
> +{
> +}
> +static inline void rproc_exit_cdev(void)
> +{
> +}
> +static inline int rproc_char_device_add(struct rproc *rproc)
> +{
> +	return 0;
> +}
> +static inline void  rproc_char_device_remove(struct rproc *rproc)
> +{
> +}
> +#endif
> +
>  void rproc_free_vring(struct rproc_vring *rvring);
>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
>  
> @@ -63,6 +84,7 @@ struct resource_table *rproc_elf_find_loaded_rsc_table(struct rproc *rproc,
>  struct rproc_mem_entry *
>  rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
>  
> +
>  static inline
>  int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
>  {
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index 16ad666..c4ca796 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -37,6 +37,7 @@
>  
>  #include <linux/types.h>
>  #include <linux/mutex.h>
> +#include <linux/cdev.h>
>  #include <linux/virtio.h>
>  #include <linux/completion.h>
>  #include <linux/idr.h>
> @@ -514,6 +515,7 @@ struct rproc {
>  	bool auto_boot;
>  	struct list_head dump_segments;
>  	int nb_vdev;
> +	struct cdev char_dev;
>  };
>  
>  /**
>
Rishabh Bhatnagar April 3, 2020, 7:13 p.m. UTC | #5
On 2020-04-02 10:28, Arnaud POULIQUEN wrote:
> Hi
> 
> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
>> Add the character device interface for userspace applications.
>> This interface can be used in order to boot up and shutdown
>> remote subsystems. Currently there is only a sysfs interface
>> which the userspace clients can use. If a usersapce application
>> crashes after booting the remote processor does not get any
>> indication about the crash. It might still assume that the
>> application is running. For example modem uses remotefs service
>> to fetch data from disk/flash memory. If the remotefs service
>> crashes, modem keeps on requesting data which might lead to a
>> crash. Adding a character device interface makes the remote
>> processor tightly coupled with the user space application.
>> A crash of the application leads to a close on the file descriptors
>> therefore shutting down the remoteproc.
> 
> Sorry I'm late in the discussion, I hope I've gone through the whole
> discussion so I don't reopen a closed point...
> 
> Something here is not crystal clear to me so I'd rather share it...
> 
> I suppose that you the automatic restart of the application is not 
> possible to
> stop and restart the remote processor...
Yes correct, while we wait for the application to restart we might 
observe a
fatal crash.
> 
> Why this use case can not be solved by a process monitor or a service
> in userland that detects the application crash and stop the remote
> firmware using
> the sysfs interface?
> 
What happens in the case where the process monitor itself crashes? This 
is
actually the approach we follow in our downstream code. We have a 
central entity
in userspace that controls bootup/shutdown of some remote processors 
based on the
votes from userspace clients. We have observed cases where this entity
itself crashes and remote processors are left hanging.
> I just want to be sure that there is no alternative to this, because
> having two ways
> for application to shutdown the firmware seems to me confusing...
Does making this interface optional/configurable helps?
> 
> What about the opposite service, mean inform the application that the 
> remote
> processor is crashed?
> Do you identify such need? or the "auto" crash recovery is sufficient?
Auto recovery works perfectly for us. Although there is a mechanism in
place using QMI(Qualcomm MSM interface) that can notify clients about 
remote
processor crash.
> 
> Thanks,
> Arnaud
>> 
>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>> ---
>>  drivers/remoteproc/Kconfig               |   9 +++
>>  drivers/remoteproc/Makefile              |   1 +
>>  drivers/remoteproc/remoteproc_cdev.c     | 100 
>> +++++++++++++++++++++++++++++++
>>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
>>  include/linux/remoteproc.h               |   2 +
>>  5 files changed, 134 insertions(+)
>>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
>> 
>> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
>> index de3862c..6374b79 100644
>> --- a/drivers/remoteproc/Kconfig
>> +++ b/drivers/remoteproc/Kconfig
>> @@ -14,6 +14,15 @@ config REMOTEPROC
>> 
>>  if REMOTEPROC
>> 
>> +config REMOTEPROC_CDEV
>> +	bool "Remoteproc character device interface"
>> +	help
>> +	  Say y here to have a character device interface for Remoteproc
>> +	  framework. Userspace can boot/shutdown remote processors through
>> +	  this interface.
>> +
>> +	  It's safe to say N if you don't want to use this interface.
>> +
>>  config IMX_REMOTEPROC
>>  	tristate "IMX6/7 remoteproc support"
>>  	depends on ARCH_MXC
>> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
>> index e30a1b1..b7d4f77 100644
>> --- a/drivers/remoteproc/Makefile
>> +++ b/drivers/remoteproc/Makefile
>> @@ -9,6 +9,7 @@ remoteproc-y				+= remoteproc_debugfs.o
>>  remoteproc-y				+= remoteproc_sysfs.o
>>  remoteproc-y				+= remoteproc_virtio.o
>>  remoteproc-y				+= remoteproc_elf_loader.o
>> +obj-$(CONFIG_REMOTEPROC_CDEV)		+= remoteproc_cdev.o
>>  obj-$(CONFIG_IMX_REMOTEPROC)		+= imx_rproc.o
>>  obj-$(CONFIG_MTK_SCP)			+= mtk_scp.o mtk_scp_ipi.o
>>  obj-$(CONFIG_OMAP_REMOTEPROC)		+= omap_remoteproc.o
>> diff --git a/drivers/remoteproc/remoteproc_cdev.c 
>> b/drivers/remoteproc/remoteproc_cdev.c
>> new file mode 100644
>> index 0000000..8182bd1
>> --- /dev/null
>> +++ b/drivers/remoteproc/remoteproc_cdev.c
>> @@ -0,0 +1,100 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Character device interface driver for Remoteproc framework.
>> + *
>> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
>> + */
>> +
>> +#include <linux/cdev.h>
>> +#include <linux/fs.h>
>> +#include <linux/module.h>
>> +#include <linux/mutex.h>
>> +#include <linux/remoteproc.h>
>> +
>> +#include "remoteproc_internal.h"
>> +
>> +#define NUM_RPROC_DEVICES	64
>> +static dev_t rproc_cdev;
>> +static DEFINE_IDA(cdev_minor_ida);
>> +
>> +static int rproc_cdev_open(struct inode *inode, struct file *file)
>> +{
>> +	struct rproc *rproc;
>> +
>> +	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>> +
>> +	if (!rproc)
>> +		return -EINVAL;
>> +
>> +	if (rproc->state == RPROC_RUNNING)
>> +		return -EBUSY;
>> +
>> +	return rproc_boot(rproc);
>> +}
>> +
>> +static int rproc_cdev_release(struct inode *inode, struct file *file)
>> +{
>> +	struct rproc *rproc;
>> +
>> +	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>> +
>> +	if (!rproc || rproc->state != RPROC_RUNNING)
>> +		return -EINVAL;
>> +
>> +	rproc_shutdown(rproc);
>> +
>> +	return 0;
>> +}
>> +
>> +static const struct file_operations rproc_fops = {
>> +	.open = rproc_cdev_open,
>> +	.release = rproc_cdev_release,
>> +};
>> +
>> +int rproc_char_device_add(struct rproc *rproc)
>> +{
>> +	int ret, minor;
>> +	dev_t cdevt;
>> +
>> +	minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
>> +			       GFP_KERNEL);
>> +	if (minor < 0) {
>> +		dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
>> +			__func__, minor);
>> +		return -ENODEV;
>> +	}
>> +
>> +	cdev_init(&rproc->char_dev, &rproc_fops);
>> +	rproc->char_dev.owner = THIS_MODULE;
>> +
>> +	cdevt = MKDEV(MAJOR(rproc_cdev), minor);
>> +	ret = cdev_add(&rproc->char_dev, cdevt, 1);
>> +	if (ret < 0)
>> +		ida_simple_remove(&cdev_minor_ida, minor);
>> +
>> +	rproc->dev.devt = cdevt;
>> +	return ret;
>> +}
>> +
>> +void rproc_char_device_remove(struct rproc *rproc)
>> +{
>> +	__unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 
>> 1,
>> +			    "rproc");
>> +	ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
>> +}
>> +
>> +void __init rproc_init_cdev(void)
>> +{
>> +	int ret;
>> +
>> +	ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, 
>> "rproc");
>> +	if (ret < 0) {
>> +		pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
>> +		return;
>> +	}
>> +}
>> +
>> +void __exit rproc_exit_cdev(void)
>> +{
>> +	__unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, 
>> "rproc");
>> +}
>> diff --git a/drivers/remoteproc/remoteproc_internal.h 
>> b/drivers/remoteproc/remoteproc_internal.h
>> index 493ef92..28d61a1 100644
>> --- a/drivers/remoteproc/remoteproc_internal.h
>> +++ b/drivers/remoteproc/remoteproc_internal.h
>> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char 
>> *name, struct rproc *rproc,
>>  int rproc_init_sysfs(void);
>>  void rproc_exit_sysfs(void);
>> 
>> +#ifdef CONFIG_REMOTEPROC_CDEV
>> +void rproc_init_cdev(void);
>> +void rproc_exit_cdev(void);
>> +int rproc_char_device_add(struct rproc *rproc);
>> +void rproc_char_device_remove(struct rproc *rproc);
>> +#else
>> +static inline void rproc_init_cdev(void)
>> +{
>> +}
>> +static inline void rproc_exit_cdev(void)
>> +{
>> +}
>> +static inline int rproc_char_device_add(struct rproc *rproc)
>> +{
>> +	return 0;
>> +}
>> +static inline void  rproc_char_device_remove(struct rproc *rproc)
>> +{
>> +}
>> +#endif
>> +
>>  void rproc_free_vring(struct rproc_vring *rvring);
>>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
>> 
>> @@ -63,6 +84,7 @@ struct resource_table 
>> *rproc_elf_find_loaded_rsc_table(struct rproc *rproc,
>>  struct rproc_mem_entry *
>>  rproc_find_carveout_by_name(struct rproc *rproc, const char *name, 
>> ...);
>> 
>> +
>>  static inline
>>  int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware 
>> *fw)
>>  {
>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>> index 16ad666..c4ca796 100644
>> --- a/include/linux/remoteproc.h
>> +++ b/include/linux/remoteproc.h
>> @@ -37,6 +37,7 @@
>> 
>>  #include <linux/types.h>
>>  #include <linux/mutex.h>
>> +#include <linux/cdev.h>
>>  #include <linux/virtio.h>
>>  #include <linux/completion.h>
>>  #include <linux/idr.h>
>> @@ -514,6 +515,7 @@ struct rproc {
>>  	bool auto_boot;
>>  	struct list_head dump_segments;
>>  	int nb_vdev;
>> +	struct cdev char_dev;
>>  };
>> 
>>  /**
>>
Mathieu Poirier April 3, 2020, 8:30 p.m. UTC | #6
On Thu, 2 Apr 2020 at 11:28, Arnaud POULIQUEN <arnaud.pouliquen@st.com> wrote:
>
> Hi
>
> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
> > Add the character device interface for userspace applications.
> > This interface can be used in order to boot up and shutdown
> > remote subsystems. Currently there is only a sysfs interface
> > which the userspace clients can use. If a usersapce application
> > crashes after booting the remote processor does not get any
> > indication about the crash. It might still assume that the
> > application is running. For example modem uses remotefs service
> > to fetch data from disk/flash memory. If the remotefs service
> > crashes, modem keeps on requesting data which might lead to a
> > crash. Adding a character device interface makes the remote
> > processor tightly coupled with the user space application.
> > A crash of the application leads to a close on the file descriptors
> > therefore shutting down the remoteproc.
>
> Sorry I'm late in the discussion, I hope I've gone through the whole
> discussion so I don't reopen a closed point...
>
> Something here is not crystal clear to me so I'd rather share it...
>
> I suppose that you the automatic restart of the application is not possible to
> stop and restart the remote processor...
>
> Why this use case can not be solved by a process monitor or a service
> in userland that detects the application crash and stop the remote firmware using
> the sysfs interface?

I also looked for a better way to do things...  The conclusion I came
to is that it may take too long between the application crash and the
monitoring service to stop the firmware via sysfs.

>
> I just want to be sure that there is no alternative to this, because having two ways
> for application to shutdown the firmware seems to me confusing...
>
> What about the opposite service, mean inform the application that the remote
> processor is crashed?

Also a valid point - the problem with asynchronous notification
schemes is the possibility to get out of sync.  I would also like to
find a better way...

> Do you identify such need? or the "auto" crash recovery is sufficient?
>
> Thanks,
> Arnaud
> >
> > Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> > ---
> >  drivers/remoteproc/Kconfig               |   9 +++
> >  drivers/remoteproc/Makefile              |   1 +
> >  drivers/remoteproc/remoteproc_cdev.c     | 100 +++++++++++++++++++++++++++++++
> >  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
> >  include/linux/remoteproc.h               |   2 +
> >  5 files changed, 134 insertions(+)
> >  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
> >
> > diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
> > index de3862c..6374b79 100644
> > --- a/drivers/remoteproc/Kconfig
> > +++ b/drivers/remoteproc/Kconfig
> > @@ -14,6 +14,15 @@ config REMOTEPROC
> >
> >  if REMOTEPROC
> >
> > +config REMOTEPROC_CDEV
> > +     bool "Remoteproc character device interface"
> > +     help
> > +       Say y here to have a character device interface for Remoteproc
> > +       framework. Userspace can boot/shutdown remote processors through
> > +       this interface.
> > +
> > +       It's safe to say N if you don't want to use this interface.
> > +
> >  config IMX_REMOTEPROC
> >       tristate "IMX6/7 remoteproc support"
> >       depends on ARCH_MXC
> > diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
> > index e30a1b1..b7d4f77 100644
> > --- a/drivers/remoteproc/Makefile
> > +++ b/drivers/remoteproc/Makefile
> > @@ -9,6 +9,7 @@ remoteproc-y                          += remoteproc_debugfs.o
> >  remoteproc-y                         += remoteproc_sysfs.o
> >  remoteproc-y                         += remoteproc_virtio.o
> >  remoteproc-y                         += remoteproc_elf_loader.o
> > +obj-$(CONFIG_REMOTEPROC_CDEV)                += remoteproc_cdev.o
> >  obj-$(CONFIG_IMX_REMOTEPROC)         += imx_rproc.o
> >  obj-$(CONFIG_MTK_SCP)                        += mtk_scp.o mtk_scp_ipi.o
> >  obj-$(CONFIG_OMAP_REMOTEPROC)                += omap_remoteproc.o
> > diff --git a/drivers/remoteproc/remoteproc_cdev.c b/drivers/remoteproc/remoteproc_cdev.c
> > new file mode 100644
> > index 0000000..8182bd1
> > --- /dev/null
> > +++ b/drivers/remoteproc/remoteproc_cdev.c
> > @@ -0,0 +1,100 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Character device interface driver for Remoteproc framework.
> > + *
> > + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
> > + */
> > +
> > +#include <linux/cdev.h>
> > +#include <linux/fs.h>
> > +#include <linux/module.h>
> > +#include <linux/mutex.h>
> > +#include <linux/remoteproc.h>
> > +
> > +#include "remoteproc_internal.h"
> > +
> > +#define NUM_RPROC_DEVICES    64
> > +static dev_t rproc_cdev;
> > +static DEFINE_IDA(cdev_minor_ida);
> > +
> > +static int rproc_cdev_open(struct inode *inode, struct file *file)
> > +{
> > +     struct rproc *rproc;
> > +
> > +     rproc = container_of(inode->i_cdev, struct rproc, char_dev);
> > +
> > +     if (!rproc)
> > +             return -EINVAL;
> > +
> > +     if (rproc->state == RPROC_RUNNING)
> > +             return -EBUSY;
> > +
> > +     return rproc_boot(rproc);
> > +}
> > +
> > +static int rproc_cdev_release(struct inode *inode, struct file *file)
> > +{
> > +     struct rproc *rproc;
> > +
> > +     rproc = container_of(inode->i_cdev, struct rproc, char_dev);
> > +
> > +     if (!rproc || rproc->state != RPROC_RUNNING)
> > +             return -EINVAL;
> > +
> > +     rproc_shutdown(rproc);
> > +
> > +     return 0;
> > +}
> > +
> > +static const struct file_operations rproc_fops = {
> > +     .open = rproc_cdev_open,
> > +     .release = rproc_cdev_release,
> > +};
> > +
> > +int rproc_char_device_add(struct rproc *rproc)
> > +{
> > +     int ret, minor;
> > +     dev_t cdevt;
> > +
> > +     minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
> > +                            GFP_KERNEL);
> > +     if (minor < 0) {
> > +             dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
> > +                     __func__, minor);
> > +             return -ENODEV;
> > +     }
> > +
> > +     cdev_init(&rproc->char_dev, &rproc_fops);
> > +     rproc->char_dev.owner = THIS_MODULE;
> > +
> > +     cdevt = MKDEV(MAJOR(rproc_cdev), minor);
> > +     ret = cdev_add(&rproc->char_dev, cdevt, 1);
> > +     if (ret < 0)
> > +             ida_simple_remove(&cdev_minor_ida, minor);
> > +
> > +     rproc->dev.devt = cdevt;
> > +     return ret;
> > +}
> > +
> > +void rproc_char_device_remove(struct rproc *rproc)
> > +{
> > +     __unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 1,
> > +                         "rproc");
> > +     ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
> > +}
> > +
> > +void __init rproc_init_cdev(void)
> > +{
> > +     int ret;
> > +
> > +     ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, "rproc");
> > +     if (ret < 0) {
> > +             pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
> > +             return;
> > +     }
> > +}
> > +
> > +void __exit rproc_exit_cdev(void)
> > +{
> > +     __unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, "rproc");
> > +}
> > diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
> > index 493ef92..28d61a1 100644
> > --- a/drivers/remoteproc/remoteproc_internal.h
> > +++ b/drivers/remoteproc/remoteproc_internal.h
> > @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char *name, struct rproc *rproc,
> >  int rproc_init_sysfs(void);
> >  void rproc_exit_sysfs(void);
> >
> > +#ifdef CONFIG_REMOTEPROC_CDEV
> > +void rproc_init_cdev(void);
> > +void rproc_exit_cdev(void);
> > +int rproc_char_device_add(struct rproc *rproc);
> > +void rproc_char_device_remove(struct rproc *rproc);
> > +#else
> > +static inline void rproc_init_cdev(void)
> > +{
> > +}
> > +static inline void rproc_exit_cdev(void)
> > +{
> > +}
> > +static inline int rproc_char_device_add(struct rproc *rproc)
> > +{
> > +     return 0;
> > +}
> > +static inline void  rproc_char_device_remove(struct rproc *rproc)
> > +{
> > +}
> > +#endif
> > +
> >  void rproc_free_vring(struct rproc_vring *rvring);
> >  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
> >
> > @@ -63,6 +84,7 @@ struct resource_table *rproc_elf_find_loaded_rsc_table(struct rproc *rproc,
> >  struct rproc_mem_entry *
> >  rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
> >
> > +
> >  static inline
> >  int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
> >  {
> > diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> > index 16ad666..c4ca796 100644
> > --- a/include/linux/remoteproc.h
> > +++ b/include/linux/remoteproc.h
> > @@ -37,6 +37,7 @@
> >
> >  #include <linux/types.h>
> >  #include <linux/mutex.h>
> > +#include <linux/cdev.h>
> >  #include <linux/virtio.h>
> >  #include <linux/completion.h>
> >  #include <linux/idr.h>
> > @@ -514,6 +515,7 @@ struct rproc {
> >       bool auto_boot;
> >       struct list_head dump_segments;
> >       int nb_vdev;
> > +     struct cdev char_dev;
> >  };
> >
> >  /**
> >
Arnaud POULIQUEN April 6, 2020, 9:01 a.m. UTC | #7
On 4/3/20 9:13 PM, rishabhb@codeaurora.org wrote:
> On 2020-04-02 10:28, Arnaud POULIQUEN wrote:
>> Hi
>>
>> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
>>> Add the character device interface for userspace applications.
>>> This interface can be used in order to boot up and shutdown
>>> remote subsystems. Currently there is only a sysfs interface
>>> which the userspace clients can use. If a usersapce application
>>> crashes after booting the remote processor does not get any
>>> indication about the crash. It might still assume that the
>>> application is running. For example modem uses remotefs service
>>> to fetch data from disk/flash memory. If the remotefs service
>>> crashes, modem keeps on requesting data which might lead to a
>>> crash. Adding a character device interface makes the remote
>>> processor tightly coupled with the user space application.
>>> A crash of the application leads to a close on the file descriptors
>>> therefore shutting down the remoteproc.
>>
>> Sorry I'm late in the discussion, I hope I've gone through the whole
>> discussion so I don't reopen a closed point...
>>
>> Something here is not crystal clear to me so I'd rather share it...
>>
>> I suppose that you the automatic restart of the application is not possible to
>> stop and restart the remote processor...
> Yes correct, while we wait for the application to restart we might observe a
> fatal crash.
>>
>> Why this use case can not be solved by a process monitor or a service
>> in userland that detects the application crash and stop the remote
>> firmware using
>> the sysfs interface?
>>
> What happens in the case where the process monitor itself crashes? This is
> actually the approach we follow in our downstream code. We have a central entity
> in userspace that controls bootup/shutdown of some remote processors based on the
> votes from userspace clients. We have observed cases where this entity
> itself crashes and remote processors are left hanging.

Your description makes me feel like this patch is only a workaround of something that
should be fixed in the userland, even if i understand that hanging is one of the most
critical problem and have to be fixed.
For instance, how to handle several applications that interact with the remote processor
( e.g. rpmsg service applications) how to stop and restart everything. Using the char
device would probaly resolve only a part of the issue...

I'm not aware about your environment and i'm not a userland expert. But what i still not
understand why a parent process can not do the job...
I just test a simple script on my side that treat the kill -9 of an application
("cat" in my case).

#start the remote firmware
cp  $1 /lib/firmware/
echo $1> /sys/class/remoteproc/remoteproc0/firmware
echo start >/sys/class/remoteproc/remoteproc0/state
#your binary
cat /dev/kmsg
# stop the remote firmware in case of crash (and potentially some other apps)
echo stop >/sys/class/remoteproc/remoteproc0/state

Anyway, it's just my feeling, let other people give their feedback.

>> I just want to be sure that there is no alternative to this, because
>> having two ways
>> for application to shutdown the firmware seems to me confusing...
> Does making this interface optional/configurable helps?
>>
>> What about the opposite service, mean inform the application that the remote
>> processor is crashed?
>> Do you identify such need? or the "auto" crash recovery is sufficient?
> Auto recovery works perfectly for us. Although there is a mechanism in
> place using QMI(Qualcomm MSM interface) that can notify clients about remote
> processor crash.

Thanks for the information.

Regards
Arnaud

>>
>> Thanks,
>> Arnaud
>>>
>>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>>> ---
>>>  drivers/remoteproc/Kconfig               |   9 +++
>>>  drivers/remoteproc/Makefile              |   1 +
>>>  drivers/remoteproc/remoteproc_cdev.c     | 100 +++++++++++++++++++++++++++++++
>>>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
>>>  include/linux/remoteproc.h               |   2 +
>>>  5 files changed, 134 insertions(+)
>>>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
>>>
>>> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
>>> index de3862c..6374b79 100644
>>> --- a/drivers/remoteproc/Kconfig
>>> +++ b/drivers/remoteproc/Kconfig
>>> @@ -14,6 +14,15 @@ config REMOTEPROC
>>>
>>>  if REMOTEPROC
>>>
>>> +config REMOTEPROC_CDEV
>>> +    bool "Remoteproc character device interface"
>>> +    help
>>> +      Say y here to have a character device interface for Remoteproc
>>> +      framework. Userspace can boot/shutdown remote processors through
>>> +      this interface.
>>> +
>>> +      It's safe to say N if you don't want to use this interface.
>>> +
>>>  config IMX_REMOTEPROC
>>>      tristate "IMX6/7 remoteproc support"
>>>      depends on ARCH_MXC
>>> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
>>> index e30a1b1..b7d4f77 100644
>>> --- a/drivers/remoteproc/Makefile
>>> +++ b/drivers/remoteproc/Makefile
>>> @@ -9,6 +9,7 @@ remoteproc-y                += remoteproc_debugfs.o
>>>  remoteproc-y                += remoteproc_sysfs.o
>>>  remoteproc-y                += remoteproc_virtio.o
>>>  remoteproc-y                += remoteproc_elf_loader.o
>>> +obj-$(CONFIG_REMOTEPROC_CDEV)        += remoteproc_cdev.o
>>>  obj-$(CONFIG_IMX_REMOTEPROC)        += imx_rproc.o
>>>  obj-$(CONFIG_MTK_SCP)            += mtk_scp.o mtk_scp_ipi.o
>>>  obj-$(CONFIG_OMAP_REMOTEPROC)        += omap_remoteproc.o
>>> diff --git a/drivers/remoteproc/remoteproc_cdev.c b/drivers/remoteproc/remoteproc_cdev.c
>>> new file mode 100644
>>> index 0000000..8182bd1
>>> --- /dev/null
>>> +++ b/drivers/remoteproc/remoteproc_cdev.c
>>> @@ -0,0 +1,100 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +/*
>>> + * Character device interface driver for Remoteproc framework.
>>> + *
>>> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
>>> + */
>>> +
>>> +#include <linux/cdev.h>
>>> +#include <linux/fs.h>
>>> +#include <linux/module.h>
>>> +#include <linux/mutex.h>
>>> +#include <linux/remoteproc.h>
>>> +
>>> +#include "remoteproc_internal.h"
>>> +
>>> +#define NUM_RPROC_DEVICES    64
>>> +static dev_t rproc_cdev;
>>> +static DEFINE_IDA(cdev_minor_ida);
>>> +
>>> +static int rproc_cdev_open(struct inode *inode, struct file *file)
>>> +{
>>> +    struct rproc *rproc;
>>> +
>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>> +
>>> +    if (!rproc)
>>> +        return -EINVAL;
>>> +
>>> +    if (rproc->state == RPROC_RUNNING)
>>> +        return -EBUSY;
>>> +
>>> +    return rproc_boot(rproc);
>>> +}
>>> +
>>> +static int rproc_cdev_release(struct inode *inode, struct file *file)
>>> +{
>>> +    struct rproc *rproc;
>>> +
>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>> +
>>> +    if (!rproc || rproc->state != RPROC_RUNNING)
>>> +        return -EINVAL;
>>> +
>>> +    rproc_shutdown(rproc);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static const struct file_operations rproc_fops = {
>>> +    .open = rproc_cdev_open,
>>> +    .release = rproc_cdev_release,
>>> +};
>>> +
>>> +int rproc_char_device_add(struct rproc *rproc)
>>> +{
>>> +    int ret, minor;
>>> +    dev_t cdevt;
>>> +
>>> +    minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
>>> +                   GFP_KERNEL);
>>> +    if (minor < 0) {
>>> +        dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
>>> +            __func__, minor);
>>> +        return -ENODEV;
>>> +    }
>>> +
>>> +    cdev_init(&rproc->char_dev, &rproc_fops);
>>> +    rproc->char_dev.owner = THIS_MODULE;
>>> +
>>> +    cdevt = MKDEV(MAJOR(rproc_cdev), minor);
>>> +    ret = cdev_add(&rproc->char_dev, cdevt, 1);
>>> +    if (ret < 0)
>>> +        ida_simple_remove(&cdev_minor_ida, minor);
>>> +
>>> +    rproc->dev.devt = cdevt;
>>> +    return ret;
>>> +}
>>> +
>>> +void rproc_char_device_remove(struct rproc *rproc)
>>> +{
>>> +    __unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 1,
>>> +                "rproc");
>>> +    ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
>>> +}
>>> +
>>> +void __init rproc_init_cdev(void)
>>> +{
>>> +    int ret;
>>> +
>>> +    ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, "rproc");
>>> +    if (ret < 0) {
>>> +        pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
>>> +        return;
>>> +    }
>>> +}
>>> +
>>> +void __exit rproc_exit_cdev(void)
>>> +{
>>> +    __unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, "rproc");
>>> +}
>>> diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
>>> index 493ef92..28d61a1 100644
>>> --- a/drivers/remoteproc/remoteproc_internal.h
>>> +++ b/drivers/remoteproc/remoteproc_internal.h
>>> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char *name, struct rproc *rproc,
>>>  int rproc_init_sysfs(void);
>>>  void rproc_exit_sysfs(void);
>>>
>>> +#ifdef CONFIG_REMOTEPROC_CDEV
>>> +void rproc_init_cdev(void);
>>> +void rproc_exit_cdev(void);
>>> +int rproc_char_device_add(struct rproc *rproc);
>>> +void rproc_char_device_remove(struct rproc *rproc);
>>> +#else
>>> +static inline void rproc_init_cdev(void)
>>> +{
>>> +}
>>> +static inline void rproc_exit_cdev(void)
>>> +{
>>> +}
>>> +static inline int rproc_char_device_add(struct rproc *rproc)
>>> +{
>>> +    return 0;
>>> +}
>>> +static inline void  rproc_char_device_remove(struct rproc *rproc)
>>> +{
>>> +}
>>> +#endif
>>> +
>>>  void rproc_free_vring(struct rproc_vring *rvring);
>>>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
>>>
>>> @@ -63,6 +84,7 @@ struct resource_table *rproc_elf_find_loaded_rsc_table(struct rproc *rproc,
>>>  struct rproc_mem_entry *
>>>  rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
>>>
>>> +
>>>  static inline
>>>  int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
>>>  {
>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>> index 16ad666..c4ca796 100644
>>> --- a/include/linux/remoteproc.h
>>> +++ b/include/linux/remoteproc.h
>>> @@ -37,6 +37,7 @@
>>>
>>>  #include <linux/types.h>
>>>  #include <linux/mutex.h>
>>> +#include <linux/cdev.h>
>>>  #include <linux/virtio.h>
>>>  #include <linux/completion.h>
>>>  #include <linux/idr.h>
>>> @@ -514,6 +515,7 @@ struct rproc {
>>>      bool auto_boot;
>>>      struct list_head dump_segments;
>>>      int nb_vdev;
>>> +    struct cdev char_dev;
>>>  };
>>>
>>>  /**
>>>
Clément Leger April 6, 2020, 12:06 p.m. UTC | #8
Hi Arnaud,

----- On 6 Apr, 2020, at 11:01, Arnaud Pouliquen arnaud.pouliquen@st.com wrote:

> On 4/3/20 9:13 PM, rishabhb@codeaurora.org wrote:
>> On 2020-04-02 10:28, Arnaud POULIQUEN wrote:
>>> Hi
>>>
>>> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
>>>> Add the character device interface for userspace applications.
>>>> This interface can be used in order to boot up and shutdown
>>>> remote subsystems. Currently there is only a sysfs interface
>>>> which the userspace clients can use. If a usersapce application
>>>> crashes after booting the remote processor does not get any
>>>> indication about the crash. It might still assume that the
>>>> application is running. For example modem uses remotefs service
>>>> to fetch data from disk/flash memory. If the remotefs service
>>>> crashes, modem keeps on requesting data which might lead to a
>>>> crash. Adding a character device interface makes the remote
>>>> processor tightly coupled with the user space application.
>>>> A crash of the application leads to a close on the file descriptors
>>>> therefore shutting down the remoteproc.
>>>
>>> Sorry I'm late in the discussion, I hope I've gone through the whole
>>> discussion so I don't reopen a closed point...
>>>
>>> Something here is not crystal clear to me so I'd rather share it...
>>>
>>> I suppose that you the automatic restart of the application is not possible to
>>> stop and restart the remote processor...
>> Yes correct, while we wait for the application to restart we might observe a
>> fatal crash.
>>>
>>> Why this use case can not be solved by a process monitor or a service
>>> in userland that detects the application crash and stop the remote
>>> firmware using
>>> the sysfs interface?
>>>
>> What happens in the case where the process monitor itself crashes? This is
>> actually the approach we follow in our downstream code. We have a central entity
>> in userspace that controls bootup/shutdown of some remote processors based on
>> the
>> votes from userspace clients. We have observed cases where this entity
>> itself crashes and remote processors are left hanging.
> 
> Your description makes me feel like this patch is only a workaround of something
> that
> should be fixed in the userland, even if i understand that hanging is one of the
> most
> critical problem and have to be fixed.
> For instance, how to handle several applications that interact with the remote
> processor
> ( e.g. rpmsg service applications) how to stop and restart everything. Using the
> char
> device would probaly resolve only a part of the issue...
> 
> I'm not aware about your environment and i'm not a userland expert. But what i
> still not
> understand why a parent process can not do the job...
> I just test a simple script on my side that treat the kill -9 of an application
> ("cat" in my case).

This is not entirely true, if the parent process is killed with a SIGKILL, then
the process will not be able to handle anything and the remoteproc will still
be running.

What I understood from Rishabh patch is a way to allow a single process handling
the rproc state. We have the same kind of need and currently, if the
user application crashes, then the rproc is still running (which happens).

> 
> #start the remote firmware
> cp  $1 /lib/firmware/
> echo $1> /sys/class/remoteproc/remoteproc0/firmware
> echo start >/sys/class/remoteproc/remoteproc0/state
> #your binary
> cat /dev/kmsg
> # stop the remote firmware in case of crash (and potentially some other apps)
> echo stop >/sys/class/remoteproc/remoteproc0/state
> 

This is not really "production proof" and what happens if the application is
responsible of setting the firmware which might be jitted ? 
And if the script receives the SIGKILL, then we are back to the same problem.

I really think, this is a step forward an easier and reliable use of the remoteproc
on userland to guarantee a coherent rproc state even if host application
crashes.

Regards,

Clément

> Anyway, it's just my feeling, let other people give their feedback.
> 
>>> I just want to be sure that there is no alternative to this, because
>>> having two ways
>>> for application to shutdown the firmware seems to me confusing...
>> Does making this interface optional/configurable helps?
>>>
>>> What about the opposite service, mean inform the application that the remote
>>> processor is crashed?
>>> Do you identify such need? or the "auto" crash recovery is sufficient?
>> Auto recovery works perfectly for us. Although there is a mechanism in
>> place using QMI(Qualcomm MSM interface) that can notify clients about remote
>> processor crash.
> 
> Thanks for the information.
> 
> Regards
> Arnaud
> 
>>>
>>> Thanks,
>>> Arnaud
>>>>
>>>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>>>> ---
>>>>  drivers/remoteproc/Kconfig               |   9 +++
>>>>  drivers/remoteproc/Makefile              |   1 +
>>>>  drivers/remoteproc/remoteproc_cdev.c     | 100 +++++++++++++++++++++++++++++++
>>>>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
>>>>  include/linux/remoteproc.h               |   2 +
>>>>  5 files changed, 134 insertions(+)
>>>>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
>>>>
>>>> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
>>>> index de3862c..6374b79 100644
>>>> --- a/drivers/remoteproc/Kconfig
>>>> +++ b/drivers/remoteproc/Kconfig
>>>> @@ -14,6 +14,15 @@ config REMOTEPROC
>>>>
>>>>  if REMOTEPROC
>>>>
>>>> +config REMOTEPROC_CDEV
>>>> +    bool "Remoteproc character device interface"
>>>> +    help
>>>> +      Say y here to have a character device interface for Remoteproc
>>>> +      framework. Userspace can boot/shutdown remote processors through
>>>> +      this interface.
>>>> +
>>>> +      It's safe to say N if you don't want to use this interface.
>>>> +
>>>>  config IMX_REMOTEPROC
>>>>      tristate "IMX6/7 remoteproc support"
>>>>      depends on ARCH_MXC
>>>> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
>>>> index e30a1b1..b7d4f77 100644
>>>> --- a/drivers/remoteproc/Makefile
>>>> +++ b/drivers/remoteproc/Makefile
>>>> @@ -9,6 +9,7 @@ remoteproc-y                += remoteproc_debugfs.o
>>>>  remoteproc-y                += remoteproc_sysfs.o
>>>>  remoteproc-y                += remoteproc_virtio.o
>>>>  remoteproc-y                += remoteproc_elf_loader.o
>>>> +obj-$(CONFIG_REMOTEPROC_CDEV)        += remoteproc_cdev.o
>>>>  obj-$(CONFIG_IMX_REMOTEPROC)        += imx_rproc.o
>>>>  obj-$(CONFIG_MTK_SCP)            += mtk_scp.o mtk_scp_ipi.o
>>>>  obj-$(CONFIG_OMAP_REMOTEPROC)        += omap_remoteproc.o
>>>> diff --git a/drivers/remoteproc/remoteproc_cdev.c
>>>> b/drivers/remoteproc/remoteproc_cdev.c
>>>> new file mode 100644
>>>> index 0000000..8182bd1
>>>> --- /dev/null
>>>> +++ b/drivers/remoteproc/remoteproc_cdev.c
>>>> @@ -0,0 +1,100 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>> +/*
>>>> + * Character device interface driver for Remoteproc framework.
>>>> + *
>>>> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
>>>> + */
>>>> +
>>>> +#include <linux/cdev.h>
>>>> +#include <linux/fs.h>
>>>> +#include <linux/module.h>
>>>> +#include <linux/mutex.h>
>>>> +#include <linux/remoteproc.h>
>>>> +
>>>> +#include "remoteproc_internal.h"
>>>> +
>>>> +#define NUM_RPROC_DEVICES    64
>>>> +static dev_t rproc_cdev;
>>>> +static DEFINE_IDA(cdev_minor_ida);
>>>> +
>>>> +static int rproc_cdev_open(struct inode *inode, struct file *file)
>>>> +{
>>>> +    struct rproc *rproc;
>>>> +
>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>>> +
>>>> +    if (!rproc)
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (rproc->state == RPROC_RUNNING)
>>>> +        return -EBUSY;
>>>> +
>>>> +    return rproc_boot(rproc);
>>>> +}
>>>> +
>>>> +static int rproc_cdev_release(struct inode *inode, struct file *file)
>>>> +{
>>>> +    struct rproc *rproc;
>>>> +
>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>>> +
>>>> +    if (!rproc || rproc->state != RPROC_RUNNING)
>>>> +        return -EINVAL;
>>>> +
>>>> +    rproc_shutdown(rproc);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static const struct file_operations rproc_fops = {
>>>> +    .open = rproc_cdev_open,
>>>> +    .release = rproc_cdev_release,
>>>> +};
>>>> +
>>>> +int rproc_char_device_add(struct rproc *rproc)
>>>> +{
>>>> +    int ret, minor;
>>>> +    dev_t cdevt;
>>>> +
>>>> +    minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
>>>> +                   GFP_KERNEL);
>>>> +    if (minor < 0) {
>>>> +        dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
>>>> +            __func__, minor);
>>>> +        return -ENODEV;
>>>> +    }
>>>> +
>>>> +    cdev_init(&rproc->char_dev, &rproc_fops);
>>>> +    rproc->char_dev.owner = THIS_MODULE;
>>>> +
>>>> +    cdevt = MKDEV(MAJOR(rproc_cdev), minor);
>>>> +    ret = cdev_add(&rproc->char_dev, cdevt, 1);
>>>> +    if (ret < 0)
>>>> +        ida_simple_remove(&cdev_minor_ida, minor);
>>>> +
>>>> +    rproc->dev.devt = cdevt;
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +void rproc_char_device_remove(struct rproc *rproc)
>>>> +{
>>>> +    __unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 1,
>>>> +                "rproc");
>>>> +    ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
>>>> +}
>>>> +
>>>> +void __init rproc_init_cdev(void)
>>>> +{
>>>> +    int ret;
>>>> +
>>>> +    ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, "rproc");
>>>> +    if (ret < 0) {
>>>> +        pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
>>>> +        return;
>>>> +    }
>>>> +}
>>>> +
>>>> +void __exit rproc_exit_cdev(void)
>>>> +{
>>>> +    __unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, "rproc");
>>>> +}
>>>> diff --git a/drivers/remoteproc/remoteproc_internal.h
>>>> b/drivers/remoteproc/remoteproc_internal.h
>>>> index 493ef92..28d61a1 100644
>>>> --- a/drivers/remoteproc/remoteproc_internal.h
>>>> +++ b/drivers/remoteproc/remoteproc_internal.h
>>>> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char *name,
>>>> struct rproc *rproc,
>>>>  int rproc_init_sysfs(void);
>>>>  void rproc_exit_sysfs(void);
>>>>
>>>> +#ifdef CONFIG_REMOTEPROC_CDEV
>>>> +void rproc_init_cdev(void);
>>>> +void rproc_exit_cdev(void);
>>>> +int rproc_char_device_add(struct rproc *rproc);
>>>> +void rproc_char_device_remove(struct rproc *rproc);
>>>> +#else
>>>> +static inline void rproc_init_cdev(void)
>>>> +{
>>>> +}
>>>> +static inline void rproc_exit_cdev(void)
>>>> +{
>>>> +}
>>>> +static inline int rproc_char_device_add(struct rproc *rproc)
>>>> +{
>>>> +    return 0;
>>>> +}
>>>> +static inline void  rproc_char_device_remove(struct rproc *rproc)
>>>> +{
>>>> +}
>>>> +#endif
>>>> +
>>>>  void rproc_free_vring(struct rproc_vring *rvring);
>>>>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
>>>>
>>>> @@ -63,6 +84,7 @@ struct resource_table *rproc_elf_find_loaded_rsc_table(struct
>>>> rproc *rproc,
>>>>  struct rproc_mem_entry *
>>>>  rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
>>>>
>>>> +
>>>>  static inline
>>>>  int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
>>>>  {
>>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>>> index 16ad666..c4ca796 100644
>>>> --- a/include/linux/remoteproc.h
>>>> +++ b/include/linux/remoteproc.h
>>>> @@ -37,6 +37,7 @@
>>>>
>>>>  #include <linux/types.h>
>>>>  #include <linux/mutex.h>
>>>> +#include <linux/cdev.h>
>>>>  #include <linux/virtio.h>
>>>>  #include <linux/completion.h>
>>>>  #include <linux/idr.h>
>>>> @@ -514,6 +515,7 @@ struct rproc {
>>>>      bool auto_boot;
>>>>      struct list_head dump_segments;
>>>>      int nb_vdev;
>>>> +    struct cdev char_dev;
>>>>  };
>>>>
>>>>  /**
Arnaud POULIQUEN April 6, 2020, 2:17 p.m. UTC | #9
Hi Clément,

On 4/6/20 2:06 PM, Clément Leger wrote:
> Hi Arnaud,
> 
> ----- On 6 Apr, 2020, at 11:01, Arnaud Pouliquen arnaud.pouliquen@st.com wrote:
> 
>> On 4/3/20 9:13 PM, rishabhb@codeaurora.org wrote:
>>> On 2020-04-02 10:28, Arnaud POULIQUEN wrote:
>>>> Hi
>>>>
>>>> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
>>>>> Add the character device interface for userspace applications.
>>>>> This interface can be used in order to boot up and shutdown
>>>>> remote subsystems. Currently there is only a sysfs interface
>>>>> which the userspace clients can use. If a usersapce application
>>>>> crashes after booting the remote processor does not get any
>>>>> indication about the crash. It might still assume that the
>>>>> application is running. For example modem uses remotefs service
>>>>> to fetch data from disk/flash memory. If the remotefs service
>>>>> crashes, modem keeps on requesting data which might lead to a
>>>>> crash. Adding a character device interface makes the remote
>>>>> processor tightly coupled with the user space application.
>>>>> A crash of the application leads to a close on the file descriptors
>>>>> therefore shutting down the remoteproc.
>>>>
>>>> Sorry I'm late in the discussion, I hope I've gone through the whole
>>>> discussion so I don't reopen a closed point...
>>>>
>>>> Something here is not crystal clear to me so I'd rather share it...
>>>>
>>>> I suppose that you the automatic restart of the application is not possible to
>>>> stop and restart the remote processor...
>>> Yes correct, while we wait for the application to restart we might observe a
>>> fatal crash.
>>>>
>>>> Why this use case can not be solved by a process monitor or a service
>>>> in userland that detects the application crash and stop the remote
>>>> firmware using
>>>> the sysfs interface?
>>>>
>>> What happens in the case where the process monitor itself crashes? This is
>>> actually the approach we follow in our downstream code. We have a central entity
>>> in userspace that controls bootup/shutdown of some remote processors based on
>>> the
>>> votes from userspace clients. We have observed cases where this entity
>>> itself crashes and remote processors are left hanging.
>>
>> Your description makes me feel like this patch is only a workaround of something
>> that
>> should be fixed in the userland, even if i understand that hanging is one of the
>> most
>> critical problem and have to be fixed.
>> For instance, how to handle several applications that interact with the remote
>> processor
>> ( e.g. rpmsg service applications) how to stop and restart everything. Using the
>> char
>> device would probaly resolve only a part of the issue...
>>
>> I'm not aware about your environment and i'm not a userland expert. But what i
>> still not
>> understand why a parent process can not do the job...
>> I just test a simple script on my side that treat the kill -9 of an application
>> ("cat" in my case).
> 
> This is not entirely true, if the parent process is killed with a SIGKILL, then
> the process will not be able to handle anything and the remoteproc will still
> be running.
> 
> What I understood from Rishabh patch is a way to allow a single process handling
> the rproc state. We have the same kind of need and currently, if the
> user application crashes, then the rproc is still running (which happens).
> 
>>
>> #start the remote firmware
>> cp  $1 /lib/firmware/
>> echo $1> /sys/class/remoteproc/remoteproc0/firmware
>> echo start >/sys/class/remoteproc/remoteproc0/state
>> #your binary
>> cat /dev/kmsg
>> # stop the remote firmware in case of crash (and potentially some other apps)
>> echo stop >/sys/class/remoteproc/remoteproc0/state
>>
> 
> This is not really "production proof" and what happens if the application is
> responsible of setting the firmware which might be jitted ? 
> And if the script receives the SIGKILL, then we are back to the same problem.
Yes this is just a basic example, not an implementation which would depend on the
environment. i'm just trying here to  put forward a multi-process solution...and
that I'm not an userland expert :).  

> 
> I really think, this is a step forward an easier and reliable use of the remoteproc
> on userland to guarantee a coherent rproc state even if host application
> crashes.

I can see 3 ways of handling an application crash:
- just shutdown the firmware
=> can be done through char device
- stop some other related processes and/or generate a remote proc crash dump for debug
=> /sysfs and/or debugfs
- do nothing as you want a silence application reboot and re-attach to the running firmware
=> use sysfs

I'm challenging the solution because splitting the API seems to me not a good solution.
Now i wonder how it works for the other applications that are relying on some other
kernel frameworks...
Perhaps the answer is that these frameworks don't use sysfs but char device. 
That would means that the sysfs solution is not the more adapted solution and
perhaps we should migrate to a char device.
But in this case, i think that it should implement the whole API and be exclusive with
the syfs legacy API (so no sysfs or sysfs in read-only).

Regards,
Arnaud

> 
> Regards,
> 
> Clément
> 
>> Anyway, it's just my feeling, let other people give their feedback.
>>
>>>> I just want to be sure that there is no alternative to this, because
>>>> having two ways
>>>> for application to shutdown the firmware seems to me confusing...
>>> Does making this interface optional/configurable helps?
>>>>
>>>> What about the opposite service, mean inform the application that the remote
>>>> processor is crashed?
>>>> Do you identify such need? or the "auto" crash recovery is sufficient?
>>> Auto recovery works perfectly for us. Although there is a mechanism in
>>> place using QMI(Qualcomm MSM interface) that can notify clients about remote
>>> processor crash.
>>
>> Thanks for the information.
>>
>> Regards
>> Arnaud
>>
>>>>
>>>> Thanks,
>>>> Arnaud
>>>>>
>>>>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>>>>> ---
>>>>>  drivers/remoteproc/Kconfig               |   9 +++
>>>>>  drivers/remoteproc/Makefile              |   1 +
>>>>>  drivers/remoteproc/remoteproc_cdev.c     | 100 +++++++++++++++++++++++++++++++
>>>>>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
>>>>>  include/linux/remoteproc.h               |   2 +
>>>>>  5 files changed, 134 insertions(+)
>>>>>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
>>>>>
>>>>> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
>>>>> index de3862c..6374b79 100644
>>>>> --- a/drivers/remoteproc/Kconfig
>>>>> +++ b/drivers/remoteproc/Kconfig
>>>>> @@ -14,6 +14,15 @@ config REMOTEPROC
>>>>>
>>>>>  if REMOTEPROC
>>>>>
>>>>> +config REMOTEPROC_CDEV
>>>>> +    bool "Remoteproc character device interface"
>>>>> +    help
>>>>> +      Say y here to have a character device interface for Remoteproc
>>>>> +      framework. Userspace can boot/shutdown remote processors through
>>>>> +      this interface.
>>>>> +
>>>>> +      It's safe to say N if you don't want to use this interface.
>>>>> +
>>>>>  config IMX_REMOTEPROC
>>>>>      tristate "IMX6/7 remoteproc support"
>>>>>      depends on ARCH_MXC
>>>>> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
>>>>> index e30a1b1..b7d4f77 100644
>>>>> --- a/drivers/remoteproc/Makefile
>>>>> +++ b/drivers/remoteproc/Makefile
>>>>> @@ -9,6 +9,7 @@ remoteproc-y                += remoteproc_debugfs.o
>>>>>  remoteproc-y                += remoteproc_sysfs.o
>>>>>  remoteproc-y                += remoteproc_virtio.o
>>>>>  remoteproc-y                += remoteproc_elf_loader.o
>>>>> +obj-$(CONFIG_REMOTEPROC_CDEV)        += remoteproc_cdev.o
>>>>>  obj-$(CONFIG_IMX_REMOTEPROC)        += imx_rproc.o
>>>>>  obj-$(CONFIG_MTK_SCP)            += mtk_scp.o mtk_scp_ipi.o
>>>>>  obj-$(CONFIG_OMAP_REMOTEPROC)        += omap_remoteproc.o
>>>>> diff --git a/drivers/remoteproc/remoteproc_cdev.c
>>>>> b/drivers/remoteproc/remoteproc_cdev.c
>>>>> new file mode 100644
>>>>> index 0000000..8182bd1
>>>>> --- /dev/null
>>>>> +++ b/drivers/remoteproc/remoteproc_cdev.c
>>>>> @@ -0,0 +1,100 @@
>>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>>> +/*
>>>>> + * Character device interface driver for Remoteproc framework.
>>>>> + *
>>>>> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
>>>>> + */
>>>>> +
>>>>> +#include <linux/cdev.h>
>>>>> +#include <linux/fs.h>
>>>>> +#include <linux/module.h>
>>>>> +#include <linux/mutex.h>
>>>>> +#include <linux/remoteproc.h>
>>>>> +
>>>>> +#include "remoteproc_internal.h"
>>>>> +
>>>>> +#define NUM_RPROC_DEVICES    64
>>>>> +static dev_t rproc_cdev;
>>>>> +static DEFINE_IDA(cdev_minor_ida);
>>>>> +
>>>>> +static int rproc_cdev_open(struct inode *inode, struct file *file)
>>>>> +{
>>>>> +    struct rproc *rproc;
>>>>> +
>>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>>>> +
>>>>> +    if (!rproc)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    if (rproc->state == RPROC_RUNNING)
>>>>> +        return -EBUSY;
>>>>> +
>>>>> +    return rproc_boot(rproc);
>>>>> +}
>>>>> +
>>>>> +static int rproc_cdev_release(struct inode *inode, struct file *file)
>>>>> +{
>>>>> +    struct rproc *rproc;
>>>>> +
>>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>>>> +
>>>>> +    if (!rproc || rproc->state != RPROC_RUNNING)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    rproc_shutdown(rproc);
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static const struct file_operations rproc_fops = {
>>>>> +    .open = rproc_cdev_open,
>>>>> +    .release = rproc_cdev_release,
>>>>> +};
>>>>> +
>>>>> +int rproc_char_device_add(struct rproc *rproc)
>>>>> +{
>>>>> +    int ret, minor;
>>>>> +    dev_t cdevt;
>>>>> +
>>>>> +    minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
>>>>> +                   GFP_KERNEL);
>>>>> +    if (minor < 0) {
>>>>> +        dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
>>>>> +            __func__, minor);
>>>>> +        return -ENODEV;
>>>>> +    }
>>>>> +
>>>>> +    cdev_init(&rproc->char_dev, &rproc_fops);
>>>>> +    rproc->char_dev.owner = THIS_MODULE;
>>>>> +
>>>>> +    cdevt = MKDEV(MAJOR(rproc_cdev), minor);
>>>>> +    ret = cdev_add(&rproc->char_dev, cdevt, 1);
>>>>> +    if (ret < 0)
>>>>> +        ida_simple_remove(&cdev_minor_ida, minor);
>>>>> +
>>>>> +    rproc->dev.devt = cdevt;
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +void rproc_char_device_remove(struct rproc *rproc)
>>>>> +{
>>>>> +    __unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 1,
>>>>> +                "rproc");
>>>>> +    ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
>>>>> +}
>>>>> +
>>>>> +void __init rproc_init_cdev(void)
>>>>> +{
>>>>> +    int ret;
>>>>> +
>>>>> +    ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, "rproc");
>>>>> +    if (ret < 0) {
>>>>> +        pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
>>>>> +        return;
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +void __exit rproc_exit_cdev(void)
>>>>> +{
>>>>> +    __unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, "rproc");
>>>>> +}
>>>>> diff --git a/drivers/remoteproc/remoteproc_internal.h
>>>>> b/drivers/remoteproc/remoteproc_internal.h
>>>>> index 493ef92..28d61a1 100644
>>>>> --- a/drivers/remoteproc/remoteproc_internal.h
>>>>> +++ b/drivers/remoteproc/remoteproc_internal.h
>>>>> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char *name,
>>>>> struct rproc *rproc,
>>>>>  int rproc_init_sysfs(void);
>>>>>  void rproc_exit_sysfs(void);
>>>>>
>>>>> +#ifdef CONFIG_REMOTEPROC_CDEV
>>>>> +void rproc_init_cdev(void);
>>>>> +void rproc_exit_cdev(void);
>>>>> +int rproc_char_device_add(struct rproc *rproc);
>>>>> +void rproc_char_device_remove(struct rproc *rproc);
>>>>> +#else
>>>>> +static inline void rproc_init_cdev(void)
>>>>> +{
>>>>> +}
>>>>> +static inline void rproc_exit_cdev(void)
>>>>> +{
>>>>> +}
>>>>> +static inline int rproc_char_device_add(struct rproc *rproc)
>>>>> +{
>>>>> +    return 0;
>>>>> +}
>>>>> +static inline void  rproc_char_device_remove(struct rproc *rproc)
>>>>> +{
>>>>> +}
>>>>> +#endif
>>>>> +
>>>>>  void rproc_free_vring(struct rproc_vring *rvring);
>>>>>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
>>>>>
>>>>> @@ -63,6 +84,7 @@ struct resource_table *rproc_elf_find_loaded_rsc_table(struct
>>>>> rproc *rproc,
>>>>>  struct rproc_mem_entry *
>>>>>  rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
>>>>>
>>>>> +
>>>>>  static inline
>>>>>  int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
>>>>>  {
>>>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>>>> index 16ad666..c4ca796 100644
>>>>> --- a/include/linux/remoteproc.h
>>>>> +++ b/include/linux/remoteproc.h
>>>>> @@ -37,6 +37,7 @@
>>>>>
>>>>>  #include <linux/types.h>
>>>>>  #include <linux/mutex.h>
>>>>> +#include <linux/cdev.h>
>>>>>  #include <linux/virtio.h>
>>>>>  #include <linux/completion.h>
>>>>>  #include <linux/idr.h>
>>>>> @@ -514,6 +515,7 @@ struct rproc {
>>>>>      bool auto_boot;
>>>>>      struct list_head dump_segments;
>>>>>      int nb_vdev;
>>>>> +    struct cdev char_dev;
>>>>>  };
>>>>>
>>>>>  /**
Clément Leger April 6, 2020, 3:58 p.m. UTC | #10
Hi Arnaud,

----- On 6 Apr, 2020, at 16:17, Arnaud Pouliquen arnaud.pouliquen@st.com wrote:

> Hi Clément,
> 
> On 4/6/20 2:06 PM, Clément Leger wrote:
>> Hi Arnaud,
>> 
>> ----- On 6 Apr, 2020, at 11:01, Arnaud Pouliquen arnaud.pouliquen@st.com wrote:
>> 
>>> On 4/3/20 9:13 PM, rishabhb@codeaurora.org wrote:
>>>> On 2020-04-02 10:28, Arnaud POULIQUEN wrote:
>>>>> Hi
>>>>>
>>>>> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
>>>>>> Add the character device interface for userspace applications.
>>>>>> This interface can be used in order to boot up and shutdown
>>>>>> remote subsystems. Currently there is only a sysfs interface
>>>>>> which the userspace clients can use. If a usersapce application
>>>>>> crashes after booting the remote processor does not get any
>>>>>> indication about the crash. It might still assume that the
>>>>>> application is running. For example modem uses remotefs service
>>>>>> to fetch data from disk/flash memory. If the remotefs service
>>>>>> crashes, modem keeps on requesting data which might lead to a
>>>>>> crash. Adding a character device interface makes the remote
>>>>>> processor tightly coupled with the user space application.
>>>>>> A crash of the application leads to a close on the file descriptors
>>>>>> therefore shutting down the remoteproc.
>>>>>
>>>>> Sorry I'm late in the discussion, I hope I've gone through the whole
>>>>> discussion so I don't reopen a closed point...
>>>>>
>>>>> Something here is not crystal clear to me so I'd rather share it...
>>>>>
>>>>> I suppose that you the automatic restart of the application is not possible to
>>>>> stop and restart the remote processor...
>>>> Yes correct, while we wait for the application to restart we might observe a
>>>> fatal crash.
>>>>>
>>>>> Why this use case can not be solved by a process monitor or a service
>>>>> in userland that detects the application crash and stop the remote
>>>>> firmware using
>>>>> the sysfs interface?
>>>>>
>>>> What happens in the case where the process monitor itself crashes? This is
>>>> actually the approach we follow in our downstream code. We have a central entity
>>>> in userspace that controls bootup/shutdown of some remote processors based on
>>>> the
>>>> votes from userspace clients. We have observed cases where this entity
>>>> itself crashes and remote processors are left hanging.
>>>
>>> Your description makes me feel like this patch is only a workaround of something
>>> that
>>> should be fixed in the userland, even if i understand that hanging is one of the
>>> most
>>> critical problem and have to be fixed.
>>> For instance, how to handle several applications that interact with the remote
>>> processor
>>> ( e.g. rpmsg service applications) how to stop and restart everything. Using the
>>> char
>>> device would probaly resolve only a part of the issue...
>>>
>>> I'm not aware about your environment and i'm not a userland expert. But what i
>>> still not
>>> understand why a parent process can not do the job...
>>> I just test a simple script on my side that treat the kill -9 of an application
>>> ("cat" in my case).
>> 
>> This is not entirely true, if the parent process is killed with a SIGKILL, then
>> the process will not be able to handle anything and the remoteproc will still
>> be running.
>> 
>> What I understood from Rishabh patch is a way to allow a single process handling
>> the rproc state. We have the same kind of need and currently, if the
>> user application crashes, then the rproc is still running (which happens).
>> 
>>>
>>> #start the remote firmware
>>> cp  $1 /lib/firmware/
>>> echo $1> /sys/class/remoteproc/remoteproc0/firmware
>>> echo start >/sys/class/remoteproc/remoteproc0/state
>>> #your binary
>>> cat /dev/kmsg
>>> # stop the remote firmware in case of crash (and potentially some other apps)
>>> echo stop >/sys/class/remoteproc/remoteproc0/state
>>>
>> 
>> This is not really "production proof" and what happens if the application is
>> responsible of setting the firmware which might be jitted ?
>> And if the script receives the SIGKILL, then we are back to the same problem.
> Yes this is just a basic example, not an implementation which would depend on
> the
> environment. i'm just trying here to  put forward a multi-process solution...and
> that I'm not an userland expert :).
> 
>> 
>> I really think, this is a step forward an easier and reliable use of the
>> remoteproc
>> on userland to guarantee a coherent rproc state even if host application
>> crashes.
> 
> I can see 3 ways of handling an application crash:
> - just shutdown the firmware
> => can be done through char device
> - stop some other related processes and/or generate a remote proc crash dump for
> debug
> => /sysfs and/or debugfs
> - do nothing as you want a silence application reboot and re-attach to the
> running firmware
> => use sysfs
> 
> I'm challenging the solution because splitting the API seems to me not a good
> solution.

Completely ok with that, we have to fully understand the targeted usecase to
avoid implemented a flawed interface.

> Now i wonder how it works for the other applications that are relying on some
> other
> kernel frameworks...

For some other device, there is a chardev. The watchdog for intance uses a
/dev/watchdog. Regarding the gpio, it seems they are also using a chardev
and the sysfs interface is deprecated.

> Perhaps the answer is that these frameworks don't use sysfs but char device.
> That would means that the sysfs solution is not the more adapted solution and
> perhaps we should migrate to a char device.
> But in this case, i think that it should implement the whole API and be
> exclusive with
> the syfs legacy API (so no sysfs or sysfs in read-only).

I agree with that, if another interface must be defined, then it should 
implement everything that is supported right now with the sysfs.

Regards,

Clément

> 
> Regards,
> Arnaud
> 
>> 
>> Regards,
>> 
>> Clément
>> 
>>> Anyway, it's just my feeling, let other people give their feedback.
>>>
>>>>> I just want to be sure that there is no alternative to this, because
>>>>> having two ways
>>>>> for application to shutdown the firmware seems to me confusing...
>>>> Does making this interface optional/configurable helps?
>>>>>
>>>>> What about the opposite service, mean inform the application that the remote
>>>>> processor is crashed?
>>>>> Do you identify such need? or the "auto" crash recovery is sufficient?
>>>> Auto recovery works perfectly for us. Although there is a mechanism in
>>>> place using QMI(Qualcomm MSM interface) that can notify clients about remote
>>>> processor crash.
>>>
>>> Thanks for the information.
>>>
>>> Regards
>>> Arnaud
>>>
>>>>>
>>>>> Thanks,
>>>>> Arnaud
>>>>>>
>>>>>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>>>>>> ---
>>>>>>  drivers/remoteproc/Kconfig               |   9 +++
>>>>>>  drivers/remoteproc/Makefile              |   1 +
>>>>>>  drivers/remoteproc/remoteproc_cdev.c     | 100 +++++++++++++++++++++++++++++++
>>>>>>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
>>>>>>  include/linux/remoteproc.h               |   2 +
>>>>>>  5 files changed, 134 insertions(+)
>>>>>>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
>>>>>>
>>>>>> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
>>>>>> index de3862c..6374b79 100644
>>>>>> --- a/drivers/remoteproc/Kconfig
>>>>>> +++ b/drivers/remoteproc/Kconfig
>>>>>> @@ -14,6 +14,15 @@ config REMOTEPROC
>>>>>>
>>>>>>  if REMOTEPROC
>>>>>>
>>>>>> +config REMOTEPROC_CDEV
>>>>>> +    bool "Remoteproc character device interface"
>>>>>> +    help
>>>>>> +      Say y here to have a character device interface for Remoteproc
>>>>>> +      framework. Userspace can boot/shutdown remote processors through
>>>>>> +      this interface.
>>>>>> +
>>>>>> +      It's safe to say N if you don't want to use this interface.
>>>>>> +
>>>>>>  config IMX_REMOTEPROC
>>>>>>      tristate "IMX6/7 remoteproc support"
>>>>>>      depends on ARCH_MXC
>>>>>> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
>>>>>> index e30a1b1..b7d4f77 100644
>>>>>> --- a/drivers/remoteproc/Makefile
>>>>>> +++ b/drivers/remoteproc/Makefile
>>>>>> @@ -9,6 +9,7 @@ remoteproc-y                += remoteproc_debugfs.o
>>>>>>  remoteproc-y                += remoteproc_sysfs.o
>>>>>>  remoteproc-y                += remoteproc_virtio.o
>>>>>>  remoteproc-y                += remoteproc_elf_loader.o
>>>>>> +obj-$(CONFIG_REMOTEPROC_CDEV)        += remoteproc_cdev.o
>>>>>>  obj-$(CONFIG_IMX_REMOTEPROC)        += imx_rproc.o
>>>>>>  obj-$(CONFIG_MTK_SCP)            += mtk_scp.o mtk_scp_ipi.o
>>>>>>  obj-$(CONFIG_OMAP_REMOTEPROC)        += omap_remoteproc.o
>>>>>> diff --git a/drivers/remoteproc/remoteproc_cdev.c
>>>>>> b/drivers/remoteproc/remoteproc_cdev.c
>>>>>> new file mode 100644
>>>>>> index 0000000..8182bd1
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/remoteproc/remoteproc_cdev.c
>>>>>> @@ -0,0 +1,100 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>>>> +/*
>>>>>> + * Character device interface driver for Remoteproc framework.
>>>>>> + *
>>>>>> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
>>>>>> + */
>>>>>> +
>>>>>> +#include <linux/cdev.h>
>>>>>> +#include <linux/fs.h>
>>>>>> +#include <linux/module.h>
>>>>>> +#include <linux/mutex.h>
>>>>>> +#include <linux/remoteproc.h>
>>>>>> +
>>>>>> +#include "remoteproc_internal.h"
>>>>>> +
>>>>>> +#define NUM_RPROC_DEVICES    64
>>>>>> +static dev_t rproc_cdev;
>>>>>> +static DEFINE_IDA(cdev_minor_ida);
>>>>>> +
>>>>>> +static int rproc_cdev_open(struct inode *inode, struct file *file)
>>>>>> +{
>>>>>> +    struct rproc *rproc;
>>>>>> +
>>>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>>>>> +
>>>>>> +    if (!rproc)
>>>>>> +        return -EINVAL;
>>>>>> +
>>>>>> +    if (rproc->state == RPROC_RUNNING)
>>>>>> +        return -EBUSY;
>>>>>> +
>>>>>> +    return rproc_boot(rproc);
>>>>>> +}
>>>>>> +
>>>>>> +static int rproc_cdev_release(struct inode *inode, struct file *file)
>>>>>> +{
>>>>>> +    struct rproc *rproc;
>>>>>> +
>>>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>>>>> +
>>>>>> +    if (!rproc || rproc->state != RPROC_RUNNING)
>>>>>> +        return -EINVAL;
>>>>>> +
>>>>>> +    rproc_shutdown(rproc);
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static const struct file_operations rproc_fops = {
>>>>>> +    .open = rproc_cdev_open,
>>>>>> +    .release = rproc_cdev_release,
>>>>>> +};
>>>>>> +
>>>>>> +int rproc_char_device_add(struct rproc *rproc)
>>>>>> +{
>>>>>> +    int ret, minor;
>>>>>> +    dev_t cdevt;
>>>>>> +
>>>>>> +    minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
>>>>>> +                   GFP_KERNEL);
>>>>>> +    if (minor < 0) {
>>>>>> +        dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
>>>>>> +            __func__, minor);
>>>>>> +        return -ENODEV;
>>>>>> +    }
>>>>>> +
>>>>>> +    cdev_init(&rproc->char_dev, &rproc_fops);
>>>>>> +    rproc->char_dev.owner = THIS_MODULE;
>>>>>> +
>>>>>> +    cdevt = MKDEV(MAJOR(rproc_cdev), minor);
>>>>>> +    ret = cdev_add(&rproc->char_dev, cdevt, 1);
>>>>>> +    if (ret < 0)
>>>>>> +        ida_simple_remove(&cdev_minor_ida, minor);
>>>>>> +
>>>>>> +    rproc->dev.devt = cdevt;
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +void rproc_char_device_remove(struct rproc *rproc)
>>>>>> +{
>>>>>> +    __unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 1,
>>>>>> +                "rproc");
>>>>>> +    ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
>>>>>> +}
>>>>>> +
>>>>>> +void __init rproc_init_cdev(void)
>>>>>> +{
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, "rproc");
>>>>>> +    if (ret < 0) {
>>>>>> +        pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
>>>>>> +        return;
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +void __exit rproc_exit_cdev(void)
>>>>>> +{
>>>>>> +    __unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, "rproc");
>>>>>> +}
>>>>>> diff --git a/drivers/remoteproc/remoteproc_internal.h
>>>>>> b/drivers/remoteproc/remoteproc_internal.h
>>>>>> index 493ef92..28d61a1 100644
>>>>>> --- a/drivers/remoteproc/remoteproc_internal.h
>>>>>> +++ b/drivers/remoteproc/remoteproc_internal.h
>>>>>> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char *name,
>>>>>> struct rproc *rproc,
>>>>>>  int rproc_init_sysfs(void);
>>>>>>  void rproc_exit_sysfs(void);
>>>>>>
>>>>>> +#ifdef CONFIG_REMOTEPROC_CDEV
>>>>>> +void rproc_init_cdev(void);
>>>>>> +void rproc_exit_cdev(void);
>>>>>> +int rproc_char_device_add(struct rproc *rproc);
>>>>>> +void rproc_char_device_remove(struct rproc *rproc);
>>>>>> +#else
>>>>>> +static inline void rproc_init_cdev(void)
>>>>>> +{
>>>>>> +}
>>>>>> +static inline void rproc_exit_cdev(void)
>>>>>> +{
>>>>>> +}
>>>>>> +static inline int rproc_char_device_add(struct rproc *rproc)
>>>>>> +{
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +static inline void  rproc_char_device_remove(struct rproc *rproc)
>>>>>> +{
>>>>>> +}
>>>>>> +#endif
>>>>>> +
>>>>>>  void rproc_free_vring(struct rproc_vring *rvring);
>>>>>>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
>>>>>>
>>>>>> @@ -63,6 +84,7 @@ struct resource_table *rproc_elf_find_loaded_rsc_table(struct
>>>>>> rproc *rproc,
>>>>>>  struct rproc_mem_entry *
>>>>>>  rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
>>>>>>
>>>>>> +
>>>>>>  static inline
>>>>>>  int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
>>>>>>  {
>>>>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>>>>> index 16ad666..c4ca796 100644
>>>>>> --- a/include/linux/remoteproc.h
>>>>>> +++ b/include/linux/remoteproc.h
>>>>>> @@ -37,6 +37,7 @@
>>>>>>
>>>>>>  #include <linux/types.h>
>>>>>>  #include <linux/mutex.h>
>>>>>> +#include <linux/cdev.h>
>>>>>>  #include <linux/virtio.h>
>>>>>>  #include <linux/completion.h>
>>>>>>  #include <linux/idr.h>
>>>>>> @@ -514,6 +515,7 @@ struct rproc {
>>>>>>      bool auto_boot;
>>>>>>      struct list_head dump_segments;
>>>>>>      int nb_vdev;
>>>>>> +    struct cdev char_dev;
>>>>>>  };
>>>>>>
> >>>>>  /**
Rishabh Bhatnagar April 8, 2020, 6:34 p.m. UTC | #11
On 2020-04-06 08:58, Clément Leger wrote:
> Hi Arnaud,
> 
> ----- On 6 Apr, 2020, at 16:17, Arnaud Pouliquen 
> arnaud.pouliquen@st.com wrote:
> 
>> Hi Clément,
>> 
>> On 4/6/20 2:06 PM, Clément Leger wrote:
>>> Hi Arnaud,
>>> 
>>> ----- On 6 Apr, 2020, at 11:01, Arnaud Pouliquen 
>>> arnaud.pouliquen@st.com wrote:
>>> 
>>>> On 4/3/20 9:13 PM, rishabhb@codeaurora.org wrote:
>>>>> On 2020-04-02 10:28, Arnaud POULIQUEN wrote:
>>>>>> Hi
>>>>>> 
>>>>>> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
>>>>>>> Add the character device interface for userspace applications.
>>>>>>> This interface can be used in order to boot up and shutdown
>>>>>>> remote subsystems. Currently there is only a sysfs interface
>>>>>>> which the userspace clients can use. If a usersapce application
>>>>>>> crashes after booting the remote processor does not get any
>>>>>>> indication about the crash. It might still assume that the
>>>>>>> application is running. For example modem uses remotefs service
>>>>>>> to fetch data from disk/flash memory. If the remotefs service
>>>>>>> crashes, modem keeps on requesting data which might lead to a
>>>>>>> crash. Adding a character device interface makes the remote
>>>>>>> processor tightly coupled with the user space application.
>>>>>>> A crash of the application leads to a close on the file 
>>>>>>> descriptors
>>>>>>> therefore shutting down the remoteproc.
>>>>>> 
>>>>>> Sorry I'm late in the discussion, I hope I've gone through the 
>>>>>> whole
>>>>>> discussion so I don't reopen a closed point...
>>>>>> 
>>>>>> Something here is not crystal clear to me so I'd rather share 
>>>>>> it...
>>>>>> 
>>>>>> I suppose that you the automatic restart of the application is not 
>>>>>> possible to
>>>>>> stop and restart the remote processor...
>>>>> Yes correct, while we wait for the application to restart we might 
>>>>> observe a
>>>>> fatal crash.
>>>>>> 
>>>>>> Why this use case can not be solved by a process monitor or a 
>>>>>> service
>>>>>> in userland that detects the application crash and stop the remote
>>>>>> firmware using
>>>>>> the sysfs interface?
>>>>>> 
>>>>> What happens in the case where the process monitor itself crashes? 
>>>>> This is
>>>>> actually the approach we follow in our downstream code. We have a 
>>>>> central entity
>>>>> in userspace that controls bootup/shutdown of some remote 
>>>>> processors based on
>>>>> the
>>>>> votes from userspace clients. We have observed cases where this 
>>>>> entity
>>>>> itself crashes and remote processors are left hanging.
>>>> 
>>>> Your description makes me feel like this patch is only a workaround 
>>>> of something
>>>> that
>>>> should be fixed in the userland, even if i understand that hanging 
>>>> is one of the
>>>> most
>>>> critical problem and have to be fixed.
>>>> For instance, how to handle several applications that interact with 
>>>> the remote
>>>> processor
>>>> ( e.g. rpmsg service applications) how to stop and restart 
>>>> everything. Using the
>>>> char
>>>> device would probaly resolve only a part of the issue...
>>>> 
>>>> I'm not aware about your environment and i'm not a userland expert. 
>>>> But what i
>>>> still not
>>>> understand why a parent process can not do the job...
>>>> I just test a simple script on my side that treat the kill -9 of an 
>>>> application
>>>> ("cat" in my case).
>>> 
>>> This is not entirely true, if the parent process is killed with a 
>>> SIGKILL, then
>>> the process will not be able to handle anything and the remoteproc 
>>> will still
>>> be running.
>>> 
>>> What I understood from Rishabh patch is a way to allow a single 
>>> process handling
>>> the rproc state. We have the same kind of need and currently, if the
>>> user application crashes, then the rproc is still running (which 
>>> happens).
>>> 
>>>> 
>>>> #start the remote firmware
>>>> cp  $1 /lib/firmware/
>>>> echo $1> /sys/class/remoteproc/remoteproc0/firmware
>>>> echo start >/sys/class/remoteproc/remoteproc0/state
>>>> #your binary
>>>> cat /dev/kmsg
>>>> # stop the remote firmware in case of crash (and potentially some 
>>>> other apps)
>>>> echo stop >/sys/class/remoteproc/remoteproc0/state
>>>> 
>>> 
>>> This is not really "production proof" and what happens if the 
>>> application is
>>> responsible of setting the firmware which might be jitted ?
>>> And if the script receives the SIGKILL, then we are back to the same 
>>> problem.
>> Yes this is just a basic example, not an implementation which would 
>> depend on
>> the
>> environment. i'm just trying here to  put forward a multi-process 
>> solution...and
>> that I'm not an userland expert :).
>> 
>>> 
>>> I really think, this is a step forward an easier and reliable use of 
>>> the
>>> remoteproc
>>> on userland to guarantee a coherent rproc state even if host 
>>> application
>>> crashes.
Yes what we want is simple mechanism where a single userspace process 
can boot/
shutdown the remote processor in all scenarios. Adding more processes to 
monitor
the already existing process might have 2 issues. One is there might be 
a delay
between the application crash and process monitor getting to know about 
it and taking
action. This might prove to be fatal in our case. Second, possibly the 
monitor can hang
or get killed and is not deterministic.
>> 
>> I can see 3 ways of handling an application crash:
>> - just shutdown the firmware
>> => can be done through char device
>> - stop some other related processes and/or generate a remote proc 
>> crash dump for
>> debug
>> => /sysfs and/or debugfs
>> - do nothing as you want a silence application reboot and re-attach to 
>> the
>> running firmware
>> => use sysfs
>> 
>> I'm challenging the solution because splitting the API seems to me not 
>> a good
>> solution.
> 
> Completely ok with that, we have to fully understand the targeted 
> usecase to
> avoid implemented a flawed interface.
> 
>> Now i wonder how it works for the other applications that are relying 
>> on some
>> other
>> kernel frameworks...
> 
> For some other device, there is a chardev. The watchdog for intance 
> uses a
> /dev/watchdog. Regarding the gpio, it seems they are also using a 
> chardev
> and the sysfs interface is deprecated.
> 
>> Perhaps the answer is that these frameworks don't use sysfs but char 
>> device.
>> That would means that the sysfs solution is not the more adapted 
>> solution and
>> perhaps we should migrate to a char device.
>> But in this case, i think that it should implement the whole API and 
>> be
>> exclusive with
>> the syfs legacy API (so no sysfs or sysfs in read-only).
> 
> I agree with that, if another interface must be defined, then it should
> implement everything that is supported right now with the sysfs.
> 
The other fields that sysfs exposes right now are firmware_name, 
name(rproc name),
state. The targeted usecase was that these are configuration parameters 
specific
to the remoteproc and should stay in the sysfs interface. Whereas char 
device
should provide direct access to remoteproc device.
It would make sense to use this interface in conjunction with sysfs
interface, where you use /dev/remoteproc0 to boot/shutdown the remote 
processor
sysfs entries to fine tune the parameters.
Adding ioctls to implement all sysfs functionality seems like overkill 
to me. Let
me know what you guys think.
> 
> Clément
> 
>> 
>> Regards,
>> Arnaud
>> 
>>> 
>>> Regards,
>>> 
>>> Clément
>>> 
>>>> Anyway, it's just my feeling, let other people give their feedback.
>>>> 
>>>>>> I just want to be sure that there is no alternative to this, 
>>>>>> because
>>>>>> having two ways
>>>>>> for application to shutdown the firmware seems to me confusing...
>>>>> Does making this interface optional/configurable helps?
>>>>>> 
>>>>>> What about the opposite service, mean inform the application that 
>>>>>> the remote
>>>>>> processor is crashed?
>>>>>> Do you identify such need? or the "auto" crash recovery is 
>>>>>> sufficient?
>>>>> Auto recovery works perfectly for us. Although there is a mechanism 
>>>>> in
>>>>> place using QMI(Qualcomm MSM interface) that can notify clients 
>>>>> about remote
>>>>> processor crash.
>>>> 
>>>> Thanks for the information.
>>>> 
>>>> Regards
>>>> Arnaud
>>>> 
>>>>>> 
>>>>>> Thanks,
>>>>>> Arnaud
>>>>>>> 
>>>>>>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>>>>>>> ---
>>>>>>>  drivers/remoteproc/Kconfig               |   9 +++
>>>>>>>  drivers/remoteproc/Makefile              |   1 +
>>>>>>>  drivers/remoteproc/remoteproc_cdev.c     | 100 
>>>>>>> +++++++++++++++++++++++++++++++
>>>>>>>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
>>>>>>>  include/linux/remoteproc.h               |   2 +
>>>>>>>  5 files changed, 134 insertions(+)
>>>>>>>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
>>>>>>> 
>>>>>>> diff --git a/drivers/remoteproc/Kconfig 
>>>>>>> b/drivers/remoteproc/Kconfig
>>>>>>> index de3862c..6374b79 100644
>>>>>>> --- a/drivers/remoteproc/Kconfig
>>>>>>> +++ b/drivers/remoteproc/Kconfig
>>>>>>> @@ -14,6 +14,15 @@ config REMOTEPROC
>>>>>>> 
>>>>>>>  if REMOTEPROC
>>>>>>> 
>>>>>>> +config REMOTEPROC_CDEV
>>>>>>> +    bool "Remoteproc character device interface"
>>>>>>> +    help
>>>>>>> +      Say y here to have a character device interface for 
>>>>>>> Remoteproc
>>>>>>> +      framework. Userspace can boot/shutdown remote processors 
>>>>>>> through
>>>>>>> +      this interface.
>>>>>>> +
>>>>>>> +      It's safe to say N if you don't want to use this 
>>>>>>> interface.
>>>>>>> +
>>>>>>>  config IMX_REMOTEPROC
>>>>>>>      tristate "IMX6/7 remoteproc support"
>>>>>>>      depends on ARCH_MXC
>>>>>>> diff --git a/drivers/remoteproc/Makefile 
>>>>>>> b/drivers/remoteproc/Makefile
>>>>>>> index e30a1b1..b7d4f77 100644
>>>>>>> --- a/drivers/remoteproc/Makefile
>>>>>>> +++ b/drivers/remoteproc/Makefile
>>>>>>> @@ -9,6 +9,7 @@ remoteproc-y                += 
>>>>>>> remoteproc_debugfs.o
>>>>>>>  remoteproc-y                += remoteproc_sysfs.o
>>>>>>>  remoteproc-y                += remoteproc_virtio.o
>>>>>>>  remoteproc-y                += remoteproc_elf_loader.o
>>>>>>> +obj-$(CONFIG_REMOTEPROC_CDEV)        += remoteproc_cdev.o
>>>>>>>  obj-$(CONFIG_IMX_REMOTEPROC)        += imx_rproc.o
>>>>>>>  obj-$(CONFIG_MTK_SCP)            += mtk_scp.o mtk_scp_ipi.o
>>>>>>>  obj-$(CONFIG_OMAP_REMOTEPROC)        += omap_remoteproc.o
>>>>>>> diff --git a/drivers/remoteproc/remoteproc_cdev.c
>>>>>>> b/drivers/remoteproc/remoteproc_cdev.c
>>>>>>> new file mode 100644
>>>>>>> index 0000000..8182bd1
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/remoteproc/remoteproc_cdev.c
>>>>>>> @@ -0,0 +1,100 @@
>>>>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>>>>> +/*
>>>>>>> + * Character device interface driver for Remoteproc framework.
>>>>>>> + *
>>>>>>> + * Copyright (c) 2020, The Linux Foundation. All rights 
>>>>>>> reserved.
>>>>>>> + */
>>>>>>> +
>>>>>>> +#include <linux/cdev.h>
>>>>>>> +#include <linux/fs.h>
>>>>>>> +#include <linux/module.h>
>>>>>>> +#include <linux/mutex.h>
>>>>>>> +#include <linux/remoteproc.h>
>>>>>>> +
>>>>>>> +#include "remoteproc_internal.h"
>>>>>>> +
>>>>>>> +#define NUM_RPROC_DEVICES    64
>>>>>>> +static dev_t rproc_cdev;
>>>>>>> +static DEFINE_IDA(cdev_minor_ida);
>>>>>>> +
>>>>>>> +static int rproc_cdev_open(struct inode *inode, struct file 
>>>>>>> *file)
>>>>>>> +{
>>>>>>> +    struct rproc *rproc;
>>>>>>> +
>>>>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>>>>>> +
>>>>>>> +    if (!rproc)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    if (rproc->state == RPROC_RUNNING)
>>>>>>> +        return -EBUSY;
>>>>>>> +
>>>>>>> +    return rproc_boot(rproc);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int rproc_cdev_release(struct inode *inode, struct file 
>>>>>>> *file)
>>>>>>> +{
>>>>>>> +    struct rproc *rproc;
>>>>>>> +
>>>>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>>>>>> +
>>>>>>> +    if (!rproc || rproc->state != RPROC_RUNNING)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    rproc_shutdown(rproc);
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static const struct file_operations rproc_fops = {
>>>>>>> +    .open = rproc_cdev_open,
>>>>>>> +    .release = rproc_cdev_release,
>>>>>>> +};
>>>>>>> +
>>>>>>> +int rproc_char_device_add(struct rproc *rproc)
>>>>>>> +{
>>>>>>> +    int ret, minor;
>>>>>>> +    dev_t cdevt;
>>>>>>> +
>>>>>>> +    minor = ida_simple_get(&cdev_minor_ida, 0, 
>>>>>>> NUM_RPROC_DEVICES,
>>>>>>> +                   GFP_KERNEL);
>>>>>>> +    if (minor < 0) {
>>>>>>> +        dev_err(&rproc->dev, "%s: No more minor numbers left! 
>>>>>>> rc:%d\n",
>>>>>>> +            __func__, minor);
>>>>>>> +        return -ENODEV;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    cdev_init(&rproc->char_dev, &rproc_fops);
>>>>>>> +    rproc->char_dev.owner = THIS_MODULE;
>>>>>>> +
>>>>>>> +    cdevt = MKDEV(MAJOR(rproc_cdev), minor);
>>>>>>> +    ret = cdev_add(&rproc->char_dev, cdevt, 1);
>>>>>>> +    if (ret < 0)
>>>>>>> +        ida_simple_remove(&cdev_minor_ida, minor);
>>>>>>> +
>>>>>>> +    rproc->dev.devt = cdevt;
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +void rproc_char_device_remove(struct rproc *rproc)
>>>>>>> +{
>>>>>>> +    __unregister_chrdev(MAJOR(rproc->dev.devt), 
>>>>>>> MINOR(rproc->dev.devt), 1,
>>>>>>> +                "rproc");
>>>>>>> +    ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
>>>>>>> +}
>>>>>>> +
>>>>>>> +void __init rproc_init_cdev(void)
>>>>>>> +{
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, 
>>>>>>> "rproc");
>>>>>>> +    if (ret < 0) {
>>>>>>> +        pr_err("Failed to alloc rproc_cdev region, err %d\n", 
>>>>>>> ret);
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +}
>>>>>>> +
>>>>>>> +void __exit rproc_exit_cdev(void)
>>>>>>> +{
>>>>>>> +    __unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, 
>>>>>>> "rproc");
>>>>>>> +}
>>>>>>> diff --git a/drivers/remoteproc/remoteproc_internal.h
>>>>>>> b/drivers/remoteproc/remoteproc_internal.h
>>>>>>> index 493ef92..28d61a1 100644
>>>>>>> --- a/drivers/remoteproc/remoteproc_internal.h
>>>>>>> +++ b/drivers/remoteproc/remoteproc_internal.h
>>>>>>> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const 
>>>>>>> char *name,
>>>>>>> struct rproc *rproc,
>>>>>>>  int rproc_init_sysfs(void);
>>>>>>>  void rproc_exit_sysfs(void);
>>>>>>> 
>>>>>>> +#ifdef CONFIG_REMOTEPROC_CDEV
>>>>>>> +void rproc_init_cdev(void);
>>>>>>> +void rproc_exit_cdev(void);
>>>>>>> +int rproc_char_device_add(struct rproc *rproc);
>>>>>>> +void rproc_char_device_remove(struct rproc *rproc);
>>>>>>> +#else
>>>>>>> +static inline void rproc_init_cdev(void)
>>>>>>> +{
>>>>>>> +}
>>>>>>> +static inline void rproc_exit_cdev(void)
>>>>>>> +{
>>>>>>> +}
>>>>>>> +static inline int rproc_char_device_add(struct rproc *rproc)
>>>>>>> +{
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +static inline void  rproc_char_device_remove(struct rproc 
>>>>>>> *rproc)
>>>>>>> +{
>>>>>>> +}
>>>>>>> +#endif
>>>>>>> +
>>>>>>>  void rproc_free_vring(struct rproc_vring *rvring);
>>>>>>>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
>>>>>>> 
>>>>>>> @@ -63,6 +84,7 @@ struct resource_table 
>>>>>>> *rproc_elf_find_loaded_rsc_table(struct
>>>>>>> rproc *rproc,
>>>>>>>  struct rproc_mem_entry *
>>>>>>>  rproc_find_carveout_by_name(struct rproc *rproc, const char 
>>>>>>> *name, ...);
>>>>>>> 
>>>>>>> +
>>>>>>>  static inline
>>>>>>>  int rproc_fw_sanity_check(struct rproc *rproc, const struct 
>>>>>>> firmware *fw)
>>>>>>>  {
>>>>>>> diff --git a/include/linux/remoteproc.h 
>>>>>>> b/include/linux/remoteproc.h
>>>>>>> index 16ad666..c4ca796 100644
>>>>>>> --- a/include/linux/remoteproc.h
>>>>>>> +++ b/include/linux/remoteproc.h
>>>>>>> @@ -37,6 +37,7 @@
>>>>>>> 
>>>>>>>  #include <linux/types.h>
>>>>>>>  #include <linux/mutex.h>
>>>>>>> +#include <linux/cdev.h>
>>>>>>>  #include <linux/virtio.h>
>>>>>>>  #include <linux/completion.h>
>>>>>>>  #include <linux/idr.h>
>>>>>>> @@ -514,6 +515,7 @@ struct rproc {
>>>>>>>      bool auto_boot;
>>>>>>>      struct list_head dump_segments;
>>>>>>>      int nb_vdev;
>>>>>>> +    struct cdev char_dev;
>>>>>>>  };
>>>>>>> 
>> >>>>>  /**
Arnaud POULIQUEN April 9, 2020, 8:29 a.m. UTC | #12
On 4/8/20 8:34 PM, rishabhb@codeaurora.org wrote:
> On 2020-04-06 08:58, Clément Leger wrote:
>> Hi Arnaud,
>>
>> ----- On 6 Apr, 2020, at 16:17, Arnaud Pouliquen arnaud.pouliquen@st.com wrote:
>>
>>> Hi Clément,
>>>
>>> On 4/6/20 2:06 PM, Clément Leger wrote:
>>>> Hi Arnaud,
>>>>
>>>> ----- On 6 Apr, 2020, at 11:01, Arnaud Pouliquen arnaud.pouliquen@st.com wrote:
>>>>
>>>>> On 4/3/20 9:13 PM, rishabhb@codeaurora.org wrote:
>>>>>> On 2020-04-02 10:28, Arnaud POULIQUEN wrote:
>>>>>>> Hi
>>>>>>>
>>>>>>> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
>>>>>>>> Add the character device interface for userspace applications.
>>>>>>>> This interface can be used in order to boot up and shutdown
>>>>>>>> remote subsystems. Currently there is only a sysfs interface
>>>>>>>> which the userspace clients can use. If a usersapce application
>>>>>>>> crashes after booting the remote processor does not get any
>>>>>>>> indication about the crash. It might still assume that the
>>>>>>>> application is running. For example modem uses remotefs service
>>>>>>>> to fetch data from disk/flash memory. If the remotefs service
>>>>>>>> crashes, modem keeps on requesting data which might lead to a
>>>>>>>> crash. Adding a character device interface makes the remote
>>>>>>>> processor tightly coupled with the user space application.
>>>>>>>> A crash of the application leads to a close on the file descriptors
>>>>>>>> therefore shutting down the remoteproc.
>>>>>>>
>>>>>>> Sorry I'm late in the discussion, I hope I've gone through the whole
>>>>>>> discussion so I don't reopen a closed point...
>>>>>>>
>>>>>>> Something here is not crystal clear to me so I'd rather share it...
>>>>>>>
>>>>>>> I suppose that you the automatic restart of the application is not possible to
>>>>>>> stop and restart the remote processor...
>>>>>> Yes correct, while we wait for the application to restart we might observe a
>>>>>> fatal crash.
>>>>>>>
>>>>>>> Why this use case can not be solved by a process monitor or a service
>>>>>>> in userland that detects the application crash and stop the remote
>>>>>>> firmware using
>>>>>>> the sysfs interface?
>>>>>>>
>>>>>> What happens in the case where the process monitor itself crashes? This is
>>>>>> actually the approach we follow in our downstream code. We have a central entity
>>>>>> in userspace that controls bootup/shutdown of some remote processors based on
>>>>>> the
>>>>>> votes from userspace clients. We have observed cases where this entity
>>>>>> itself crashes and remote processors are left hanging.
>>>>>
>>>>> Your description makes me feel like this patch is only a workaround of something
>>>>> that
>>>>> should be fixed in the userland, even if i understand that hanging is one of the
>>>>> most
>>>>> critical problem and have to be fixed.
>>>>> For instance, how to handle several applications that interact with the remote
>>>>> processor
>>>>> ( e.g. rpmsg service applications) how to stop and restart everything. Using the
>>>>> char
>>>>> device would probaly resolve only a part of the issue...
>>>>>
>>>>> I'm not aware about your environment and i'm not a userland expert. But what i
>>>>> still not
>>>>> understand why a parent process can not do the job...
>>>>> I just test a simple script on my side that treat the kill -9 of an application
>>>>> ("cat" in my case).
>>>>
>>>> This is not entirely true, if the parent process is killed with a SIGKILL, then
>>>> the process will not be able to handle anything and the remoteproc will still
>>>> be running.
>>>>
>>>> What I understood from Rishabh patch is a way to allow a single process handling
>>>> the rproc state. We have the same kind of need and currently, if the
>>>> user application crashes, then the rproc is still running (which happens).
>>>>
>>>>>
>>>>> #start the remote firmware
>>>>> cp  $1 /lib/firmware/
>>>>> echo $1> /sys/class/remoteproc/remoteproc0/firmware
>>>>> echo start >/sys/class/remoteproc/remoteproc0/state
>>>>> #your binary
>>>>> cat /dev/kmsg
>>>>> # stop the remote firmware in case of crash (and potentially some other apps)
>>>>> echo stop >/sys/class/remoteproc/remoteproc0/state
>>>>>
>>>>
>>>> This is not really "production proof" and what happens if the application is
>>>> responsible of setting the firmware which might be jitted ?
>>>> And if the script receives the SIGKILL, then we are back to the same problem.
>>> Yes this is just a basic example, not an implementation which would depend on
>>> the
>>> environment. i'm just trying here to  put forward a multi-process solution...and
>>> that I'm not an userland expert :).
>>>
>>>>
>>>> I really think, this is a step forward an easier and reliable use of the
>>>> remoteproc
>>>> on userland to guarantee a coherent rproc state even if host application
>>>> crashes.
> Yes what we want is simple mechanism where a single userspace process can boot/
> shutdown the remote processor in all scenarios. Adding more processes to monitor
> the already existing process might have 2 issues. One is there might be a delay
> between the application crash and process monitor getting to know about it and taking
> action. This might prove to be fatal in our case. Second, possibly the monitor can hang
> or get killed and is not deterministic.
>>>
>>> I can see 3 ways of handling an application crash:
>>> - just shutdown the firmware
>>> => can be done through char device
>>> - stop some other related processes and/or generate a remote proc crash dump for
>>> debug
>>> => /sysfs and/or debugfs
>>> - do nothing as you want a silence application reboot and re-attach to the
>>> running firmware
>>> => use sysfs
>>>
>>> I'm challenging the solution because splitting the API seems to me not a good
>>> solution.
>>
>> Completely ok with that, we have to fully understand the targeted usecase to
>> avoid implemented a flawed interface.
>>
>>> Now i wonder how it works for the other applications that are relying on some
>>> other
>>> kernel frameworks...
>>
>> For some other device, there is a chardev. The watchdog for intance uses a
>> /dev/watchdog. Regarding the gpio, it seems they are also using a chardev
>> and the sysfs interface is deprecated.
>>
>>> Perhaps the answer is that these frameworks don't use sysfs but char device.
>>> That would means that the sysfs solution is not the more adapted solution and
>>> perhaps we should migrate to a char device.
>>> But in this case, i think that it should implement the whole API and be
>>> exclusive with
>>> the syfs legacy API (so no sysfs or sysfs in read-only).
>>
>> I agree with that, if another interface must be defined, then it should
>> implement everything that is supported right now with the sysfs.
>>
> The other fields that sysfs exposes right now are firmware_name, name(rproc name),
> state. The targeted usecase was that these are configuration parameters specific
> to the remoteproc and should stay in the sysfs interface. Whereas char device
> should provide direct access to remoteproc device.
> It would make sense to use this interface in conjunction with sysfs
> interface, where you use /dev/remoteproc0 to boot/shutdown the remote processor
> sysfs entries to fine tune the parameters.
> Adding ioctls to implement all sysfs functionality seems like overkill to me. Let
> me know what you guys think.

In my opinion if we open the possibility of accessing to remoteproc through a char
device, we should move torwards a solution that would replace the sysfs. 
In this case sysfs fields could be read-only, and the char devices would have to
support the different shutdown modes. This means that the auto shutdown should be
configurable (IOCTL?).
But I assume that the minimum could be to only disable write access on the "state" field
and handle the "auto shutdown" as an option of the char device.

Anyway it only my opinion, let Bjorn an Mathieu comment and decide :)

Regards,
Arnaud

>>
>> Clément
>>
>>>
>>> Regards,
>>> Arnaud
>>>
>>>>
>>>> Regards,
>>>>
>>>> Clément
>>>>
>>>>> Anyway, it's just my feeling, let other people give their feedback.
>>>>>
>>>>>>> I just want to be sure that there is no alternative to this, because
>>>>>>> having two ways
>>>>>>> for application to shutdown the firmware seems to me confusing...
>>>>>> Does making this interface optional/configurable helps?
>>>>>>>
>>>>>>> What about the opposite service, mean inform the application that the remote
>>>>>>> processor is crashed?
>>>>>>> Do you identify such need? or the "auto" crash recovery is sufficient?
>>>>>> Auto recovery works perfectly for us. Although there is a mechanism in
>>>>>> place using QMI(Qualcomm MSM interface) that can notify clients about remote
>>>>>> processor crash.
>>>>>
>>>>> Thanks for the information.
>>>>>
>>>>> Regards
>>>>> Arnaud
>>>>>
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Arnaud
>>>>>>>>
>>>>>>>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
>>>>>>>> ---
>>>>>>>>  drivers/remoteproc/Kconfig               |   9 +++
>>>>>>>>  drivers/remoteproc/Makefile              |   1 +
>>>>>>>>  drivers/remoteproc/remoteproc_cdev.c     | 100 +++++++++++++++++++++++++++++++
>>>>>>>>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
>>>>>>>>  include/linux/remoteproc.h               |   2 +
>>>>>>>>  5 files changed, 134 insertions(+)
>>>>>>>>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
>>>>>>>>
>>>>>>>> diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
>>>>>>>> index de3862c..6374b79 100644
>>>>>>>> --- a/drivers/remoteproc/Kconfig
>>>>>>>> +++ b/drivers/remoteproc/Kconfig
>>>>>>>> @@ -14,6 +14,15 @@ config REMOTEPROC
>>>>>>>>
>>>>>>>>  if REMOTEPROC
>>>>>>>>
>>>>>>>> +config REMOTEPROC_CDEV
>>>>>>>> +    bool "Remoteproc character device interface"
>>>>>>>> +    help
>>>>>>>> +      Say y here to have a character device interface for Remoteproc
>>>>>>>> +      framework. Userspace can boot/shutdown remote processors through
>>>>>>>> +      this interface.
>>>>>>>> +
>>>>>>>> +      It's safe to say N if you don't want to use this interface.
>>>>>>>> +
>>>>>>>>  config IMX_REMOTEPROC
>>>>>>>>      tristate "IMX6/7 remoteproc support"
>>>>>>>>      depends on ARCH_MXC
>>>>>>>> diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
>>>>>>>> index e30a1b1..b7d4f77 100644
>>>>>>>> --- a/drivers/remoteproc/Makefile
>>>>>>>> +++ b/drivers/remoteproc/Makefile
>>>>>>>> @@ -9,6 +9,7 @@ remoteproc-y                += remoteproc_debugfs.o
>>>>>>>>  remoteproc-y                += remoteproc_sysfs.o
>>>>>>>>  remoteproc-y                += remoteproc_virtio.o
>>>>>>>>  remoteproc-y                += remoteproc_elf_loader.o
>>>>>>>> +obj-$(CONFIG_REMOTEPROC_CDEV)        += remoteproc_cdev.o
>>>>>>>>  obj-$(CONFIG_IMX_REMOTEPROC)        += imx_rproc.o
>>>>>>>>  obj-$(CONFIG_MTK_SCP)            += mtk_scp.o mtk_scp_ipi.o
>>>>>>>>  obj-$(CONFIG_OMAP_REMOTEPROC)        += omap_remoteproc.o
>>>>>>>> diff --git a/drivers/remoteproc/remoteproc_cdev.c
>>>>>>>> b/drivers/remoteproc/remoteproc_cdev.c
>>>>>>>> new file mode 100644
>>>>>>>> index 0000000..8182bd1
>>>>>>>> --- /dev/null
>>>>>>>> +++ b/drivers/remoteproc/remoteproc_cdev.c
>>>>>>>> @@ -0,0 +1,100 @@
>>>>>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>>>>>> +/*
>>>>>>>> + * Character device interface driver for Remoteproc framework.
>>>>>>>> + *
>>>>>>>> + * Copyright (c) 2020, The Linux Foundation. All rights reserved.
>>>>>>>> + */
>>>>>>>> +
>>>>>>>> +#include <linux/cdev.h>
>>>>>>>> +#include <linux/fs.h>
>>>>>>>> +#include <linux/module.h>
>>>>>>>> +#include <linux/mutex.h>
>>>>>>>> +#include <linux/remoteproc.h>
>>>>>>>> +
>>>>>>>> +#include "remoteproc_internal.h"
>>>>>>>> +
>>>>>>>> +#define NUM_RPROC_DEVICES    64
>>>>>>>> +static dev_t rproc_cdev;
>>>>>>>> +static DEFINE_IDA(cdev_minor_ida);
>>>>>>>> +
>>>>>>>> +static int rproc_cdev_open(struct inode *inode, struct file *file)
>>>>>>>> +{
>>>>>>>> +    struct rproc *rproc;
>>>>>>>> +
>>>>>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>>>>>>> +
>>>>>>>> +    if (!rproc)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    if (rproc->state == RPROC_RUNNING)
>>>>>>>> +        return -EBUSY;
>>>>>>>> +
>>>>>>>> +    return rproc_boot(rproc);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static int rproc_cdev_release(struct inode *inode, struct file *file)
>>>>>>>> +{
>>>>>>>> +    struct rproc *rproc;
>>>>>>>> +
>>>>>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
>>>>>>>> +
>>>>>>>> +    if (!rproc || rproc->state != RPROC_RUNNING)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    rproc_shutdown(rproc);
>>>>>>>> +
>>>>>>>> +    return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static const struct file_operations rproc_fops = {
>>>>>>>> +    .open = rproc_cdev_open,
>>>>>>>> +    .release = rproc_cdev_release,
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +int rproc_char_device_add(struct rproc *rproc)
>>>>>>>> +{
>>>>>>>> +    int ret, minor;
>>>>>>>> +    dev_t cdevt;
>>>>>>>> +
>>>>>>>> +    minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
>>>>>>>> +                   GFP_KERNEL);
>>>>>>>> +    if (minor < 0) {
>>>>>>>> +        dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
>>>>>>>> +            __func__, minor);
>>>>>>>> +        return -ENODEV;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    cdev_init(&rproc->char_dev, &rproc_fops);
>>>>>>>> +    rproc->char_dev.owner = THIS_MODULE;
>>>>>>>> +
>>>>>>>> +    cdevt = MKDEV(MAJOR(rproc_cdev), minor);
>>>>>>>> +    ret = cdev_add(&rproc->char_dev, cdevt, 1);
>>>>>>>> +    if (ret < 0)
>>>>>>>> +        ida_simple_remove(&cdev_minor_ida, minor);
>>>>>>>> +
>>>>>>>> +    rproc->dev.devt = cdevt;
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +void rproc_char_device_remove(struct rproc *rproc)
>>>>>>>> +{
>>>>>>>> +    __unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 1,
>>>>>>>> +                "rproc");
>>>>>>>> +    ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +void __init rproc_init_cdev(void)
>>>>>>>> +{
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, "rproc");
>>>>>>>> +    if (ret < 0) {
>>>>>>>> +        pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
>>>>>>>> +        return;
>>>>>>>> +    }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +void __exit rproc_exit_cdev(void)
>>>>>>>> +{
>>>>>>>> +    __unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, "rproc");
>>>>>>>> +}
>>>>>>>> diff --git a/drivers/remoteproc/remoteproc_internal.h
>>>>>>>> b/drivers/remoteproc/remoteproc_internal.h
>>>>>>>> index 493ef92..28d61a1 100644
>>>>>>>> --- a/drivers/remoteproc/remoteproc_internal.h
>>>>>>>> +++ b/drivers/remoteproc/remoteproc_internal.h
>>>>>>>> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const char *name,
>>>>>>>> struct rproc *rproc,
>>>>>>>>  int rproc_init_sysfs(void);
>>>>>>>>  void rproc_exit_sysfs(void);
>>>>>>>>
>>>>>>>> +#ifdef CONFIG_REMOTEPROC_CDEV
>>>>>>>> +void rproc_init_cdev(void);
>>>>>>>> +void rproc_exit_cdev(void);
>>>>>>>> +int rproc_char_device_add(struct rproc *rproc);
>>>>>>>> +void rproc_char_device_remove(struct rproc *rproc);
>>>>>>>> +#else
>>>>>>>> +static inline void rproc_init_cdev(void)
>>>>>>>> +{
>>>>>>>> +}
>>>>>>>> +static inline void rproc_exit_cdev(void)
>>>>>>>> +{
>>>>>>>> +}
>>>>>>>> +static inline int rproc_char_device_add(struct rproc *rproc)
>>>>>>>> +{
>>>>>>>> +    return 0;
>>>>>>>> +}
>>>>>>>> +static inline void  rproc_char_device_remove(struct rproc *rproc)
>>>>>>>> +{
>>>>>>>> +}
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>>  void rproc_free_vring(struct rproc_vring *rvring);
>>>>>>>>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
>>>>>>>>
>>>>>>>> @@ -63,6 +84,7 @@ struct resource_table *rproc_elf_find_loaded_rsc_table(struct
>>>>>>>> rproc *rproc,
>>>>>>>>  struct rproc_mem_entry *
>>>>>>>>  rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
>>>>>>>>
>>>>>>>> +
>>>>>>>>  static inline
>>>>>>>>  int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
>>>>>>>>  {
>>>>>>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>>>>>>> index 16ad666..c4ca796 100644
>>>>>>>> --- a/include/linux/remoteproc.h
>>>>>>>> +++ b/include/linux/remoteproc.h
>>>>>>>> @@ -37,6 +37,7 @@
>>>>>>>>
>>>>>>>>  #include <linux/types.h>
>>>>>>>>  #include <linux/mutex.h>
>>>>>>>> +#include <linux/cdev.h>
>>>>>>>>  #include <linux/virtio.h>
>>>>>>>>  #include <linux/completion.h>
>>>>>>>>  #include <linux/idr.h>
>>>>>>>> @@ -514,6 +515,7 @@ struct rproc {
>>>>>>>>      bool auto_boot;
>>>>>>>>      struct list_head dump_segments;
>>>>>>>>      int nb_vdev;
>>>>>>>> +    struct cdev char_dev;
>>>>>>>>  };
>>>>>>>>
>>> >>>>>  /**
Mathieu Poirier April 9, 2020, 5:14 p.m. UTC | #13
On Wed, 8 Apr 2020 at 12:35, <rishabhb@codeaurora.org> wrote:
>
> On 2020-04-06 08:58, Clément Leger wrote:
> > Hi Arnaud,
> >
> > ----- On 6 Apr, 2020, at 16:17, Arnaud Pouliquen
> > arnaud.pouliquen@st.com wrote:
> >
> >> Hi Clément,
> >>
> >> On 4/6/20 2:06 PM, Clément Leger wrote:
> >>> Hi Arnaud,
> >>>
> >>> ----- On 6 Apr, 2020, at 11:01, Arnaud Pouliquen
> >>> arnaud.pouliquen@st.com wrote:
> >>>
> >>>> On 4/3/20 9:13 PM, rishabhb@codeaurora.org wrote:
> >>>>> On 2020-04-02 10:28, Arnaud POULIQUEN wrote:
> >>>>>> Hi
> >>>>>>
> >>>>>> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
> >>>>>>> Add the character device interface for userspace applications.
> >>>>>>> This interface can be used in order to boot up and shutdown
> >>>>>>> remote subsystems. Currently there is only a sysfs interface
> >>>>>>> which the userspace clients can use. If a usersapce application
> >>>>>>> crashes after booting the remote processor does not get any
> >>>>>>> indication about the crash. It might still assume that the
> >>>>>>> application is running. For example modem uses remotefs service
> >>>>>>> to fetch data from disk/flash memory. If the remotefs service
> >>>>>>> crashes, modem keeps on requesting data which might lead to a
> >>>>>>> crash. Adding a character device interface makes the remote
> >>>>>>> processor tightly coupled with the user space application.
> >>>>>>> A crash of the application leads to a close on the file
> >>>>>>> descriptors
> >>>>>>> therefore shutting down the remoteproc.
> >>>>>>
> >>>>>> Sorry I'm late in the discussion, I hope I've gone through the
> >>>>>> whole
> >>>>>> discussion so I don't reopen a closed point...
> >>>>>>
> >>>>>> Something here is not crystal clear to me so I'd rather share
> >>>>>> it...
> >>>>>>
> >>>>>> I suppose that you the automatic restart of the application is not
> >>>>>> possible to
> >>>>>> stop and restart the remote processor...
> >>>>> Yes correct, while we wait for the application to restart we might
> >>>>> observe a
> >>>>> fatal crash.
> >>>>>>
> >>>>>> Why this use case can not be solved by a process monitor or a
> >>>>>> service
> >>>>>> in userland that detects the application crash and stop the remote
> >>>>>> firmware using
> >>>>>> the sysfs interface?
> >>>>>>
> >>>>> What happens in the case where the process monitor itself crashes?
> >>>>> This is
> >>>>> actually the approach we follow in our downstream code. We have a
> >>>>> central entity
> >>>>> in userspace that controls bootup/shutdown of some remote
> >>>>> processors based on
> >>>>> the
> >>>>> votes from userspace clients. We have observed cases where this
> >>>>> entity
> >>>>> itself crashes and remote processors are left hanging.
> >>>>
> >>>> Your description makes me feel like this patch is only a workaround
> >>>> of something
> >>>> that
> >>>> should be fixed in the userland, even if i understand that hanging
> >>>> is one of the
> >>>> most
> >>>> critical problem and have to be fixed.
> >>>> For instance, how to handle several applications that interact with
> >>>> the remote
> >>>> processor
> >>>> ( e.g. rpmsg service applications) how to stop and restart
> >>>> everything. Using the
> >>>> char
> >>>> device would probaly resolve only a part of the issue...
> >>>>
> >>>> I'm not aware about your environment and i'm not a userland expert.
> >>>> But what i
> >>>> still not
> >>>> understand why a parent process can not do the job...
> >>>> I just test a simple script on my side that treat the kill -9 of an
> >>>> application
> >>>> ("cat" in my case).
> >>>
> >>> This is not entirely true, if the parent process is killed with a
> >>> SIGKILL, then
> >>> the process will not be able to handle anything and the remoteproc
> >>> will still
> >>> be running.
> >>>
> >>> What I understood from Rishabh patch is a way to allow a single
> >>> process handling
> >>> the rproc state. We have the same kind of need and currently, if the
> >>> user application crashes, then the rproc is still running (which
> >>> happens).
> >>>
> >>>>
> >>>> #start the remote firmware
> >>>> cp  $1 /lib/firmware/
> >>>> echo $1> /sys/class/remoteproc/remoteproc0/firmware
> >>>> echo start >/sys/class/remoteproc/remoteproc0/state
> >>>> #your binary
> >>>> cat /dev/kmsg
> >>>> # stop the remote firmware in case of crash (and potentially some
> >>>> other apps)
> >>>> echo stop >/sys/class/remoteproc/remoteproc0/state
> >>>>
> >>>
> >>> This is not really "production proof" and what happens if the
> >>> application is
> >>> responsible of setting the firmware which might be jitted ?
> >>> And if the script receives the SIGKILL, then we are back to the same
> >>> problem.
> >> Yes this is just a basic example, not an implementation which would
> >> depend on
> >> the
> >> environment. i'm just trying here to  put forward a multi-process
> >> solution...and
> >> that I'm not an userland expert :).
> >>
> >>>
> >>> I really think, this is a step forward an easier and reliable use of
> >>> the
> >>> remoteproc
> >>> on userland to guarantee a coherent rproc state even if host
> >>> application
> >>> crashes.
> Yes what we want is simple mechanism where a single userspace process
> can boot/
> shutdown the remote processor in all scenarios. Adding more processes to
> monitor
> the already existing process might have 2 issues. One is there might be
> a delay
> between the application crash and process monitor getting to know about
> it and taking
> action. This might prove to be fatal in our case. Second, possibly the
> monitor can hang
> or get killed and is not deterministic.
> >>
> >> I can see 3 ways of handling an application crash:
> >> - just shutdown the firmware
> >> => can be done through char device
> >> - stop some other related processes and/or generate a remote proc
> >> crash dump for
> >> debug
> >> => /sysfs and/or debugfs
> >> - do nothing as you want a silence application reboot and re-attach to
> >> the
> >> running firmware
> >> => use sysfs
> >>
> >> I'm challenging the solution because splitting the API seems to me not
> >> a good
> >> solution.
> >
> > Completely ok with that, we have to fully understand the targeted
> > usecase to
> > avoid implemented a flawed interface.
> >
> >> Now i wonder how it works for the other applications that are relying
> >> on some
> >> other
> >> kernel frameworks...
> >
> > For some other device, there is a chardev. The watchdog for intance
> > uses a
> > /dev/watchdog. Regarding the gpio, it seems they are also using a
> > chardev
> > and the sysfs interface is deprecated.
> >
> >> Perhaps the answer is that these frameworks don't use sysfs but char
> >> device.
> >> That would means that the sysfs solution is not the more adapted
> >> solution and
> >> perhaps we should migrate to a char device.
> >> But in this case, i think that it should implement the whole API and
> >> be
> >> exclusive with
> >> the syfs legacy API (so no sysfs or sysfs in read-only).
> >
> > I agree with that, if another interface must be defined, then it should
> > implement everything that is supported right now with the sysfs.
> >
> The other fields that sysfs exposes right now are firmware_name,
> name(rproc name),
> state. The targeted usecase was that these are configuration parameters
> specific
> to the remoteproc and should stay in the sysfs interface. Whereas char
> device
> should provide direct access to remoteproc device.
> It would make sense to use this interface in conjunction with sysfs
> interface, where you use /dev/remoteproc0 to boot/shutdown the remote
> processor
> sysfs entries to fine tune the parameters.
> Adding ioctls to implement all sysfs functionality seems like overkill
> to me. Let
> me know what you guys think.

Hi Rishabh,

The sysfs interface is not going away - it is part of the API and that
can't be changed anymore.  I suggest the new character device
interface strictly implement what is currently needed and leave what
is not for future implementation.

> >
> > Clément
> >
> >>
> >> Regards,
> >> Arnaud
> >>
> >>>
> >>> Regards,
> >>>
> >>> Clément
> >>>
> >>>> Anyway, it's just my feeling, let other people give their feedback.
> >>>>
> >>>>>> I just want to be sure that there is no alternative to this,
> >>>>>> because
> >>>>>> having two ways
> >>>>>> for application to shutdown the firmware seems to me confusing...
> >>>>> Does making this interface optional/configurable helps?
> >>>>>>
> >>>>>> What about the opposite service, mean inform the application that
> >>>>>> the remote
> >>>>>> processor is crashed?
> >>>>>> Do you identify such need? or the "auto" crash recovery is
> >>>>>> sufficient?
> >>>>> Auto recovery works perfectly for us. Although there is a mechanism
> >>>>> in
> >>>>> place using QMI(Qualcomm MSM interface) that can notify clients
> >>>>> about remote
> >>>>> processor crash.
> >>>>
> >>>> Thanks for the information.
> >>>>
> >>>> Regards
> >>>> Arnaud
> >>>>
> >>>>>>
> >>>>>> Thanks,
> >>>>>> Arnaud
> >>>>>>>
> >>>>>>> Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
> >>>>>>> ---
> >>>>>>>  drivers/remoteproc/Kconfig               |   9 +++
> >>>>>>>  drivers/remoteproc/Makefile              |   1 +
> >>>>>>>  drivers/remoteproc/remoteproc_cdev.c     | 100
> >>>>>>> +++++++++++++++++++++++++++++++
> >>>>>>>  drivers/remoteproc/remoteproc_internal.h |  22 +++++++
> >>>>>>>  include/linux/remoteproc.h               |   2 +
> >>>>>>>  5 files changed, 134 insertions(+)
> >>>>>>>  create mode 100644 drivers/remoteproc/remoteproc_cdev.c
> >>>>>>>
> >>>>>>> diff --git a/drivers/remoteproc/Kconfig
> >>>>>>> b/drivers/remoteproc/Kconfig
> >>>>>>> index de3862c..6374b79 100644
> >>>>>>> --- a/drivers/remoteproc/Kconfig
> >>>>>>> +++ b/drivers/remoteproc/Kconfig
> >>>>>>> @@ -14,6 +14,15 @@ config REMOTEPROC
> >>>>>>>
> >>>>>>>  if REMOTEPROC
> >>>>>>>
> >>>>>>> +config REMOTEPROC_CDEV
> >>>>>>> +    bool "Remoteproc character device interface"
> >>>>>>> +    help
> >>>>>>> +      Say y here to have a character device interface for
> >>>>>>> Remoteproc
> >>>>>>> +      framework. Userspace can boot/shutdown remote processors
> >>>>>>> through
> >>>>>>> +      this interface.
> >>>>>>> +
> >>>>>>> +      It's safe to say N if you don't want to use this
> >>>>>>> interface.
> >>>>>>> +
> >>>>>>>  config IMX_REMOTEPROC
> >>>>>>>      tristate "IMX6/7 remoteproc support"
> >>>>>>>      depends on ARCH_MXC
> >>>>>>> diff --git a/drivers/remoteproc/Makefile
> >>>>>>> b/drivers/remoteproc/Makefile
> >>>>>>> index e30a1b1..b7d4f77 100644
> >>>>>>> --- a/drivers/remoteproc/Makefile
> >>>>>>> +++ b/drivers/remoteproc/Makefile
> >>>>>>> @@ -9,6 +9,7 @@ remoteproc-y                +=
> >>>>>>> remoteproc_debugfs.o
> >>>>>>>  remoteproc-y                += remoteproc_sysfs.o
> >>>>>>>  remoteproc-y                += remoteproc_virtio.o
> >>>>>>>  remoteproc-y                += remoteproc_elf_loader.o
> >>>>>>> +obj-$(CONFIG_REMOTEPROC_CDEV)        += remoteproc_cdev.o
> >>>>>>>  obj-$(CONFIG_IMX_REMOTEPROC)        += imx_rproc.o
> >>>>>>>  obj-$(CONFIG_MTK_SCP)            += mtk_scp.o mtk_scp_ipi.o
> >>>>>>>  obj-$(CONFIG_OMAP_REMOTEPROC)        += omap_remoteproc.o
> >>>>>>> diff --git a/drivers/remoteproc/remoteproc_cdev.c
> >>>>>>> b/drivers/remoteproc/remoteproc_cdev.c
> >>>>>>> new file mode 100644
> >>>>>>> index 0000000..8182bd1
> >>>>>>> --- /dev/null
> >>>>>>> +++ b/drivers/remoteproc/remoteproc_cdev.c
> >>>>>>> @@ -0,0 +1,100 @@
> >>>>>>> +// SPDX-License-Identifier: GPL-2.0-only
> >>>>>>> +/*
> >>>>>>> + * Character device interface driver for Remoteproc framework.
> >>>>>>> + *
> >>>>>>> + * Copyright (c) 2020, The Linux Foundation. All rights
> >>>>>>> reserved.
> >>>>>>> + */
> >>>>>>> +
> >>>>>>> +#include <linux/cdev.h>
> >>>>>>> +#include <linux/fs.h>
> >>>>>>> +#include <linux/module.h>
> >>>>>>> +#include <linux/mutex.h>
> >>>>>>> +#include <linux/remoteproc.h>
> >>>>>>> +
> >>>>>>> +#include "remoteproc_internal.h"
> >>>>>>> +
> >>>>>>> +#define NUM_RPROC_DEVICES    64
> >>>>>>> +static dev_t rproc_cdev;
> >>>>>>> +static DEFINE_IDA(cdev_minor_ida);
> >>>>>>> +
> >>>>>>> +static int rproc_cdev_open(struct inode *inode, struct file
> >>>>>>> *file)
> >>>>>>> +{
> >>>>>>> +    struct rproc *rproc;
> >>>>>>> +
> >>>>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
> >>>>>>> +
> >>>>>>> +    if (!rproc)
> >>>>>>> +        return -EINVAL;
> >>>>>>> +
> >>>>>>> +    if (rproc->state == RPROC_RUNNING)
> >>>>>>> +        return -EBUSY;
> >>>>>>> +
> >>>>>>> +    return rproc_boot(rproc);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static int rproc_cdev_release(struct inode *inode, struct file
> >>>>>>> *file)
> >>>>>>> +{
> >>>>>>> +    struct rproc *rproc;
> >>>>>>> +
> >>>>>>> +    rproc = container_of(inode->i_cdev, struct rproc, char_dev);
> >>>>>>> +
> >>>>>>> +    if (!rproc || rproc->state != RPROC_RUNNING)
> >>>>>>> +        return -EINVAL;
> >>>>>>> +
> >>>>>>> +    rproc_shutdown(rproc);
> >>>>>>> +
> >>>>>>> +    return 0;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static const struct file_operations rproc_fops = {
> >>>>>>> +    .open = rproc_cdev_open,
> >>>>>>> +    .release = rproc_cdev_release,
> >>>>>>> +};
> >>>>>>> +
> >>>>>>> +int rproc_char_device_add(struct rproc *rproc)
> >>>>>>> +{
> >>>>>>> +    int ret, minor;
> >>>>>>> +    dev_t cdevt;
> >>>>>>> +
> >>>>>>> +    minor = ida_simple_get(&cdev_minor_ida, 0,
> >>>>>>> NUM_RPROC_DEVICES,
> >>>>>>> +                   GFP_KERNEL);
> >>>>>>> +    if (minor < 0) {
> >>>>>>> +        dev_err(&rproc->dev, "%s: No more minor numbers left!
> >>>>>>> rc:%d\n",
> >>>>>>> +            __func__, minor);
> >>>>>>> +        return -ENODEV;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    cdev_init(&rproc->char_dev, &rproc_fops);
> >>>>>>> +    rproc->char_dev.owner = THIS_MODULE;
> >>>>>>> +
> >>>>>>> +    cdevt = MKDEV(MAJOR(rproc_cdev), minor);
> >>>>>>> +    ret = cdev_add(&rproc->char_dev, cdevt, 1);
> >>>>>>> +    if (ret < 0)
> >>>>>>> +        ida_simple_remove(&cdev_minor_ida, minor);
> >>>>>>> +
> >>>>>>> +    rproc->dev.devt = cdevt;
> >>>>>>> +    return ret;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +void rproc_char_device_remove(struct rproc *rproc)
> >>>>>>> +{
> >>>>>>> +    __unregister_chrdev(MAJOR(rproc->dev.devt),
> >>>>>>> MINOR(rproc->dev.devt), 1,
> >>>>>>> +                "rproc");
> >>>>>>> +    ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +void __init rproc_init_cdev(void)
> >>>>>>> +{
> >>>>>>> +    int ret;
> >>>>>>> +
> >>>>>>> +    ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES,
> >>>>>>> "rproc");
> >>>>>>> +    if (ret < 0) {
> >>>>>>> +        pr_err("Failed to alloc rproc_cdev region, err %d\n",
> >>>>>>> ret);
> >>>>>>> +        return;
> >>>>>>> +    }
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +void __exit rproc_exit_cdev(void)
> >>>>>>> +{
> >>>>>>> +    __unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES,
> >>>>>>> "rproc");
> >>>>>>> +}
> >>>>>>> diff --git a/drivers/remoteproc/remoteproc_internal.h
> >>>>>>> b/drivers/remoteproc/remoteproc_internal.h
> >>>>>>> index 493ef92..28d61a1 100644
> >>>>>>> --- a/drivers/remoteproc/remoteproc_internal.h
> >>>>>>> +++ b/drivers/remoteproc/remoteproc_internal.h
> >>>>>>> @@ -47,6 +47,27 @@ struct dentry *rproc_create_trace_file(const
> >>>>>>> char *name,
> >>>>>>> struct rproc *rproc,
> >>>>>>>  int rproc_init_sysfs(void);
> >>>>>>>  void rproc_exit_sysfs(void);
> >>>>>>>
> >>>>>>> +#ifdef CONFIG_REMOTEPROC_CDEV
> >>>>>>> +void rproc_init_cdev(void);
> >>>>>>> +void rproc_exit_cdev(void);
> >>>>>>> +int rproc_char_device_add(struct rproc *rproc);
> >>>>>>> +void rproc_char_device_remove(struct rproc *rproc);
> >>>>>>> +#else
> >>>>>>> +static inline void rproc_init_cdev(void)
> >>>>>>> +{
> >>>>>>> +}
> >>>>>>> +static inline void rproc_exit_cdev(void)
> >>>>>>> +{
> >>>>>>> +}
> >>>>>>> +static inline int rproc_char_device_add(struct rproc *rproc)
> >>>>>>> +{
> >>>>>>> +    return 0;
> >>>>>>> +}
> >>>>>>> +static inline void  rproc_char_device_remove(struct rproc
> >>>>>>> *rproc)
> >>>>>>> +{
> >>>>>>> +}
> >>>>>>> +#endif
> >>>>>>> +
> >>>>>>>  void rproc_free_vring(struct rproc_vring *rvring);
> >>>>>>>  int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
> >>>>>>>
> >>>>>>> @@ -63,6 +84,7 @@ struct resource_table
> >>>>>>> *rproc_elf_find_loaded_rsc_table(struct
> >>>>>>> rproc *rproc,
> >>>>>>>  struct rproc_mem_entry *
> >>>>>>>  rproc_find_carveout_by_name(struct rproc *rproc, const char
> >>>>>>> *name, ...);
> >>>>>>>
> >>>>>>> +
> >>>>>>>  static inline
> >>>>>>>  int rproc_fw_sanity_check(struct rproc *rproc, const struct
> >>>>>>> firmware *fw)
> >>>>>>>  {
> >>>>>>> diff --git a/include/linux/remoteproc.h
> >>>>>>> b/include/linux/remoteproc.h
> >>>>>>> index 16ad666..c4ca796 100644
> >>>>>>> --- a/include/linux/remoteproc.h
> >>>>>>> +++ b/include/linux/remoteproc.h
> >>>>>>> @@ -37,6 +37,7 @@
> >>>>>>>
> >>>>>>>  #include <linux/types.h>
> >>>>>>>  #include <linux/mutex.h>
> >>>>>>> +#include <linux/cdev.h>
> >>>>>>>  #include <linux/virtio.h>
> >>>>>>>  #include <linux/completion.h>
> >>>>>>>  #include <linux/idr.h>
> >>>>>>> @@ -514,6 +515,7 @@ struct rproc {
> >>>>>>>      bool auto_boot;
> >>>>>>>      struct list_head dump_segments;
> >>>>>>>      int nb_vdev;
> >>>>>>> +    struct cdev char_dev;
> >>>>>>>  };
> >>>>>>>
> >> >>>>>  /**
Bjorn Andersson April 10, 2020, 2 a.m. UTC | #14
On Thu 09 Apr 01:29 PDT 2020, Arnaud POULIQUEN wrote:

> 
> On 4/8/20 8:34 PM, rishabhb@codeaurora.org wrote:
> > On 2020-04-06 08:58, Clément Leger wrote:
> >> Hi Arnaud,
> >>
> >> ----- On 6 Apr, 2020, at 16:17, Arnaud Pouliquen arnaud.pouliquen@st.com wrote:
> >>
> >>> Hi Clément,
> >>>
> >>> On 4/6/20 2:06 PM, Clément Leger wrote:
> >>>> Hi Arnaud,
> >>>>
> >>>> ----- On 6 Apr, 2020, at 11:01, Arnaud Pouliquen arnaud.pouliquen@st.com wrote:
> >>>>
> >>>>> On 4/3/20 9:13 PM, rishabhb@codeaurora.org wrote:
> >>>>>> On 2020-04-02 10:28, Arnaud POULIQUEN wrote:
> >>>>>>> Hi
> >>>>>>>
> >>>>>>> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
> >>>>>>>> Add the character device interface for userspace applications.
> >>>>>>>> This interface can be used in order to boot up and shutdown
> >>>>>>>> remote subsystems. Currently there is only a sysfs interface
> >>>>>>>> which the userspace clients can use. If a usersapce application
> >>>>>>>> crashes after booting the remote processor does not get any
> >>>>>>>> indication about the crash. It might still assume that the
> >>>>>>>> application is running. For example modem uses remotefs service
> >>>>>>>> to fetch data from disk/flash memory. If the remotefs service
> >>>>>>>> crashes, modem keeps on requesting data which might lead to a
> >>>>>>>> crash. Adding a character device interface makes the remote
> >>>>>>>> processor tightly coupled with the user space application.
> >>>>>>>> A crash of the application leads to a close on the file descriptors
> >>>>>>>> therefore shutting down the remoteproc.
> >>>>>>>
> >>>>>>> Sorry I'm late in the discussion, I hope I've gone through the whole
> >>>>>>> discussion so I don't reopen a closed point...
> >>>>>>>
> >>>>>>> Something here is not crystal clear to me so I'd rather share it...
> >>>>>>>
> >>>>>>> I suppose that you the automatic restart of the application is not possible to
> >>>>>>> stop and restart the remote processor...
> >>>>>> Yes correct, while we wait for the application to restart we might observe a
> >>>>>> fatal crash.
> >>>>>>>
> >>>>>>> Why this use case can not be solved by a process monitor or a service
> >>>>>>> in userland that detects the application crash and stop the remote
> >>>>>>> firmware using
> >>>>>>> the sysfs interface?
> >>>>>>>
> >>>>>> What happens in the case where the process monitor itself crashes? This is
> >>>>>> actually the approach we follow in our downstream code. We have a central entity
> >>>>>> in userspace that controls bootup/shutdown of some remote processors based on
> >>>>>> the
> >>>>>> votes from userspace clients. We have observed cases where this entity
> >>>>>> itself crashes and remote processors are left hanging.
> >>>>>
> >>>>> Your description makes me feel like this patch is only a workaround of something
> >>>>> that
> >>>>> should be fixed in the userland, even if i understand that hanging is one of the
> >>>>> most
> >>>>> critical problem and have to be fixed.
> >>>>> For instance, how to handle several applications that interact with the remote
> >>>>> processor
> >>>>> ( e.g. rpmsg service applications) how to stop and restart everything. Using the
> >>>>> char
> >>>>> device would probaly resolve only a part of the issue...
> >>>>>
> >>>>> I'm not aware about your environment and i'm not a userland expert. But what i
> >>>>> still not
> >>>>> understand why a parent process can not do the job...
> >>>>> I just test a simple script on my side that treat the kill -9 of an application
> >>>>> ("cat" in my case).
> >>>>
> >>>> This is not entirely true, if the parent process is killed with a SIGKILL, then
> >>>> the process will not be able to handle anything and the remoteproc will still
> >>>> be running.
> >>>>
> >>>> What I understood from Rishabh patch is a way to allow a single process handling
> >>>> the rproc state. We have the same kind of need and currently, if the
> >>>> user application crashes, then the rproc is still running (which happens).
> >>>>
> >>>>>
> >>>>> #start the remote firmware
> >>>>> cp  $1 /lib/firmware/
> >>>>> echo $1> /sys/class/remoteproc/remoteproc0/firmware
> >>>>> echo start >/sys/class/remoteproc/remoteproc0/state
> >>>>> #your binary
> >>>>> cat /dev/kmsg
> >>>>> # stop the remote firmware in case of crash (and potentially some other apps)
> >>>>> echo stop >/sys/class/remoteproc/remoteproc0/state
> >>>>>
> >>>>
> >>>> This is not really "production proof" and what happens if the application is
> >>>> responsible of setting the firmware which might be jitted ?
> >>>> And if the script receives the SIGKILL, then we are back to the same problem.
> >>> Yes this is just a basic example, not an implementation which would depend on
> >>> the
> >>> environment. i'm just trying here to  put forward a multi-process solution...and
> >>> that I'm not an userland expert :).
> >>>
> >>>>
> >>>> I really think, this is a step forward an easier and reliable use of the
> >>>> remoteproc
> >>>> on userland to guarantee a coherent rproc state even if host application
> >>>> crashes.
> > Yes what we want is simple mechanism where a single userspace process can boot/
> > shutdown the remote processor in all scenarios. Adding more processes to monitor
> > the already existing process might have 2 issues. One is there might be a delay
> > between the application crash and process monitor getting to know about it and taking
> > action. This might prove to be fatal in our case. Second, possibly the monitor can hang
> > or get killed and is not deterministic.
> >>>
> >>> I can see 3 ways of handling an application crash:
> >>> - just shutdown the firmware
> >>> => can be done through char device
> >>> - stop some other related processes and/or generate a remote proc crash dump for
> >>> debug
> >>> => /sysfs and/or debugfs
> >>> - do nothing as you want a silence application reboot and re-attach to the
> >>> running firmware
> >>> => use sysfs
> >>>
> >>> I'm challenging the solution because splitting the API seems to me not a good
> >>> solution.
> >>
> >> Completely ok with that, we have to fully understand the targeted usecase to
> >> avoid implemented a flawed interface.
> >>
> >>> Now i wonder how it works for the other applications that are relying on some
> >>> other
> >>> kernel frameworks...
> >>
> >> For some other device, there is a chardev. The watchdog for intance uses a
> >> /dev/watchdog. Regarding the gpio, it seems they are also using a chardev
> >> and the sysfs interface is deprecated.
> >>
> >>> Perhaps the answer is that these frameworks don't use sysfs but char device.
> >>> That would means that the sysfs solution is not the more adapted solution and
> >>> perhaps we should migrate to a char device.
> >>> But in this case, i think that it should implement the whole API and be
> >>> exclusive with
> >>> the syfs legacy API (so no sysfs or sysfs in read-only).
> >>
> >> I agree with that, if another interface must be defined, then it should
> >> implement everything that is supported right now with the sysfs.
> >>
> > The other fields that sysfs exposes right now are firmware_name, name(rproc name),
> > state. The targeted usecase was that these are configuration parameters specific
> > to the remoteproc and should stay in the sysfs interface. Whereas char device
> > should provide direct access to remoteproc device.
> > It would make sense to use this interface in conjunction with sysfs
> > interface, where you use /dev/remoteproc0 to boot/shutdown the remote processor
> > sysfs entries to fine tune the parameters.
> > Adding ioctls to implement all sysfs functionality seems like overkill to me. Let
> > me know what you guys think.
> 
> In my opinion if we open the possibility of accessing to remoteproc through a char
> device, we should move torwards a solution that would replace the sysfs. 
> In this case sysfs fields could be read-only, and the char devices would have to
> support the different shutdown modes. This means that the auto shutdown should be
> configurable (IOCTL?).
> But I assume that the minimum could be to only disable write access on the "state" field
> and handle the "auto shutdown" as an option of the char device.
> 
> Anyway it only my opinion, let Bjorn an Mathieu comment and decide :)
> 

I have two concerns with migrating things to a character device:

1) Operating an ioctl based interface requires specialized tooling. E.g.
today whatever your installation is you can change firmware name and
start a core with the user space tools you have. Further more allowing
you to change firmware name before powering on the core would imply that
both of these would have to be ioctls (we can't just boot the core on
open) - so we would end up duplicating all parts of the sysfs interface
in ioctls.

2) Reducing the functionality of the sysfs interface would break
compatibility with existing user space, which is verboten. So we would
at best duplicate the existing features.


That said, per the same argument I don't really like duplicating the
state controls in a character device (i.e. Rishabh's patch), but this
really does provide something that we can't do with the current
interface.

Regards,
Bjorn
Arnaud POULIQUEN April 10, 2020, 2:51 p.m. UTC | #15
On 4/10/20 4:00 AM, Bjorn Andersson wrote:
> On Thu 09 Apr 01:29 PDT 2020, Arnaud POULIQUEN wrote:
> 
>>
>> On 4/8/20 8:34 PM, rishabhb@codeaurora.org wrote:
>>> On 2020-04-06 08:58, Clément Leger wrote:
>>>> Hi Arnaud,
>>>>
>>>> ----- On 6 Apr, 2020, at 16:17, Arnaud Pouliquen arnaud.pouliquen@st.com wrote:
>>>>
>>>>> Hi Clément,
>>>>>
>>>>> On 4/6/20 2:06 PM, Clément Leger wrote:
>>>>>> Hi Arnaud,
>>>>>>
>>>>>> ----- On 6 Apr, 2020, at 11:01, Arnaud Pouliquen arnaud.pouliquen@st.com wrote:
>>>>>>
>>>>>>> On 4/3/20 9:13 PM, rishabhb@codeaurora.org wrote:
>>>>>>>> On 2020-04-02 10:28, Arnaud POULIQUEN wrote:
>>>>>>>>> Hi
>>>>>>>>>
>>>>>>>>> On 4/1/20 2:03 AM, Rishabh Bhatnagar wrote:
>>>>>>>>>> Add the character device interface for userspace applications.
>>>>>>>>>> This interface can be used in order to boot up and shutdown
>>>>>>>>>> remote subsystems. Currently there is only a sysfs interface
>>>>>>>>>> which the userspace clients can use. If a usersapce application
>>>>>>>>>> crashes after booting the remote processor does not get any
>>>>>>>>>> indication about the crash. It might still assume that the
>>>>>>>>>> application is running. For example modem uses remotefs service
>>>>>>>>>> to fetch data from disk/flash memory. If the remotefs service
>>>>>>>>>> crashes, modem keeps on requesting data which might lead to a
>>>>>>>>>> crash. Adding a character device interface makes the remote
>>>>>>>>>> processor tightly coupled with the user space application.
>>>>>>>>>> A crash of the application leads to a close on the file descriptors
>>>>>>>>>> therefore shutting down the remoteproc.
>>>>>>>>>
>>>>>>>>> Sorry I'm late in the discussion, I hope I've gone through the whole
>>>>>>>>> discussion so I don't reopen a closed point...
>>>>>>>>>
>>>>>>>>> Something here is not crystal clear to me so I'd rather share it...
>>>>>>>>>
>>>>>>>>> I suppose that you the automatic restart of the application is not possible to
>>>>>>>>> stop and restart the remote processor...
>>>>>>>> Yes correct, while we wait for the application to restart we might observe a
>>>>>>>> fatal crash.
>>>>>>>>>
>>>>>>>>> Why this use case can not be solved by a process monitor or a service
>>>>>>>>> in userland that detects the application crash and stop the remote
>>>>>>>>> firmware using
>>>>>>>>> the sysfs interface?
>>>>>>>>>
>>>>>>>> What happens in the case where the process monitor itself crashes? This is
>>>>>>>> actually the approach we follow in our downstream code. We have a central entity
>>>>>>>> in userspace that controls bootup/shutdown of some remote processors based on
>>>>>>>> the
>>>>>>>> votes from userspace clients. We have observed cases where this entity
>>>>>>>> itself crashes and remote processors are left hanging.
>>>>>>>
>>>>>>> Your description makes me feel like this patch is only a workaround of something
>>>>>>> that
>>>>>>> should be fixed in the userland, even if i understand that hanging is one of the
>>>>>>> most
>>>>>>> critical problem and have to be fixed.
>>>>>>> For instance, how to handle several applications that interact with the remote
>>>>>>> processor
>>>>>>> ( e.g. rpmsg service applications) how to stop and restart everything. Using the
>>>>>>> char
>>>>>>> device would probaly resolve only a part of the issue...
>>>>>>>
>>>>>>> I'm not aware about your environment and i'm not a userland expert. But what i
>>>>>>> still not
>>>>>>> understand why a parent process can not do the job...
>>>>>>> I just test a simple script on my side that treat the kill -9 of an application
>>>>>>> ("cat" in my case).
>>>>>>
>>>>>> This is not entirely true, if the parent process is killed with a SIGKILL, then
>>>>>> the process will not be able to handle anything and the remoteproc will still
>>>>>> be running.
>>>>>>
>>>>>> What I understood from Rishabh patch is a way to allow a single process handling
>>>>>> the rproc state. We have the same kind of need and currently, if the
>>>>>> user application crashes, then the rproc is still running (which happens).
>>>>>>
>>>>>>>
>>>>>>> #start the remote firmware
>>>>>>> cp  $1 /lib/firmware/
>>>>>>> echo $1> /sys/class/remoteproc/remoteproc0/firmware
>>>>>>> echo start >/sys/class/remoteproc/remoteproc0/state
>>>>>>> #your binary
>>>>>>> cat /dev/kmsg
>>>>>>> # stop the remote firmware in case of crash (and potentially some other apps)
>>>>>>> echo stop >/sys/class/remoteproc/remoteproc0/state
>>>>>>>
>>>>>>
>>>>>> This is not really "production proof" and what happens if the application is
>>>>>> responsible of setting the firmware which might be jitted ?
>>>>>> And if the script receives the SIGKILL, then we are back to the same problem.
>>>>> Yes this is just a basic example, not an implementation which would depend on
>>>>> the
>>>>> environment. i'm just trying here to  put forward a multi-process solution...and
>>>>> that I'm not an userland expert :).
>>>>>
>>>>>>
>>>>>> I really think, this is a step forward an easier and reliable use of the
>>>>>> remoteproc
>>>>>> on userland to guarantee a coherent rproc state even if host application
>>>>>> crashes.
>>> Yes what we want is simple mechanism where a single userspace process can boot/
>>> shutdown the remote processor in all scenarios. Adding more processes to monitor
>>> the already existing process might have 2 issues. One is there might be a delay
>>> between the application crash and process monitor getting to know about it and taking
>>> action. This might prove to be fatal in our case. Second, possibly the monitor can hang
>>> or get killed and is not deterministic.
>>>>>
>>>>> I can see 3 ways of handling an application crash:
>>>>> - just shutdown the firmware
>>>>> => can be done through char device
>>>>> - stop some other related processes and/or generate a remote proc crash dump for
>>>>> debug
>>>>> => /sysfs and/or debugfs
>>>>> - do nothing as you want a silence application reboot and re-attach to the
>>>>> running firmware
>>>>> => use sysfs
>>>>>
>>>>> I'm challenging the solution because splitting the API seems to me not a good
>>>>> solution.
>>>>
>>>> Completely ok with that, we have to fully understand the targeted usecase to
>>>> avoid implemented a flawed interface.
>>>>
>>>>> Now i wonder how it works for the other applications that are relying on some
>>>>> other
>>>>> kernel frameworks...
>>>>
>>>> For some other device, there is a chardev. The watchdog for intance uses a
>>>> /dev/watchdog. Regarding the gpio, it seems they are also using a chardev
>>>> and the sysfs interface is deprecated.
>>>>
>>>>> Perhaps the answer is that these frameworks don't use sysfs but char device.
>>>>> That would means that the sysfs solution is not the more adapted solution and
>>>>> perhaps we should migrate to a char device.
>>>>> But in this case, i think that it should implement the whole API and be
>>>>> exclusive with
>>>>> the syfs legacy API (so no sysfs or sysfs in read-only).
>>>>
>>>> I agree with that, if another interface must be defined, then it should
>>>> implement everything that is supported right now with the sysfs.
>>>>
>>> The other fields that sysfs exposes right now are firmware_name, name(rproc name),
>>> state. The targeted usecase was that these are configuration parameters specific
>>> to the remoteproc and should stay in the sysfs interface. Whereas char device
>>> should provide direct access to remoteproc device.
>>> It would make sense to use this interface in conjunction with sysfs
>>> interface, where you use /dev/remoteproc0 to boot/shutdown the remote processor
>>> sysfs entries to fine tune the parameters.
>>> Adding ioctls to implement all sysfs functionality seems like overkill to me. Let
>>> me know what you guys think.
>>
>> In my opinion if we open the possibility of accessing to remoteproc through a char
>> device, we should move torwards a solution that would replace the sysfs. 
>> In this case sysfs fields could be read-only, and the char devices would have to
>> support the different shutdown modes. This means that the auto shutdown should be
>> configurable (IOCTL?).
>> But I assume that the minimum could be to only disable write access on the "state" field
>> and handle the "auto shutdown" as an option of the char device.
>>
>> Anyway it only my opinion, let Bjorn an Mathieu comment and decide :)
>>
> 
> I have two concerns with migrating things to a character device:
> 
> 1) Operating an ioctl based interface requires specialized tooling. E.g.
> today whatever your installation is you can change firmware name and
> start a core with the user space tools you have. Further more allowing
> you to change firmware name before powering on the core would imply that
> both of these would have to be ioctls (we can't just boot the core on
> open) - so we would end up duplicating all parts of the sysfs interface
> in ioctls.
> 
> 2) Reducing the functionality of the sysfs interface would break
> compatibility with existing user space, which is verboten. So we would
> at best duplicate the existing features.

Agree with your concerns.
But could we consider that an application using this char dev, is no more
compatible with existing interface? 
And it would make sense that it requests exclusivity on
the remote firmware lice cycle. In this case the sysfs state should be
read only to protect from an unexpected shutdown.

> 
> 
> That said, per the same argument I don't really like duplicating the
> state controls in a character device (i.e. Rishabh's patch), but this
> really does provide something that we can't do with the current
> interface.

Yes, that why it could be nice to make it scalable to integrate
future extension that can not be implemented using sysfs.

One of my main concern is that forcing the boot on open and shutdown on
close limits the usage of this device for some other future purposes. 
Coming back in the future to a generic solution will be not possible
as case of impact on the open close behaviour.
Using ioctls would avoid this limitation...
Ioctl could be used to enable the boot/shutdown on open/close

Then this char device could be also used to generate an event for application
on crash.
In this case the char device could be used only for the event, and sysfs
state would be used to manage the remoteproc firmware. So sysfs state should
be in read write access...

Concerning the firmware name, today sysfs interface imposes only one firmware
what's happen if more that one firmware loading is requested (for instance 
a core firmware with of some processing or rpc modules...).
In this case the sysfs firmware field could not be used.

These examples make me think that sysfs could be to manage through
remoteproc_cdev instead of directly calling core function.
This would help to define dynamic rules for the sysfs access. 

To sum up, nothing against opening interface with the char device.
I'm just wondering if this device should not be designed to allow some future
extensions of the API, and not only for this use case.  

Regards
Arnaud

> 
> Regards,
> Bjorn
>
diff mbox series

Patch

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index de3862c..6374b79 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -14,6 +14,15 @@  config REMOTEPROC
 
 if REMOTEPROC
 
+config REMOTEPROC_CDEV
+	bool "Remoteproc character device interface"
+	help
+	  Say y here to have a character device interface for Remoteproc
+	  framework. Userspace can boot/shutdown remote processors through
+	  this interface.
+
+	  It's safe to say N if you don't want to use this interface.
+
 config IMX_REMOTEPROC
 	tristate "IMX6/7 remoteproc support"
 	depends on ARCH_MXC
diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
index e30a1b1..b7d4f77 100644
--- a/drivers/remoteproc/Makefile
+++ b/drivers/remoteproc/Makefile
@@ -9,6 +9,7 @@  remoteproc-y				+= remoteproc_debugfs.o
 remoteproc-y				+= remoteproc_sysfs.o
 remoteproc-y				+= remoteproc_virtio.o
 remoteproc-y				+= remoteproc_elf_loader.o
+obj-$(CONFIG_REMOTEPROC_CDEV)		+= remoteproc_cdev.o
 obj-$(CONFIG_IMX_REMOTEPROC)		+= imx_rproc.o
 obj-$(CONFIG_MTK_SCP)			+= mtk_scp.o mtk_scp_ipi.o
 obj-$(CONFIG_OMAP_REMOTEPROC)		+= omap_remoteproc.o
diff --git a/drivers/remoteproc/remoteproc_cdev.c b/drivers/remoteproc/remoteproc_cdev.c
new file mode 100644
index 0000000..8182bd1
--- /dev/null
+++ b/drivers/remoteproc/remoteproc_cdev.c
@@ -0,0 +1,100 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Character device interface driver for Remoteproc framework.
+ *
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/remoteproc.h>
+
+#include "remoteproc_internal.h"
+
+#define NUM_RPROC_DEVICES	64
+static dev_t rproc_cdev;
+static DEFINE_IDA(cdev_minor_ida);
+
+static int rproc_cdev_open(struct inode *inode, struct file *file)
+{
+	struct rproc *rproc;
+
+	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
+
+	if (!rproc)
+		return -EINVAL;
+
+	if (rproc->state == RPROC_RUNNING)
+		return -EBUSY;
+
+	return rproc_boot(rproc);
+}
+
+static int rproc_cdev_release(struct inode *inode, struct file *file)
+{
+	struct rproc *rproc;
+
+	rproc = container_of(inode->i_cdev, struct rproc, char_dev);
+
+	if (!rproc || rproc->state != RPROC_RUNNING)
+		return -EINVAL;
+
+	rproc_shutdown(rproc);
+
+	return 0;
+}
+
+static const struct file_operations rproc_fops = {
+	.open = rproc_cdev_open,
+	.release = rproc_cdev_release,
+};
+
+int rproc_char_device_add(struct rproc *rproc)
+{
+	int ret, minor;
+	dev_t cdevt;
+
+	minor = ida_simple_get(&cdev_minor_ida, 0, NUM_RPROC_DEVICES,
+			       GFP_KERNEL);
+	if (minor < 0) {
+		dev_err(&rproc->dev, "%s: No more minor numbers left! rc:%d\n",
+			__func__, minor);
+		return -ENODEV;
+	}
+
+	cdev_init(&rproc->char_dev, &rproc_fops);
+	rproc->char_dev.owner = THIS_MODULE;
+
+	cdevt = MKDEV(MAJOR(rproc_cdev), minor);
+	ret = cdev_add(&rproc->char_dev, cdevt, 1);
+	if (ret < 0)
+		ida_simple_remove(&cdev_minor_ida, minor);
+
+	rproc->dev.devt = cdevt;
+	return ret;
+}
+
+void rproc_char_device_remove(struct rproc *rproc)
+{
+	__unregister_chrdev(MAJOR(rproc->dev.devt), MINOR(rproc->dev.devt), 1,
+			    "rproc");
+	ida_simple_remove(&cdev_minor_ida, MINOR(rproc->dev.devt));
+}
+
+void __init rproc_init_cdev(void)
+{
+	int ret;
+
+	ret = alloc_chrdev_region(&rproc_cdev, 0, NUM_RPROC_DEVICES, "rproc");
+	if (ret < 0) {
+		pr_err("Failed to alloc rproc_cdev region, err %d\n", ret);
+		return;
+	}
+}
+
+void __exit rproc_exit_cdev(void)
+{
+	__unregister_chrdev(MAJOR(rproc_cdev), 0, NUM_RPROC_DEVICES, "rproc");
+}
diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
index 493ef92..28d61a1 100644
--- a/drivers/remoteproc/remoteproc_internal.h
+++ b/drivers/remoteproc/remoteproc_internal.h
@@ -47,6 +47,27 @@  struct dentry *rproc_create_trace_file(const char *name, struct rproc *rproc,
 int rproc_init_sysfs(void);
 void rproc_exit_sysfs(void);
 
+#ifdef CONFIG_REMOTEPROC_CDEV
+void rproc_init_cdev(void);
+void rproc_exit_cdev(void);
+int rproc_char_device_add(struct rproc *rproc);
+void rproc_char_device_remove(struct rproc *rproc);
+#else
+static inline void rproc_init_cdev(void)
+{
+}
+static inline void rproc_exit_cdev(void)
+{
+}
+static inline int rproc_char_device_add(struct rproc *rproc)
+{
+	return 0;
+}
+static inline void  rproc_char_device_remove(struct rproc *rproc)
+{
+}
+#endif
+
 void rproc_free_vring(struct rproc_vring *rvring);
 int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
 
@@ -63,6 +84,7 @@  struct resource_table *rproc_elf_find_loaded_rsc_table(struct rproc *rproc,
 struct rproc_mem_entry *
 rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
 
+
 static inline
 int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
 {
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 16ad666..c4ca796 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -37,6 +37,7 @@ 
 
 #include <linux/types.h>
 #include <linux/mutex.h>
+#include <linux/cdev.h>
 #include <linux/virtio.h>
 #include <linux/completion.h>
 #include <linux/idr.h>
@@ -514,6 +515,7 @@  struct rproc {
 	bool auto_boot;
 	struct list_head dump_segments;
 	int nb_vdev;
+	struct cdev char_dev;
 };
 
 /**