diff mbox series

[net-next,v4,10/12] ASoC: SOF: Introduce descriptors for SOF client

Message ID 20200520070227.3392100-11-jeffrey.t.kirsher@intel.com (mailing list archive)
State Changes Requested
Headers show
Series 100GbE Intel Wired LAN Driver Updates 2020-05-19 | expand

Commit Message

Kirsher, Jeffrey T May 20, 2020, 7:02 a.m. UTC
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>

A client in the SOF (Sound Open Firmware) context is a
device that needs to communicate with the DSP via IPC
messages. The SOF core is responsible for serializing the
IPC messages to the DSP from the different clients. One
example of an SOF client would be an IPC test client that
floods the DSP with test IPC messages to validate if the
serialization works as expected. Multi-client support will
also add the ability to split the existing audio cards
into multiple ones, so as to e.g. to deal with HDMI with a
dedicated client instead of adding HDMI to all cards.

This patch introduces descriptors for SOF client driver
and SOF client device along with APIs for registering
and unregistering a SOF client driver, sending IPCs from
a client device and accessing the SOF core debugfs root entry.

Along with this, add a couple of new members to struct
snd_sof_dev that will be used for maintaining the list of
clients.

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Fred Oh <fred.oh@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 sound/soc/sof/Kconfig      | 20 +++++++++
 sound/soc/sof/Makefile     |  1 +
 sound/soc/sof/core.c       |  2 +
 sound/soc/sof/sof-client.c | 91 ++++++++++++++++++++++++++++++++++++++
 sound/soc/sof/sof-client.h | 84 +++++++++++++++++++++++++++++++++++
 sound/soc/sof/sof-priv.h   |  6 +++
 6 files changed, 204 insertions(+)
 create mode 100644 sound/soc/sof/sof-client.c
 create mode 100644 sound/soc/sof/sof-client.h

Comments

Greg KH May 20, 2020, 7:20 a.m. UTC | #1
On Wed, May 20, 2020 at 12:02:25AM -0700, Jeff Kirsher wrote:
> From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
> 
> A client in the SOF (Sound Open Firmware) context is a
> device that needs to communicate with the DSP via IPC
> messages. The SOF core is responsible for serializing the
> IPC messages to the DSP from the different clients. One
> example of an SOF client would be an IPC test client that
> floods the DSP with test IPC messages to validate if the
> serialization works as expected. Multi-client support will
> also add the ability to split the existing audio cards
> into multiple ones, so as to e.g. to deal with HDMI with a
> dedicated client instead of adding HDMI to all cards.
> 
> This patch introduces descriptors for SOF client driver
> and SOF client device along with APIs for registering
> and unregistering a SOF client driver, sending IPCs from
> a client device and accessing the SOF core debugfs root entry.
> 
> Along with this, add a couple of new members to struct
> snd_sof_dev that will be used for maintaining the list of
> clients.

Here is where you are first using a virtual bus driver, and yet, no
mention of that at all in the changelog.  Why?

Why are virtual devices/busses even needed here?

greg k-h
Jason Gunthorpe May 20, 2020, 12:54 p.m. UTC | #2
On Wed, May 20, 2020 at 12:02:25AM -0700, Jeff Kirsher wrote:
> From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
> 
> A client in the SOF (Sound Open Firmware) context is a
> device that needs to communicate with the DSP via IPC
> messages. The SOF core is responsible for serializing the
> IPC messages to the DSP from the different clients. One
> example of an SOF client would be an IPC test client that
> floods the DSP with test IPC messages to validate if the
> serialization works as expected. Multi-client support will
> also add the ability to split the existing audio cards
> into multiple ones, so as to e.g. to deal with HDMI with a
> dedicated client instead of adding HDMI to all cards.
> 
> This patch introduces descriptors for SOF client driver
> and SOF client device along with APIs for registering
> and unregistering a SOF client driver, sending IPCs from
> a client device and accessing the SOF core debugfs root entry.
> 
> Along with this, add a couple of new members to struct
> snd_sof_dev that will be used for maintaining the list of
> clients.

If you want to use sound as the rational for virtual bus then drop the
networking stuff and present a complete device/driver pairing based on
this sound stuff instead.

> +int sof_client_dev_register(struct snd_sof_dev *sdev,
> +			    const char *name)
> +{
> +	struct sof_client_dev *cdev;
> +	struct virtbus_device *vdev;
> +	unsigned long time, timeout;
> +	int ret;
> +
> +	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
> +	if (!cdev)
> +		return -ENOMEM;
> +
> +	cdev->sdev = sdev;
> +	init_completion(&cdev->probe_complete);
> +	vdev = &cdev->vdev;
> +	vdev->match_name = name;
> +	vdev->dev.parent = sdev->dev;
> +	vdev->release = sof_client_virtdev_release;
> +
> +	/*
> +	 * Register virtbus device for the client.
> +	 * The error path in virtbus_register_device() calls put_device(),
> +	 * which will free cdev in the release callback.
> +	 */
> +	ret = virtbus_register_device(vdev);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* make sure the probe is complete before updating client list */
> +	timeout = msecs_to_jiffies(SOF_CLIENT_PROBE_TIMEOUT_MS);
> +	time = wait_for_completion_timeout(&cdev->probe_complete, timeout);

This seems bonkers - the whole point of something like virtual bus is
to avoid madness like this.

> +	if (!time) {
> +		dev_err(sdev->dev, "error: probe of virtbus dev %s timed out\n",
> +			name);
> +		virtbus_unregister_device(vdev);

Unregister does kfree? In general I've found that to be a bad idea,
many drivers need to free up resources after unregistering from their
subsystem.

> +#define virtbus_dev_to_sof_client_dev(virtbus_dev) \
> +	container_of(virtbus_dev, struct sof_client_dev, vdev)

Use static inline

Jason
Jason Gunthorpe May 20, 2020, 12:57 p.m. UTC | #3
On Wed, May 20, 2020 at 09:54:37AM -0300, Jason Gunthorpe wrote:
> > +	if (!time) {
> > +		dev_err(sdev->dev, "error: probe of virtbus dev %s timed out\n",
> > +			name);
> > +		virtbus_unregister_device(vdev);
> 
> Unregister does kfree? In general I've found that to be a bad idea,
> many drivers need to free up resources after unregistering from their
> subsystem.

oops, never mind, this is the driver side it makes some sense - but
I'm not sure you should call it during error unwind anyhow. See above
about the wait being kind of bonkers..

Jason
Ranjani Sridharan May 21, 2020, 9:11 p.m. UTC | #4
On Wed, 2020-05-20 at 09:54 -0300, Jason Gunthorpe wrote:
> On Wed, May 20, 2020 at 12:02:25AM -0700, Jeff Kirsher wrote:
> > From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
> > 
> > A client in the SOF (Sound Open Firmware) context is a
> > device that needs to communicate with the DSP via IPC
> > messages. The SOF core is responsible for serializing the
> > IPC messages to the DSP from the different clients. One
> > example of an SOF client would be an IPC test client that
> > floods the DSP with test IPC messages to validate if the
> > serialization works as expected. Multi-client support will
> > also add the ability to split the existing audio cards
> > into multiple ones, so as to e.g. to deal with HDMI with a
> > dedicated client instead of adding HDMI to all cards.
> > 
> > This patch introduces descriptors for SOF client driver
> > and SOF client device along with APIs for registering
> > and unregistering a SOF client driver, sending IPCs from
> > a client device and accessing the SOF core debugfs root entry.
> > 
> > Along with this, add a couple of new members to struct
> > snd_sof_dev that will be used for maintaining the list of
> > clients.
> 
> If you want to use sound as the rational for virtual bus then drop
> the
> networking stuff and present a complete device/driver pairing based
> on
> this sound stuff instead.
> 
> > +int sof_client_dev_register(struct snd_sof_dev *sdev,
> > +			    const char *name)
> > +{
> > +	struct sof_client_dev *cdev;
> > +	struct virtbus_device *vdev;
> > +	unsigned long time, timeout;
> > +	int ret;
> > +
> > +	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
> > +	if (!cdev)
> > +		return -ENOMEM;
> > +
> > +	cdev->sdev = sdev;
> > +	init_completion(&cdev->probe_complete);
> > +	vdev = &cdev->vdev;
> > +	vdev->match_name = name;
> > +	vdev->dev.parent = sdev->dev;
> > +	vdev->release = sof_client_virtdev_release;
> > +
> > +	/*
> > +	 * Register virtbus device for the client.
> > +	 * The error path in virtbus_register_device() calls
> > put_device(),
> > +	 * which will free cdev in the release callback.
> > +	 */
> > +	ret = virtbus_register_device(vdev);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	/* make sure the probe is complete before updating client list
> > */
> > +	timeout = msecs_to_jiffies(SOF_CLIENT_PROBE_TIMEOUT_MS);
> > +	time = wait_for_completion_timeout(&cdev->probe_complete,
> > timeout);
> 
> This seems bonkers - the whole point of something like virtual bus is
> to avoid madness like this.

Thanks for your review, Jason. The idea of the times wait here is to
make the registration of the virtbus devices synchronous so that the
SOF core device has knowledge of all the clients that have been able to
probe successfully. This part is domain-specific and it works very well
in the audio driver case.

Could you please elaborate on why you think this is a bad idea?

Thanks,
Ranjani
Jason Gunthorpe May 21, 2020, 11:34 p.m. UTC | #5
On Thu, May 21, 2020 at 02:11:37PM -0700, Ranjani Sridharan wrote:
> On Wed, 2020-05-20 at 09:54 -0300, Jason Gunthorpe wrote:
> > On Wed, May 20, 2020 at 12:02:25AM -0700, Jeff Kirsher wrote:
> > > From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
> > > 
> > > A client in the SOF (Sound Open Firmware) context is a
> > > device that needs to communicate with the DSP via IPC
> > > messages. The SOF core is responsible for serializing the
> > > IPC messages to the DSP from the different clients. One
> > > example of an SOF client would be an IPC test client that
> > > floods the DSP with test IPC messages to validate if the
> > > serialization works as expected. Multi-client support will
> > > also add the ability to split the existing audio cards
> > > into multiple ones, so as to e.g. to deal with HDMI with a
> > > dedicated client instead of adding HDMI to all cards.
> > > 
> > > This patch introduces descriptors for SOF client driver
> > > and SOF client device along with APIs for registering
> > > and unregistering a SOF client driver, sending IPCs from
> > > a client device and accessing the SOF core debugfs root entry.
> > > 
> > > Along with this, add a couple of new members to struct
> > > snd_sof_dev that will be used for maintaining the list of
> > > clients.
> > 
> > If you want to use sound as the rational for virtual bus then drop
> > the
> > networking stuff and present a complete device/driver pairing based
> > on
> > this sound stuff instead.
> > 
> > > +int sof_client_dev_register(struct snd_sof_dev *sdev,
> > > +			    const char *name)
> > > +{
> > > +	struct sof_client_dev *cdev;
> > > +	struct virtbus_device *vdev;
> > > +	unsigned long time, timeout;
> > > +	int ret;
> > > +
> > > +	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
> > > +	if (!cdev)
> > > +		return -ENOMEM;
> > > +
> > > +	cdev->sdev = sdev;
> > > +	init_completion(&cdev->probe_complete);
> > > +	vdev = &cdev->vdev;
> > > +	vdev->match_name = name;
> > > +	vdev->dev.parent = sdev->dev;
> > > +	vdev->release = sof_client_virtdev_release;
> > > +
> > > +	/*
> > > +	 * Register virtbus device for the client.
> > > +	 * The error path in virtbus_register_device() calls
> > > put_device(),
> > > +	 * which will free cdev in the release callback.
> > > +	 */
> > > +	ret = virtbus_register_device(vdev);
> > > +	if (ret < 0)
> > > +		return ret;
> > > +
> > > +	/* make sure the probe is complete before updating client list
> > > */
> > > +	timeout = msecs_to_jiffies(SOF_CLIENT_PROBE_TIMEOUT_MS);
> > > +	time = wait_for_completion_timeout(&cdev->probe_complete,
> > > timeout);
> > 
> > This seems bonkers - the whole point of something like virtual bus is
> > to avoid madness like this.
> 
> Thanks for your review, Jason. The idea of the times wait here is to
> make the registration of the virtbus devices synchronous so that the
> SOF core device has knowledge of all the clients that have been able to
> probe successfully. This part is domain-specific and it works very well
> in the audio driver case.

This need to be hot plug safe. What if the module for this driver is
not available until later in boot? What if the user unplugs the
driver? What if the kernel runs probing single threaded?

It is really unlikely you can both have the requirement that things be
synchronous and also be doing all the other lifetime details properly..

Jason
Pierre-Louis Bossart May 22, 2020, 2:29 p.m. UTC | #6
>>>> +	ret = virtbus_register_device(vdev);
>>>> +	if (ret < 0)
>>>> +		return ret;
>>>> +
>>>> +	/* make sure the probe is complete before updating client list
>>>> */
>>>> +	timeout = msecs_to_jiffies(SOF_CLIENT_PROBE_TIMEOUT_MS);
>>>> +	time = wait_for_completion_timeout(&cdev->probe_complete,
>>>> timeout);
>>>
>>> This seems bonkers - the whole point of something like virtual bus is
>>> to avoid madness like this.
>>
>> Thanks for your review, Jason. The idea of the times wait here is to
>> make the registration of the virtbus devices synchronous so that the
>> SOF core device has knowledge of all the clients that have been able to
>> probe successfully. This part is domain-specific and it works very well
>> in the audio driver case.
> 
> This need to be hot plug safe. What if the module for this driver is
> not available until later in boot? What if the user unplugs the
> driver? What if the kernel runs probing single threaded?
> 
> It is really unlikely you can both have the requirement that things be
> synchronous and also be doing all the other lifetime details properly..

Can you suggest an alternate solution then?

The complete/wait_for_completion is a simple mechanism to tell that the 
action requested by the parent is done. Absent that, we can end-up in a 
situation where the probe may fail, or the requested module does not 
exist, and the parent knows nothing about the failure - so the system is 
in a zombie state and users are frustrated. It's not great either, is it?

This is not an hypothetical case, we've had this recurring problem when 
a PCI device creates an audio card represented as a platform device. 
When the card registration fails, typically due to configuration issues, 
the PCI probe still completes. That's really confusing and the source of 
lots of support questions. If we use these virtual bus extensions to 
stpo abusing platform devices, it'd be really nice to make those 
unreported probe failures go away.
Jason Gunthorpe May 22, 2020, 2:55 p.m. UTC | #7
On Fri, May 22, 2020 at 09:29:57AM -0500, Pierre-Louis Bossart wrote:
> 
> > > > > +	ret = virtbus_register_device(vdev);
> > > > > +	if (ret < 0)
> > > > > +		return ret;
> > > > > +
> > > > > +	/* make sure the probe is complete before updating client list
> > > > > */
> > > > > +	timeout = msecs_to_jiffies(SOF_CLIENT_PROBE_TIMEOUT_MS);
> > > > > +	time = wait_for_completion_timeout(&cdev->probe_complete,
> > > > > timeout);
> > > > 
> > > > This seems bonkers - the whole point of something like virtual bus is
> > > > to avoid madness like this.
> > > 
> > > Thanks for your review, Jason. The idea of the times wait here is to
> > > make the registration of the virtbus devices synchronous so that the
> > > SOF core device has knowledge of all the clients that have been able to
> > > probe successfully. This part is domain-specific and it works very well
> > > in the audio driver case.
> > 
> > This need to be hot plug safe. What if the module for this driver is
> > not available until later in boot? What if the user unplugs the
> > driver? What if the kernel runs probing single threaded?
> > 
> > It is really unlikely you can both have the requirement that things be
> > synchronous and also be doing all the other lifetime details properly..
> 
> Can you suggest an alternate solution then?

I don't even know what problem you are trying to solve.

> The complete/wait_for_completion is a simple mechanism to tell that the
> action requested by the parent is done. Absent that, we can end-up in a
> situation where the probe may fail, or the requested module does not exist,
> and the parent knows nothing about the failure - so the system is in a
> zombie state and users are frustrated. It's not great either, is it?

Maybe not great, but at least it is consistent with all the lifetime
models and the operation of the driver core.

> This is not an hypothetical case, we've had this recurring problem when a
> PCI device creates an audio card represented as a platform device. When the
> card registration fails, typically due to configuration issues, the PCI
> probe still completes. That's really confusing and the source of lots of
> support questions. If we use these virtual bus extensions to stpo abusing
> platform devices, it'd be really nice to make those unreported probe
> failures go away.

I think you need to address this in some other way that is hot plug
safe.

Surely you can make this failure visible to users in some other way?

Jason
Pierre-Louis Bossart May 22, 2020, 3:33 p.m. UTC | #8
On 5/22/20 9:55 AM, Jason Gunthorpe wrote:
> On Fri, May 22, 2020 at 09:29:57AM -0500, Pierre-Louis Bossart wrote:
>>
>>>>>> +	ret = virtbus_register_device(vdev);
>>>>>> +	if (ret < 0)
>>>>>> +		return ret;
>>>>>> +
>>>>>> +	/* make sure the probe is complete before updating client list
>>>>>> */
>>>>>> +	timeout = msecs_to_jiffies(SOF_CLIENT_PROBE_TIMEOUT_MS);
>>>>>> +	time = wait_for_completion_timeout(&cdev->probe_complete,
>>>>>> timeout);
>>>>>
>>>>> This seems bonkers - the whole point of something like virtual bus is
>>>>> to avoid madness like this.
>>>>
>>>> Thanks for your review, Jason. The idea of the times wait here is to
>>>> make the registration of the virtbus devices synchronous so that the
>>>> SOF core device has knowledge of all the clients that have been able to
>>>> probe successfully. This part is domain-specific and it works very well
>>>> in the audio driver case.
>>>
>>> This need to be hot plug safe. What if the module for this driver is
>>> not available until later in boot? What if the user unplugs the
>>> driver? What if the kernel runs probing single threaded?
>>>
>>> It is really unlikely you can both have the requirement that things be
>>> synchronous and also be doing all the other lifetime details properly..
>>
>> Can you suggest an alternate solution then?
> 
> I don't even know what problem you are trying to solve.
> 
>> The complete/wait_for_completion is a simple mechanism to tell that the
>> action requested by the parent is done. Absent that, we can end-up in a
>> situation where the probe may fail, or the requested module does not exist,
>> and the parent knows nothing about the failure - so the system is in a
>> zombie state and users are frustrated. It's not great either, is it?
> 
> Maybe not great, but at least it is consistent with all the lifetime
> models and the operation of the driver core.

I agree your comments are valid ones, I just don't have a solution to be 
fully compliant with these models and report failures of the driver 
probe for a child device due to configuration issues (bad audio 
topology, etc).

My understanding is that errors on probe are explicitly not handled in 
the driver core, see e.g. comments such as:

/*
  * Ignore errors returned by ->probe so that the next driver can try
  * its luck.
  */
https://elixir.bootlin.com/linux/latest/source/drivers/base/dd.c#L636

If somehow we could request the error to be reported then probably we 
wouldn't need this complete/wait_for_completion mechanism as a custom 
notification.

>> This is not an hypothetical case, we've had this recurring problem when a
>> PCI device creates an audio card represented as a platform device. When the
>> card registration fails, typically due to configuration issues, the PCI
>> probe still completes. That's really confusing and the source of lots of
>> support questions. If we use these virtual bus extensions to stpo abusing
>> platform devices, it'd be really nice to make those unreported probe
>> failures go away.
> 
> I think you need to address this in some other way that is hot plug
> safe.
> 
> Surely you can make this failure visible to users in some other way?

Not at the moment, no. there are no failures reported in dmesg, and the 
user does not see any card created. This is a silent error.

This is probably domain-specific btw, the use of complete() is only part 
of the SOF core where we extended the virtual bus to support SOF 
clients. This is not a requirement in general for virtual bus users. We 
are not forcing anyone to rely on this complete/wait_for_completion, and 
if someone has a better idea to help us report probe failures we are all 
ears.
Jason Gunthorpe May 22, 2020, 5:10 p.m. UTC | #9
On Fri, May 22, 2020 at 10:33:20AM -0500, Pierre-Louis Bossart wrote:

> > Maybe not great, but at least it is consistent with all the lifetime
> > models and the operation of the driver core.
> 
> I agree your comments are valid ones, I just don't have a solution to be
> fully compliant with these models and report failures of the driver probe
> for a child device due to configuration issues (bad audio topology, etc).


> My understanding is that errors on probe are explicitly not handled in the
> driver core, see e.g. comments such as:

Yes, but that doesn't really apply here...
 
> /*
>  * Ignore errors returned by ->probe so that the next driver can try
>  * its luck.
>  */
> https://elixir.bootlin.com/linux/latest/source/drivers/base/dd.c#L636
> 
> If somehow we could request the error to be reported then probably we
> wouldn't need this complete/wait_for_completion mechanism as a custom
> notification.

That is the same issue as the completion, a driver should not be
making assumptions about ordering like this. For instance what if the
current driver is in the initrd and the 2nd driver is in a module in
the filesystem? It will not probe until the system boots more
completely. 

This is all stuff that is supposed to work properly.

> Not at the moment, no. there are no failures reported in dmesg, and
> the user does not see any card created. This is a silent error.

Creating a partial non-function card until all the parts are loaded
seems like the right way to surface an error like this.

Or don't break the driver up in this manner if all the parts are really
required just for it to function - quite strange place to get into.

What happens if the user unplugs this sub driver once things start
running?

Jason
Pierre-Louis Bossart May 22, 2020, 6:35 p.m. UTC | #10
On 5/22/20 12:10 PM, Jason Gunthorpe wrote:
> On Fri, May 22, 2020 at 10:33:20AM -0500, Pierre-Louis Bossart wrote:
> 
>>> Maybe not great, but at least it is consistent with all the lifetime
>>> models and the operation of the driver core.
>>
>> I agree your comments are valid ones, I just don't have a solution to be
>> fully compliant with these models and report failures of the driver probe
>> for a child device due to configuration issues (bad audio topology, etc).
> 
> 
>> My understanding is that errors on probe are explicitly not handled in the
>> driver core, see e.g. comments such as:
> 
> Yes, but that doesn't really apply here...
>   
>> /*
>>   * Ignore errors returned by ->probe so that the next driver can try
>>   * its luck.
>>   */
>> https://elixir.bootlin.com/linux/latest/source/drivers/base/dd.c#L636
>>
>> If somehow we could request the error to be reported then probably we
>> wouldn't need this complete/wait_for_completion mechanism as a custom
>> notification.
> 
> That is the same issue as the completion, a driver should not be
> making assumptions about ordering like this. For instance what if the
> current driver is in the initrd and the 2nd driver is in a module in
> the filesystem? It will not probe until the system boots more
> completely.
> 
> This is all stuff that is supposed to work properly.
> 
>> Not at the moment, no. there are no failures reported in dmesg, and
>> the user does not see any card created. This is a silent error.
> 
> Creating a partial non-function card until all the parts are loaded
> seems like the right way to surface an error like this.
> 
> Or don't break the driver up in this manner if all the parts are really
> required just for it to function - quite strange place to get into.

This is not about having all the parts available - that's handled 
already with deferred probe - but an error happening during card 
registration. In that case the ALSA/ASoC core throws an error and we 
cannot report it back to the parent.

> What happens if the user unplugs this sub driver once things start
> running?

refcounting in the ALSA core prevents that from happening usually.
Jason Gunthorpe May 22, 2020, 6:40 p.m. UTC | #11
On Fri, May 22, 2020 at 01:35:54PM -0500, Pierre-Louis Bossart wrote:
> 
> 
> On 5/22/20 12:10 PM, Jason Gunthorpe wrote:
> > On Fri, May 22, 2020 at 10:33:20AM -0500, Pierre-Louis Bossart wrote:
> > 
> > > > Maybe not great, but at least it is consistent with all the lifetime
> > > > models and the operation of the driver core.
> > > 
> > > I agree your comments are valid ones, I just don't have a solution to be
> > > fully compliant with these models and report failures of the driver probe
> > > for a child device due to configuration issues (bad audio topology, etc).
> > 
> > 
> > > My understanding is that errors on probe are explicitly not handled in the
> > > driver core, see e.g. comments such as:
> > 
> > Yes, but that doesn't really apply here...
> > > /*
> > >   * Ignore errors returned by ->probe so that the next driver can try
> > >   * its luck.
> > >   */
> > > https://elixir.bootlin.com/linux/latest/source/drivers/base/dd.c#L636
> > > 
> > > If somehow we could request the error to be reported then probably we
> > > wouldn't need this complete/wait_for_completion mechanism as a custom
> > > notification.
> > 
> > That is the same issue as the completion, a driver should not be
> > making assumptions about ordering like this. For instance what if the
> > current driver is in the initrd and the 2nd driver is in a module in
> > the filesystem? It will not probe until the system boots more
> > completely.
> > 
> > This is all stuff that is supposed to work properly.
> > 
> > > Not at the moment, no. there are no failures reported in dmesg, and
> > > the user does not see any card created. This is a silent error.
> > 
> > Creating a partial non-function card until all the parts are loaded
> > seems like the right way to surface an error like this.
> > 
> > Or don't break the driver up in this manner if all the parts are really
> > required just for it to function - quite strange place to get into.
> 
> This is not about having all the parts available - that's handled already
> with deferred probe - but an error happening during card registration. In
> that case the ALSA/ASoC core throws an error and we cannot report it back to
> the parent.

The whole point of the virtual bus stuff was to split up a
multi-functional PCI device into parts. If all the parts are required
to be working to make the device work, why are you using virtual bus
here?

> > What happens if the user unplugs this sub driver once things start
> > running?
> 
> refcounting in the ALSA core prevents that from happening usually.

So user triggered unplug of driver that attaches here just hangs
forever? That isn't OK either.

Jason
Pierre-Louis Bossart May 22, 2020, 6:48 p.m. UTC | #12
On 5/22/20 1:40 PM, Jason Gunthorpe wrote:
> On Fri, May 22, 2020 at 01:35:54PM -0500, Pierre-Louis Bossart wrote:
>>
>>
>> On 5/22/20 12:10 PM, Jason Gunthorpe wrote:
>>> On Fri, May 22, 2020 at 10:33:20AM -0500, Pierre-Louis Bossart wrote:
>>>
>>>>> Maybe not great, but at least it is consistent with all the lifetime
>>>>> models and the operation of the driver core.
>>>>
>>>> I agree your comments are valid ones, I just don't have a solution to be
>>>> fully compliant with these models and report failures of the driver probe
>>>> for a child device due to configuration issues (bad audio topology, etc).
>>>
>>>
>>>> My understanding is that errors on probe are explicitly not handled in the
>>>> driver core, see e.g. comments such as:
>>>
>>> Yes, but that doesn't really apply here...
>>>> /*
>>>>    * Ignore errors returned by ->probe so that the next driver can try
>>>>    * its luck.
>>>>    */
>>>> https://elixir.bootlin.com/linux/latest/source/drivers/base/dd.c#L636
>>>>
>>>> If somehow we could request the error to be reported then probably we
>>>> wouldn't need this complete/wait_for_completion mechanism as a custom
>>>> notification.
>>>
>>> That is the same issue as the completion, a driver should not be
>>> making assumptions about ordering like this. For instance what if the
>>> current driver is in the initrd and the 2nd driver is in a module in
>>> the filesystem? It will not probe until the system boots more
>>> completely.
>>>
>>> This is all stuff that is supposed to work properly.
>>>
>>>> Not at the moment, no. there are no failures reported in dmesg, and
>>>> the user does not see any card created. This is a silent error.
>>>
>>> Creating a partial non-function card until all the parts are loaded
>>> seems like the right way to surface an error like this.
>>>
>>> Or don't break the driver up in this manner if all the parts are really
>>> required just for it to function - quite strange place to get into.
>>
>> This is not about having all the parts available - that's handled already
>> with deferred probe - but an error happening during card registration. In
>> that case the ALSA/ASoC core throws an error and we cannot report it back to
>> the parent.
> 
> The whole point of the virtual bus stuff was to split up a
> multi-functional PCI device into parts. If all the parts are required
> to be working to make the device work, why are you using virtual bus
> here?

It's the other way around: how does the core know that one part isn't 
functional.

There is nothing in what we said that requires that all parts are fully 
functional. All we stated is that when *one* part isn't fully functional 
we know about it.

>>> What happens if the user unplugs this sub driver once things start
>>> running?
>>
>> refcounting in the ALSA core prevents that from happening usually.
> 
> So user triggered unplug of driver that attaches here just hangs
> forever? That isn't OK either.

No, you'd get a 'module in use' error if I am not mistaken.
Jason Gunthorpe May 22, 2020, 7:44 p.m. UTC | #13
On Fri, May 22, 2020 at 01:48:00PM -0500, Pierre-Louis Bossart wrote:
> 
> 
> On 5/22/20 1:40 PM, Jason Gunthorpe wrote:
> > On Fri, May 22, 2020 at 01:35:54PM -0500, Pierre-Louis Bossart wrote:
> > > 
> > > 
> > > On 5/22/20 12:10 PM, Jason Gunthorpe wrote:
> > > > On Fri, May 22, 2020 at 10:33:20AM -0500, Pierre-Louis Bossart wrote:
> > > > 
> > > > > > Maybe not great, but at least it is consistent with all the lifetime
> > > > > > models and the operation of the driver core.
> > > > > 
> > > > > I agree your comments are valid ones, I just don't have a solution to be
> > > > > fully compliant with these models and report failures of the driver probe
> > > > > for a child device due to configuration issues (bad audio topology, etc).
> > > > 
> > > > 
> > > > > My understanding is that errors on probe are explicitly not handled in the
> > > > > driver core, see e.g. comments such as:
> > > > 
> > > > Yes, but that doesn't really apply here...
> > > > > /*
> > > > >    * Ignore errors returned by ->probe so that the next driver can try
> > > > >    * its luck.
> > > > >    */
> > > > > https://elixir.bootlin.com/linux/latest/source/drivers/base/dd.c#L636
> > > > > 
> > > > > If somehow we could request the error to be reported then probably we
> > > > > wouldn't need this complete/wait_for_completion mechanism as a custom
> > > > > notification.
> > > > 
> > > > That is the same issue as the completion, a driver should not be
> > > > making assumptions about ordering like this. For instance what if the
> > > > current driver is in the initrd and the 2nd driver is in a module in
> > > > the filesystem? It will not probe until the system boots more
> > > > completely.
> > > > 
> > > > This is all stuff that is supposed to work properly.
> > > > 
> > > > > Not at the moment, no. there are no failures reported in dmesg, and
> > > > > the user does not see any card created. This is a silent error.
> > > > 
> > > > Creating a partial non-function card until all the parts are loaded
> > > > seems like the right way to surface an error like this.
> > > > 
> > > > Or don't break the driver up in this manner if all the parts are really
> > > > required just for it to function - quite strange place to get into.
> > > 
> > > This is not about having all the parts available - that's handled already
> > > with deferred probe - but an error happening during card registration. In
> > > that case the ALSA/ASoC core throws an error and we cannot report it back to
> > > the parent.
> > 
> > The whole point of the virtual bus stuff was to split up a
> > multi-functional PCI device into parts. If all the parts are required
> > to be working to make the device work, why are you using virtual bus
> > here?
> 
> It's the other way around: how does the core know that one part isn't
> functional.

> There is nothing in what we said that requires that all parts are fully
> functional. All we stated is that when *one* part isn't fully functional we
> know about it.

Maybe if you can present some diagram or something, because I really
can't understand why asoc is trying to do with virtual bus here.

> > > > What happens if the user unplugs this sub driver once things start
> > > > running?
> > > 
> > > refcounting in the ALSA core prevents that from happening usually.
> > 
> > So user triggered unplug of driver that attaches here just hangs
> > forever? That isn't OK either.
> 
> No, you'd get a 'module in use' error if I am not mistaken.

You can disconnect drivers without unloading modules. It is a common
misconception. You should never, ever, rely on module ref counting for
anything more than keeping function pointers in memory.

Jason
Pierre-Louis Bossart May 22, 2020, 9:05 p.m. UTC | #14
> Maybe if you can present some diagram or something, because I really
> can't understand why asoc is trying to do with virtual bus here.

instead of having a 1:1 mapping between PCI device and a monolithic 
card, we want to split the sound card in multiple orthogonal parts such as:

PCI device
   - local devices (mic/speakers)
   - hdmi devices
   - presence detection/sensing
   - probe/tuning interfaces
   - debug/tests

Initially we wanted to use platform devices but Greg suggested this API 
is abused. We don't have a platform/firmware based enumeration, nor a 
physical bus for each of these subparts, so the virtual bus was suggested.

Does this help?
Greg KH May 23, 2020, 6:23 a.m. UTC | #15
On Fri, May 22, 2020 at 09:29:57AM -0500, Pierre-Louis Bossart wrote:
> This is not an hypothetical case, we've had this recurring problem when a
> PCI device creates an audio card represented as a platform device. When the
> card registration fails, typically due to configuration issues, the PCI
> probe still completes.

Then fix that problem there.  The audio card should not be being created
as a platform device, as that is not what it is.  And even if it was,
the probe should not complete, it should clean up after itself and error
out.

That's not a driver core issue, sounds like a subsystem error handling
issue that needs to be resolved.

thanks,

greg k-h
Pierre-Louis Bossart May 23, 2020, 7:41 p.m. UTC | #16
On 5/23/20 1:23 AM, Greg KH wrote:
> On Fri, May 22, 2020 at 09:29:57AM -0500, Pierre-Louis Bossart wrote:
>> This is not an hypothetical case, we've had this recurring problem when a
>> PCI device creates an audio card represented as a platform device. When the
>> card registration fails, typically due to configuration issues, the PCI
>> probe still completes.
> 
> Then fix that problem there.  The audio card should not be being created
> as a platform device, as that is not what it is.  And even if it was,
> the probe should not complete, it should clean up after itself and error
> out.

Did you mean 'the PCI probe should not complete and error out'?

If yes, that's yet another problem... During the PCI probe, we start a 
workqueue and return success to avoid blocking everything. And only 
'later' do we actually create the card. So that's two levels of probe 
that cannot report a failure. I didn't come up with this design, IIRC 
this is due to audio-DRM dependencies and it's been used for 10+ years.

> 
> That's not a driver core issue, sounds like a subsystem error handling
> issue that needs to be resolved.
> 
> thanks,
> 
> greg k-h
>
Greg KH May 24, 2020, 6:35 a.m. UTC | #17
On Sat, May 23, 2020 at 02:41:51PM -0500, Pierre-Louis Bossart wrote:
> 
> 
> On 5/23/20 1:23 AM, Greg KH wrote:
> > On Fri, May 22, 2020 at 09:29:57AM -0500, Pierre-Louis Bossart wrote:
> > > This is not an hypothetical case, we've had this recurring problem when a
> > > PCI device creates an audio card represented as a platform device. When the
> > > card registration fails, typically due to configuration issues, the PCI
> > > probe still completes.
> > 
> > Then fix that problem there.  The audio card should not be being created
> > as a platform device, as that is not what it is.  And even if it was,
> > the probe should not complete, it should clean up after itself and error
> > out.
> 
> Did you mean 'the PCI probe should not complete and error out'?

Yes.

> If yes, that's yet another problem... During the PCI probe, we start a
> workqueue and return success to avoid blocking everything.

That's crazy.

> And only 'later' do we actually create the card. So that's two levels
> of probe that cannot report a failure. I didn't come up with this
> design, IIRC this is due to audio-DRM dependencies and it's been used
> for 10+ years.

Then if the probe function fails, it needs to unwind everything itself
and unregister the device with the PCI subsystem so that things work
properly.  If it does not do that today, that's a bug.

What kind of crazy dependencies cause this type of "requirement"?

thanks,

greg k-h
Jason Gunthorpe May 25, 2020, 4:55 p.m. UTC | #18
On Sat, May 23, 2020 at 02:41:51PM -0500, Pierre-Louis Bossart wrote:

> If yes, that's yet another problem... During the PCI probe, we start a
> workqueue and return success to avoid blocking everything. And only 'later'
> do we actually create the card. So that's two levels of probe that cannot
> report a failure. I didn't come up with this design, IIRC this is due to
> audio-DRM dependencies and it's been used for 10+ years.

I think there are more tools now than 10 years ago, maybe it is time
to revisit designs like this - clearly something is really wrong with
it based on your explanations.

Jason
Pierre-Louis Bossart May 26, 2020, 1:15 p.m. UTC | #19
On 5/24/20 1:35 AM, Greg KH wrote:
> On Sat, May 23, 2020 at 02:41:51PM -0500, Pierre-Louis Bossart wrote:
>>
>>
>> On 5/23/20 1:23 AM, Greg KH wrote:
>>> On Fri, May 22, 2020 at 09:29:57AM -0500, Pierre-Louis Bossart wrote:
>>>> This is not an hypothetical case, we've had this recurring problem when a
>>>> PCI device creates an audio card represented as a platform device. When the
>>>> card registration fails, typically due to configuration issues, the PCI
>>>> probe still completes.
>>>
>>> Then fix that problem there.  The audio card should not be being created
>>> as a platform device, as that is not what it is.  And even if it was,
>>> the probe should not complete, it should clean up after itself and error
>>> out.
>>
>> Did you mean 'the PCI probe should not complete and error out'?
> 
> Yes.
> 
>> If yes, that's yet another problem... During the PCI probe, we start a
>> workqueue and return success to avoid blocking everything.
> 
> That's crazy.
> 
>> And only 'later' do we actually create the card. So that's two levels
>> of probe that cannot report a failure. I didn't come up with this
>> design, IIRC this is due to audio-DRM dependencies and it's been used
>> for 10+ years.
> 
> Then if the probe function fails, it needs to unwind everything itself
> and unregister the device with the PCI subsystem so that things work
> properly.  If it does not do that today, that's a bug.
> 
> What kind of crazy dependencies cause this type of "requirement"?

I think it is related to the request_module("i915") in 
snd_hdac_i915_init(), and possibly other firmware download.

Adding Takashi for more details.
Takashi Iwai May 26, 2020, 1:37 p.m. UTC | #20
On Tue, 26 May 2020 15:15:26 +0200,
Pierre-Louis Bossart wrote:
> 
> 
> 
> On 5/24/20 1:35 AM, Greg KH wrote:
> > On Sat, May 23, 2020 at 02:41:51PM -0500, Pierre-Louis Bossart wrote:
> >>
> >>
> >> On 5/23/20 1:23 AM, Greg KH wrote:
> >>> On Fri, May 22, 2020 at 09:29:57AM -0500, Pierre-Louis Bossart wrote:
> >>>> This is not an hypothetical case, we've had this recurring problem when a
> >>>> PCI device creates an audio card represented as a platform device. When the
> >>>> card registration fails, typically due to configuration issues, the PCI
> >>>> probe still completes.
> >>>
> >>> Then fix that problem there.  The audio card should not be being created
> >>> as a platform device, as that is not what it is.  And even if it was,
> >>> the probe should not complete, it should clean up after itself and error
> >>> out.
> >>
> >> Did you mean 'the PCI probe should not complete and error out'?
> >
> > Yes.
> >
> >> If yes, that's yet another problem... During the PCI probe, we start a
> >> workqueue and return success to avoid blocking everything.
> >
> > That's crazy.
> >
> >> And only 'later' do we actually create the card. So that's two levels
> >> of probe that cannot report a failure. I didn't come up with this
> >> design, IIRC this is due to audio-DRM dependencies and it's been used
> >> for 10+ years.
> >
> > Then if the probe function fails, it needs to unwind everything itself
> > and unregister the device with the PCI subsystem so that things work
> > properly.  If it does not do that today, that's a bug.
> >
> > What kind of crazy dependencies cause this type of "requirement"?
> 
> I think it is related to the request_module("i915") in
> snd_hdac_i915_init(), and possibly other firmware download.
> 
> Adding Takashi for more details.

Right, there are a few levels of complexity there.  The HD-audio
PCI controller driver, for example, is initialized in an async way
with a work.  It loads the firmware files with
request_firmware_nowait() and also binds itself as a component master
with the DRM graphics driver via component framework.

Currently it has no way to unwind the PCI binding itself at the error
path, though.  In theory it should be possible to unregister the PCI
from the driver itself in the work context, but it failed in the
earlier experiments, hence the driver sets itself in a disabled state
instead.  Maybe worth to try again.

But, to be noted, all belonging sub-devices aren't instantiated but
deleted at the error path.  Only the main PCI binding is kept in a
disabled state just as a place holder until it's unbound explicitly.


Takashi
Greg KH May 27, 2020, 7:17 a.m. UTC | #21
On Tue, May 26, 2020 at 03:37:36PM +0200, Takashi Iwai wrote:
> On Tue, 26 May 2020 15:15:26 +0200,
> Pierre-Louis Bossart wrote:
> > 
> > 
> > 
> > On 5/24/20 1:35 AM, Greg KH wrote:
> > > On Sat, May 23, 2020 at 02:41:51PM -0500, Pierre-Louis Bossart wrote:
> > >>
> > >>
> > >> On 5/23/20 1:23 AM, Greg KH wrote:
> > >>> On Fri, May 22, 2020 at 09:29:57AM -0500, Pierre-Louis Bossart wrote:
> > >>>> This is not an hypothetical case, we've had this recurring problem when a
> > >>>> PCI device creates an audio card represented as a platform device. When the
> > >>>> card registration fails, typically due to configuration issues, the PCI
> > >>>> probe still completes.
> > >>>
> > >>> Then fix that problem there.  The audio card should not be being created
> > >>> as a platform device, as that is not what it is.  And even if it was,
> > >>> the probe should not complete, it should clean up after itself and error
> > >>> out.
> > >>
> > >> Did you mean 'the PCI probe should not complete and error out'?
> > >
> > > Yes.
> > >
> > >> If yes, that's yet another problem... During the PCI probe, we start a
> > >> workqueue and return success to avoid blocking everything.
> > >
> > > That's crazy.
> > >
> > >> And only 'later' do we actually create the card. So that's two levels
> > >> of probe that cannot report a failure. I didn't come up with this
> > >> design, IIRC this is due to audio-DRM dependencies and it's been used
> > >> for 10+ years.
> > >
> > > Then if the probe function fails, it needs to unwind everything itself
> > > and unregister the device with the PCI subsystem so that things work
> > > properly.  If it does not do that today, that's a bug.
> > >
> > > What kind of crazy dependencies cause this type of "requirement"?
> > 
> > I think it is related to the request_module("i915") in
> > snd_hdac_i915_init(), and possibly other firmware download.
> > 
> > Adding Takashi for more details.
> 
> Right, there are a few levels of complexity there.  The HD-audio
> PCI controller driver, for example, is initialized in an async way
> with a work.  It loads the firmware files with
> request_firmware_nowait() and also binds itself as a component master
> with the DRM graphics driver via component framework.
> 
> Currently it has no way to unwind the PCI binding itself at the error
> path, though.  In theory it should be possible to unregister the PCI
> from the driver itself in the work context, but it failed in the
> earlier experiments, hence the driver sets itself in a disabled state
> instead.  Maybe worth to try again.
> 
> But, to be noted, all belonging sub-devices aren't instantiated but
> deleted at the error path.  Only the main PCI binding is kept in a
> disabled state just as a place holder until it's unbound explicitly.

Ok, that's good to hear.  But platform devices should never be showing
up as a child of a PCI device.  In the "near future" when we get the
virtual bus code merged, we can convert any existing users like this to
the new code.

thanks,

greg k-h
Pierre-Louis Bossart May 27, 2020, 2:05 p.m. UTC | #22
>>>>> If yes, that's yet another problem... During the PCI probe, we start a
>>>>> workqueue and return success to avoid blocking everything.
>>>>
>>>> That's crazy.
>>>>
>>>>> And only 'later' do we actually create the card. So that's two levels
>>>>> of probe that cannot report a failure. I didn't come up with this
>>>>> design, IIRC this is due to audio-DRM dependencies and it's been used
>>>>> for 10+ years.
>>>>
>>>> Then if the probe function fails, it needs to unwind everything itself
>>>> and unregister the device with the PCI subsystem so that things work
>>>> properly.  If it does not do that today, that's a bug.
>>>>
>>>> What kind of crazy dependencies cause this type of "requirement"?
>>>
>>> I think it is related to the request_module("i915") in
>>> snd_hdac_i915_init(), and possibly other firmware download.
>>>
>>> Adding Takashi for more details.
>>
>> Right, there are a few levels of complexity there.  The HD-audio
>> PCI controller driver, for example, is initialized in an async way
>> with a work.  It loads the firmware files with
>> request_firmware_nowait() and also binds itself as a component master
>> with the DRM graphics driver via component framework.
>>
>> Currently it has no way to unwind the PCI binding itself at the error
>> path, though.  In theory it should be possible to unregister the PCI
>> from the driver itself in the work context, but it failed in the
>> earlier experiments, hence the driver sets itself in a disabled state
>> instead.  Maybe worth to try again.
>>
>> But, to be noted, all belonging sub-devices aren't instantiated but
>> deleted at the error path.  Only the main PCI binding is kept in a
>> disabled state just as a place holder until it's unbound explicitly.
> 
> Ok, that's good to hear.  But platform devices should never be showing
> up as a child of a PCI device.  In the "near future" when we get the
> virtual bus code merged, we can convert any existing users like this to
> the new code.

yes that's the plan. It'll be however more than a 1:1 replacement, i.e. 
we want to use this opportunity to split existing cards into separate 
ones when it makes sense to do so. There's really no rationale for 
having code to deal with HDMI in each machine driver when we could have 
a single driver for HDMI. That's really what drove us to suggest this 
patchset based on the virtual bus: removal of platform devices + 
repartition.
Mark Brown June 29, 2020, 5:36 p.m. UTC | #23
On Wed, May 20, 2020 at 12:02:25AM -0700, Jeff Kirsher wrote:
> From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
> 
> A client in the SOF (Sound Open Firmware) context is a
> device that needs to communicate with the DSP via IPC

As please send patches to the maintainers for the code you would like to
change.  :(

> +config SND_SOC_SOF_CLIENT
> +	tristate
> +	select VIRTUAL_BUS
> +	help
> +	  This option is not user-selectable but automagically handled by
> +	  'select' statements at a higher level

VIRTUAL_BUS is both user visible and selected here (not that the select
will do much since this option is not user visible) - which is it?

> +config SND_SOC_SOF_CLIENT_SUPPORT
> +	bool "SOF enable clients"
> +	depends on SND_SOC_SOF
> +	help
> +	  This adds support for client support with Sound Open Firmware.
> +	  The SOF driver adds the capability to separate out the debug
> +	  functionality for IPC tests, probes etc. into separate client
> +	  devices. This option would also allow adding client devices
> +	  based on DSP FW capabilities and ACPI/OF device information.
> +	  Say Y if you want to enable clients with SOF.
> +	  If unsure select "N".

I'm not sure that's going to mean much to users, nor can I see why you'd
ever have a SOF system without this enabled?

> +	/*
> +	 * Register virtbus device for the client.
> +	 * The error path in virtbus_register_device() calls put_device(),
> +	 * which will free cdev in the release callback.
> +	 */
> +	ret = virtbus_register_device(vdev);
> +	if (ret < 0)
> +		return ret;

> +	/* make sure the probe is complete before updating client list */
> +	timeout = msecs_to_jiffies(SOF_CLIENT_PROBE_TIMEOUT_MS);
> +	time = wait_for_completion_timeout(&cdev->probe_complete, timeout);
> +	if (!time) {
> +		dev_err(sdev->dev, "error: probe of virtbus dev %s timed out\n",
> +			name);
> +		virtbus_unregister_device(vdev);
> +		return -ETIMEDOUT;
> +	}

This seems wrong - what happens if the driver isn't loaded yet for
example?  Why does the device registration care what's going on with
driver binding at all?
Mark Brown June 29, 2020, 8:21 p.m. UTC | #24
On Sat, May 23, 2020 at 08:23:51AM +0200, Greg KH wrote:

> Then fix that problem there.  The audio card should not be being created
> as a platform device, as that is not what it is.  And even if it was,
> the probe should not complete, it should clean up after itself and error
> out.

To be clear ASoC sound cards are physical devices which exist in the
real world.

> That's not a driver core issue, sounds like a subsystem error handling
> issue that needs to be resolved.

It's not a subsystem issue, it's an issue with the half baked support
for enumerating modern audio hardware on ACPI systems.  Unfortunately
we have to enumerate hardware based on having data tables instantiated
via DMI information for the system which doesn't work well with a
generic kernel like Linux, on Windows they're per-machine custom
drivers.  There is some effort at putting some of the data into ACPI
tables on newer systems which is helping a lot but it's partial.
Mark Brown June 29, 2020, 8:33 p.m. UTC | #25
On Wed, May 27, 2020 at 09:17:33AM +0200, Greg KH wrote:

> Ok, that's good to hear.  But platform devices should never be showing
> up as a child of a PCI device.  In the "near future" when we get the
> virtual bus code merged, we can convert any existing users like this to
> the new code.

What are we supposed to do with things like PCI attached FPGAs and ASICs
in that case?  They can have host visible devices with physical
resources like MMIO ranges and interrupts without those being split up
neatly as PCI subfunctions - the original use case for MFD was such
ASICs, there's a few PCI drivers in there now.  Adding support for those
into virtual bus would make it even more of a cut'n'paste of the
platform bus than it already is.
Mark Brown June 29, 2020, 8:59 p.m. UTC | #26
On Fri, May 22, 2020 at 10:33:20AM -0500, Pierre-Louis Bossart wrote:
> On 5/22/20 9:55 AM, Jason Gunthorpe wrote:

> > Maybe not great, but at least it is consistent with all the lifetime
> > models and the operation of the driver core.

> I agree your comments are valid ones, I just don't have a solution to be
> fully compliant with these models and report failures of the driver probe
> for a child device due to configuration issues (bad audio topology, etc).

> My understanding is that errors on probe are explicitly not handled in the
> driver core, see e.g. comments such as:

It's just not an error for a child device to not instantiate, we don't
even know if the driver is loaded yet.  The parent really should not
care if the child is there or not.

> > > PCI device creates an audio card represented as a platform device. When the
> > > card registration fails, typically due to configuration issues, the PCI
> > > probe still completes. That's really confusing and the source of lots of
> > > support questions. If we use these virtual bus extensions to stpo abusing
> > > platform devices, it'd be really nice to make those unreported probe
> > > failures go away.

> > I think you need to address this in some other way that is hot plug
> > safe.

> > Surely you can make this failure visible to users in some other way?

> Not at the moment, no. there are no failures reported in dmesg, and the user
> does not see any card created. This is a silent error.

If we're failing to do something we should report it.  This includes
deferred probe failures.
Jason Gunthorpe June 29, 2020, 10:59 p.m. UTC | #27
On Mon, Jun 29, 2020 at 09:33:17PM +0100, Mark Brown wrote:
> On Wed, May 27, 2020 at 09:17:33AM +0200, Greg KH wrote:
> 
> > Ok, that's good to hear.  But platform devices should never be showing
> > up as a child of a PCI device.  In the "near future" when we get the
> > virtual bus code merged, we can convert any existing users like this to
> > the new code.
> 
> What are we supposed to do with things like PCI attached FPGAs and ASICs
> in that case?  They can have host visible devices with physical
> resources like MMIO ranges and interrupts without those being split up
> neatly as PCI subfunctions - the original use case for MFD was such
> ASICs, there's a few PCI drivers in there now. 

Greg has been pretty clear that MFD shouldn't have been used on top of
PCI drivers.

In a sense virtual bus is pretty much MFD v2.

Jason
Kirsher, Jeffrey T June 29, 2020, 11:13 p.m. UTC | #28
> -----Original Message-----
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Monday, June 29, 2020 16:00
> To: Mark Brown <broonie@kernel.org>
> Cc: Greg KH <gregkh@linuxfoundation.org>; Takashi Iwai <tiwai@suse.de>;
> Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>; Ranjani Sridharan
> <ranjani.sridharan@linux.intel.com>; Kirsher, Jeffrey T
> <jeffrey.t.kirsher@intel.com>; davem@davemloft.net; netdev@vger.kernel.org;
> linux-rdma@vger.kernel.org; nhorman@redhat.com; sassmann@redhat.com;
> Fred Oh <fred.oh@linux.intel.com>
> Subject: Re: [net-next v4 10/12] ASoC: SOF: Introduce descriptors for SOF
> client
> 
> On Mon, Jun 29, 2020 at 09:33:17PM +0100, Mark Brown wrote:
> > On Wed, May 27, 2020 at 09:17:33AM +0200, Greg KH wrote:
> >
> > > Ok, that's good to hear.  But platform devices should never be
> > > showing up as a child of a PCI device.  In the "near future" when we
> > > get the virtual bus code merged, we can convert any existing users
> > > like this to the new code.
> >
> > What are we supposed to do with things like PCI attached FPGAs and
> > ASICs in that case?  They can have host visible devices with physical
> > resources like MMIO ranges and interrupts without those being split up
> > neatly as PCI subfunctions - the original use case for MFD was such
> > ASICs, there's a few PCI drivers in there now.
> 
> Greg has been pretty clear that MFD shouldn't have been used on top of PCI
> drivers.
> 
> In a sense virtual bus is pretty much MFD v2.
 
With the big distinction that MFD uses Platform bus/devices, which is why we could not
use MFD as a solution, and virtbus does not.
Mark Brown June 30, 2020, 10:31 a.m. UTC | #29
On Mon, Jun 29, 2020 at 07:59:59PM -0300, Jason Gunthorpe wrote:
> On Mon, Jun 29, 2020 at 09:33:17PM +0100, Mark Brown wrote:
> > On Wed, May 27, 2020 at 09:17:33AM +0200, Greg KH wrote:

> > > Ok, that's good to hear.  But platform devices should never be showing
> > > up as a child of a PCI device.  In the "near future" when we get the
> > > virtual bus code merged, we can convert any existing users like this to
> > > the new code.

> > What are we supposed to do with things like PCI attached FPGAs and ASICs
> > in that case?  They can have host visible devices with physical
> > resources like MMIO ranges and interrupts without those being split up
> > neatly as PCI subfunctions - the original use case for MFD was such
> > ASICs, there's a few PCI drivers in there now. 

> Greg has been pretty clear that MFD shouldn't have been used on top of
> PCI drivers.

The proposed bus lacks resource handling, an equivalent of
platform_get_resource() and friends for example, which would be needed
for use with physical devices.  Both that and the name suggest that it's
for virtual devices.

> In a sense virtual bus is pretty much MFD v2.

Copying in Lee since I'm not sure he's aware of this, it's quite a
recent thing...  MFD is a layer above AFAICT, it's not a bus but rather
a combination of helpers for registering subdevices and a place for
drivers for core functionality of devices which have multiple features.

The reason the MFDs use platform devices is that they end up having to
have all the features of platform devices - originally people were
making virtual buses for them but the code duplication is real so
everyone (including Greg) decided to just use what was there already.
Jason Gunthorpe June 30, 2020, 11:32 a.m. UTC | #30
On Tue, Jun 30, 2020 at 11:31:41AM +0100, Mark Brown wrote:
> On Mon, Jun 29, 2020 at 07:59:59PM -0300, Jason Gunthorpe wrote:
> > On Mon, Jun 29, 2020 at 09:33:17PM +0100, Mark Brown wrote:
> > > On Wed, May 27, 2020 at 09:17:33AM +0200, Greg KH wrote:
> 
> > > > Ok, that's good to hear.  But platform devices should never be showing
> > > > up as a child of a PCI device.  In the "near future" when we get the
> > > > virtual bus code merged, we can convert any existing users like this to
> > > > the new code.
> 
> > > What are we supposed to do with things like PCI attached FPGAs and ASICs
> > > in that case?  They can have host visible devices with physical
> > > resources like MMIO ranges and interrupts without those being split up
> > > neatly as PCI subfunctions - the original use case for MFD was such
> > > ASICs, there's a few PCI drivers in there now. 
> 
> > Greg has been pretty clear that MFD shouldn't have been used on top of
> > PCI drivers.
> 
> The proposed bus lacks resource handling, an equivalent of
> platform_get_resource() and friends for example, which would be needed
> for use with physical devices.  Both that and the name suggest that it's
> for virtual devices.

Resource handling is only useful if the HW has a hard distinction
between it's functional blocks. This scheme is intended for devices
where that doesn't exist. The driver that attaches to the PCI device
and creates the virtual devices is supposed to provide SW abstractions
for the other drivers to sit on.
 
I'm not sure why we are calling it virtual bus.

> The reason the MFDs use platform devices is that they end up having to
> have all the features of platform devices - originally people were
> making virtual buses for them but the code duplication is real so
> everyone (including Greg) decided to just use what was there already.

Maybe Greg will explain why he didn't like the earlier version of that
stuff that used MFD

Jason
Mark Brown June 30, 2020, 2:16 p.m. UTC | #31
On Tue, Jun 30, 2020 at 08:32:45AM -0300, Jason Gunthorpe wrote:
> On Tue, Jun 30, 2020 at 11:31:41AM +0100, Mark Brown wrote:
> > On Mon, Jun 29, 2020 at 07:59:59PM -0300, Jason Gunthorpe wrote:

> > > > What are we supposed to do with things like PCI attached FPGAs and ASICs
> > > > in that case?  They can have host visible devices with physical
> > > > resources like MMIO ranges and interrupts without those being split up
> > > > neatly as PCI subfunctions - the original use case for MFD was such
> > > > ASICs, there's a few PCI drivers in there now. 

> > > Greg has been pretty clear that MFD shouldn't have been used on top of
> > > PCI drivers.

> > The proposed bus lacks resource handling, an equivalent of
> > platform_get_resource() and friends for example, which would be needed
> > for use with physical devices.  Both that and the name suggest that it's
> > for virtual devices.

> Resource handling is only useful if the HW has a hard distinction
> between it's functional blocks. This scheme is intended for devices
> where that doesn't exist. The driver that attaches to the PCI device
> and creates the virtual devices is supposed to provide SW abstractions
> for the other drivers to sit on.

> I'm not sure why we are calling it virtual bus.

The abstraction that the PCI based MFDs (and FPGAs will be similar,
they're just dynamic MFDs to a good approximation) need is to pass
through MMIO regions, interrupts and so on which is exactly what the
platform bus offers.  The hardware is basically someone taking a bunch
of IPs and shoving them behind the MMIO/interrupt regions of a PCI
device.

> > The reason the MFDs use platform devices is that they end up having to
> > have all the features of platform devices - originally people were
> > making virtual buses for them but the code duplication is real so
> > everyone (including Greg) decided to just use what was there already.

> Maybe Greg will explain why he didn't like the earlier version of that
> stuff that used MFD

AFAICT Greg is mostly concerned about the MFDs that aren't memory
mapped, though some of them do use the resource API to pass interrupts
through.
Ranjani Sridharan June 30, 2020, 5:24 p.m. UTC | #32
On Tue, 2020-06-30 at 08:32 -0300, Jason Gunthorpe wrote:
> On Tue, Jun 30, 2020 at 11:31:41AM +0100, Mark Brown wrote:
> > On Mon, Jun 29, 2020 at 07:59:59PM -0300, Jason Gunthorpe wrote:
> > > On Mon, Jun 29, 2020 at 09:33:17PM +0100, Mark Brown wrote:
> > > > On Wed, May 27, 2020 at 09:17:33AM +0200, Greg KH wrote:
> > > > > Ok, that's good to hear.  But platform devices should never
> > > > > be showing
> > > > > up as a child of a PCI device.  In the "near future" when we
> > > > > get the
> > > > > virtual bus code merged, we can convert any existing users
> > > > > like this to
> > > > > the new code.
> > > > What are we supposed to do with things like PCI attached FPGAs
> > > > and ASICs
> > > > in that case?  They can have host visible devices with physical
> > > > resources like MMIO ranges and interrupts without those being
> > > > split up
> > > > neatly as PCI subfunctions - the original use case for MFD was
> > > > such
> > > > ASICs, there's a few PCI drivers in there now. 
> > > Greg has been pretty clear that MFD shouldn't have been used on
> > > top of
> > > PCI drivers.
> > 
> > The proposed bus lacks resource handling, an equivalent of
> > platform_get_resource() and friends for example, which would be
> > needed
> > for use with physical devices.  Both that and the name suggest that
> > it's
> > for virtual devices.
> 
> Resource handling is only useful if the HW has a hard distinction
> between it's functional blocks. This scheme is intended for devices
> where that doesn't exist. The driver that attaches to the PCI device
> and creates the virtual devices is supposed to provide SW
> abstractions
> for the other drivers to sit on.
>  
> I'm not sure why we are calling it virtual bus.
Hi Jason,

We're addressing the naming in the next version as well. We've had
several people reject the name virtual bus and we've narrowed in on
"ancillary bus" for the new name suggesting that we have the core
device that is attached to the primary bus and one or more sub-devices
that are attached to the ancillary bus. Please let us know what you
think of it.

Thanks,
Ranjani
Jason Gunthorpe June 30, 2020, 5:27 p.m. UTC | #33
On Tue, Jun 30, 2020 at 10:24:04AM -0700, Ranjani Sridharan wrote:
> On Tue, 2020-06-30 at 08:32 -0300, Jason Gunthorpe wrote:
> > On Tue, Jun 30, 2020 at 11:31:41AM +0100, Mark Brown wrote:
> > > On Mon, Jun 29, 2020 at 07:59:59PM -0300, Jason Gunthorpe wrote:
> > > > On Mon, Jun 29, 2020 at 09:33:17PM +0100, Mark Brown wrote:
> > > > > On Wed, May 27, 2020 at 09:17:33AM +0200, Greg KH wrote:
> > > > > > Ok, that's good to hear.  But platform devices should never
> > > > > > be showing
> > > > > > up as a child of a PCI device.  In the "near future" when we
> > > > > > get the
> > > > > > virtual bus code merged, we can convert any existing users
> > > > > > like this to
> > > > > > the new code.
> > > > > What are we supposed to do with things like PCI attached FPGAs
> > > > > and ASICs
> > > > > in that case?  They can have host visible devices with physical
> > > > > resources like MMIO ranges and interrupts without those being
> > > > > split up
> > > > > neatly as PCI subfunctions - the original use case for MFD was
> > > > > such
> > > > > ASICs, there's a few PCI drivers in there now. 
> > > > Greg has been pretty clear that MFD shouldn't have been used on
> > > > top of
> > > > PCI drivers.
> > > 
> > > The proposed bus lacks resource handling, an equivalent of
> > > platform_get_resource() and friends for example, which would be
> > > needed
> > > for use with physical devices.  Both that and the name suggest that
> > > it's
> > > for virtual devices.
> > 
> > Resource handling is only useful if the HW has a hard distinction
> > between it's functional blocks. This scheme is intended for devices
> > where that doesn't exist. The driver that attaches to the PCI device
> > and creates the virtual devices is supposed to provide SW
> > abstractions
> > for the other drivers to sit on.
> >  
> > I'm not sure why we are calling it virtual bus.
> Hi Jason,
> 
> We're addressing the naming in the next version as well. We've had
> several people reject the name virtual bus and we've narrowed in on
> "ancillary bus" for the new name suggesting that we have the core
> device that is attached to the primary bus and one or more sub-devices
> that are attached to the ancillary bus. Please let us know what you
> think of it.

It is sufficiently vauge

I wonder if SW_MFD might me more apt though? Based on Mark's remarks
current MFD is 'hw' MFD where the created platform_devices expect a
MMIO pass through, while this is a MFD a device-specific SW
interfacing layer.

MFD really is the best name for this kind of functionality,
understanding how it is different from the current MFD might also help
justify why it exists and give it a name.

Jason
Greg KH July 1, 2020, 6:59 a.m. UTC | #34
On Tue, Jun 30, 2020 at 10:24:04AM -0700, Ranjani Sridharan wrote:
> On Tue, 2020-06-30 at 08:32 -0300, Jason Gunthorpe wrote:
> > On Tue, Jun 30, 2020 at 11:31:41AM +0100, Mark Brown wrote:
> > > On Mon, Jun 29, 2020 at 07:59:59PM -0300, Jason Gunthorpe wrote:
> > > > On Mon, Jun 29, 2020 at 09:33:17PM +0100, Mark Brown wrote:
> > > > > On Wed, May 27, 2020 at 09:17:33AM +0200, Greg KH wrote:
> > > > > > Ok, that's good to hear.  But platform devices should never
> > > > > > be showing
> > > > > > up as a child of a PCI device.  In the "near future" when we
> > > > > > get the
> > > > > > virtual bus code merged, we can convert any existing users
> > > > > > like this to
> > > > > > the new code.
> > > > > What are we supposed to do with things like PCI attached FPGAs
> > > > > and ASICs
> > > > > in that case?  They can have host visible devices with physical
> > > > > resources like MMIO ranges and interrupts without those being
> > > > > split up
> > > > > neatly as PCI subfunctions - the original use case for MFD was
> > > > > such
> > > > > ASICs, there's a few PCI drivers in there now. 
> > > > Greg has been pretty clear that MFD shouldn't have been used on
> > > > top of
> > > > PCI drivers.
> > > 
> > > The proposed bus lacks resource handling, an equivalent of
> > > platform_get_resource() and friends for example, which would be
> > > needed
> > > for use with physical devices.  Both that and the name suggest that
> > > it's
> > > for virtual devices.
> > 
> > Resource handling is only useful if the HW has a hard distinction
> > between it's functional blocks. This scheme is intended for devices
> > where that doesn't exist. The driver that attaches to the PCI device
> > and creates the virtual devices is supposed to provide SW
> > abstractions
> > for the other drivers to sit on.
> >  
> > I'm not sure why we are calling it virtual bus.
> Hi Jason,
> 
> We're addressing the naming in the next version as well. We've had
> several people reject the name virtual bus and we've narrowed in on
> "ancillary bus" for the new name suggesting that we have the core
> device that is attached to the primary bus and one or more sub-devices
> that are attached to the ancillary bus. Please let us know what you
> think of it.

I'm thinking that the primary person who keeps asking you to create this
"virtual bus" was not upset about that name, nor consulted, so why are
you changing this?  :(

Right now this feels like the old technique of "keep throwing crap at a
maintainer until they get so sick of it that they do the work
themselves..."

greg k-h
Mark Brown July 1, 2020, 9:50 a.m. UTC | #35
On Tue, Jun 30, 2020 at 02:27:10PM -0300, Jason Gunthorpe wrote:

> I wonder if SW_MFD might me more apt though? Based on Mark's remarks
> current MFD is 'hw' MFD where the created platform_devices expect a
> MMIO pass through, while this is a MFD a device-specific SW
> interfacing layer.

Another part of this is that there's not a clean cut over between MMIO
and not using any hardware resources at all - for example a device might
be connected over I2C but use resources to distribute interrupts to
subdevices.

> MFD really is the best name for this kind of functionality,
> understanding how it is different from the current MFD might also help
> justify why it exists and give it a name.

Right, it's not clear what we're doing here.
Jason Gunthorpe July 1, 2020, 11:32 p.m. UTC | #36
On Wed, Jul 01, 2020 at 10:50:49AM +0100, Mark Brown wrote:
> On Tue, Jun 30, 2020 at 02:27:10PM -0300, Jason Gunthorpe wrote:
> 
> > I wonder if SW_MFD might me more apt though? Based on Mark's remarks
> > current MFD is 'hw' MFD where the created platform_devices expect a
> > MMIO pass through, while this is a MFD a device-specific SW
> > interfacing layer.
> 
> Another part of this is that there's not a clean cut over between MMIO
> and not using any hardware resources at all - for example a device might
> be connected over I2C but use resources to distribute interrupts to
> subdevices.

How does the subdevice do anything if it only received an interrupt?

That sounds rather more like virtual bus's use case..

Jason
Mark Brown July 2, 2020, 11:15 a.m. UTC | #37
On Wed, Jul 01, 2020 at 08:32:50PM -0300, Jason Gunthorpe wrote:
> On Wed, Jul 01, 2020 at 10:50:49AM +0100, Mark Brown wrote:

> > Another part of this is that there's not a clean cut over between MMIO
> > and not using any hardware resources at all - for example a device might
> > be connected over I2C but use resources to distribute interrupts to
> > subdevices.

> How does the subdevice do anything if it only received an interrupt?

Via some bus that isn't memory mapped like I2C or SPI.

> That sounds rather more like virtual bus's use case..

These are very much physical devices often with distinct IPs in distinct
address ranges and so on, it's just that those addresses happen not to
be on buses it is sensible to memory map.
Jason Gunthorpe July 2, 2020, 12:11 p.m. UTC | #38
On Thu, Jul 02, 2020 at 12:15:22PM +0100, Mark Brown wrote:
> On Wed, Jul 01, 2020 at 08:32:50PM -0300, Jason Gunthorpe wrote:
> > On Wed, Jul 01, 2020 at 10:50:49AM +0100, Mark Brown wrote:
> 
> > > Another part of this is that there's not a clean cut over between MMIO
> > > and not using any hardware resources at all - for example a device might
> > > be connected over I2C but use resources to distribute interrupts to
> > > subdevices.
> 
> > How does the subdevice do anything if it only received an interrupt?
> 
> Via some bus that isn't memory mapped like I2C or SPI.
> 
> > That sounds rather more like virtual bus's use case..
> 
> These are very much physical devices often with distinct IPs in distinct
> address ranges and so on, it's just that those addresses happen not to
> be on buses it is sensible to memory map.

But platform bus is all about memmory mapping, so how does the
subdevice learn the address range and properly share the underlying
transport?

Jason
Mark Brown July 2, 2020, 12:20 p.m. UTC | #39
On Thu, Jul 02, 2020 at 09:11:47AM -0300, Jason Gunthorpe wrote:
> On Thu, Jul 02, 2020 at 12:15:22PM +0100, Mark Brown wrote:

> > These are very much physical devices often with distinct IPs in distinct
> > address ranges and so on, it's just that those addresses happen not to
> > be on buses it is sensible to memory map.

> But platform bus is all about memmory mapping, so how does the
> subdevice learn the address range and properly share the underlying
> transport?

Hard coding, some out of band mechanism or using an unparented register
region (the resource the code is fine, it's not going to actually look
at the registers described).
Ranjani Sridharan July 2, 2020, 1:43 p.m. UTC | #40
On Wed, 2020-07-01 at 08:59 +0200, Greg KH wrote:
> On Tue, Jun 30, 2020 at 10:24:04AM -0700, Ranjani Sridharan wrote:
> > On Tue, 2020-06-30 at 08:32 -0300, Jason Gunthorpe wrote:
> > > On Tue, Jun 30, 2020 at 11:31:41AM +0100, Mark Brown wrote:
> > > > On Mon, Jun 29, 2020 at 07:59:59PM -0300, Jason Gunthorpe
> > > > wrote:
> > > > > On Mon, Jun 29, 2020 at 09:33:17PM +0100, Mark Brown wrote:
> > > > > > On Wed, May 27, 2020 at 09:17:33AM +0200, Greg KH wrote:
> > > > > > > Ok, that's good to hear.  But platform devices should
> > > > > > > never
> > > > > > > be showing
> > > > > > > up as a child of a PCI device.  In the "near future" when
> > > > > > > we
> > > > > > > get the
> > > > > > > virtual bus code merged, we can convert any existing
> > > > > > > users
> > > > > > > like this to
> > > > > > > the new code.
> > > > > > What are we supposed to do with things like PCI attached
> > > > > > FPGAs
> > > > > > and ASICs
> > > > > > in that case?  They can have host visible devices with
> > > > > > physical
> > > > > > resources like MMIO ranges and interrupts without those
> > > > > > being
> > > > > > split up
> > > > > > neatly as PCI subfunctions - the original use case for MFD
> > > > > > was
> > > > > > such
> > > > > > ASICs, there's a few PCI drivers in there now. 
> > > > > Greg has been pretty clear that MFD shouldn't have been used
> > > > > on
> > > > > top of
> > > > > PCI drivers.
> > > > 
> > > > The proposed bus lacks resource handling, an equivalent of
> > > > platform_get_resource() and friends for example, which would be
> > > > needed
> > > > for use with physical devices.  Both that and the name suggest
> > > > that
> > > > it's
> > > > for virtual devices.
> > > 
> > > Resource handling is only useful if the HW has a hard distinction
> > > between it's functional blocks. This scheme is intended for
> > > devices
> > > where that doesn't ex"ist. The driver that attaches to the PCI
> > > device
> > > and creates the virtual devices is supposed to provide SW
> > > abstractions
> > > for the other drivers to sit on.
> > >  
> > > I'm not sure why we are calling it virtual bus.
> > Hi Jason,
> > 
> > We're addressing the naming in the next version as well. We've had
> > several people reject the name virtual bus and we've narrowed in on
> > "ancillary bus" for the new name suggesting that we have the core
> > device that is attached to the primary bus and one or more sub-
> > devices
> > that are attached to the ancillary bus. Please let us know what you
> > think of it.
> 
> I'm thinking that the primary person who keeps asking you to create
> this
> "virtual bus" was not upset about that name, nor consulted, so why
> are
> you changing this?  :(
> 
> Right now this feels like the old technique of "keep throwing crap at
> a
> maintainer until they get so sick of it that they do the work
> themselves..."

Hi Greg,

It wasnt our intention to frustrate you with the name change but in the
last exchange you had specifically asked for signed-off-by's from other
Intel developers. In that process, one of the recent feedback from some
of them was about the name being misleading and confusing.

If you feel strongly about the keeping name "virtual bus", please let
us know and we can circle back with them again.

Thanks,
Ranjani
Dan Williams July 6, 2020, 11:02 p.m. UTC | #41
On Thu, Jul 2, 2020 at 6:44 AM Ranjani Sridharan
<ranjani.sridharan@linux.intel.com> wrote:
[..]
> > > Hi Jason,
> > >
> > > We're addressing the naming in the next version as well. We've had
> > > several people reject the name virtual bus and we've narrowed in on
> > > "ancillary bus" for the new name suggesting that we have the core
> > > device that is attached to the primary bus and one or more sub-
> > > devices
> > > that are attached to the ancillary bus. Please let us know what you
> > > think of it.
> >
> > I'm thinking that the primary person who keeps asking you to create
> > this
> > "virtual bus" was not upset about that name, nor consulted, so why
> > are
> > you changing this?  :(
> >
> > Right now this feels like the old technique of "keep throwing crap at
> > a
> > maintainer until they get so sick of it that they do the work
> > themselves..."
>
> Hi Greg,
>
> It wasnt our intention to frustrate you with the name change but in the
> last exchange you had specifically asked for signed-off-by's from other
> Intel developers. In that process, one of the recent feedback from some
> of them was about the name being misleading and confusing.
>
> If you feel strongly about the keeping name "virtual bus", please let
> us know and we can circle back with them again.

Hey Greg,

Feel free to blame me for the naming thrash it was part of my internal
review feedback trying to crispen the definition of this facility. I
was expecting the next revision to come with the internal reviewed-by
and an explanation of all the items that were changed during that
review.

Ranjani, is the next rev ready to go out with the review items
identified? Let's just proceed with the current direction of the
review tags that Greg asked for, name changes and all, and iterate the
next details on the list with the new patches in hand.
Greg KH July 7, 2020, 2:16 p.m. UTC | #42
On Mon, Jul 06, 2020 at 04:02:57PM -0700, Dan Williams wrote:
> On Thu, Jul 2, 2020 at 6:44 AM Ranjani Sridharan
> <ranjani.sridharan@linux.intel.com> wrote:
> [..]
> > > > Hi Jason,
> > > >
> > > > We're addressing the naming in the next version as well. We've had
> > > > several people reject the name virtual bus and we've narrowed in on
> > > > "ancillary bus" for the new name suggesting that we have the core
> > > > device that is attached to the primary bus and one or more sub-
> > > > devices
> > > > that are attached to the ancillary bus. Please let us know what you
> > > > think of it.
> > >
> > > I'm thinking that the primary person who keeps asking you to create
> > > this
> > > "virtual bus" was not upset about that name, nor consulted, so why
> > > are
> > > you changing this?  :(
> > >
> > > Right now this feels like the old technique of "keep throwing crap at
> > > a
> > > maintainer until they get so sick of it that they do the work
> > > themselves..."
> >
> > Hi Greg,
> >
> > It wasnt our intention to frustrate you with the name change but in the
> > last exchange you had specifically asked for signed-off-by's from other
> > Intel developers. In that process, one of the recent feedback from some
> > of them was about the name being misleading and confusing.
> >
> > If you feel strongly about the keeping name "virtual bus", please let
> > us know and we can circle back with them again.
> 
> Hey Greg,
> 
> Feel free to blame me for the naming thrash it was part of my internal
> review feedback trying to crispen the definition of this facility. I
> was expecting the next revision to come with the internal reviewed-by
> and an explanation of all the items that were changed during that
> review.

That would have been nice to see, instead of it "leaking" like this :(

thanks,

greg k-h
diff mbox series

Patch

diff --git a/sound/soc/sof/Kconfig b/sound/soc/sof/Kconfig
index 4dda4b62509f..609989daf85b 100644
--- a/sound/soc/sof/Kconfig
+++ b/sound/soc/sof/Kconfig
@@ -50,6 +50,25 @@  config SND_SOC_SOF_DEBUG_PROBES
 	  Say Y if you want to enable probes.
 	  If unsure, select "N".
 
+config SND_SOC_SOF_CLIENT
+	tristate
+	select VIRTUAL_BUS
+	help
+	  This option is not user-selectable but automagically handled by
+	  'select' statements at a higher level
+
+config SND_SOC_SOF_CLIENT_SUPPORT
+	bool "SOF enable clients"
+	depends on SND_SOC_SOF
+	help
+	  This adds support for client support with Sound Open Firmware.
+	  The SOF driver adds the capability to separate out the debug
+	  functionality for IPC tests, probes etc. into separate client
+	  devices. This option would also allow adding client devices
+	  based on DSP FW capabilities and ACPI/OF device information.
+	  Say Y if you want to enable clients with SOF.
+	  If unsure select "N".
+
 config SND_SOC_SOF_DEVELOPER_SUPPORT
 	bool "SOF developer options support"
 	depends on EXPERT
@@ -186,6 +205,7 @@  endif ## SND_SOC_SOF_DEVELOPER_SUPPORT
 
 config SND_SOC_SOF
 	tristate
+	select SND_SOC_SOF_CLIENT if SND_SOC_SOF_CLIENT_SUPPORT
 	select SND_SOC_TOPOLOGY
 	select SND_SOC_SOF_NOCODEC if SND_SOC_SOF_NOCODEC_SUPPORT
 	help
diff --git a/sound/soc/sof/Makefile b/sound/soc/sof/Makefile
index 8eca2f85c90e..c819124c05bb 100644
--- a/sound/soc/sof/Makefile
+++ b/sound/soc/sof/Makefile
@@ -2,6 +2,7 @@ 
 
 snd-sof-objs := core.o ops.o loader.o ipc.o pcm.o pm.o debug.o topology.o\
 		control.o trace.o utils.o sof-audio.o
+snd-sof-$(CONFIG_SND_SOC_SOF_CLIENT) += sof-client.o
 snd-sof-$(CONFIG_SND_SOC_SOF_DEBUG_PROBES) += probe.o compress.o
 
 snd-sof-pci-objs := sof-pci-dev.o
diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
index 91acfae7935c..fdfed157e6c0 100644
--- a/sound/soc/sof/core.c
+++ b/sound/soc/sof/core.c
@@ -313,8 +313,10 @@  int snd_sof_device_probe(struct device *dev, struct snd_sof_pdata *plat_data)
 	INIT_LIST_HEAD(&sdev->widget_list);
 	INIT_LIST_HEAD(&sdev->dai_list);
 	INIT_LIST_HEAD(&sdev->route_list);
+	INIT_LIST_HEAD(&sdev->client_list);
 	spin_lock_init(&sdev->ipc_lock);
 	spin_lock_init(&sdev->hw_lock);
+	mutex_init(&sdev->client_mutex);
 
 	if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE))
 		INIT_WORK(&sdev->probe_work, sof_probe_work);
diff --git a/sound/soc/sof/sof-client.c b/sound/soc/sof/sof-client.c
new file mode 100644
index 000000000000..b46080aa062e
--- /dev/null
+++ b/sound/soc/sof/sof-client.c
@@ -0,0 +1,91 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright(c) 2020 Intel Corporation. All rights reserved.
+//
+// Author: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
+//
+
+#include <linux/completion.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/virtual_bus.h>
+#include "sof-client.h"
+#include "sof-priv.h"
+
+static void sof_client_virtdev_release(struct virtbus_device *vdev)
+{
+	struct sof_client_dev *cdev = virtbus_dev_to_sof_client_dev(vdev);
+
+	kfree(cdev);
+}
+
+int sof_client_dev_register(struct snd_sof_dev *sdev,
+			    const char *name)
+{
+	struct sof_client_dev *cdev;
+	struct virtbus_device *vdev;
+	unsigned long time, timeout;
+	int ret;
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		return -ENOMEM;
+
+	cdev->sdev = sdev;
+	init_completion(&cdev->probe_complete);
+	vdev = &cdev->vdev;
+	vdev->match_name = name;
+	vdev->dev.parent = sdev->dev;
+	vdev->release = sof_client_virtdev_release;
+
+	/*
+	 * Register virtbus device for the client.
+	 * The error path in virtbus_register_device() calls put_device(),
+	 * which will free cdev in the release callback.
+	 */
+	ret = virtbus_register_device(vdev);
+	if (ret < 0)
+		return ret;
+
+	/* make sure the probe is complete before updating client list */
+	timeout = msecs_to_jiffies(SOF_CLIENT_PROBE_TIMEOUT_MS);
+	time = wait_for_completion_timeout(&cdev->probe_complete, timeout);
+	if (!time) {
+		dev_err(sdev->dev, "error: probe of virtbus dev %s timed out\n",
+			name);
+		virtbus_unregister_device(vdev);
+		return -ETIMEDOUT;
+	}
+
+	/* add to list of SOF client devices */
+	mutex_lock(&sdev->client_mutex);
+	list_add(&cdev->list, &sdev->client_list);
+	mutex_unlock(&sdev->client_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(sof_client_dev_register, SND_SOC_SOF_CLIENT);
+
+int sof_client_ipc_tx_message(struct sof_client_dev *cdev, u32 header,
+			      void *msg_data, size_t msg_bytes,
+			      void *reply_data, size_t reply_bytes)
+{
+	return sof_ipc_tx_message(cdev->sdev->ipc, header, msg_data, msg_bytes,
+				  reply_data, reply_bytes);
+}
+EXPORT_SYMBOL_NS_GPL(sof_client_ipc_tx_message, SND_SOC_SOF_CLIENT);
+
+struct dentry *sof_client_get_debugfs_root(struct sof_client_dev *cdev)
+{
+	return cdev->sdev->debugfs_root;
+}
+EXPORT_SYMBOL_NS_GPL(sof_client_get_debugfs_root, SND_SOC_SOF_CLIENT);
+
+MODULE_AUTHOR("Ranjani Sridharan <ranjani.sridharan@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/sof/sof-client.h b/sound/soc/sof/sof-client.h
new file mode 100644
index 000000000000..fdc4b1511ffc
--- /dev/null
+++ b/sound/soc/sof/sof-client.h
@@ -0,0 +1,84 @@ 
+/* SPDX-License-Identifier: (GPL-2.0-only) */
+
+#ifndef __SOUND_SOC_SOF_CLIENT_H
+#define __SOUND_SOC_SOF_CLIENT_H
+
+#include <linux/completion.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/device/driver.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/virtual_bus.h>
+
+#define SOF_CLIENT_PROBE_TIMEOUT_MS 2000
+
+struct snd_sof_dev;
+
+enum sof_client_type {
+	SOF_CLIENT_AUDIO,
+	SOF_CLIENT_IPC,
+};
+
+/* SOF client device */
+struct sof_client_dev {
+	struct virtbus_device vdev;
+	struct snd_sof_dev *sdev;
+	struct list_head list;	/* item in SOF core client drv list */
+	struct completion probe_complete;
+	void *data;
+};
+
+/* client-specific ops, all optional */
+struct sof_client_ops {
+	int (*client_ipc_rx)(struct sof_client_dev *cdev, u32 msg_cmd);
+};
+
+struct sof_client_drv {
+	const char *name;
+	enum sof_client_type type;
+	const struct sof_client_ops ops;
+	struct virtbus_driver virtbus_drv;
+};
+
+#define virtbus_dev_to_sof_client_dev(virtbus_dev) \
+	container_of(virtbus_dev, struct sof_client_dev, vdev)
+
+static inline int sof_client_drv_register(struct sof_client_drv *drv)
+{
+	return virtbus_register_driver(&drv->virtbus_drv);
+}
+
+static inline void sof_client_drv_unregister(struct sof_client_drv *drv)
+{
+	virtbus_unregister_driver(&drv->virtbus_drv);
+}
+
+int sof_client_dev_register(struct snd_sof_dev *sdev,
+			    const char *name);
+
+static inline void sof_client_dev_unregister(struct sof_client_dev *cdev)
+{
+	virtbus_unregister_device(&cdev->vdev);
+}
+
+int sof_client_ipc_tx_message(struct sof_client_dev *cdev, u32 header,
+			      void *msg_data, size_t msg_bytes,
+			      void *reply_data, size_t reply_bytes);
+
+struct dentry *sof_client_get_debugfs_root(struct sof_client_dev *cdev);
+
+/**
+ * module_sof_client_driver() - Helper macro for registering an SOF Client
+ * driver
+ * @__sof_client_driver: SOF client driver struct
+ *
+ * Helper macro for SOF client drivers which do not do anything special in
+ * module init/exit. This eliminates a lot of boilerplate. Each module may only
+ * use this macro once, and calling it replaces module_init() and module_exit()
+ */
+#define module_sof_client_driver(__sof_client_driver) \
+	module_driver(__sof_client_driver, sof_client_drv_register, \
+			sof_client_drv_unregister)
+
+#endif
diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h
index a4b297c842df..9da7f6f45362 100644
--- a/sound/soc/sof/sof-priv.h
+++ b/sound/soc/sof/sof-priv.h
@@ -438,6 +438,12 @@  struct snd_sof_dev {
 
 	bool msi_enabled;
 
+	/* list of client devices */
+	struct list_head client_list;
+
+	/* mutex to protect client list */
+	struct mutex client_mutex;
+
 	void *private;			/* core does not touch this */
 };