diff mbox

[2/4] ALSA: dice: postpone card registration

Message ID 1450911310-15507-3-git-send-email-o-takashi@sakamocchi.jp (mailing list archive)
State New, archived
Headers show

Commit Message

Takashi Sakamoto Dec. 23, 2015, 10:55 p.m. UTC
Some models based on ASIC for Dice II series (STD, CP) change their
hardware configurations after appearing on IEEE 1394 bus. This is due to
interactions of boot loader (RedBoot), firmwares (eCos) and vendor's
configurations. This causes current ALSA dice driver to get wrong
information about the hardware's capability because its probe function
runs just after detecting unit of the model.

As long as I investigated, it takes a bit time (less than 1 second) to load
the firmware after bootstrap. Just after loaded, the driver can get
information about the unit. Then the hardware is initialized according to
vendor's configurations. After, the got information becomes wrong.
Between bootstrap, firmware loading and post configuration, some bus resets
are observed.

This commit offloads most processing of probe function into workqueue and
schedules the workqueue after successive bus resets. This has an effect to
get correct hardware information and avoid involvement to bus reset storm.

For code simplicity, this change effects all of Dice-based models, i.e.
Dice II, Dice Jr., Dice Mini and Dice III. Due to the same reason, sound
card instance is kept till card free function even if some errors are
detected in the workqueue.

I use a loose strategy to manage a race condition between the workqueue and
the bus reset. This is due to a specification of dice transaction. When bus
reset occurs, registered address for the transaction is cleared. Drivers
must re-register their own address again. While, this operation is
required for the workqueue because the workqueue includes to wait for the
transaction. This commit uses no lock primitives for the race condition in
bus reset handler. Instead, checking 'registered' member of
'struct snd_dice' avoid executing the workqueue several times.

Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 sound/firewire/dice/dice.c | 111 +++++++++++++++++++++++++++++++++------------
 sound/firewire/dice/dice.h |   3 ++
 2 files changed, 86 insertions(+), 28 deletions(-)

Comments

Takashi Sakamoto Dec. 24, 2015, 5:04 a.m. UTC | #1
Hi,

I had a mistake in this patch...

On Dec 24 2015 07:55, Takashi Sakamoto wrote:
> Some models based on ASIC for Dice II series (STD, CP) change their
> hardware configurations after appearing on IEEE 1394 bus. This is due to
> interactions of boot loader (RedBoot), firmwares (eCos) and vendor's
> configurations. This causes current ALSA dice driver to get wrong
> information about the hardware's capability because its probe function
> runs just after detecting unit of the model.
>
> As long as I investigated, it takes a bit time (less than 1 second) to load
> the firmware after bootstrap. Just after loaded, the driver can get
> information about the unit. Then the hardware is initialized according to
> vendor's configurations. After, the got information becomes wrong.
> Between bootstrap, firmware loading and post configuration, some bus resets
> are observed.
>
> This commit offloads most processing of probe function into workqueue and
> schedules the workqueue after successive bus resets. This has an effect to
> get correct hardware information and avoid involvement to bus reset storm.
>
> For code simplicity, this change effects all of Dice-based models, i.e.
> Dice II, Dice Jr., Dice Mini and Dice III. Due to the same reason, sound
> card instance is kept till card free function even if some errors are
> detected in the workqueue.
>
> I use a loose strategy to manage a race condition between the workqueue and
> the bus reset. This is due to a specification of dice transaction. When bus
> reset occurs, registered address for the transaction is cleared. Drivers
> must re-register their own address again. While, this operation is
> required for the workqueue because the workqueue includes to wait for the
> transaction. This commit uses no lock primitives for the race condition in
> bus reset handler. Instead, checking 'registered' member of
> 'struct snd_dice' avoid executing the workqueue several times.
>
> Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
> ---
>   sound/firewire/dice/dice.c | 111 +++++++++++++++++++++++++++++++++------------
>   sound/firewire/dice/dice.h |   3 ++
>   2 files changed, 86 insertions(+), 28 deletions(-)
>
> diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c
> index 26271cc..adb8c87 100644
> --- a/sound/firewire/dice/dice.c
> +++ b/sound/firewire/dice/dice.c
> @@ -18,6 +18,8 @@ MODULE_LICENSE("GPL v2");
>   #define WEISS_CATEGORY_ID	0x00
>   #define LOUD_CATEGORY_ID	0x10
>
> +#define PROBE_DELAY_MS		(2 * MSEC_PER_SEC)
> +
>   static int check_dice_category(struct fw_unit *unit)
>   {
>   	struct fw_device *device = fw_parent_device(unit);
> @@ -185,6 +187,9 @@ static void dice_card_free(struct snd_card *card)
>   {
>   	struct snd_dice *dice = card->private_data;
>
> +	/* The workqueue for registration uses the memory block. */
> +	cancel_work_sync(&dice->dwork.work);
> +
>   	snd_dice_stream_destroy_duplex(dice);
>   	snd_dice_transaction_destroy(dice);
>   	fw_unit_put(dice->unit);
> @@ -192,6 +197,66 @@ static void dice_card_free(struct snd_card *card)
>   	mutex_destroy(&dice->mutex);
>   }
>
> +static void do_registration(struct work_struct *work)
> +{
> +	struct snd_dice *dice = container_of(work, struct snd_dice, dwork.work);
> +	int err;
> +
> +	mutex_lock(&dice->mutex);
> +
> +	if (dice->card->shutdown || dice->registered)
> +		goto end;
> +
> +	err = snd_dice_transaction_init(dice);
> +	if (err < 0)
> +		goto end;
> +
> +	err = dice_read_params(dice);
> +	if (err < 0)
> +		goto end;
> +
> +	dice_card_strings(dice);
> +
> +	err = snd_dice_create_pcm(dice);
> +	if (err < 0)
> +		goto end;
> +
> +	err = snd_dice_create_midi(dice);
> +	if (err < 0)
> +		goto end;
> +
> +	err = snd_card_register(dice->card);
> +	if (err < 0)
> +		goto end;
> +
> +	dice->registered = true;
> +end:
> +	mutex_unlock(&dice->mutex);
> +
> +	/*
> +	 * It's a difficult work to manage a race condition between workqueue,
> +	 * unit event handlers and processes. The memory block for this card
> +	 * is released as the same way that usual sound cards are going to be
> +	 * released.
> +	 */
> +}
> +
> +static inline void schedule_registration(struct snd_dice *dice)
> +{
> +	struct fw_card *fw_card = fw_parent_device(dice->unit)->card;
> +	u64 now, delay;
> +
> +	now = get_jiffies_64();
> +	delay = fw_card->reset_jiffies + msecs_to_jiffies(PROBE_DELAY_MS);
> +
> +	if (time_after64(delay, now))
> +		delay -= now;
> +	else
> +		delay = 0;
> +
> +	schedule_delayed_work(&dice->dwork, delay);
> +}
> +
>   static int dice_probe(struct fw_unit *unit, const struct ieee1394_device_id *id)
>   {
>   	struct snd_card *card;
> @@ -205,29 +270,20 @@ static int dice_probe(struct fw_unit *unit, const struct ieee1394_device_id *id)
>   	err = snd_card_new(&unit->device, -1, NULL, THIS_MODULE,
>   			   sizeof(*dice), &card);
>   	if (err < 0)
> -		goto end;
> +		return err;
>
>   	dice = card->private_data;
>   	dice->card = card;
>   	dice->unit = fw_unit_get(unit);
>   	card->private_free = dice_card_free;
> +	dev_set_drvdata(&unit->device, dice);
>
>   	spin_lock_init(&dice->lock);
>   	mutex_init(&dice->mutex);
>   	init_completion(&dice->clock_accepted);
>   	init_waitqueue_head(&dice->hwdep_wait);
>
> -	err = snd_dice_transaction_init(dice);
> -	if (err < 0)
> -		goto error;
> -
> -	err = dice_read_params(dice);
> -	if (err < 0)
> -		goto error;
> -
> -	dice_card_strings(dice);
> -
> -	err = snd_dice_create_pcm(dice);
> +	err = snd_dice_stream_init_duplex(dice);
>   	if (err < 0)
>   		goto error;
>
> @@ -237,23 +293,11 @@ static int dice_probe(struct fw_unit *unit, const struct ieee1394_device_id *id)
>
>   	snd_dice_create_proc(dice);
>
> -	err = snd_dice_create_midi(dice);
> -	if (err < 0)
> -		goto error;
> -
> -	err = snd_dice_stream_init_duplex(dice);
> -	if (err < 0)
> -		goto error;
> +	/* Register this sound card later. */
> +	INIT_DEFERRABLE_WORK(&dice->dwork, do_registration);
> +	schedule_registration(dice);
>
> -	err = snd_card_register(card);
> -	if (err < 0) {
> -		snd_dice_stream_destroy_duplex(dice);
> -		goto error;
> -	}
> -
> -	dev_set_drvdata(&unit->device, dice);
> -end:
> -	return err;
> +	return 0;
>   error:
>   	snd_card_free(card);
>   	return err;
> @@ -263,14 +307,25 @@ static void dice_remove(struct fw_unit *unit)
>   {
>   	struct snd_dice *dice = dev_get_drvdata(&unit->device);
>
> +	/* For a race condition of struct snd_card.shutdown. */
> +	mutex_lock(&dice->mutex);
> +
>   	/* No need to wait for releasing card object in this context. */
>   	snd_card_free_when_closed(dice->card);
> +
> +	mutex_unlock(&dice->mutex);
>   }
>
>   static void dice_bus_reset(struct fw_unit *unit)
>   {
>   	struct snd_dice *dice = dev_get_drvdata(&unit->device);
>
> +	/* Postpone a workqueue for deferred registration. */
> +	if (!dice->registered) {
> +		schedule_registration(dice);
> +		return;
> +	}
> +

This 'return' is needless. The v1 patchset includes the same mistake.

>   	/* The handler address register becomes initialized. */
>   	snd_dice_transaction_reinit(dice);
>
> diff --git a/sound/firewire/dice/dice.h b/sound/firewire/dice/dice.h
> index 101550ac..3d5ebeb 100644
> --- a/sound/firewire/dice/dice.h
> +++ b/sound/firewire/dice/dice.h
> @@ -45,6 +45,9 @@ struct snd_dice {
>   	spinlock_t lock;
>   	struct mutex mutex;
>
> +	bool registered;
> +	struct delayed_work dwork;
> +
>   	/* Offsets for sub-addresses */
>   	unsigned int global_offset;
>   	unsigned int rx_offset;

I'll post new patchset later. Sorry to reviewers...


Regards

Takashi Sakamoto
Stefan Richter Dec. 24, 2015, 3:12 p.m. UTC | #2
On Dec 24 Takashi Sakamoto wrote:
[...]
> Between bootstrap, firmware loading and post configuration, some bus resets
> are observed.
> 
> This commit offloads most processing of probe function into workqueue and
> schedules the workqueue after successive bus resets. This has an effect to
> get correct hardware information and avoid involvement to bus reset storm.
> 
> For code simplicity, this change effects all of Dice-based models, i.e.
> Dice II, Dice Jr., Dice Mini and Dice III. Due to the same reason, sound
> card instance is kept till card free function even if some errors are
> detected in the workqueue.
[...]
> --- a/sound/firewire/dice/dice.c
> +++ b/sound/firewire/dice/dice.c
> @@ -18,6 +18,8 @@ MODULE_LICENSE("GPL v2");
>  #define WEISS_CATEGORY_ID	0x00
>  #define LOUD_CATEGORY_ID	0x10
>  
> +#define PROBE_DELAY_MS		(2 * MSEC_PER_SEC)
> +
>  static int check_dice_category(struct fw_unit *unit)
>  {
>  	struct fw_device *device = fw_parent_device(unit);
> @@ -185,6 +187,9 @@ static void dice_card_free(struct snd_card *card)
>  {
>  	struct snd_dice *dice = card->private_data;
>  
> +	/* The workqueue for registration uses the memory block. */
> +	cancel_work_sync(&dice->dwork.work);

According to the kerneldoc comment at cancel_work_sync, this should
actually be cancel_delayed_work_sync(&dice->dwork);.

> +
>  	snd_dice_stream_destroy_duplex(dice);
>  	snd_dice_transaction_destroy(dice);
>  	fw_unit_put(dice->unit);
> @@ -192,6 +197,66 @@ static void dice_card_free(struct snd_card *card)
>  	mutex_destroy(&dice->mutex);
>  }
>  
> +static void do_registration(struct work_struct *work)
> +{
> +	struct snd_dice *dice = container_of(work, struct snd_dice, dwork.work);
> +	int err;
> +
> +	mutex_lock(&dice->mutex);

So the worker needs to obtain &dice->mutex.  But...

[...]
> @@ -263,14 +307,25 @@ static void dice_remove(struct fw_unit *unit)
>  {
>  	struct snd_dice *dice = dev_get_drvdata(&unit->device);
>  
> +	/* For a race condition of struct snd_card.shutdown. */
> +	mutex_lock(&dice->mutex);
> +
>  	/* No need to wait for releasing card object in this context. */
>  	snd_card_free_when_closed(dice->card);
> +
> +	mutex_unlock(&dice->mutex);
>  }

...if I read snd_card_free_when_closed() and its surrounding code
correctly, then dice_card_free() is called from within this, which can
cause a deadlock.  Right?

If so, then it seems to me that cancel_delayed_work_sync(&dice->dwork)
should be moved out of dice_card_free() and be put at the beginning of
dice_remove().
Stefan Richter Dec. 24, 2015, 7:10 p.m. UTC | #3
On Dec 24 Stefan Richter wrote:
> On Dec 24 Takashi Sakamoto wrote:
> [...]
> > Between bootstrap, firmware loading and post configuration, some bus resets
> > are observed.
> > 
> > This commit offloads most processing of probe function into workqueue and
> > schedules the workqueue after successive bus resets. This has an effect to
> > get correct hardware information and avoid involvement to bus reset storm.
> > 
> > For code simplicity, this change effects all of Dice-based models, i.e.
> > Dice II, Dice Jr., Dice Mini and Dice III. Due to the same reason, sound
> > card instance is kept till card free function even if some errors are
> > detected in the workqueue.  
> [...]
> > --- a/sound/firewire/dice/dice.c
> > +++ b/sound/firewire/dice/dice.c
> > @@ -18,6 +18,8 @@ MODULE_LICENSE("GPL v2");
> >  #define WEISS_CATEGORY_ID	0x00
> >  #define LOUD_CATEGORY_ID	0x10
> >  
> > +#define PROBE_DELAY_MS		(2 * MSEC_PER_SEC)
> > +
> >  static int check_dice_category(struct fw_unit *unit)
> >  {
> >  	struct fw_device *device = fw_parent_device(unit);
> > @@ -185,6 +187,9 @@ static void dice_card_free(struct snd_card *card)
> >  {
> >  	struct snd_dice *dice = card->private_data;
> >  
> > +	/* The workqueue for registration uses the memory block. */
> > +	cancel_work_sync(&dice->dwork.work);  
> 
> According to the kerneldoc comment at cancel_work_sync, this should
> actually be cancel_delayed_work_sync(&dice->dwork);.
> 
> > +
> >  	snd_dice_stream_destroy_duplex(dice);
> >  	snd_dice_transaction_destroy(dice);
> >  	fw_unit_put(dice->unit);
> > @@ -192,6 +197,66 @@ static void dice_card_free(struct snd_card *card)
> >  	mutex_destroy(&dice->mutex);
> >  }

Another comment to dice_card_free() which is aside from this patch:

You are calling mutex_destroy(&dice->mutex) here.  But if I am right in my
understanding that dice_card_free() is called from dice_remove(), then
this is wrong because dice_card_free() still holding the mutex.

I am not sure what the proper fix is.  Perhaps just never perform
mutex_destroy().

> > +static void do_registration(struct work_struct *work)
> > +{
> > +	struct snd_dice *dice = container_of(work, struct snd_dice, dwork.work);
> > +	int err;
> > +
> > +	mutex_lock(&dice->mutex);  
> 
> So the worker needs to obtain &dice->mutex.  But...
> 
> [...]
> > @@ -263,14 +307,25 @@ static void dice_remove(struct fw_unit *unit)
> >  {
> >  	struct snd_dice *dice = dev_get_drvdata(&unit->device);
> >  
> > +	/* For a race condition of struct snd_card.shutdown. */
> > +	mutex_lock(&dice->mutex);
> > +
> >  	/* No need to wait for releasing card object in this context. */
> >  	snd_card_free_when_closed(dice->card);
> > +
> > +	mutex_unlock(&dice->mutex);
> >  }  
> 
> ...if I read snd_card_free_when_closed() and its surrounding code
> correctly, then dice_card_free() is called from within this, which can
> cause a deadlock.  Right?
> 
> If so, then it seems to me that cancel_delayed_work_sync(&dice->dwork)
> should be moved out of dice_card_free() and be put at the beginning of
> dice_remove().
Stefan Richter Dec. 24, 2015, 8:51 p.m. UTC | #4
On Dec 24 Takashi Sakamoto wrote:
> --- a/sound/firewire/dice/dice.c
> +++ b/sound/firewire/dice/dice.c
[...]
>  static void dice_bus_reset(struct fw_unit *unit)
>  {
>  	struct snd_dice *dice = dev_get_drvdata(&unit->device);
>  
> +	/* Postpone a workqueue for deferred registration. */
> +	if (!dice->registered) {
> +		schedule_registration(dice);
> +		return;
> +	}
> +
>  	/* The handler address register becomes initialized. */
>  	snd_dice_transaction_reinit(dice);
>  

In previous versions of the patch, you used
	schedule_delayed_work(&dice->dwork, delay);
in dice_probe() and
	mod_delayed_work(dice->dwork.wq, &dice->dwork, delay);
in dice_bus_reset().

Now you are using schedule_delayed_work() in both.  This means that
dice_bus_reset() is now unable to further defer the work.  Is this
intentional?

By the way, in drivers/firewire/core-cdev.c, I used a somewhat different
workqueue scheduling scheme.  Problem:
  - The first 1000 ms after a bus reset are to be used for re-allocations
    of previously allocated isochronous resources.  Attempts for new iso
    resource allocations shall be deferred until after 1000 ms after the
    latest bus reset.
My solution:
  - The work which is to perform allocations/ reallocations/ deallocations
    is scheduled immediately.  But the worker function (iso_resource_work)
    reschedules itself if it notices that (1.) its job is to allocate and
    (2.) the last bus reset was less than 1 s ago.

I used the same principle in drivers/firewire/core-card.c.  Problem:
  - If system software wants to reset the bus, it shall wait at least
    2000 ms until after the last bus reset.  The reason is to allow for
    various bus reset handling protocols to be performed first (e.g.
    isochronous resource allocations, re-logins and the likes).
My solution:
  - br_work() reschedules itself if it detects that the last bus reset was
    less than 2 s ago.

Admittedly there is a small remaining window after the worker looked at
card->reset_jiffies and before it performs its real work, and another bus
rest could happen in this window.  I suppose if I wanted to close even
this window, I would have to couple card->reset_jiffies with a
bus generation counter and then make the remaining work depended on this
bus generation.

Back to your patch:  I am not sure how strictly you want to guarantee the
delay between last reset and do_registration()'s execution.  Maybe it
would be beneficial to put the check for card->reset_jiffies and self-
rescheduling into do_registration(), similar to the two examples from
firewire-core, or maybe that's not really necessary for your purposes.
Stefan Richter Dec. 24, 2015, 9:04 p.m. UTC | #5
On Dec 24 Stefan Richter wrote:
> I am not sure how strictly you want to guarantee the
> delay between last reset and do_registration()'s execution.  Maybe it
> would be beneficial to put the check for card->reset_jiffies and self-
> rescheduling into do_registration(), similar to the two examples from
> firewire-core, or maybe that's not really necessary for your purposes.

Or as I wrote in the other thread:
Maybe you can live with less precision and simply use a fixed relative
delay, disregarding card->reset_jiffies altogether.
Takashi Sakamoto Dec. 25, 2015, 12:04 a.m. UTC | #6
On Dec 25 2015 04:10, Stefan Richter wrote:
> On Dec 24 Stefan Richter wrote:
>> On Dec 24 Takashi Sakamoto wrote:
>> [...]
>>> Between bootstrap, firmware loading and post configuration, some bus resets
>>> are observed.
>>>
>>> This commit offloads most processing of probe function into workqueue and
>>> schedules the workqueue after successive bus resets. This has an effect to
>>> get correct hardware information and avoid involvement to bus reset storm.
>>>
>>> For code simplicity, this change effects all of Dice-based models, i.e.
>>> Dice II, Dice Jr., Dice Mini and Dice III. Due to the same reason, sound
>>> card instance is kept till card free function even if some errors are
>>> detected in the workqueue.  
>> [...]
>>> --- a/sound/firewire/dice/dice.c
>>> +++ b/sound/firewire/dice/dice.c
>>> @@ -18,6 +18,8 @@ MODULE_LICENSE("GPL v2");
>>>  #define WEISS_CATEGORY_ID	0x00
>>>  #define LOUD_CATEGORY_ID	0x10
>>>  
>>> +#define PROBE_DELAY_MS		(2 * MSEC_PER_SEC)
>>> +
>>>  static int check_dice_category(struct fw_unit *unit)
>>>  {
>>>  	struct fw_device *device = fw_parent_device(unit);
>>> @@ -185,6 +187,9 @@ static void dice_card_free(struct snd_card *card)
>>>  {
>>>  	struct snd_dice *dice = card->private_data;
>>>  
>>> +	/* The workqueue for registration uses the memory block. */
>>> +	cancel_work_sync(&dice->dwork.work);  
>>
>> According to the kerneldoc comment at cancel_work_sync, this should
>> actually be cancel_delayed_work_sync(&dice->dwork);.
>>
>>> +
>>>  	snd_dice_stream_destroy_duplex(dice);
>>>  	snd_dice_transaction_destroy(dice);
>>>  	fw_unit_put(dice->unit);
>>> @@ -192,6 +197,66 @@ static void dice_card_free(struct snd_card *card)
>>>  	mutex_destroy(&dice->mutex);
>>>  }
> 
> Another comment to dice_card_free() which is aside from this patch:
> 
> You are calling mutex_destroy(&dice->mutex) here.  But if I am right in my
> understanding that dice_card_free() is called from dice_remove(), then
> this is wrong because dice_card_free() still holding the mutex.
> 
> I am not sure what the proper fix is.  Perhaps just never perform
> mutex_destroy().

Indeed.

I decide to move the cancel_work_sync() to .remove callback.
(actually, it should be cancel_delayed_work_sync().) After .remove
callback, .update callback is not called anymore therefore no works are
schedulled for the registration.

Then, the mutex_lock()/mutex_unlock() is not need for this purpose. When
sound card is released, the work is confirmed to finished and
unschedulled anymore because .remove is called before.

>>> +static void do_registration(struct work_struct *work)
>>> +{
>>> +	struct snd_dice *dice = container_of(work, struct snd_dice, dwork.work);
>>> +	int err;
>>> +
>>> +	mutex_lock(&dice->mutex);  
>>
>> So the worker needs to obtain &dice->mutex.  But...
>>
>> [...]
>>> @@ -263,14 +307,25 @@ static void dice_remove(struct fw_unit *unit)
>>>  {
>>>  	struct snd_dice *dice = dev_get_drvdata(&unit->device);
>>>  
>>> +	/* For a race condition of struct snd_card.shutdown. */
>>> +	mutex_lock(&dice->mutex);
>>> +
>>>  	/* No need to wait for releasing card object in this context. */
>>>  	snd_card_free_when_closed(dice->card);
>>> +
>>> +	mutex_unlock(&dice->mutex);
>>>  }  
>>
>> ...if I read snd_card_free_when_closed() and its surrounding code
>> correctly, then dice_card_free() is called from within this, which can
>> cause a deadlock.  Right?
>>
>> If so, then it seems to me that cancel_delayed_work_sync(&dice->dwork)
>> should be moved out of dice_card_free() and be put at the beginning of
>> dice_remove().


Thanks

Takashi Sakamoto
Takashi Sakamoto Dec. 25, 2015, 12:21 a.m. UTC | #7
On De 25 2015 05:51, Stefan Richter wrote:
> On Dec 24 Takashi Sakamoto wrote:
>> --- a/sound/firewire/dice/dice.c
>> +++ b/sound/firewire/dice/dice.c
> [...]
>>  static void dice_bus_reset(struct fw_unit *unit)
>>  {
>>  	struct snd_dice *dice = dev_get_drvdata(&unit->device);
>>  
>> +	/* Postpone a workqueue for deferred registration. */
>> +	if (!dice->registered) {
>> +		schedule_registration(dice);
>> +		return;
>> +	}
>> +
>>  	/* The handler address register becomes initialized. */
>>  	snd_dice_transaction_reinit(dice);
>>  
> 
> In previous versions of the patch, you used
> 	schedule_delayed_work(&dice->dwork, delay);
> in dice_probe() and
> 	mod_delayed_work(dice->dwork.wq, &dice->dwork, delay);
> in dice_bus_reset().
> 
> Now you are using schedule_delayed_work() in both.  This means that
> dice_bus_reset() is now unable to further defer the work.  Is this
> intentional?

No intention, just my mistake. Originally, I had an intention to use
'mod_delayed_work()' here.
https://www.kernel.org/doc/htmldocs/device-drivers/API-mod-delayed-work.html

> By the way, in drivers/firewire/core-cdev.c, I used a somewhat different
> workqueue scheduling scheme.  Problem:
>   - The first 1000 ms after a bus reset are to be used for re-allocations
>     of previously allocated isochronous resources.  Attempts for new iso
>     resource allocations shall be deferred until after 1000 ms after the
>     latest bus reset.
> My solution:
>   - The work which is to perform allocations/ reallocations/ deallocations
>     is scheduled immediately.  But the worker function (iso_resource_work)
>     reschedules itself if it notices that (1.) its job is to allocate and
>     (2.) the last bus reset was less than 1 s ago.
> 
> I used the same principle in drivers/firewire/core-card.c.  Problem:
>   - If system software wants to reset the bus, it shall wait at least
>     2000 ms until after the last bus reset.  The reason is to allow for
>     various bus reset handling protocols to be performed first (e.g.
>     isochronous resource allocations, re-logins and the likes).
> My solution:
>   - br_work() reschedules itself if it detects that the last bus reset was
>     less than 2 s ago.
> 
> Admittedly there is a small remaining window after the worker looked at
> card->reset_jiffies and before it performs its real work, and another bus
> rest could happen in this window.  I suppose if I wanted to close even
> this window, I would have to couple card->reset_jiffies with a
> bus generation counter and then make the remaining work depended on this
> bus generation.
> 
> Back to your patch:  I am not sure how strictly you want to guarantee the
> delay between last reset and do_registration()'s execution.  Maybe it
> would be beneficial to put the check for card->reset_jiffies and self-
> rescheduling into do_registration(), similar to the two examples from
> firewire-core, or maybe that's not really necessary for your purposes.

On Dec 25 2015 06:04, Stefan Richter wrote:
> Or as I wrote in the other thread:
> Maybe you can live with less precision and simply use a fixed relative
> delay, disregarding card->reset_jiffies altogether.

For the case that users execute insmod/rmmod by their own. In this case,
no need to be delay for the registration work.



Thanks

Takashi Sakamoto
diff mbox

Patch

diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c
index 26271cc..adb8c87 100644
--- a/sound/firewire/dice/dice.c
+++ b/sound/firewire/dice/dice.c
@@ -18,6 +18,8 @@  MODULE_LICENSE("GPL v2");
 #define WEISS_CATEGORY_ID	0x00
 #define LOUD_CATEGORY_ID	0x10
 
+#define PROBE_DELAY_MS		(2 * MSEC_PER_SEC)
+
 static int check_dice_category(struct fw_unit *unit)
 {
 	struct fw_device *device = fw_parent_device(unit);
@@ -185,6 +187,9 @@  static void dice_card_free(struct snd_card *card)
 {
 	struct snd_dice *dice = card->private_data;
 
+	/* The workqueue for registration uses the memory block. */
+	cancel_work_sync(&dice->dwork.work);
+
 	snd_dice_stream_destroy_duplex(dice);
 	snd_dice_transaction_destroy(dice);
 	fw_unit_put(dice->unit);
@@ -192,6 +197,66 @@  static void dice_card_free(struct snd_card *card)
 	mutex_destroy(&dice->mutex);
 }
 
+static void do_registration(struct work_struct *work)
+{
+	struct snd_dice *dice = container_of(work, struct snd_dice, dwork.work);
+	int err;
+
+	mutex_lock(&dice->mutex);
+
+	if (dice->card->shutdown || dice->registered)
+		goto end;
+
+	err = snd_dice_transaction_init(dice);
+	if (err < 0)
+		goto end;
+
+	err = dice_read_params(dice);
+	if (err < 0)
+		goto end;
+
+	dice_card_strings(dice);
+
+	err = snd_dice_create_pcm(dice);
+	if (err < 0)
+		goto end;
+
+	err = snd_dice_create_midi(dice);
+	if (err < 0)
+		goto end;
+
+	err = snd_card_register(dice->card);
+	if (err < 0)
+		goto end;
+
+	dice->registered = true;
+end:
+	mutex_unlock(&dice->mutex);
+
+	/*
+	 * It's a difficult work to manage a race condition between workqueue,
+	 * unit event handlers and processes. The memory block for this card
+	 * is released as the same way that usual sound cards are going to be
+	 * released.
+	 */
+}
+
+static inline void schedule_registration(struct snd_dice *dice)
+{
+	struct fw_card *fw_card = fw_parent_device(dice->unit)->card;
+	u64 now, delay;
+
+	now = get_jiffies_64();
+	delay = fw_card->reset_jiffies + msecs_to_jiffies(PROBE_DELAY_MS);
+
+	if (time_after64(delay, now))
+		delay -= now;
+	else
+		delay = 0;
+
+	schedule_delayed_work(&dice->dwork, delay);
+}
+
 static int dice_probe(struct fw_unit *unit, const struct ieee1394_device_id *id)
 {
 	struct snd_card *card;
@@ -205,29 +270,20 @@  static int dice_probe(struct fw_unit *unit, const struct ieee1394_device_id *id)
 	err = snd_card_new(&unit->device, -1, NULL, THIS_MODULE,
 			   sizeof(*dice), &card);
 	if (err < 0)
-		goto end;
+		return err;
 
 	dice = card->private_data;
 	dice->card = card;
 	dice->unit = fw_unit_get(unit);
 	card->private_free = dice_card_free;
+	dev_set_drvdata(&unit->device, dice);
 
 	spin_lock_init(&dice->lock);
 	mutex_init(&dice->mutex);
 	init_completion(&dice->clock_accepted);
 	init_waitqueue_head(&dice->hwdep_wait);
 
-	err = snd_dice_transaction_init(dice);
-	if (err < 0)
-		goto error;
-
-	err = dice_read_params(dice);
-	if (err < 0)
-		goto error;
-
-	dice_card_strings(dice);
-
-	err = snd_dice_create_pcm(dice);
+	err = snd_dice_stream_init_duplex(dice);
 	if (err < 0)
 		goto error;
 
@@ -237,23 +293,11 @@  static int dice_probe(struct fw_unit *unit, const struct ieee1394_device_id *id)
 
 	snd_dice_create_proc(dice);
 
-	err = snd_dice_create_midi(dice);
-	if (err < 0)
-		goto error;
-
-	err = snd_dice_stream_init_duplex(dice);
-	if (err < 0)
-		goto error;
+	/* Register this sound card later. */
+	INIT_DEFERRABLE_WORK(&dice->dwork, do_registration);
+	schedule_registration(dice);
 
-	err = snd_card_register(card);
-	if (err < 0) {
-		snd_dice_stream_destroy_duplex(dice);
-		goto error;
-	}
-
-	dev_set_drvdata(&unit->device, dice);
-end:
-	return err;
+	return 0;
 error:
 	snd_card_free(card);
 	return err;
@@ -263,14 +307,25 @@  static void dice_remove(struct fw_unit *unit)
 {
 	struct snd_dice *dice = dev_get_drvdata(&unit->device);
 
+	/* For a race condition of struct snd_card.shutdown. */
+	mutex_lock(&dice->mutex);
+
 	/* No need to wait for releasing card object in this context. */
 	snd_card_free_when_closed(dice->card);
+
+	mutex_unlock(&dice->mutex);
 }
 
 static void dice_bus_reset(struct fw_unit *unit)
 {
 	struct snd_dice *dice = dev_get_drvdata(&unit->device);
 
+	/* Postpone a workqueue for deferred registration. */
+	if (!dice->registered) {
+		schedule_registration(dice);
+		return;
+	}
+
 	/* The handler address register becomes initialized. */
 	snd_dice_transaction_reinit(dice);
 
diff --git a/sound/firewire/dice/dice.h b/sound/firewire/dice/dice.h
index 101550ac..3d5ebeb 100644
--- a/sound/firewire/dice/dice.h
+++ b/sound/firewire/dice/dice.h
@@ -45,6 +45,9 @@  struct snd_dice {
 	spinlock_t lock;
 	struct mutex mutex;
 
+	bool registered;
+	struct delayed_work dwork;
+
 	/* Offsets for sub-addresses */
 	unsigned int global_offset;
 	unsigned int rx_offset;