diff mbox series

pinctrl: Add lock to ensure the state atomization

Message ID 20231201152931.31161-1-quic_aiquny@quicinc.com (mailing list archive)
State Superseded
Headers show
Series pinctrl: Add lock to ensure the state atomization | expand

Commit Message

Aiqun Yu (Maria) Dec. 1, 2023, 3:29 p.m. UTC
Currently pinctrl_select_state is an export symbol and don't have
effective re-entrance protect design. And possible of pinctrl state
changed during pinctrl_commit_state handling. Add per pinctrl lock to
ensure the old state and new state transition atomization.
Move dev error print message right before old_state pinctrl_select_state
and out of lock protection to avoid console related driver call
pinctrl_select_state recursively.

Fixes: 4198a9b57106 ("pinctrl: avoid reload of p state in list iteration")
Signed-off-by: Maria Yu <quic_aiquny@quicinc.com>
---
 drivers/pinctrl/core.c | 11 +++++++++--
 drivers/pinctrl/core.h |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)


base-commit: 994d5c58e50e91bb02c7be4a91d5186292a895c8

Comments

Bjorn Andersson Dec. 1, 2023, 8:39 p.m. UTC | #1
On Fri, Dec 01, 2023 at 11:29:31PM +0800, Maria Yu wrote:
> Currently pinctrl_select_state is an export symbol and don't have
> effective re-entrance protect design. And possible of pinctrl state
> changed during pinctrl_commit_state handling. Add per pinctrl lock to
> ensure the old state and new state transition atomization.
> Move dev error print message right before old_state pinctrl_select_state
> and out of lock protection to avoid console related driver call
> pinctrl_select_state recursively.

I'm uncertain about the validity of having client code call this api in
a racy manner. I'm likely just missing something here... It would be
nice if this scenario was described in a little bit more detail.

The recursive error print sounds like a distinct problem of its own,
that warrants being introduced in a patch of its own. But as with the
other part, I'm not able to spot a code path in the upstream kernel
where this hppens, so please properly describe the scenario where
touching the console would result back in another pinctrl_select_state().

Thanks,
Bjorn

> 
> Fixes: 4198a9b57106 ("pinctrl: avoid reload of p state in list iteration")
> Signed-off-by: Maria Yu <quic_aiquny@quicinc.com>
> ---
>  drivers/pinctrl/core.c | 11 +++++++++--
>  drivers/pinctrl/core.h |  2 ++
>  2 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
> index f2977eb65522..a19c286bf82e 100644
> --- a/drivers/pinctrl/core.c
> +++ b/drivers/pinctrl/core.c
> @@ -1066,6 +1066,7 @@ static struct pinctrl *create_pinctrl(struct device *dev,
>  	p->dev = dev;
>  	INIT_LIST_HEAD(&p->states);
>  	INIT_LIST_HEAD(&p->dt_maps);
> +	spin_lock_init(&p->lock);
>  
>  	ret = pinctrl_dt_to_map(p, pctldev);
>  	if (ret < 0) {
> @@ -1262,9 +1263,12 @@ static void pinctrl_link_add(struct pinctrl_dev *pctldev,
>  static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
>  {
>  	struct pinctrl_setting *setting, *setting2;
> -	struct pinctrl_state *old_state = READ_ONCE(p->state);
> +	struct pinctrl_state *old_state;
>  	int ret;
> +	unsigned long flags;
>  
> +	spin_lock_irqsave(&p->lock, flags);
> +	old_state = p->state;
>  	if (old_state) {
>  		/*
>  		 * For each pinmux setting in the old state, forget SW's record
> @@ -1329,11 +1333,11 @@ static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
>  	}
>  
>  	p->state = state;
> +	spin_unlock_irqrestore(&p->lock, flags);
>  
>  	return 0;
>  
>  unapply_new_state:
> -	dev_err(p->dev, "Error applying setting, reverse things back\n");
>  
>  	list_for_each_entry(setting2, &state->settings, node) {
>  		if (&setting2->node == &setting->node)
> @@ -1349,6 +1353,9 @@ static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
>  			pinmux_disable_setting(setting2);
>  	}
>  
> +	spin_unlock_irqrestore(&p->lock, flags);
> +
> +	dev_err(p->dev, "Error applying setting, reverse things back\n");
>  	/* There's no infinite recursive loop here because p->state is NULL */
>  	if (old_state)
>  		pinctrl_select_state(p, old_state);
> diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h
> index 530370443c19..86fc41393f7b 100644
> --- a/drivers/pinctrl/core.h
> +++ b/drivers/pinctrl/core.h
> @@ -12,6 +12,7 @@
>  #include <linux/list.h>
>  #include <linux/mutex.h>
>  #include <linux/radix-tree.h>
> +#include <linux/spinlock.h>
>  #include <linux/types.h>
>  
>  #include <linux/pinctrl/machine.h>
> @@ -91,6 +92,7 @@ struct pinctrl {
>  	struct pinctrl_state *state;
>  	struct list_head dt_maps;
>  	struct kref users;
> +	spinlock_t lock;
>  };
>  
>  /**
> 
> base-commit: 994d5c58e50e91bb02c7be4a91d5186292a895c8
> -- 
> 2.17.1
> 
>
Aiqun Yu (Maria) Dec. 4, 2023, 9:57 a.m. UTC | #2
On 12/2/2023 4:39 AM, Bjorn Andersson wrote:
> On Fri, Dec 01, 2023 at 11:29:31PM +0800, Maria Yu wrote:
>> Currently pinctrl_select_state is an export symbol and don't have
>> effective re-entrance protect design. And possible of pinctrl state
>> changed during pinctrl_commit_state handling. Add per pinctrl lock to
>> ensure the old state and new state transition atomization.
>> Move dev error print message right before old_state pinctrl_select_state
>> and out of lock protection to avoid console related driver call
>> pinctrl_select_state recursively.
> 
> I'm uncertain about the validity of having client code call this api in
> a racy manner. I'm likely just missing something here... It would be
> nice if this scenario was described in a little bit more detail.
Hi Bjorn,

we've got a customer dump that the real racy happened, and the system 
frequently have printk message like:
   "not freeing pin xx (xxx) as part of deactivating group xxx - it is
already used for some other setting".
Finally the system crashed after the flood log.

We've inform the customer to check their own client code which called 
this api, to have proper lock to avoid racy of per dev 
pinctrl_select_state call from customer driver end.
For example:
LOCK;
pinctrl_select_state();
gpio pulling;
udelay();
check state;
other hardware behaviors;
UNLOCK;

While it is still unnecessary the volatile re-load of p->state for the 
interation and so I upstream a patch like link[2].

while during the merge discussion, upstream maintainer suggest to have 
the lock issue fixed, instead of only READ_ONCE for the interation.
I think it is also make sense since although current in-tree driver have 
take care of each pinctrl_select_state call, since it is a export 
symbole and we've see the similar issue continuously (a year back ago 
also we've seen similar issue before[3]).

The whole serials discussion can be found link here:
[1] 
https://lore.kernel.org/lkml/e011b3e9-7c09-4214-8e9c-90e12c38bbaa@quicinc.com/
[2] 
https://lore.kernel.org/lkml/20231115102824.23727-1-quic_aiquny@quicinc.com/
[3] 
https://lore.kernel.org/lkml/20221027065408.36977-1-quic_aiquny@quicinc.com/

> 
> The recursive error print sounds like a distinct problem of its own,
> that warrants being introduced in a patch of its own. But as with the
> other part, I'm not able to spot a code path in the upstream kernel
> where this hppens, so please properly describe the scenario where
> touching the console would result back in another pinctrl_select_state().
For this part, I am thinking about a spin lock is introduced and have 
the error log out of the lock will be safer.
The current patch disable irq during the lock, and some console driver 
rely on interrupt to get tx dma/fifo ready.
Also console driver will be a pinctrl client, so avoid unnecessary 
recursive in theory.
Just incase some out of tree concole driver was able to use the 
pinctrl_select_state in console write related APIs as well.
> 
> Thanks,
> Bjorn
> 
>>
>> Fixes: 4198a9b57106 ("pinctrl: avoid reload of p state in list iteration")
>> Signed-off-by: Maria Yu <quic_aiquny@quicinc.com>
>> ---
>>   drivers/pinctrl/core.c | 11 +++++++++--
>>   drivers/pinctrl/core.h |  2 ++
>>   2 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
>> index f2977eb65522..a19c286bf82e 100644
>> --- a/drivers/pinctrl/core.c
>> +++ b/drivers/pinctrl/core.c
>> @@ -1066,6 +1066,7 @@ static struct pinctrl *create_pinctrl(struct device *dev,
>>   	p->dev = dev;
>>   	INIT_LIST_HEAD(&p->states);
>>   	INIT_LIST_HEAD(&p->dt_maps);
>> +	spin_lock_init(&p->lock);
>>   
>>   	ret = pinctrl_dt_to_map(p, pctldev);
>>   	if (ret < 0) {
>> @@ -1262,9 +1263,12 @@ static void pinctrl_link_add(struct pinctrl_dev *pctldev,
>>   static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
>>   {
>>   	struct pinctrl_setting *setting, *setting2;
>> -	struct pinctrl_state *old_state = READ_ONCE(p->state);
>> +	struct pinctrl_state *old_state;
>>   	int ret;
>> +	unsigned long flags;
>>   
>> +	spin_lock_irqsave(&p->lock, flags);
>> +	old_state = p->state;
>>   	if (old_state) {
>>   		/*
>>   		 * For each pinmux setting in the old state, forget SW's record
>> @@ -1329,11 +1333,11 @@ static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
>>   	}
>>   
>>   	p->state = state;
>> +	spin_unlock_irqrestore(&p->lock, flags);
>>   
>>   	return 0;
>>   
>>   unapply_new_state:
>> -	dev_err(p->dev, "Error applying setting, reverse things back\n");
>>   
>>   	list_for_each_entry(setting2, &state->settings, node) {
>>   		if (&setting2->node == &setting->node)
>> @@ -1349,6 +1353,9 @@ static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
>>   			pinmux_disable_setting(setting2);
>>   	}
>>   
>> +	spin_unlock_irqrestore(&p->lock, flags);
>> +
>> +	dev_err(p->dev, "Error applying setting, reverse things back\n");
>>   	/* There's no infinite recursive loop here because p->state is NULL */
>>   	if (old_state)
>>   		pinctrl_select_state(p, old_state);
>> diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h
>> index 530370443c19..86fc41393f7b 100644
>> --- a/drivers/pinctrl/core.h
>> +++ b/drivers/pinctrl/core.h
>> @@ -12,6 +12,7 @@
>>   #include <linux/list.h>
>>   #include <linux/mutex.h>
>>   #include <linux/radix-tree.h>
>> +#include <linux/spinlock.h>
>>   #include <linux/types.h>
>>   
>>   #include <linux/pinctrl/machine.h>
>> @@ -91,6 +92,7 @@ struct pinctrl {
>>   	struct pinctrl_state *state;
>>   	struct list_head dt_maps;
>>   	struct kref users;
>> +	spinlock_t lock;
>>   };
>>   
>>   /**
>>
>> base-commit: 994d5c58e50e91bb02c7be4a91d5186292a895c8
>> -- 
>> 2.17.1
>>
>>
Bjorn Andersson Dec. 4, 2023, 4:46 p.m. UTC | #3
On Mon, Dec 04, 2023 at 05:57:42PM +0800, Aiqun(Maria) Yu wrote:
> On 12/2/2023 4:39 AM, Bjorn Andersson wrote:
> > On Fri, Dec 01, 2023 at 11:29:31PM +0800, Maria Yu wrote:
> > > Currently pinctrl_select_state is an export symbol and don't have
> > > effective re-entrance protect design. And possible of pinctrl state
> > > changed during pinctrl_commit_state handling. Add per pinctrl lock to
> > > ensure the old state and new state transition atomization.
> > > Move dev error print message right before old_state pinctrl_select_state
> > > and out of lock protection to avoid console related driver call
> > > pinctrl_select_state recursively.
> > 
> > I'm uncertain about the validity of having client code call this api in
> > a racy manner. I'm likely just missing something here... It would be
> > nice if this scenario was described in a little bit more detail.
> Hi Bjorn,
> 
> we've got a customer dump that the real racy happened, and the system
> frequently have printk message like:
>   "not freeing pin xx (xxx) as part of deactivating group xxx - it is
> already used for some other setting".
> Finally the system crashed after the flood log.
> 

Sounds like we have a valid issue, but let's make sure that we
describe the problem on its technical grounds in the commit that is
upstreamed - if nothing else, so that others can determine if the
solution matches their bug reports.

> We've inform the customer to check their own client code which called this
> api, to have proper lock to avoid racy of per dev pinctrl_select_state call
> from customer driver end.
> For example:
> LOCK;
> pinctrl_select_state();

Placing a lock inside pinctrl_select_state() will not make this whole
sequence atomic, so if the client driver needs to know that the state
remains from here until the "other hardware behaviors" below, something
more is needed.

Perhaps I'm misunderstanding what you're saying though?

> gpio pulling;
> udelay();
> check state;
> other hardware behaviors;
> UNLOCK;
> 
> While it is still unnecessary the volatile re-load of p->state for the
> interation and so I upstream a patch like link[2].
> 
> while during the merge discussion, upstream maintainer suggest to have the
> lock issue fixed, instead of only READ_ONCE for the interation.
> I think it is also make sense since although current in-tree driver have
> take care of each pinctrl_select_state call, since it is a export symbole
> and we've see the similar issue continuously (a year back ago also we've
> seen similar issue before[3]).
> 

I think you're correcting a real problem, in that two contexts calling
pinctrl_select_state() seems to be able to cause non-deterministic
results. But is the motivation "pinctrl_select_state() is an
EXPORT_SYMBOL, so let's make it thread safe", or is the motivation
"during async probing of devices it's possible to end up in
pinctrl_select_state() from multiple contexts simultaneously, so make it
thread safe"?

> The whole serials discussion can be found link here:
> [1] https://lore.kernel.org/lkml/e011b3e9-7c09-4214-8e9c-90e12c38bbaa@quicinc.com/
> [2]
> https://lore.kernel.org/lkml/20231115102824.23727-1-quic_aiquny@quicinc.com/
> [3]
> https://lore.kernel.org/lkml/20221027065408.36977-1-quic_aiquny@quicinc.com/
> 
> > 
> > The recursive error print sounds like a distinct problem of its own,
> > that warrants being introduced in a patch of its own. But as with the
> > other part, I'm not able to spot a code path in the upstream kernel
> > where this hppens, so please properly describe the scenario where
> > touching the console would result back in another pinctrl_select_state().
> For this part, I am thinking about a spin lock is introduced and have the
> error log out of the lock will be safer.
> The current patch disable irq during the lock, and some console driver rely
> on interrupt to get tx dma/fifo ready.

Logging outside the region with interrupts disabled make total sense,
I'm definitely in favor of this.

> Also console driver will be a pinctrl client, so avoid unnecessary recursive
> in theory.

I don't think a console driver should pinctrl_select_state() from its
tx path, that's why I'm asking.

But perhaps I'm missing some scenario, if so please describe this in the
commit message.

> Just incase some out of tree concole driver was able to use the
> pinctrl_select_state in console write related APIs as well.

If there is a valid usage pattern I think we should consider that, but
we do not care about out-of-tree drivers misusing/abusing framework
APIs.

Regards,
Bjorn
Aiqun Yu (Maria) Dec. 5, 2023, 7:46 a.m. UTC | #4
On 12/5/2023 12:46 AM, Bjorn Andersson wrote:
> On Mon, Dec 04, 2023 at 05:57:42PM +0800, Aiqun(Maria) Yu wrote:
>> On 12/2/2023 4:39 AM, Bjorn Andersson wrote:
>>> On Fri, Dec 01, 2023 at 11:29:31PM +0800, Maria Yu wrote:
>>>> Currently pinctrl_select_state is an export symbol and don't have
>>>> effective re-entrance protect design. And possible of pinctrl state
>>>> changed during pinctrl_commit_state handling. Add per pinctrl lock to
>>>> ensure the old state and new state transition atomization.
>>>> Move dev error print message right before old_state pinctrl_select_state
>>>> and out of lock protection to avoid console related driver call
>>>> pinctrl_select_state recursively.
>>>
>>> I'm uncertain about the validity of having client code call this api in
>>> a racy manner. I'm likely just missing something here... It would be
>>> nice if this scenario was described in a little bit more detail.
>> Hi Bjorn,
>>
>> we've got a customer dump that the real racy happened, and the system
>> frequently have printk message like:
>>    "not freeing pin xx (xxx) as part of deactivating group xxx - it is
>> already used for some other setting".
>> Finally the system crashed after the flood log.
>>
> 
> Sounds like we have a valid issue, but let's make sure that we
> describe the problem on its technical grounds in the commit that is
> upstreamed - if nothing else, so that others can determine if the
> solution matches their bug reports.
> 
>> We've inform the customer to check their own client code which called this
>> api, to have proper lock to avoid racy of per dev pinctrl_select_state call
>> from customer driver end.
>> For example:
>> LOCK;
>> pinctrl_select_state();
> 
> Placing a lock inside pinctrl_select_state() will not make this whole
> sequence atomic, so if the client driver needs to know that the state
> remains from here until the "other hardware behaviors" below, something
> more is needed.
Agree.
This is only an example to enforcing from the clients which call this 
API. while apparently not all clients ensured this kind of lock safe 
when to call pinctrl_select_state, so that's why placing a lock inside 
pinctrl_select_state to ensure atomic of per dev pinctrl at least.
> 
> Perhaps I'm misunderstanding what you're saying though?

> 
>> gpio pulling;
>> udelay();
>> check state;
>> other hardware behaviors;
>> UNLOCK;
>>
>> While it is still unnecessary the volatile re-load of p->state for the
>> interation and so I upstream a patch like link[2].
>>
>> while during the merge discussion, upstream maintainer suggest to have the
>> lock issue fixed, instead of only READ_ONCE for the interation.
>> I think it is also make sense since although current in-tree driver have
>> take care of each pinctrl_select_state call, since it is a export symbole
>> and we've see the similar issue continuously (a year back ago also we've
>> seen similar issue before[3]).
>>
> 
> I think you're correcting a real problem, in that two contexts calling
> pinctrl_select_state() seems to be able to cause non-deterministic
> results. But is the motivation "pinctrl_select_state() is an
> EXPORT_SYMBOL, so let's make it thread safe", or is the motivation
EXPORT_SYMBOL pinctrl_select_state, let's make it thread safe is a 
motivation here. As above reason explained.
> "during async probing of devices it's possible to end up in
> pinctrl_select_state() from multiple contexts simultaneously, so make it
> thread safe"?
> 
>> The whole serials discussion can be found link here:
>> [1] https://lore.kernel.org/lkml/e011b3e9-7c09-4214-8e9c-90e12c38bbaa@quicinc.com/
>> [2]
>> https://lore.kernel.org/lkml/20231115102824.23727-1-quic_aiquny@quicinc.com/
>> [3]
>> https://lore.kernel.org/lkml/20221027065408.36977-1-quic_aiquny@quicinc.com/
>>
>>>
>>> The recursive error print sounds like a distinct problem of its own,
>>> that warrants being introduced in a patch of its own. But as with the
>>> other part, I'm not able to spot a code path in the upstream kernel
>>> where this hppens, so please properly describe the scenario where
>>> touching the console would result back in another pinctrl_select_state().
>> For this part, I am thinking about a spin lock is introduced and have the
>> error log out of the lock will be safer.
>> The current patch disable irq during the lock, and some console driver rely
>> on interrupt to get tx dma/fifo ready.
> 
> Logging outside the region with interrupts disabled make total sense,
> I'm definitely in favor of this.
Fair enough.
> 
>> Also console driver will be a pinctrl client, so avoid unnecessary recursive
>> in theory.
> 
> I don't think a console driver should pinctrl_select_state() from its
> tx path, that's why I'm asking.
> 
Got you. I haven't seen in-tree console driver have tx path call 
pinctrl_select_state. Will re-vise the commit comments accordingly.
> But perhaps I'm missing some scenario, if so please describe this in the
> commit message.
> 

>> Just incase some out of tree concole driver was able to use the
>> pinctrl_select_state in console write related APIs as well.
> 
> If there is a valid usage pattern I think we should consider that, but
> we do not care about out-of-tree drivers misusing/abusing framework
> APIs.
Fair.
> 
> Regards,
> Bjorn
diff mbox series

Patch

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index f2977eb65522..a19c286bf82e 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1066,6 +1066,7 @@  static struct pinctrl *create_pinctrl(struct device *dev,
 	p->dev = dev;
 	INIT_LIST_HEAD(&p->states);
 	INIT_LIST_HEAD(&p->dt_maps);
+	spin_lock_init(&p->lock);
 
 	ret = pinctrl_dt_to_map(p, pctldev);
 	if (ret < 0) {
@@ -1262,9 +1263,12 @@  static void pinctrl_link_add(struct pinctrl_dev *pctldev,
 static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
 {
 	struct pinctrl_setting *setting, *setting2;
-	struct pinctrl_state *old_state = READ_ONCE(p->state);
+	struct pinctrl_state *old_state;
 	int ret;
+	unsigned long flags;
 
+	spin_lock_irqsave(&p->lock, flags);
+	old_state = p->state;
 	if (old_state) {
 		/*
 		 * For each pinmux setting in the old state, forget SW's record
@@ -1329,11 +1333,11 @@  static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
 	}
 
 	p->state = state;
+	spin_unlock_irqrestore(&p->lock, flags);
 
 	return 0;
 
 unapply_new_state:
-	dev_err(p->dev, "Error applying setting, reverse things back\n");
 
 	list_for_each_entry(setting2, &state->settings, node) {
 		if (&setting2->node == &setting->node)
@@ -1349,6 +1353,9 @@  static int pinctrl_commit_state(struct pinctrl *p, struct pinctrl_state *state)
 			pinmux_disable_setting(setting2);
 	}
 
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	dev_err(p->dev, "Error applying setting, reverse things back\n");
 	/* There's no infinite recursive loop here because p->state is NULL */
 	if (old_state)
 		pinctrl_select_state(p, old_state);
diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h
index 530370443c19..86fc41393f7b 100644
--- a/drivers/pinctrl/core.h
+++ b/drivers/pinctrl/core.h
@@ -12,6 +12,7 @@ 
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
 
 #include <linux/pinctrl/machine.h>
@@ -91,6 +92,7 @@  struct pinctrl {
 	struct pinctrl_state *state;
 	struct list_head dt_maps;
 	struct kref users;
+	spinlock_t lock;
 };
 
 /**