diff mbox series

[v10,54/55] Input: atmel_mxt_ts: Implement synchronization during various operation

Message ID 20200331105051.58896-55-jiada_wang@mentor.com (mailing list archive)
State Superseded
Headers show
Series atmel_mxt_ts misc | expand

Commit Message

Wang, Jiada March 31, 2020, 10:50 a.m. UTC
From: Sanjeev Chugh <sanjeev_chugh@mentor.com>

There could be scope of race conditions when sysfs is being handled
and at the same time, device removal is occurring. For example,
we don't want the device removal to begin if the Atmel device
cfg update is going on or firmware update is going on. In such
cases, wait for device update to be completed before the removal
continues.

    Thread                                          Thread 2:
=========================                       =========================
mxt_update_fw_store()                           mxt_remove()
mutex_lock(&data->lock)                         ...
mxt_initialize()                                //Tries to acquire lock
  request_firmware_nowait()                     mutex_lock(&data->lock)
...                                             ==>waits for lock()
...                                             .
...                                             .
mutex_unlock(&data->lock)                       .
                                                //Gets lock and proceeds
                                                mxt_free_input_device();
                                                ...
                                                mutex_unlock(&data->lock)
                                                //Frees atmel driver data
                                                kfree(data)

If the request_firmware_nowait() completes after the driver removal,
and callback is triggered. But kernel crashes since the module is
already removed.

This commit adds state machine to serialize such scenarios.

Signed-off-by: Sanjeev Chugh <sanjeev_chugh@mentor.com>
Signed-off-by: Bhuvanesh Surachari <bhuvanesh_surachari@mentor.com>
Signed-off-by: Jiada Wang <jiada_wang@mentor.com>
---
 drivers/input/touchscreen/atmel_mxt_ts.c | 223 ++++++++++++++++++++---
 1 file changed, 198 insertions(+), 25 deletions(-)

Comments

Dmitry Osipenko April 1, 2020, 4:04 p.m. UTC | #1
31.03.2020 13:50, Jiada Wang пишет:
> From: Sanjeev Chugh <sanjeev_chugh@mentor.com>
> 
> There could be scope of race conditions when sysfs is being handled
> and at the same time, device removal is occurring. For example,
> we don't want the device removal to begin if the Atmel device
> cfg update is going on or firmware update is going on. In such
> cases, wait for device update to be completed before the removal
> continues.
> 
>     Thread                                          Thread 2:
> =========================                       =========================
> mxt_update_fw_store()                           mxt_remove()
> mutex_lock(&data->lock)                         ...
> mxt_initialize()                                //Tries to acquire lock
>   request_firmware_nowait()                     mutex_lock(&data->lock)
> ...                                             ==>waits for lock()
> ...                                             .
> ...                                             .
> mutex_unlock(&data->lock)                       .
>                                                 //Gets lock and proceeds
>                                                 mxt_free_input_device();
>                                                 ...
>                                                 mutex_unlock(&data->lock)
>                                                 //Frees atmel driver data
>                                                 kfree(data)
> 
> If the request_firmware_nowait() completes after the driver removal,
> and callback is triggered. But kernel crashes since the module is
> already removed.
> 
> This commit adds state machine to serialize such scenarios.

Won't it be easier to bump driver's module use-count by __module_get()
while firmware is updating? Or remove sysfs during of mxt_remove()?
Wang, Jiada April 2, 2020, 11:50 a.m. UTC | #2
Hi Dmitry

On 2020/04/02 1:04, Dmitry Osipenko wrote:
> 31.03.2020 13:50, Jiada Wang пишет:
>> From: Sanjeev Chugh <sanjeev_chugh@mentor.com>
>>
>> There could be scope of race conditions when sysfs is being handled
>> and at the same time, device removal is occurring. For example,
>> we don't want the device removal to begin if the Atmel device
>> cfg update is going on or firmware update is going on. In such
>> cases, wait for device update to be completed before the removal
>> continues.
>>
>>      Thread                                          Thread 2:
>> =========================                       =========================
>> mxt_update_fw_store()                           mxt_remove()
>> mutex_lock(&data->lock)                         ...
>> mxt_initialize()                                //Tries to acquire lock
>>    request_firmware_nowait()                     mutex_lock(&data->lock)
>> ...                                             ==>waits for lock()
>> ...                                             .
>> ...                                             .
>> mutex_unlock(&data->lock)                       .
>>                                                  //Gets lock and proceeds
>>                                                  mxt_free_input_device();
>>                                                  ...
>>                                                  mutex_unlock(&data->lock)
>>                                                  //Frees atmel driver data
>>                                                  kfree(data)
>>
>> If the request_firmware_nowait() completes after the driver removal,
>> and callback is triggered. But kernel crashes since the module is
>> already removed.
>>
>> This commit adds state machine to serialize such scenarios.
> 
> Won't it be easier to bump driver's module use-count by __module_get()
> while firmware is updating? Or remove sysfs during of mxt_remove()? >

thanks for your inspiration, I will replace state machine with module 
use-count.

Thanks,
Jiada
Dmitry Osipenko April 2, 2020, 1:24 p.m. UTC | #3
02.04.2020 14:50, Wang, Jiada пишет:
> Hi Dmitry
> 
> On 2020/04/02 1:04, Dmitry Osipenko wrote:
>> 31.03.2020 13:50, Jiada Wang пишет:
>>> From: Sanjeev Chugh <sanjeev_chugh@mentor.com>
>>>
>>> There could be scope of race conditions when sysfs is being handled
>>> and at the same time, device removal is occurring. For example,
>>> we don't want the device removal to begin if the Atmel device
>>> cfg update is going on or firmware update is going on. In such
>>> cases, wait for device update to be completed before the removal
>>> continues.
>>>
>>>      Thread                                          Thread 2:
>>> =========================                      
>>> =========================
>>> mxt_update_fw_store()                           mxt_remove()
>>> mutex_lock(&data->lock)                         ...
>>> mxt_initialize()                                //Tries to acquire lock
>>>    request_firmware_nowait()                     mutex_lock(&data->lock)
>>> ...                                             ==>waits for lock()
>>> ...                                             .
>>> ...                                             .
>>> mutex_unlock(&data->lock)                       .
>>>                                                  //Gets lock and
>>> proceeds
>>>                                                 
>>> mxt_free_input_device();
>>>                                                  ...
>>>                                                 
>>> mutex_unlock(&data->lock)
>>>                                                  //Frees atmel driver
>>> data
>>>                                                  kfree(data)
>>>
>>> If the request_firmware_nowait() completes after the driver removal,
>>> and callback is triggered. But kernel crashes since the module is
>>> already removed.
>>>
>>> This commit adds state machine to serialize such scenarios.
>>
>> Won't it be easier to bump driver's module use-count by __module_get()
>> while firmware is updating? Or remove sysfs during of mxt_remove()? >
> 
> thanks for your inspiration, I will replace state machine with module
> use-count.

I'm actually now thinking that the suggestion about the module-count
wasn't very correct because this won't really help in regards to
mxt_update_fw_store() / mxt_remove() racing.

I see that mxt_remove() already invokes the mxt_sysfs_remove(), which
should block until mxt_update_fw_store() is completed, shouldn't it?

I guess the kfree(data) isn't the real cause of the problem and
something like this should help:

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c
b/drivers/input/touchscreen/atmel_mxt_ts.c
index b2edf51e1595..4e66106feeb9 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -4254,6 +4254,7 @@ static void mxt_sysfs_remove(struct mxt_data *data)
 	struct i2c_client *client = data->client;

 	sysfs_remove_group(&client->dev.kobj, &mxt_attr_group);
+	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
 }

 static void mxt_reset_slots(struct mxt_data *data)
@@ -4649,31 +4650,19 @@ static int mxt_remove(struct i2c_client *client)
 {
 	struct mxt_data *data = i2c_get_clientdata(client);

-	mutex_lock(&data->lock);
-	if (data->e_state == MXT_STATE_UPDATING_CONFIG_ASYNC ||
-	    data->e_state == MXT_STATE_UPDATING_CONFIG) {
-		data->e_state = MXT_STATE_GOING_AWAY;
-		mutex_unlock(&data->lock);
-		mxt_wait_for_completion(data, &data->update_cfg_completion,
-					MXT_CONFIG_TIMEOUT);
-	} else {
-		data->e_state = MXT_STATE_GOING_AWAY;
-		mutex_unlock(&data->lock);
-	}
+	mxt_sysfs_remove(data);

-	disable_irq(data->irq);
-	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
 	if (data->reset_gpio) {
 		sysfs_remove_link(&client->dev.kobj, "reset");
 		gpiod_unexport(data->reset_gpio);
 	}
+
 	mxt_debug_msg_remove(data);
-	mxt_sysfs_remove(data);
 	mxt_free_input_device(data);
 	mxt_free_object_table(data);

	if (debug_state)
		cancel_delayed_work_sync(&data->watchdog_work);
+	disable_irq(data->irq);

 	return 0;
 }
Dmitry Osipenko April 2, 2020, 1:44 p.m. UTC | #4
02.04.2020 16:24, Dmitry Osipenko пишет:
> 02.04.2020 14:50, Wang, Jiada пишет:
>> Hi Dmitry
>>
>> On 2020/04/02 1:04, Dmitry Osipenko wrote:
>>> 31.03.2020 13:50, Jiada Wang пишет:
>>>> From: Sanjeev Chugh <sanjeev_chugh@mentor.com>
>>>>
>>>> There could be scope of race conditions when sysfs is being handled
>>>> and at the same time, device removal is occurring. For example,
>>>> we don't want the device removal to begin if the Atmel device
>>>> cfg update is going on or firmware update is going on. In such
>>>> cases, wait for device update to be completed before the removal
>>>> continues.
>>>>
>>>>      Thread                                          Thread 2:
>>>> =========================                      
>>>> =========================
>>>> mxt_update_fw_store()                           mxt_remove()
>>>> mutex_lock(&data->lock)                         ...
>>>> mxt_initialize()                                //Tries to acquire lock
>>>>    request_firmware_nowait()                     mutex_lock(&data->lock)
>>>> ...                                             ==>waits for lock()
>>>> ...                                             .
>>>> ...                                             .
>>>> mutex_unlock(&data->lock)                       .
>>>>                                                  //Gets lock and
>>>> proceeds
>>>>                                                 
>>>> mxt_free_input_device();
>>>>                                                  ...
>>>>                                                 
>>>> mutex_unlock(&data->lock)
>>>>                                                  //Frees atmel driver
>>>> data
>>>>                                                  kfree(data)
>>>>
>>>> If the request_firmware_nowait() completes after the driver removal,
>>>> and callback is triggered. But kernel crashes since the module is
>>>> already removed.
>>>>
>>>> This commit adds state machine to serialize such scenarios.
>>>
>>> Won't it be easier to bump driver's module use-count by __module_get()
>>> while firmware is updating? Or remove sysfs during of mxt_remove()? >
>>
>> thanks for your inspiration, I will replace state machine with module
>> use-count.
> 
> I'm actually now thinking that the suggestion about the module-count
> wasn't very correct because this won't really help in regards to
> mxt_update_fw_store() / mxt_remove() racing.
> 
> I see that mxt_remove() already invokes the mxt_sysfs_remove(), which
> should block until mxt_update_fw_store() is completed, shouldn't it?
> 
> I guess the kfree(data) isn't the real cause of the problem and
> something like this should help:
> 
> diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c
> b/drivers/input/touchscreen/atmel_mxt_ts.c
> index b2edf51e1595..4e66106feeb9 100644
> --- a/drivers/input/touchscreen/atmel_mxt_ts.c
> +++ b/drivers/input/touchscreen/atmel_mxt_ts.c
> @@ -4254,6 +4254,7 @@ static void mxt_sysfs_remove(struct mxt_data *data)
>  	struct i2c_client *client = data->client;
> 
>  	sysfs_remove_group(&client->dev.kobj, &mxt_attr_group);
> +	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>  }
> 
>  static void mxt_reset_slots(struct mxt_data *data)
> @@ -4649,31 +4650,19 @@ static int mxt_remove(struct i2c_client *client)
>  {
>  	struct mxt_data *data = i2c_get_clientdata(client);
> 
> -	mutex_lock(&data->lock);
> -	if (data->e_state == MXT_STATE_UPDATING_CONFIG_ASYNC ||
> -	    data->e_state == MXT_STATE_UPDATING_CONFIG) {
> -		data->e_state = MXT_STATE_GOING_AWAY;
> -		mutex_unlock(&data->lock);
> -		mxt_wait_for_completion(data, &data->update_cfg_completion,
> -					MXT_CONFIG_TIMEOUT);
> -	} else {
> -		data->e_state = MXT_STATE_GOING_AWAY;
> -		mutex_unlock(&data->lock);
> -	}
> +	mxt_sysfs_remove(data);
> 
> -	disable_irq(data->irq);
> -	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>  	if (data->reset_gpio) {
>  		sysfs_remove_link(&client->dev.kobj, "reset");
>  		gpiod_unexport(data->reset_gpio);
>  	}
> +
>  	mxt_debug_msg_remove(data);
> -	mxt_sysfs_remove(data);
>  	mxt_free_input_device(data);
>  	mxt_free_object_table(data);
> 
> 	if (debug_state)
> 		cancel_delayed_work_sync(&data->watchdog_work);
> +	disable_irq(data->irq);
> 
>  	return 0;
>  }
> 

I'm also wondering why dev_attr_update_fw needs a separate
attribute_group, couldn't it be moved into mxt_attrs[]?
Dmitry Osipenko April 2, 2020, 2 p.m. UTC | #5
02.04.2020 16:24, Dmitry Osipenko пишет:
> 02.04.2020 14:50, Wang, Jiada пишет:
>> Hi Dmitry
>>
>> On 2020/04/02 1:04, Dmitry Osipenko wrote:
>>> 31.03.2020 13:50, Jiada Wang пишет:
>>>> From: Sanjeev Chugh <sanjeev_chugh@mentor.com>
>>>>
>>>> There could be scope of race conditions when sysfs is being handled
>>>> and at the same time, device removal is occurring. For example,
>>>> we don't want the device removal to begin if the Atmel device
>>>> cfg update is going on or firmware update is going on. In such
>>>> cases, wait for device update to be completed before the removal
>>>> continues.
>>>>
>>>>      Thread                                          Thread 2:
>>>> =========================                      
>>>> =========================
>>>> mxt_update_fw_store()                           mxt_remove()
>>>> mutex_lock(&data->lock)                         ...
>>>> mxt_initialize()                                //Tries to acquire lock
>>>>    request_firmware_nowait()                     mutex_lock(&data->lock)
>>>> ...                                             ==>waits for lock()
>>>> ...                                             .
>>>> ...                                             .
>>>> mutex_unlock(&data->lock)                       .
>>>>                                                  //Gets lock and
>>>> proceeds
>>>>                                                 
>>>> mxt_free_input_device();
>>>>                                                  ...
>>>>                                                 
>>>> mutex_unlock(&data->lock)
>>>>                                                  //Frees atmel driver
>>>> data
>>>>                                                  kfree(data)
>>>>
>>>> If the request_firmware_nowait() completes after the driver removal,
>>>> and callback is triggered. But kernel crashes since the module is
>>>> already removed.
>>>>
>>>> This commit adds state machine to serialize such scenarios.
>>>
>>> Won't it be easier to bump driver's module use-count by __module_get()
>>> while firmware is updating? Or remove sysfs during of mxt_remove()? >
>>
>> thanks for your inspiration, I will replace state machine with module
>> use-count.
> 
> I'm actually now thinking that the suggestion about the module-count
> wasn't very correct because this won't really help in regards to
> mxt_update_fw_store() / mxt_remove() racing.
> 
> I see that mxt_remove() already invokes the mxt_sysfs_remove(), which
> should block until mxt_update_fw_store() is completed, shouldn't it?
> 
> I guess the kfree(data) isn't the real cause of the problem and
> something like this should help:
> 
> diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c
> b/drivers/input/touchscreen/atmel_mxt_ts.c
> index b2edf51e1595..4e66106feeb9 100644
> --- a/drivers/input/touchscreen/atmel_mxt_ts.c
> +++ b/drivers/input/touchscreen/atmel_mxt_ts.c
> @@ -4254,6 +4254,7 @@ static void mxt_sysfs_remove(struct mxt_data *data)
>  	struct i2c_client *client = data->client;
> 
>  	sysfs_remove_group(&client->dev.kobj, &mxt_attr_group);
> +	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>  }
> 
>  static void mxt_reset_slots(struct mxt_data *data)
> @@ -4649,31 +4650,19 @@ static int mxt_remove(struct i2c_client *client)
>  {
>  	struct mxt_data *data = i2c_get_clientdata(client);
> 
> -	mutex_lock(&data->lock);
> -	if (data->e_state == MXT_STATE_UPDATING_CONFIG_ASYNC ||
> -	    data->e_state == MXT_STATE_UPDATING_CONFIG) {
> -		data->e_state = MXT_STATE_GOING_AWAY;
> -		mutex_unlock(&data->lock);
> -		mxt_wait_for_completion(data, &data->update_cfg_completion,
> -					MXT_CONFIG_TIMEOUT);
> -	} else {
> -		data->e_state = MXT_STATE_GOING_AWAY;
> -		mutex_unlock(&data->lock);
> -	}
> +	mxt_sysfs_remove(data);
> 
> -	disable_irq(data->irq);
> -	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>  	if (data->reset_gpio) {
>  		sysfs_remove_link(&client->dev.kobj, "reset");
>  		gpiod_unexport(data->reset_gpio);
>  	}
> +
>  	mxt_debug_msg_remove(data);
> -	mxt_sysfs_remove(data);
>  	mxt_free_input_device(data);
>  	mxt_free_object_table(data);
> 
> 	if (debug_state)
> 		cancel_delayed_work_sync(&data->watchdog_work);
> +	disable_irq(data->irq);
> 
>  	return 0;
>  }
> 

I'm looking at this again and the original tear-down order of the
mxt_remove() looks okay, so no need to change it.

Reading the commit message, it says that request_firmware_nowait() races
with kfree(data), but that can't happen because the data is
resource-managed and request_firmware_nowait() bumps device's use-count.

https://elixir.bootlin.com/linux/v5.6.2/source/drivers/base/firmware_loader/main.c#L1043

I think this patch was ported from some very old kernel version and it's
simply not applicable to the upstream anymore.
Dmitry Osipenko April 2, 2020, 2:07 p.m. UTC | #6
02.04.2020 17:00, Dmitry Osipenko пишет:
> 02.04.2020 16:24, Dmitry Osipenko пишет:
>> 02.04.2020 14:50, Wang, Jiada пишет:
>>> Hi Dmitry
>>>
>>> On 2020/04/02 1:04, Dmitry Osipenko wrote:
>>>> 31.03.2020 13:50, Jiada Wang пишет:
>>>>> From: Sanjeev Chugh <sanjeev_chugh@mentor.com>
>>>>>
>>>>> There could be scope of race conditions when sysfs is being handled
>>>>> and at the same time, device removal is occurring. For example,
>>>>> we don't want the device removal to begin if the Atmel device
>>>>> cfg update is going on or firmware update is going on. In such
>>>>> cases, wait for device update to be completed before the removal
>>>>> continues.
>>>>>
>>>>>      Thread                                          Thread 2:
>>>>> =========================                      
>>>>> =========================
>>>>> mxt_update_fw_store()                           mxt_remove()
>>>>> mutex_lock(&data->lock)                         ...
>>>>> mxt_initialize()                                //Tries to acquire lock
>>>>>    request_firmware_nowait()                     mutex_lock(&data->lock)
>>>>> ...                                             ==>waits for lock()
>>>>> ...                                             .
>>>>> ...                                             .
>>>>> mutex_unlock(&data->lock)                       .
>>>>>                                                  //Gets lock and
>>>>> proceeds
>>>>>                                                 
>>>>> mxt_free_input_device();
>>>>>                                                  ...
>>>>>                                                 
>>>>> mutex_unlock(&data->lock)
>>>>>                                                  //Frees atmel driver
>>>>> data
>>>>>                                                  kfree(data)
>>>>>
>>>>> If the request_firmware_nowait() completes after the driver removal,
>>>>> and callback is triggered. But kernel crashes since the module is
>>>>> already removed.
>>>>>
>>>>> This commit adds state machine to serialize such scenarios.
>>>>
>>>> Won't it be easier to bump driver's module use-count by __module_get()
>>>> while firmware is updating? Or remove sysfs during of mxt_remove()? >
>>>
>>> thanks for your inspiration, I will replace state machine with module
>>> use-count.
>>
>> I'm actually now thinking that the suggestion about the module-count
>> wasn't very correct because this won't really help in regards to
>> mxt_update_fw_store() / mxt_remove() racing.
>>
>> I see that mxt_remove() already invokes the mxt_sysfs_remove(), which
>> should block until mxt_update_fw_store() is completed, shouldn't it?
>>
>> I guess the kfree(data) isn't the real cause of the problem and
>> something like this should help:
>>
>> diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c
>> b/drivers/input/touchscreen/atmel_mxt_ts.c
>> index b2edf51e1595..4e66106feeb9 100644
>> --- a/drivers/input/touchscreen/atmel_mxt_ts.c
>> +++ b/drivers/input/touchscreen/atmel_mxt_ts.c
>> @@ -4254,6 +4254,7 @@ static void mxt_sysfs_remove(struct mxt_data *data)
>>  	struct i2c_client *client = data->client;
>>
>>  	sysfs_remove_group(&client->dev.kobj, &mxt_attr_group);
>> +	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>>  }
>>
>>  static void mxt_reset_slots(struct mxt_data *data)
>> @@ -4649,31 +4650,19 @@ static int mxt_remove(struct i2c_client *client)
>>  {
>>  	struct mxt_data *data = i2c_get_clientdata(client);
>>
>> -	mutex_lock(&data->lock);
>> -	if (data->e_state == MXT_STATE_UPDATING_CONFIG_ASYNC ||
>> -	    data->e_state == MXT_STATE_UPDATING_CONFIG) {
>> -		data->e_state = MXT_STATE_GOING_AWAY;
>> -		mutex_unlock(&data->lock);
>> -		mxt_wait_for_completion(data, &data->update_cfg_completion,
>> -					MXT_CONFIG_TIMEOUT);
>> -	} else {
>> -		data->e_state = MXT_STATE_GOING_AWAY;
>> -		mutex_unlock(&data->lock);
>> -	}
>> +	mxt_sysfs_remove(data);
>>
>> -	disable_irq(data->irq);
>> -	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>>  	if (data->reset_gpio) {
>>  		sysfs_remove_link(&client->dev.kobj, "reset");
>>  		gpiod_unexport(data->reset_gpio);
>>  	}
>> +
>>  	mxt_debug_msg_remove(data);
>> -	mxt_sysfs_remove(data);
>>  	mxt_free_input_device(data);
>>  	mxt_free_object_table(data);
>>
>> 	if (debug_state)
>> 		cancel_delayed_work_sync(&data->watchdog_work);
>> +	disable_irq(data->irq);
>>
>>  	return 0;
>>  }
>>
> 
> I'm looking at this again and the original tear-down order of the
> mxt_remove() looks okay, so no need to change it.
> 
> Reading the commit message, it says that request_firmware_nowait() races
> with kfree(data), but that can't happen because the data is
> resource-managed and request_firmware_nowait() bumps device's use-count.
> 
> https://elixir.bootlin.com/linux/v5.6.2/source/drivers/base/firmware_loader/main.c#L1043
> 
> I think this patch was ported from some very old kernel version and it's
> simply not applicable to the upstream anymore.
> 

Moreover, request_firmware_nowait() uses try_module_get(), hence
driver's module can't be unloaded until firmware is loaded. This patch
is wrong, please drop it.
Wang, Jiada April 3, 2020, 8:39 a.m. UTC | #7
Hi Dmitry

On 2020/04/02 22:24, Dmitry Osipenko wrote:
> 02.04.2020 14:50, Wang, Jiada пишет:
>> Hi Dmitry
>>
>> On 2020/04/02 1:04, Dmitry Osipenko wrote:
>>> 31.03.2020 13:50, Jiada Wang пишет:
>>>> From: Sanjeev Chugh <sanjeev_chugh@mentor.com>
>>>>
>>>> There could be scope of race conditions when sysfs is being handled
>>>> and at the same time, device removal is occurring. For example,
>>>> we don't want the device removal to begin if the Atmel device
>>>> cfg update is going on or firmware update is going on. In such
>>>> cases, wait for device update to be completed before the removal
>>>> continues.
>>>>
>>>>       Thread                                          Thread 2:
>>>> =========================
>>>> =========================
>>>> mxt_update_fw_store()                           mxt_remove()
>>>> mutex_lock(&data->lock)                         ...
>>>> mxt_initialize()                                //Tries to acquire lock
>>>>     request_firmware_nowait()                     mutex_lock(&data->lock)
>>>> ...                                             ==>waits for lock()
>>>> ...                                             .
>>>> ...                                             .
>>>> mutex_unlock(&data->lock)                       .
>>>>                                                   //Gets lock and
>>>> proceeds
>>>>                                                  
>>>> mxt_free_input_device();
>>>>                                                   ...
>>>>                                                  
>>>> mutex_unlock(&data->lock)
>>>>                                                   //Frees atmel driver
>>>> data
>>>>                                                   kfree(data)
>>>>
>>>> If the request_firmware_nowait() completes after the driver removal,
>>>> and callback is triggered. But kernel crashes since the module is
>>>> already removed.
>>>>
>>>> This commit adds state machine to serialize such scenarios.
>>>
>>> Won't it be easier to bump driver's module use-count by __module_get()
>>> while firmware is updating? Or remove sysfs during of mxt_remove()? >
>>
>> thanks for your inspiration, I will replace state machine with module
>> use-count.
> 
> I'm actually now thinking that the suggestion about the module-count
> wasn't very correct because this won't really help in regards to
> mxt_update_fw_store() / mxt_remove() racing.
> 
> I see that mxt_remove() already invokes the mxt_sysfs_remove(), which
> should block until mxt_update_fw_store() is completed, shouldn't it?

Yes, you are correct,
this commit isn't addressing the real root cause

> 
> I guess the kfree(data) isn't the real cause of the problem and
> something like this should help:
> 
> diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c
> b/drivers/input/touchscreen/atmel_mxt_ts.c
> index b2edf51e1595..4e66106feeb9 100644
> --- a/drivers/input/touchscreen/atmel_mxt_ts.c
> +++ b/drivers/input/touchscreen/atmel_mxt_ts.c
> @@ -4254,6 +4254,7 @@ static void mxt_sysfs_remove(struct mxt_data *data)
>   	struct i2c_client *client = data->client;
> 
>   	sysfs_remove_group(&client->dev.kobj, &mxt_attr_group);
> +	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>   }
> 
>   static void mxt_reset_slots(struct mxt_data *data)
> @@ -4649,31 +4650,19 @@ static int mxt_remove(struct i2c_client *client)
>   {
>   	struct mxt_data *data = i2c_get_clientdata(client);
> 
> -	mutex_lock(&data->lock);
> -	if (data->e_state == MXT_STATE_UPDATING_CONFIG_ASYNC ||
> -	    data->e_state == MXT_STATE_UPDATING_CONFIG) {
> -		data->e_state = MXT_STATE_GOING_AWAY;
> -		mutex_unlock(&data->lock);
> -		mxt_wait_for_completion(data, &data->update_cfg_completion,
> -					MXT_CONFIG_TIMEOUT);
> -	} else {
> -		data->e_state = MXT_STATE_GOING_AWAY;
> -		mutex_unlock(&data->lock);
> -	}
> +	mxt_sysfs_remove(data);
> 
> -	disable_irq(data->irq);
> -	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>   	if (data->reset_gpio) {
>   		sysfs_remove_link(&client->dev.kobj, "reset");
>   		gpiod_unexport(data->reset_gpio);
>   	}
> +
>   	mxt_debug_msg_remove(data);
> -	mxt_sysfs_remove(data);
>   	mxt_free_input_device(data);
>   	mxt_free_object_table(data);
> 
> 	if (debug_state)
> 		cancel_delayed_work_sync(&data->watchdog_work);
> +	disable_irq(data->irq);
> 
>   	return 0;
>   }
> 
yes, I confirmed, the root cause is because irq is disabled while 
firmware is being updated, this cause update of firmware can't proceed.
by move disable irq after sysfs entry removal can fix the issue

Thanks,
Jiada
Dmitry Osipenko April 4, 2020, 9:38 p.m. UTC | #8
03.04.2020 11:39, Wang, Jiada пишет:
> Hi Dmitry
> 
> On 2020/04/02 22:24, Dmitry Osipenko wrote:
>> 02.04.2020 14:50, Wang, Jiada пишет:
>>> Hi Dmitry
>>>
>>> On 2020/04/02 1:04, Dmitry Osipenko wrote:
>>>> 31.03.2020 13:50, Jiada Wang пишет:
>>>>> From: Sanjeev Chugh <sanjeev_chugh@mentor.com>
>>>>>
>>>>> There could be scope of race conditions when sysfs is being handled
>>>>> and at the same time, device removal is occurring. For example,
>>>>> we don't want the device removal to begin if the Atmel device
>>>>> cfg update is going on or firmware update is going on. In such
>>>>> cases, wait for device update to be completed before the removal
>>>>> continues.
>>>>>
>>>>>       Thread                                          Thread 2:
>>>>> =========================
>>>>> =========================
>>>>> mxt_update_fw_store()                           mxt_remove()
>>>>> mutex_lock(&data->lock)                         ...
>>>>> mxt_initialize()                                //Tries to acquire
>>>>> lock
>>>>>     request_firmware_nowait()                    
>>>>> mutex_lock(&data->lock)
>>>>> ...                                             ==>waits for lock()
>>>>> ...                                             .
>>>>> ...                                             .
>>>>> mutex_unlock(&data->lock)                       .
>>>>>                                                   //Gets lock and
>>>>> proceeds
>>>>>                                                 
>>>>> mxt_free_input_device();
>>>>>                                                   ...
>>>>>                                                 
>>>>> mutex_unlock(&data->lock)
>>>>>                                                   //Frees atmel driver
>>>>> data
>>>>>                                                   kfree(data)
>>>>>
>>>>> If the request_firmware_nowait() completes after the driver removal,
>>>>> and callback is triggered. But kernel crashes since the module is
>>>>> already removed.
>>>>>
>>>>> This commit adds state machine to serialize such scenarios.
>>>>
>>>> Won't it be easier to bump driver's module use-count by __module_get()
>>>> while firmware is updating? Or remove sysfs during of mxt_remove()? >
>>>
>>> thanks for your inspiration, I will replace state machine with module
>>> use-count.
>>
>> I'm actually now thinking that the suggestion about the module-count
>> wasn't very correct because this won't really help in regards to
>> mxt_update_fw_store() / mxt_remove() racing.
>>
>> I see that mxt_remove() already invokes the mxt_sysfs_remove(), which
>> should block until mxt_update_fw_store() is completed, shouldn't it?
> 
> Yes, you are correct,
> this commit isn't addressing the real root cause
> 
>>
>> I guess the kfree(data) isn't the real cause of the problem and
>> something like this should help:
>>
>> diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c
>> b/drivers/input/touchscreen/atmel_mxt_ts.c
>> index b2edf51e1595..4e66106feeb9 100644
>> --- a/drivers/input/touchscreen/atmel_mxt_ts.c
>> +++ b/drivers/input/touchscreen/atmel_mxt_ts.c
>> @@ -4254,6 +4254,7 @@ static void mxt_sysfs_remove(struct mxt_data *data)
>>       struct i2c_client *client = data->client;
>>
>>       sysfs_remove_group(&client->dev.kobj, &mxt_attr_group);
>> +    sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>>   }
>>
>>   static void mxt_reset_slots(struct mxt_data *data)
>> @@ -4649,31 +4650,19 @@ static int mxt_remove(struct i2c_client *client)
>>   {
>>       struct mxt_data *data = i2c_get_clientdata(client);
>>
>> -    mutex_lock(&data->lock);
>> -    if (data->e_state == MXT_STATE_UPDATING_CONFIG_ASYNC ||
>> -        data->e_state == MXT_STATE_UPDATING_CONFIG) {
>> -        data->e_state = MXT_STATE_GOING_AWAY;
>> -        mutex_unlock(&data->lock);
>> -        mxt_wait_for_completion(data, &data->update_cfg_completion,
>> -                    MXT_CONFIG_TIMEOUT);
>> -    } else {
>> -        data->e_state = MXT_STATE_GOING_AWAY;
>> -        mutex_unlock(&data->lock);
>> -    }
>> +    mxt_sysfs_remove(data);
>>
>> -    disable_irq(data->irq);
>> -    sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>>       if (data->reset_gpio) {
>>           sysfs_remove_link(&client->dev.kobj, "reset");
>>           gpiod_unexport(data->reset_gpio);
>>       }
>> +
>>       mxt_debug_msg_remove(data);
>> -    mxt_sysfs_remove(data);
>>       mxt_free_input_device(data);
>>       mxt_free_object_table(data);
>>
>>     if (debug_state)
>>         cancel_delayed_work_sync(&data->watchdog_work);
>> +    disable_irq(data->irq);
>>
>>       return 0;
>>   }
>>
> yes, I confirmed, the root cause is because irq is disabled while
> firmware is being updated, this cause update of firmware can't proceed.
> by move disable irq after sysfs entry removal can fix the issue

Okay :)
Wang, Jiada April 6, 2020, 4:07 a.m. UTC | #9
Hi Dmitrij

On 2020/04/02 22:44, Dmitry Osipenko wrote:
> 02.04.2020 16:24, Dmitry Osipenko пишет:
>> 02.04.2020 14:50, Wang, Jiada пишет:
>>> Hi Dmitry
>>>
>>> On 2020/04/02 1:04, Dmitry Osipenko wrote:
>>>> 31.03.2020 13:50, Jiada Wang пишет:
>>>>> From: Sanjeev Chugh <sanjeev_chugh@mentor.com>
>>>>>
>>>>> There could be scope of race conditions when sysfs is being handled
>>>>> and at the same time, device removal is occurring. For example,
>>>>> we don't want the device removal to begin if the Atmel device
>>>>> cfg update is going on or firmware update is going on. In such
>>>>> cases, wait for device update to be completed before the removal
>>>>> continues.
>>>>>
>>>>>       Thread                                          Thread 2:
>>>>> =========================
>>>>> =========================
>>>>> mxt_update_fw_store()                           mxt_remove()
>>>>> mutex_lock(&data->lock)                         ...
>>>>> mxt_initialize()                                //Tries to acquire lock
>>>>>     request_firmware_nowait()                     mutex_lock(&data->lock)
>>>>> ...                                             ==>waits for lock()
>>>>> ...                                             .
>>>>> ...                                             .
>>>>> mutex_unlock(&data->lock)                       .
>>>>>                                                   //Gets lock and
>>>>> proceeds
>>>>>                                                  
>>>>> mxt_free_input_device();
>>>>>                                                   ...
>>>>>                                                  
>>>>> mutex_unlock(&data->lock)
>>>>>                                                   //Frees atmel driver
>>>>> data
>>>>>                                                   kfree(data)
>>>>>
>>>>> If the request_firmware_nowait() completes after the driver removal,
>>>>> and callback is triggered. But kernel crashes since the module is
>>>>> already removed.
>>>>>
>>>>> This commit adds state machine to serialize such scenarios.
>>>>
>>>> Won't it be easier to bump driver's module use-count by __module_get()
>>>> while firmware is updating? Or remove sysfs during of mxt_remove()? >
>>>
>>> thanks for your inspiration, I will replace state machine with module
>>> use-count.
>>
>> I'm actually now thinking that the suggestion about the module-count
>> wasn't very correct because this won't really help in regards to
>> mxt_update_fw_store() / mxt_remove() racing.
>>
>> I see that mxt_remove() already invokes the mxt_sysfs_remove(), which
>> should block until mxt_update_fw_store() is completed, shouldn't it?
>>
>> I guess the kfree(data) isn't the real cause of the problem and
>> something like this should help:
>>
>> diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c
>> b/drivers/input/touchscreen/atmel_mxt_ts.c
>> index b2edf51e1595..4e66106feeb9 100644
>> --- a/drivers/input/touchscreen/atmel_mxt_ts.c
>> +++ b/drivers/input/touchscreen/atmel_mxt_ts.c
>> @@ -4254,6 +4254,7 @@ static void mxt_sysfs_remove(struct mxt_data *data)
>>   	struct i2c_client *client = data->client;
>>
>>   	sysfs_remove_group(&client->dev.kobj, &mxt_attr_group);
>> +	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>>   }
>>
>>   static void mxt_reset_slots(struct mxt_data *data)
>> @@ -4649,31 +4650,19 @@ static int mxt_remove(struct i2c_client *client)
>>   {
>>   	struct mxt_data *data = i2c_get_clientdata(client);
>>
>> -	mutex_lock(&data->lock);
>> -	if (data->e_state == MXT_STATE_UPDATING_CONFIG_ASYNC ||
>> -	    data->e_state == MXT_STATE_UPDATING_CONFIG) {
>> -		data->e_state = MXT_STATE_GOING_AWAY;
>> -		mutex_unlock(&data->lock);
>> -		mxt_wait_for_completion(data, &data->update_cfg_completion,
>> -					MXT_CONFIG_TIMEOUT);
>> -	} else {
>> -		data->e_state = MXT_STATE_GOING_AWAY;
>> -		mutex_unlock(&data->lock);
>> -	}
>> +	mxt_sysfs_remove(data);
>>
>> -	disable_irq(data->irq);
>> -	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>>   	if (data->reset_gpio) {
>>   		sysfs_remove_link(&client->dev.kobj, "reset");
>>   		gpiod_unexport(data->reset_gpio);
>>   	}
>> +
>>   	mxt_debug_msg_remove(data);
>> -	mxt_sysfs_remove(data);
>>   	mxt_free_input_device(data);
>>   	mxt_free_object_table(data);
>>
>> 	if (debug_state)
>> 		cancel_delayed_work_sync(&data->watchdog_work);
>> +	disable_irq(data->irq);
>>
>>   	return 0;
>>   }
>>
> 
> I'm also wondering why dev_attr_update_fw needs a separate
> attribute_group, couldn't it be moved into mxt_attrs[]
Separate sysfs into different groups are done by commit
"Input: atmel_mxt_ts - rework sysfs init/remove"
I think the main purpose,
is to remove other sysfs entries when firmware is being updated.

Thanks,
Jiada
Wang, Jiada April 6, 2020, 4:18 a.m. UTC | #10
Hi Dmitry

On 2020/04/02 23:00, Dmitry Osipenko wrote:
> 02.04.2020 16:24, Dmitry Osipenko пишет:
>> 02.04.2020 14:50, Wang, Jiada пишет:
>>> Hi Dmitry
>>>
>>> On 2020/04/02 1:04, Dmitry Osipenko wrote:
>>>> 31.03.2020 13:50, Jiada Wang пишет:
>>>>> From: Sanjeev Chugh <sanjeev_chugh@mentor.com>
>>>>>
>>>>> There could be scope of race conditions when sysfs is being handled
>>>>> and at the same time, device removal is occurring. For example,
>>>>> we don't want the device removal to begin if the Atmel device
>>>>> cfg update is going on or firmware update is going on. In such
>>>>> cases, wait for device update to be completed before the removal
>>>>> continues.
>>>>>
>>>>>       Thread                                          Thread 2:
>>>>> =========================
>>>>> =========================
>>>>> mxt_update_fw_store()                           mxt_remove()
>>>>> mutex_lock(&data->lock)                         ...
>>>>> mxt_initialize()                                //Tries to acquire lock
>>>>>     request_firmware_nowait()                     mutex_lock(&data->lock)
>>>>> ...                                             ==>waits for lock()
>>>>> ...                                             .
>>>>> ...                                             .
>>>>> mutex_unlock(&data->lock)                       .
>>>>>                                                   //Gets lock and
>>>>> proceeds
>>>>>                                                  
>>>>> mxt_free_input_device();
>>>>>                                                   ...
>>>>>                                                  
>>>>> mutex_unlock(&data->lock)
>>>>>                                                   //Frees atmel driver
>>>>> data
>>>>>                                                   kfree(data)
>>>>>
>>>>> If the request_firmware_nowait() completes after the driver removal,
>>>>> and callback is triggered. But kernel crashes since the module is
>>>>> already removed.
>>>>>
>>>>> This commit adds state machine to serialize such scenarios.
>>>>
>>>> Won't it be easier to bump driver's module use-count by __module_get()
>>>> while firmware is updating? Or remove sysfs during of mxt_remove()? >
>>>
>>> thanks for your inspiration, I will replace state machine with module
>>> use-count.
>>
>> I'm actually now thinking that the suggestion about the module-count
>> wasn't very correct because this won't really help in regards to
>> mxt_update_fw_store() / mxt_remove() racing.
>>
>> I see that mxt_remove() already invokes the mxt_sysfs_remove(), which
>> should block until mxt_update_fw_store() is completed, shouldn't it?
>>
>> I guess the kfree(data) isn't the real cause of the problem and
>> something like this should help:
>>
>> diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c
>> b/drivers/input/touchscreen/atmel_mxt_ts.c
>> index b2edf51e1595..4e66106feeb9 100644
>> --- a/drivers/input/touchscreen/atmel_mxt_ts.c
>> +++ b/drivers/input/touchscreen/atmel_mxt_ts.c
>> @@ -4254,6 +4254,7 @@ static void mxt_sysfs_remove(struct mxt_data *data)
>>   	struct i2c_client *client = data->client;
>>
>>   	sysfs_remove_group(&client->dev.kobj, &mxt_attr_group);
>> +	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>>   }
>>
>>   static void mxt_reset_slots(struct mxt_data *data)
>> @@ -4649,31 +4650,19 @@ static int mxt_remove(struct i2c_client *client)
>>   {
>>   	struct mxt_data *data = i2c_get_clientdata(client);
>>
>> -	mutex_lock(&data->lock);
>> -	if (data->e_state == MXT_STATE_UPDATING_CONFIG_ASYNC ||
>> -	    data->e_state == MXT_STATE_UPDATING_CONFIG) {
>> -		data->e_state = MXT_STATE_GOING_AWAY;
>> -		mutex_unlock(&data->lock);
>> -		mxt_wait_for_completion(data, &data->update_cfg_completion,
>> -					MXT_CONFIG_TIMEOUT);
>> -	} else {
>> -		data->e_state = MXT_STATE_GOING_AWAY;
>> -		mutex_unlock(&data->lock);
>> -	}
>> +	mxt_sysfs_remove(data);
>>
>> -	disable_irq(data->irq);
>> -	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
>>   	if (data->reset_gpio) {
>>   		sysfs_remove_link(&client->dev.kobj, "reset");
>>   		gpiod_unexport(data->reset_gpio);
>>   	}
>> +
>>   	mxt_debug_msg_remove(data);
>> -	mxt_sysfs_remove(data);
>>   	mxt_free_input_device(data);
>>   	mxt_free_object_table(data);
>>
>> 	if (debug_state)
>> 		cancel_delayed_work_sync(&data->watchdog_work);
>> +	disable_irq(data->irq);
>>
>>   	return 0;
>>   }
>>
> 
> I'm looking at this again and the original tear-down order of the
> mxt_remove() looks okay, so no need to change it.
> 
> Reading the commit message, it says that request_firmware_nowait() races
> with kfree(data), but that can't happen because the data is
> resource-managed and request_firmware_nowait() bumps device's use-count.
> 
> https://elixir.bootlin.com/linux/v5.6.2/source/drivers/base/firmware_loader/main.c#L1043
> 
> I think this patch was ported from some very old kernel version and it's
> simply not applicable to the upstream anymore.
> 

I had some test,
and confirmed you are right,
this commit is no longer applicable to upstream,
but as discussed in another patch,
disable_irq() need to be moved after remove of mxt_fw_attr_group.
I will add this change in a new commit.

Thanks,
Jiada
Dmitry Osipenko April 7, 2020, 3 p.m. UTC | #11
06.04.2020 07:18, Wang, Jiada пишет:
...
> I had some test,
> and confirmed you are right,
> this commit is no longer applicable to upstream,
> but as discussed in another patch,
> disable_irq() need to be moved after remove of mxt_fw_attr_group.
> I will add this change in a new commit.

Sounds good.
diff mbox series

Patch

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 10d6726de9c4..d244d97d134e 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -223,6 +223,7 @@  enum t100_type {
 #define MXT_POWERON_DELAY	150	/* msec */
 #define MXT_BOOTLOADER_WAIT	36E5	/* 1 minute */
 #define MXT_WATCHDOG_TIMEOUT	1000	/* msec */
+#define MXT_CONFIG_TIMEOUT	36E5	/* msec */
 
 /* Command to unlock bootloader */
 #define MXT_UNLOCK_CMD_MSB	0xaa
@@ -249,6 +250,20 @@  enum t100_type {
 #define MXT_DEBUG_STATE		false
 static bool debug_state = MXT_DEBUG_STATE;
 
+enum device_state {
+	MXT_STATE_READY,
+	MXT_STATE_UPDATING_CONFIG,
+	MXT_STATE_UPDATING_CONFIG_ASYNC,
+	MXT_STATE_START,
+	MXT_STATE_STOP,
+	MXT_STATE_GOING_AWAY
+};
+
+enum mxt_cmd {
+	UPDATE_CFG,
+	UPDATE_FW
+};
+
 struct mxt_info {
 	u8 family_id;
 	u8 variant_id;
@@ -427,11 +442,15 @@  struct mxt_data {
 	/* Indicates whether device is in suspend */
 	bool suspended;
 
-	/* Indicates whether device is updating configuration */
-	bool updating_config;
+	struct mutex lock;
 
 	unsigned int mtu;
 	bool t25_status;
+
+	/* State handling for probe/remove, open/close and config update */
+	enum device_state e_state;
+
+	struct completion update_cfg_completion;
 };
 
 struct mxt_vb2_buffer {
@@ -1638,6 +1657,7 @@  static irqreturn_t mxt_interrupt(int irq, void *dev_id)
 	struct mxt_data *data = dev_id;
 	int ret;
 
+	mutex_lock(&data->lock);
 	data->mxt_status.intp_triggered = true;
 
 	if (data->in_bootloader) {
@@ -1665,6 +1685,8 @@  static irqreturn_t mxt_interrupt(int irq, void *dev_id)
 
 exit:
 	data->mxt_status.intp_triggered = false;
+	mutex_unlock(&data->lock);
+
 	return ret;
 }
 
@@ -2247,6 +2269,8 @@  static void mxt_free_object_table(struct mxt_data *data)
 	video_unregister_device(&data->dbg.vdev);
 	v4l2_device_unregister(&data->dbg.v4l2);
 #endif
+	mutex_lock(&data->lock);
+
 	data->object_table = NULL;
 	kfree(data->info);
 	data->info = NULL;
@@ -2276,6 +2300,8 @@  static void mxt_free_object_table(struct mxt_data *data)
 	data->T100_reportid_min = 0;
 	data->T100_reportid_max = 0;
 	data->max_reportid = 0;
+
+	mutex_unlock(&data->lock);
 }
 
 static int mxt_parse_object_table(struct mxt_data *data,
@@ -2957,8 +2983,15 @@  static int mxt_configure_objects(struct mxt_data *data,
 
 static void mxt_config_cb(const struct firmware *cfg, void *ctx)
 {
+	struct mxt_data *data = ctx;
+
 	mxt_configure_objects(ctx, cfg);
 	release_firmware(cfg);
+	complete(&data->update_cfg_completion);
+	mutex_lock(&data->lock);
+	if (data->e_state != MXT_STATE_GOING_AWAY)
+		data->e_state = MXT_STATE_READY;
+	mutex_unlock(&data->lock);
 }
 
 static int mxt_bootloader_status(struct mxt_data *data)
@@ -2994,9 +3027,12 @@  static void mxt_watchdog_work(struct work_struct *work)
 	u16 info_buf;
 	int ret;
 
+	mutex_lock(&data->lock);
 	if (data->suspended || data->in_bootloader ||
-	    data->mxt_status.intp_triggered)
+	    data->mxt_status.intp_triggered) {
+		mutex_unlock(&data->lock);
 		goto sched_work;
+	}
 
 	ret = __mxt_read_reg(data->client, 0, sizeof(info_buf), &info_buf);
 
@@ -3066,6 +3102,15 @@  static int mxt_initialize(struct mxt_data *data)
 		goto err_free_sysfs;
 
 	if (data->cfg_name) {
+		mutex_lock(&data->lock);
+		if (data->e_state != MXT_STATE_GOING_AWAY) {
+			data->e_state = MXT_STATE_UPDATING_CONFIG_ASYNC;
+		} else {
+			mutex_unlock(&data->lock);
+			return -EBUSY;
+		}
+		reinit_completion(&data->update_cfg_completion);
+		mutex_unlock(&data->lock);
 		error = request_firmware_nowait(THIS_MODULE, true,
 						data->cfg_name,
 						&client->dev,
@@ -3845,30 +3890,58 @@  static int mxt_update_file_name(struct device *dev, char **file_name,
 	return 0;
 }
 
+static int mxt_process_operation(struct mxt_data *data,
+				 enum mxt_cmd cmd,
+				 void *cmd_data);
+
 static ssize_t update_fw_store(struct device *dev,
 			       struct device_attribute *attr,
 			       const char *buf, size_t count)
 {
 	struct mxt_data *data = dev_get_drvdata(dev);
+	char *filename = NULL;
+	int ret;
+
+	ret = mxt_update_file_name(dev, &filename, buf, count);
+	if (ret)
+		goto out;
+
+	ret = mxt_process_operation(data, UPDATE_FW, filename);
+	kfree(filename);
+
+	if (ret)
+		goto out;
+
+	return count;
+out:
+	return ret;
+}
+
+static int mxt_fw_update(struct mxt_data *data,
+			 const char *filename)
+{
+	struct device *dev = &data->client->dev;
+	unsigned int len = 0;
 	int error;
 
-	error = mxt_update_file_name(dev, &data->fw_name, buf, count);
+	len = strlen(filename);
+	error = mxt_update_file_name(dev, &data->fw_name, filename, len);
 	if (error)
 		return error;
 
 	error = mxt_load_fw(dev);
 	if (error) {
 		dev_err(dev, "The firmware update failed(%d)\n", error);
-		count = error;
-	} else {
-		dev_info(dev, "The firmware update succeeded\n");
-
-		error = mxt_initialize(data);
-		if (error)
-			return error;
+		return error;
 	}
 
-	return count;
+	error = mxt_initialize(data);
+	if (error)
+		return error;
+
+	dev_info(dev, "The firmware update succeeded\n");
+
+	return error;
 }
 
 static ssize_t update_cfg_store(struct device *dev,
@@ -3876,14 +3949,38 @@  static ssize_t update_cfg_store(struct device *dev,
 				const char *buf, size_t count)
 {
 	struct mxt_data *data = dev_get_drvdata(dev);
+	char *filename = NULL;
+	int ret;
+
+	ret = mxt_update_file_name(dev, &filename, buf, count);
+	if (ret)
+		goto out;
+
+	ret = mxt_process_operation(data, UPDATE_CFG, filename);
+	kfree(filename);
+
+	if (ret)
+		goto out;
+
+	return count;
+out:
+	return ret;
+}
+
+static int mxt_cfg_update(struct mxt_data *data,
+			  char *filename)
+{
+	struct device *dev = &data->client->dev;
 	const struct firmware *cfg;
+	unsigned int len = 0;
 	int ret;
 
-	ret = mxt_update_file_name(dev, &data->cfg_name, buf, count);
+	len = strlen(filename);
+	ret = mxt_update_file_name(dev, &data->cfg_name, filename, len);
 	if (ret)
 		return ret;
 
-	ret = request_firmware(&cfg, data->cfg_name, dev);
+	ret = request_firmware(&cfg, data->cfg_name, &data->client->dev);
 	if (ret < 0) {
 		dev_err(dev, "Failure to request config file %s\n",
 			data->cfg_name);
@@ -3891,8 +3988,6 @@  static ssize_t update_cfg_store(struct device *dev,
 		goto out;
 	}
 
-	data->updating_config = true;
-
 	mxt_free_input_device(data);
 
 	if (data->suspended) {
@@ -3908,15 +4003,8 @@  static ssize_t update_cfg_store(struct device *dev,
 	}
 
 	ret = mxt_configure_objects(data, cfg);
-	if (ret)
-		goto release;
-
-	ret = count;
-
-release:
 	release_firmware(cfg);
 out:
-	data->updating_config = false;
 	return ret;
 }
 
@@ -4188,8 +4276,17 @@  static int mxt_start(struct mxt_data *data)
 {
 	int ret = 0;
 
-	if (!data->suspended || data->in_bootloader)
+	mutex_lock(&data->lock);
+	if (!data->suspended) {
+		mutex_unlock(&data->lock);
 		return 0;
+	}
+	if (data->in_bootloader || data->e_state != MXT_STATE_READY) {
+		mutex_unlock(&data->lock);
+		return -EBUSY;
+	}
+	data->e_state = MXT_STATE_START;
+	mutex_unlock(&data->lock);
 
 	switch (data->suspend_mode) {
 	case MXT_SUSPEND_T9_CTRL:
@@ -4233,8 +4330,12 @@  static int mxt_start(struct mxt_data *data)
 		ret = mxt_acquire_irq(data);
 	}
 
+	mutex_lock(&data->lock);
 	if (!ret)
 		data->suspended = false;
+	if (data->e_state != MXT_STATE_GOING_AWAY)
+		data->e_state = MXT_STATE_READY;
+	mutex_unlock(&data->lock);
 
 	return ret;
 }
@@ -4243,8 +4344,19 @@  static int mxt_stop(struct mxt_data *data)
 {
 	int ret;
 
-	if (data->suspended || data->in_bootloader || data->updating_config)
+	mutex_lock(&data->lock);
+	if (data->suspended || data->e_state == MXT_STATE_UPDATING_CONFIG) {
+		mutex_unlock(&data->lock);
 		return 0;
+	}
+	if ((data->e_state != MXT_STATE_READY &&
+	     data->e_state != MXT_STATE_GOING_AWAY)) {
+		mutex_unlock(&data->lock);
+		return -EBUSY;
+	}
+	if (data->e_state != MXT_STATE_GOING_AWAY)
+		data->e_state = MXT_STATE_STOP;
+	mutex_unlock(&data->lock);
 
 	switch (data->suspend_mode) {
 	case MXT_SUSPEND_T9_CTRL:
@@ -4274,8 +4386,15 @@  static int mxt_stop(struct mxt_data *data)
 		break;
 	}
 
+	mutex_lock(&data->lock);
 	data->suspended = true;
+
+	if (data->e_state != MXT_STATE_GOING_AWAY)
+		data->e_state = MXT_STATE_READY;
+	mutex_unlock(&data->lock);
+
 	return 0;
+
 }
 
 static int mxt_input_open(struct input_dev *dev)
@@ -4430,12 +4549,15 @@  static int mxt_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	snprintf(data->phys, sizeof(data->phys), "i2c-%u-%04x/input0",
 		 client->adapter->nr, client->addr);
 
+	mutex_init(&data->lock);
+
 	data->client = client;
 	i2c_set_clientdata(client, data);
 
 	init_completion(&data->chg_completion);
 	init_completion(&data->reset_completion);
 	init_completion(&data->crc_completion);
+	init_completion(&data->update_cfg_completion);
 	mutex_init(&data->debug_msg_lock);
 
 	data->suspend_mode = dmi_check_system(chromebook_T9_suspend_dmi) ?
@@ -4527,6 +4649,18 @@  static int mxt_remove(struct i2c_client *client)
 {
 	struct mxt_data *data = i2c_get_clientdata(client);
 
+	mutex_lock(&data->lock);
+	if (data->e_state == MXT_STATE_UPDATING_CONFIG_ASYNC ||
+	    data->e_state == MXT_STATE_UPDATING_CONFIG) {
+		data->e_state = MXT_STATE_GOING_AWAY;
+		mutex_unlock(&data->lock);
+		mxt_wait_for_completion(data, &data->update_cfg_completion,
+					MXT_CONFIG_TIMEOUT);
+	} else {
+		data->e_state = MXT_STATE_GOING_AWAY;
+		mutex_unlock(&data->lock);
+	}
+
 	disable_irq(data->irq);
 	sysfs_remove_group(&client->dev.kobj, &mxt_fw_attr_group);
 	if (data->reset_gpio) {
@@ -4587,6 +4721,45 @@  static int __maybe_unused mxt_resume(struct device *dev)
 	return ret;
 }
 
+static int mxt_process_operation(struct mxt_data *data,
+				 enum mxt_cmd cmd,
+				 void *cmd_data)
+{
+	int ret = 0;
+
+	mutex_lock(&data->lock);
+	if (data->e_state != MXT_STATE_READY) {
+		mutex_unlock(&data->lock);
+		dev_err(&data->client->dev, "Atmel touch device is shutting down\n");
+		return -EBUSY;
+	}
+	data->e_state = MXT_STATE_UPDATING_CONFIG;
+	reinit_completion(&data->update_cfg_completion);
+	mutex_unlock(&data->lock);
+
+	switch (cmd) {
+	case UPDATE_CFG:
+	case UPDATE_FW:
+		if (cmd == UPDATE_CFG)
+			ret = mxt_cfg_update(data, (char *)cmd_data);
+		else
+			ret = mxt_fw_update(data, (char *)cmd_data);
+		break;
+
+	default:
+		break;
+	}
+	mutex_lock(&data->lock);
+	if (data->e_state != MXT_STATE_UPDATING_CONFIG_ASYNC) {
+		complete(&data->update_cfg_completion);
+		if (data->e_state != MXT_STATE_GOING_AWAY)
+			data->e_state = MXT_STATE_READY;
+	}
+	mutex_unlock(&data->lock);
+
+	return ret;
+}
+
 static SIMPLE_DEV_PM_OPS(mxt_pm_ops, mxt_suspend, mxt_resume);
 
 static const struct of_device_id mxt_of_match[] = {