diff mbox series

[v9] usb: gadget: u_serial: Add null pointer check in gs_read_complete & gs_write_complete

Message ID TYUPR06MB6217DE28012FFEC5E808DD64D2962@TYUPR06MB6217.apcprd06.prod.outlook.com (mailing list archive)
State New
Headers show
Series [v9] usb: gadget: u_serial: Add null pointer check in gs_read_complete & gs_write_complete | expand

Commit Message

胡连勤 Aug. 29, 2024, 11:54 a.m. UTC
From: Lianqin Hu <hulianqin@vivo.com>

Considering that in some extreme cases, when the unbind operation
is being executed, gserial_disconnect has already cleared gser->ioport,
triggering a gadget reconfiguration at this time and gs_read_complete
gets called afterwards, which results in accessing null pointer,
add a null pointer check to prevent this situation.

Added a static spinlock to prevent gser->ioport from becoming
null after the newly added check.

Unable to handle kernel NULL pointer dereference at
virtual address 00000000000001a8
pc : gs_read_complete+0x58/0x240
lr : usb_gadget_giveback_request+0x40/0x160
sp : ffffffc00f1539c0
x29: ffffffc00f1539c0 x28: ffffff8002a30000 x27: 0000000000000000
x26: ffffff8002a30000 x25: 0000000000000000 x24: ffffff8002a30000
x23: ffffff8002ff9a70 x22: ffffff898e7a7b00 x21: ffffff803c9af9d8
x20: ffffff898e7a7b00 x19: 00000000000001a8 x18: ffffffc0099fd098
x17: 0000000000001000 x16: 0000000080000000 x15: 0000000ac1200000
x14: 0000000000000003 x13: 000000000000d5e8 x12: 0000000355c314ac
x11: 0000000000000015 x10: 0000000000000012 x9 : 0000000000000008
x8 : 0000000000000000 x7 : 0000000000000000 x6 : ffffff887cd12000
x5 : 0000000000000002 x4 : ffffffc00f9b07f0 x3 : ffffffc00f1538d0
x2 : 0000000000000001 x1 : 0000000000000000 x0 : 00000000000001a8
Call trace:
gs_read_complete+0x58/0x240
usb_gadget_giveback_request+0x40/0x160
dwc3_remove_requests+0x170/0x484
dwc3_ep0_out_start+0xb0/0x1d4
__dwc3_gadget_start+0x25c/0x720
kretprobe_trampoline.cfi_jt+0x0/0x8
kretprobe_trampoline.cfi_jt+0x0/0x8
udc_bind_to_driver+0x1d8/0x300
usb_gadget_probe_driver+0xa8/0x1dc
gadget_dev_desc_UDC_store+0x13c/0x188
configfs_write_iter+0x160/0x1f4
vfs_write+0x2d0/0x40c
ksys_write+0x7c/0xf0
__arm64_sys_write+0x20/0x30
invoke_syscall+0x60/0x150
el0_svc_common+0x8c/0xf8
do_el0_svc+0x28/0xa0
el0_svc+0x24/0x84
el0t_64_sync_handler+0x88/0xec
el0t_64_sync+0x1b4/0x1b8
Code: aa1f03e1 aa1303e0 52800022 2a0103e8 (88e87e62)
---[ end trace 938847327a739172 ]---
Kernel panic - not syncing: Oops: Fatal exception

Fixes: c1dca562be8a ("usb gadget: split out serial core")
Cc: stable@vger.kernel.org
Suggested-by: Michael Nazzareno Trimarchi <michael@amarulasolutions.com>
Suggested-by: Prashanth K <quic_prashk@quicinc.com>
Signed-off-by: Lianqin Hu <hulianqin@vivo.com>
---
v9: Add gadget reconfiguration description in commit message.
v8: Updated patch submission description as suggested in v7 discussion.
v7: Remove code comments.
v6: Update the commit text.
v5: Add the Fixes tag.
v4: CC stable kernel.
v3: Add serial_port_lock protection when checking port pointer.
v2: Optimize code comments.
v1: Delete log printing.

 drivers/usb/gadget/function/u_serial.c | 31 +++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

Comments

胡连勤 Sept. 2, 2024, 2:38 p.m. UTC | #1
Ping......

>From: Lianqin Hu <hulianqin@vivo.com>
>
>Considering that in some extreme cases, when the unbind operation is being executed, gserial_disconnect has already cleared gser->ioport, triggering a gadget reconfiguration at this time and gs_read_complete gets called afterwards, which results in accessing null pointer, add a null pointer check to prevent this situation.
>
>Added a static spinlock to prevent gser->ioport from becoming null after the newly added check.
>
>Unable to handle kernel NULL pointer dereference at virtual address 00000000000001a8 pc : gs_read_complete+0x58/0x240 lr : usb_gadget_giveback_request+0x40/0x160
>sp : ffffffc00f1539c0
>x29: ffffffc00f1539c0 x28: ffffff8002a30000 x27: 0000000000000000
>x26: ffffff8002a30000 x25: 0000000000000000 x24: ffffff8002a30000
>x23: ffffff8002ff9a70 x22: ffffff898e7a7b00 x21: ffffff803c9af9d8
>x20: ffffff898e7a7b00 x19: 00000000000001a8 x18: ffffffc0099fd098
>x17: 0000000000001000 x16: 0000000080000000 x15: 0000000ac1200000
>x14: 0000000000000003 x13: 000000000000d5e8 x12: 0000000355c314ac
>x11: 0000000000000015 x10: 0000000000000012 x9 : 0000000000000008
>x8 : 0000000000000000 x7 : 0000000000000000 x6 : ffffff887cd12000
>x5 : 0000000000000002 x4 : ffffffc00f9b07f0 x3 : ffffffc00f1538d0
>x2 : 0000000000000001 x1 : 0000000000000000 x0 : 00000000000001a8 Call trace:
>gs_read_complete+0x58/0x240
>usb_gadget_giveback_request+0x40/0x160
>dwc3_remove_requests+0x170/0x484
>dwc3_ep0_out_start+0xb0/0x1d4
>__dwc3_gadget_start+0x25c/0x720
>kretprobe_trampoline.cfi_jt+0x0/0x8
>kretprobe_trampoline.cfi_jt+0x0/0x8
>udc_bind_to_driver+0x1d8/0x300
>usb_gadget_probe_driver+0xa8/0x1dc
>gadget_dev_desc_UDC_store+0x13c/0x188
>configfs_write_iter+0x160/0x1f4
>vfs_write+0x2d0/0x40c
>ksys_write+0x7c/0xf0
>__arm64_sys_write+0x20/0x30
>invoke_syscall+0x60/0x150
>el0_svc_common+0x8c/0xf8
>do_el0_svc+0x28/0xa0
>el0_svc+0x24/0x84
>el0t_64_sync_handler+0x88/0xec
>el0t_64_sync+0x1b4/0x1b8
>Code: aa1f03e1 aa1303e0 52800022 2a0103e8 (88e87e62) ---[ end trace 938847327a739172 ]--- Kernel panic - not syncing: Oops: Fatal exception
>
>Fixes: c1dca562be8a ("usb gadget: split out serial core")
>Cc: stable@vger.kernel.org
>Suggested-by: Michael Nazzareno Trimarchi <michael@amarulasolutions.com>
>Suggested-by: Prashanth K <quic_prashk@quicinc.com>
>Signed-off-by: Lianqin Hu <hulianqin@vivo.com>
>---
>v9: Add gadget reconfiguration description in commit message.
>v8: Updated patch submission description as suggested in v7 discussion.
>v7: Remove code comments.
>v6: Update the commit text.
>v5: Add the Fixes tag.
>v4: CC stable kernel.
>v3: Add serial_port_lock protection when checking port pointer.
>v2: Optimize code comments.
>v1: Delete log printing.
>
> drivers/usb/gadget/function/u_serial.c | 31 +++++++++++++++++++++-----
> 1 file changed, 26 insertions(+), 5 deletions(-)
>
>diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
>index b394105e55d6..66d918523b3e 100644
>--- a/drivers/usb/gadget/function/u_serial.c
>+++ b/drivers/usb/gadget/function/u_serial.c
>@@ -452,20 +452,41 @@ static void gs_rx_push(struct work_struct *work)
> 
> static void gs_read_complete(struct usb_ep *ep, struct usb_request *req)  {
>-	struct gs_port	*port = ep->driver_data;
>+	struct gs_port	*port;
>+	unsigned long  flags;
>+
>+	spin_lock_irqsave(&serial_port_lock, flags);
>+	port = ep->driver_data;
>+
>+	if (!port) {
>+		spin_unlock_irqrestore(&serial_port_lock, flags);
>+		return;
>+	}
> 
>-	/* Queue all received data until the tty layer is ready for it. */
> 	spin_lock(&port->port_lock);
>+	spin_unlock(&serial_port_lock);
>+
>+	/* Queue all received data until the tty layer is ready for it. */
> 	list_add_tail(&req->list, &port->read_queue);
> 	schedule_delayed_work(&port->push, 0);
>-	spin_unlock(&port->port_lock);
>+	spin_unlock_irqrestore(&port->port_lock, flags);
> }
> 
> static void gs_write_complete(struct usb_ep *ep, struct usb_request *req)  {
>-	struct gs_port	*port = ep->driver_data;
>+	struct gs_port	*port;
>+	unsigned long  flags;
>+
>+	spin_lock_irqsave(&serial_port_lock, flags);
>+	port = ep->driver_data;
>+
>+	if (!port) {
>+		spin_unlock_irqrestore(&serial_port_lock, flags);
>+		return;
>+	}
> 
> 	spin_lock(&port->port_lock);
>+	spin_unlock(&serial_port_lock);
> 	list_add(&req->list, &port->write_pool);
> 	port->write_started--;
> 
>@@ -486,7 +507,7 @@ static void gs_write_complete(struct usb_ep *ep, struct usb_request *req)
> 		break;
> 	}
> 
>-	spin_unlock(&port->port_lock);
>+	spin_unlock_irqrestore(&port->port_lock, flags);
> }
> 
> static void gs_free_requests(struct usb_ep *ep, struct list_head *head,
>--
>2.39.0
Krzysztof Kozlowski Sept. 2, 2024, 4:08 p.m. UTC | #2
On 02/09/2024 16:38, 胡连勤 wrote:
> Ping......
> 

You ping after four days? As whom do you treat us? I already complained
about @vivo.com in several threads. That's one more to the list of very
poor interactions from @vivo.com.

READ submitting-patches very carefully and then get your process fixed.

Best regards,
Krzysztof
Krzysztof Kozlowski Sept. 2, 2024, 4:16 p.m. UTC | #3
On 29/08/2024 13:54, 胡连勤 wrote:
> From: Lianqin Hu <hulianqin@vivo.com>
> 
> Considering that in some extreme cases, when the unbind operation
> is being executed, gserial_disconnect has already cleared gser->ioport,
> triggering a gadget reconfiguration at this time and gs_read_complete
> gets called afterwards, which results in accessing null pointer,
> add a null pointer check to prevent this situation.
> 

...

>  
>  static void gs_write_complete(struct usb_ep *ep, struct usb_request *req)
>  {
> -	struct gs_port	*port = ep->driver_data;
> +	struct gs_port	*port;
> +	unsigned long  flags;
> +
> +	spin_lock_irqsave(&serial_port_lock, flags);
> +	port = ep->driver_data;
> +
> +	if (!port) {
> +		spin_unlock_irqrestore(&serial_port_lock, flags);
> +		return;
> +	}
>  
>  	spin_lock(&port->port_lock);
> +	spin_unlock(&serial_port_lock);

You pinged us for this after 4 days. This is damn v9 and still
unresolved comments from previous review.

Explain, how did you resolve Greg's comment about this unintuitive code:
https://lore.kernel.org/all/2024082251-grief-profanity-b0da@gregkh/

Pattern of immediacy, rush and impatience was used in one of latest big
messes (just google about harassing open source maintainers by some
random contributors).

I suggest go back to drawing board and improve the code instead of
making it spaghetti without explanation, even though we asked for that
explanation.


Best regards,
Krzysztof
胡连勤 Sept. 3, 2024, 2:23 a.m. UTC | #4
Hello linux community expert:

>You ping after four days? As whom do you treat us? I already complained about @vivo.com in several threads. That's one more to the list of very poor interactions from @vivo.com.

>READ submitting-patches very carefully and then get your process fixed.

We sincerely apologize. We have always respected the maintainers of the Linux community and had no intention of offending them.

Perhaps we were anxious and hoped that the patch could be merged earlier (to solve the problems encountered in production as soon as possible), 
which inadvertently offended the community maintainers. We apologize again for this.

Thanks
Greg Kroah-Hartman Sept. 3, 2024, 4:58 a.m. UTC | #5
On Tue, Sep 03, 2024 at 02:23:09AM +0000, 胡连勤 wrote:
> Hello linux community expert:
> 
> >You ping after four days? As whom do you treat us? I already complained about @vivo.com in several threads. That's one more to the list of very poor interactions from @vivo.com.
> 
> >READ submitting-patches very carefully and then get your process fixed.
> 
> We sincerely apologize. We have always respected the maintainers of the Linux community and had no intention of offending them.
> 
> Perhaps we were anxious and hoped that the patch could be merged earlier (to solve the problems encountered in production as soon as possible), 
> which inadvertently offended the community maintainers. We apologize again for this.

If you wish to have patches reviewed and merged quickly, please help out
in the review and testing of other changes that have been submitted
before yours, to help cut down the workload of those of us reviewing
these changes.

thanks,

greg k-h
Greg Kroah-Hartman Sept. 3, 2024, 6:26 a.m. UTC | #6
On Thu, Aug 29, 2024 at 11:54:39AM +0000, 胡连勤 wrote:
> From: Lianqin Hu <hulianqin@vivo.com>
> 
> Considering that in some extreme cases, when the unbind operation
> is being executed, gserial_disconnect has already cleared gser->ioport,
> triggering a gadget reconfiguration at this time and gs_read_complete
> gets called afterwards, which results in accessing null pointer,
> add a null pointer check to prevent this situation.
> 
> Added a static spinlock to prevent gser->ioport from becoming
> null after the newly added check.

In looking at this further, shouldn't we just be moving around where we
call usb_ep_disable() in gserial_disconnect()?

Can't we shut down the endpoints _BEFORE_ we set the port structures to
NULL?  After the endpoints are stopped, then we know that there is no
outstanding i/o possible and then we can clean up properly?

Not to say that your change doesn't fix the race condition here, but
cleaning up things in the proper order might be the better way as then
there can not be any race conditions at all?

Have you tried that?

thanks,

greg k-h
胡连勤 Sept. 3, 2024, 11:49 a.m. UTC | #7
Hello linux community expert:

>If you wish to have patches reviewed and merged quickly, please help out in the review and testing of other changes that have been submitted before yours, to help cut down the workload of those of us reviewing these changes.
 We usually do the related pressure tests after Patch. For example, this patch we have a USB mode switching pressure test, and the ADB ROOT & UNROOT pressure test.
  
Thanks
胡连勤 Sept. 3, 2024, 11:58 a.m. UTC | #8
Hello linux community expert:

>> From: Lianqin Hu <hulianqin@vivo.com>
>> 
>> Considering that in some extreme cases, when the unbind operation is 
>> being executed, gserial_disconnect has already cleared gser->ioport, 
>> triggering a gadget reconfiguration at this time and gs_read_complete 
>> gets called afterwards, which results in accessing null pointer, add a 
>> null pointer check to prevent this situation.
>> 
>> Added a static spinlock to prevent gser->ioport from becoming null 
>> after the newly added check.

>In looking at this further, shouldn't we just be moving around where we call usb_ep_disable() in gserial_disconnect()?

>Can't we shut down the endpoints _BEFORE_ we set the port structures to NULL?  After the endpoints are stopped, then we know that there is no outstanding i/o possible and then we can clean up properly?

>Not to say that your change doesn't fix the race condition here, but cleaning up things in the proper order might be the better way as then there can not be any race conditions at all?

>Have you tried that?

Thank you for your guidance, we studied and tried.

Thanks
Greg Kroah-Hartman Sept. 3, 2024, 12:05 p.m. UTC | #9
On Tue, Sep 03, 2024 at 11:49:25AM +0000, 胡连勤 wrote:
> Hello linux community expert:
> 
> >If you wish to have patches reviewed and merged quickly, please help out in the review and testing of other changes that have been submitted before yours, to help cut down the workload of those of us reviewing these changes.
>  We usually do the related pressure tests after Patch. For example, this patch we have a USB mode switching pressure test, and the ADB ROOT & UNROOT pressure test.

I'm not asking about your testing, I'm asking for you to help out with
the patch review of the other patches on the mailing list for the USB
subsystem, specifically for the usb gadget code which you are now
familiar with.

We always need help with that,

thanks,

greg k-h
Greg Kroah-Hartman Sept. 3, 2024, 12:06 p.m. UTC | #10
On Tue, Sep 03, 2024 at 11:58:37AM +0000, 胡连勤 wrote:
> Hello linux community expert:
> 
> >> From: Lianqin Hu <hulianqin@vivo.com>
> >> 
> >> Considering that in some extreme cases, when the unbind operation is 
> >> being executed, gserial_disconnect has already cleared gser->ioport, 
> >> triggering a gadget reconfiguration at this time and gs_read_complete 
> >> gets called afterwards, which results in accessing null pointer, add a 
> >> null pointer check to prevent this situation.
> >> 
> >> Added a static spinlock to prevent gser->ioport from becoming null 
> >> after the newly added check.
> 
> >In looking at this further, shouldn't we just be moving around where we call usb_ep_disable() in gserial_disconnect()?
> 
> >Can't we shut down the endpoints _BEFORE_ we set the port structures to NULL?  After the endpoints are stopped, then we know that there is no outstanding i/o possible and then we can clean up properly?
> 
> >Not to say that your change doesn't fix the race condition here, but cleaning up things in the proper order might be the better way as then there can not be any race conditions at all?
> 
> >Have you tried that?
> 
> Thank you for your guidance, we studied and tried.

I'm confused, you tried what and what happened?

greg k-h
diff mbox series

Patch

diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
index b394105e55d6..66d918523b3e 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -452,20 +452,41 @@  static void gs_rx_push(struct work_struct *work)
 
 static void gs_read_complete(struct usb_ep *ep, struct usb_request *req)
 {
-	struct gs_port	*port = ep->driver_data;
+	struct gs_port	*port;
+	unsigned long  flags;
+
+	spin_lock_irqsave(&serial_port_lock, flags);
+	port = ep->driver_data;
+
+	if (!port) {
+		spin_unlock_irqrestore(&serial_port_lock, flags);
+		return;
+	}
 
-	/* Queue all received data until the tty layer is ready for it. */
 	spin_lock(&port->port_lock);
+	spin_unlock(&serial_port_lock);
+
+	/* Queue all received data until the tty layer is ready for it. */
 	list_add_tail(&req->list, &port->read_queue);
 	schedule_delayed_work(&port->push, 0);
-	spin_unlock(&port->port_lock);
+	spin_unlock_irqrestore(&port->port_lock, flags);
 }
 
 static void gs_write_complete(struct usb_ep *ep, struct usb_request *req)
 {
-	struct gs_port	*port = ep->driver_data;
+	struct gs_port	*port;
+	unsigned long  flags;
+
+	spin_lock_irqsave(&serial_port_lock, flags);
+	port = ep->driver_data;
+
+	if (!port) {
+		spin_unlock_irqrestore(&serial_port_lock, flags);
+		return;
+	}
 
 	spin_lock(&port->port_lock);
+	spin_unlock(&serial_port_lock);
 	list_add(&req->list, &port->write_pool);
 	port->write_started--;
 
@@ -486,7 +507,7 @@  static void gs_write_complete(struct usb_ep *ep, struct usb_request *req)
 		break;
 	}
 
-	spin_unlock(&port->port_lock);
+	spin_unlock_irqrestore(&port->port_lock, flags);
 }
 
 static void gs_free_requests(struct usb_ep *ep, struct list_head *head,