diff mbox series

[net-next,V2] ptp: fix corrupted list in ptp_open

Message ID tencent_24C96E7894D0EBA2EDD2CFB87BB66EC02D0A@qq.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next,V2] ptp: fix corrupted list in ptp_open | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1344 this patch: 1344
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang success Errors and warnings before: 1369 this patch: 1369
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1369 this patch: 1369
netdev/checkpatch fail ERROR: trailing whitespace
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Edward Adam Davis Oct. 30, 2023, 9:07 p.m. UTC
There is no lock protection when writing ptp->tsevqs in ptp_open(),
ptp_release(), which can cause data corruption, use mutex lock to avoid this 
issue.

Moreover, ptp_release() should not be used to release the queue in ptp_read(),
and it should be deleted together.

Reported-and-tested-by: syzbot+df3f3ef31f60781fa911@syzkaller.appspotmail.com
Fixes: 8f5de6fb2453 ("ptp: support multiple timestamp event readers")
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
---
 drivers/ptp/ptp_chardev.c | 11 +++++++++--
 drivers/ptp/ptp_clock.c   |  3 +++
 drivers/ptp/ptp_private.h |  1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

Comments

Martin Habets Oct. 31, 2023, 9:28 a.m. UTC | #1
Please use a separate mail thread for a new patch revision.
See the section "Resending after review" in
Documentation/process/maintainer-netdev.rst.

Martin

On Tue, Oct 31, 2023 at 05:07:08AM +0800, Edward Adam Davis wrote:
> There is no lock protection when writing ptp->tsevqs in ptp_open(),
> ptp_release(), which can cause data corruption, use mutex lock to avoid this 
> issue.
> 
> Moreover, ptp_release() should not be used to release the queue in ptp_read(),
> and it should be deleted together.
> 
> Reported-and-tested-by: syzbot+df3f3ef31f60781fa911@syzkaller.appspotmail.com
> Fixes: 8f5de6fb2453 ("ptp: support multiple timestamp event readers")
> Signed-off-by: Edward Adam Davis <eadavis@qq.com>
> ---
>  drivers/ptp/ptp_chardev.c | 11 +++++++++--
>  drivers/ptp/ptp_clock.c   |  3 +++
>  drivers/ptp/ptp_private.h |  1 +
>  3 files changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
> index 282cd7d24077..e31551d2697d 100644
> --- a/drivers/ptp/ptp_chardev.c
> +++ b/drivers/ptp/ptp_chardev.c
> @@ -109,6 +109,9 @@ int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode)
>  	struct timestamp_event_queue *queue;
>  	char debugfsname[32];
>  
> +	if (mutex_lock_interruptible(&ptp->tsevq_mux)) 
> +		return -ERESTARTSYS;
> +
>  	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
>  	if (!queue)
>  		return -EINVAL;
> @@ -132,15 +135,20 @@ int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode)
>  	debugfs_create_u32_array("mask", 0444, queue->debugfs_instance,
>  				 &queue->dfs_bitmap);
>  
> +	mutex_unlock(&ptp->tsevq_mux);
>  	return 0;
>  }
>  
>  int ptp_release(struct posix_clock_context *pccontext)
>  {
>  	struct timestamp_event_queue *queue = pccontext->private_clkdata;
> +	struct ptp_clock *ptp =
> +		container_of(pccontext->clk, struct ptp_clock, clock);
>  	unsigned long flags;
>  
>  	if (queue) {
> +		if (mutex_lock_interruptible(&ptp->tsevq_mux)) 
> +			return -ERESTARTSYS;
>  		debugfs_remove(queue->debugfs_instance);
>  		pccontext->private_clkdata = NULL;
>  		spin_lock_irqsave(&queue->lock, flags);
> @@ -148,6 +156,7 @@ int ptp_release(struct posix_clock_context *pccontext)
>  		spin_unlock_irqrestore(&queue->lock, flags);
>  		bitmap_free(queue->mask);
>  		kfree(queue);
> +		mutex_unlock(&ptp->tsevq_mux);
>  	}
>  	return 0;
>  }
> @@ -585,7 +594,5 @@ ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags,
>  free_event:
>  	kfree(event);
>  exit:
> -	if (result < 0)
> -		ptp_release(pccontext);
>  	return result;
>  }
> diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
> index 3d1b0a97301c..7930db6ec18d 100644
> --- a/drivers/ptp/ptp_clock.c
> +++ b/drivers/ptp/ptp_clock.c
> @@ -176,6 +176,7 @@ static void ptp_clock_release(struct device *dev)
>  
>  	ptp_cleanup_pin_groups(ptp);
>  	kfree(ptp->vclock_index);
> +	mutex_destroy(&ptp->tsevq_mux);
>  	mutex_destroy(&ptp->pincfg_mux);
>  	mutex_destroy(&ptp->n_vclocks_mux);
>  	/* Delete first entry */
> @@ -247,6 +248,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
>  	if (!queue)
>  		goto no_memory_queue;
>  	list_add_tail(&queue->qlist, &ptp->tsevqs);
> +	mutex_init(&ptp->tsevq_mux);
>  	queue->mask = bitmap_alloc(PTP_MAX_CHANNELS, GFP_KERNEL);
>  	if (!queue->mask)
>  		goto no_memory_bitmap;
> @@ -356,6 +358,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
>  	if (ptp->kworker)
>  		kthread_destroy_worker(ptp->kworker);
>  kworker_err:
> +	mutex_destroy(&ptp->tsevq_mux);
>  	mutex_destroy(&ptp->pincfg_mux);
>  	mutex_destroy(&ptp->n_vclocks_mux);
>  	bitmap_free(queue->mask);
> diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
> index 52f87e394aa6..1525bd2059ba 100644
> --- a/drivers/ptp/ptp_private.h
> +++ b/drivers/ptp/ptp_private.h
> @@ -44,6 +44,7 @@ struct ptp_clock {
>  	struct pps_device *pps_source;
>  	long dialed_frequency; /* remembers the frequency adjustment */
>  	struct list_head tsevqs; /* timestamp fifo list */
> +	struct mutex tsevq_mux; /* one process at a time reading the fifo */
>  	struct mutex pincfg_mux; /* protect concurrent info->pin_config access */
>  	wait_queue_head_t tsev_wq;
>  	int defunct; /* tells readers to go away when clock is being removed */
> -- 
> 2.25.1
>
Richard Cochran Nov. 2, 2023, 12:12 a.m. UTC | #2
On Tue, Oct 31, 2023 at 05:07:08AM +0800, Edward Adam Davis wrote:
> There is no lock protection when writing ptp->tsevqs in ptp_open(),
> ptp_release(), which can cause data corruption,

Really?  How?

> use mutex lock to avoid this 
> issue.
> 
> Moreover, ptp_release() should not be used to release the queue in ptp_read(),
> and it should be deleted together.
> 
> Reported-and-tested-by: syzbot+df3f3ef31f60781fa911@syzkaller.appspotmail.com
> Fixes: 8f5de6fb2453 ("ptp: support multiple timestamp event readers")
> Signed-off-by: Edward Adam Davis <eadavis@qq.com>
> ---
>  drivers/ptp/ptp_chardev.c | 11 +++++++++--
>  drivers/ptp/ptp_clock.c   |  3 +++
>  drivers/ptp/ptp_private.h |  1 +
>  3 files changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
> index 282cd7d24077..e31551d2697d 100644
> --- a/drivers/ptp/ptp_chardev.c
> +++ b/drivers/ptp/ptp_chardev.c
> @@ -109,6 +109,9 @@ int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode)
>  	struct timestamp_event_queue *queue;
>  	char debugfsname[32];
>  
> +	if (mutex_lock_interruptible(&ptp->tsevq_mux)) 
> +		return -ERESTARTSYS;
> +

This mutex is not needed.

Please don't ignore review comments.

Thanks,
Richard
Edward Adam Davis Nov. 2, 2023, 11:16 a.m. UTC | #3
On Wed, 1 Nov 2023 17:12:53 -0700 Richard Cochran wrote:
>> There is no lock protection when writing ptp->tsevqs in ptp_open(),
>> ptp_release(), which can cause data corruption,
>
>Really?  How?
Let me show the corruption that occurs in ptp_open() and ptp_release(), 

1. Corruption that appears in ptp_open(),
Link: https://syzkaller.appspot.com/bug?extid=df3f3ef31f60781fa911

list_add corruption. prev->next should be next (ffff88814a1325e8), but was ffff888078d25048. (prev=ffff888078d21048).
------------[ cut here ]------------
kernel BUG at lib/list_debug.c:32!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 7237 Comm: syz-executor182 Not tainted 6.6.0-rc6-next-20231020-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023
RIP: 0010:__list_add_valid_or_report+0xb6/0x100 lib/list_debug.c:32
Code: e8 2f a5 3a fd 0f 0b 48 89 d9 48 c7 c7 40 9d e9 8a e8 1e a5 3a fd 0f 0b 48 89 f1 48 c7 c7 c0 9d e9 8a 48 89 de e8 0a a5 3a fd <0f> 0b 48 89 f2 48 89 d9 48 89 ee 48 c7 c7 40 9e e9 8a e8 f3 a4 3a
RSP: 0018:ffffc90009b3f898 EFLAGS: 00010286
RAX: 0000000000000075 RBX: ffff88814a1325e8 RCX: ffffffff816bb8d9
RDX: 0000000000000000 RSI: ffffffff816c4d42 RDI: 0000000000000005
RBP: ffff88807c7a9048 R08: 0000000000000005 R09: 0000000000000000
R10: 0000000080000000 R11: 0000000000000001 R12: ffff88814a132000
R13: ffffc90009b3f900 R14: ffff888078d21048 R15: ffff88807c7a9048
FS:  0000555556c00380(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffef0aa1138 CR3: 000000007d17e000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 __list_add_valid include/linux/list.h:88 [inline]
 __list_add include/linux/list.h:150 [inline]
 list_add_tail include/linux/list.h:183 [inline]
 ptp_open+0x1c5/0x4f0 drivers/ptp/ptp_chardev.c:122
 posix_clock_open+0x17e/0x240 kernel/time/posix-clock.c:134
 chrdev_open+0x26d/0x6e0 fs/char_dev.c:414
 do_dentry_open+0x8d4/0x18d0 fs/open.c:948
 do_open fs/namei.c:3621 [inline]
 path_openat+0x1d36/0x2cd0 fs/namei.c:3778
 do_filp_open+0x1dc/0x430 fs/namei.c:3808
 do_sys_openat2+0x176/0x1e0 fs/open.c:1440
 do_sys_open fs/open.c:1455 [inline]
 __do_sys_openat fs/open.c:1471 [inline]
 __se_sys_openat fs/open.c:1466 [inline]
 __x64_sys_openat+0x175/0x210 fs/open.c:1466
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x3f/0x110 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x63/0x6b
RIP: 0033:0x7fc6c2099ae9
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffef0aa1238 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc6c2099ae9
RDX: 0000000000000000 RSI: 0000000020000300 RDI: ffffffffffffff9c
RBP: 00000000000f4240 R08: 0000000000000000 R09: 00000000000000a0
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000000130fc
R13: 00007ffef0aa124c R14: 00007ffef0aa1260 R15: 00007ffef0aa1250
 </TASK>

2. Corruption that appears in ptp_open(),
Link: https://syzkaller.appspot.com/x/log.txt?x=169a58d1680000

list_del corruption. prev->next should be ffff8880280e5048, but was ffff888025dc1048. (prev=ffff88814adb1048)
------------[ cut here ]------------
kernel BUG at lib/list_debug.c:62!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 13142 Comm: syz-executor.2 Not tainted 6.6.0-rc6-next-20231018-syzkaller-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023
RIP: 0010:__list_del_entry_valid_or_report+0x11f/0x1b0
Code: 8f e9 8a e8 c3 d3 3a fd 0f 0b 48 89 ca 48 c7 c7 e0 8f e9 8a e8 b2 d3 3a fd 0f 0b 48 89 c2 48 c7 c7 40 90 e9 8a e8 a1 d3 3a fd <0f> 0b 48 89 d1 48 c7 c7 c0 90 e9 8a 48 89 c2 e8 8d d3 3a fd 0f 0b
RSP: 0018:ffffc90003167e08 EFLAGS: 00010086
RAX: 000000000000006d RBX: ffff8880280e4000 RCX: ffffffff816b9cd9
RDX: 0000000000000000 RSI: ffffffff816c3142 RDI: 0000000000000005
RBP: ffff888023b7c480 R08: 0000000000000005 R09: 0000000000000000
R10: 0000000080000001 R11: 0000000000000001 R12: 0000000000000293
R13: ffff8880280e5008 R14: ffff8880280e5048 R15: ffff8880280e5050
FS:  00005555557e3480(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f350fd98000 CR3: 000000002427a000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 ptp_release+0xca/0x2a0
 posix_clock_release+0xa4/0x160
 __fput+0x270/0xbb0
 __fput_sync+0x47/0x50
 __x64_sys_close+0x87/0xf0
 do_syscall_64+0x3f/0x110
 entry_SYSCALL_64_after_hwframe+0x63/0x6b 


The above two logs can clearly indicate that there is corruption when 
executing the operation of writing ptp->tsevqs in ptp_open() and ptp_release().
>
>> use mutex lock to avoid this
>> issue.
>>
>> Moreover, ptp_release() should not be used to release the queue in ptp_read(),
>> and it should be deleted together.
>>
>> Reported-and-tested-by: syzbot+df3f3ef31f60781fa911@syzkaller.appspotmail.com
>> Fixes: 8f5de6fb2453 ("ptp: support multiple timestamp event readers")
>> Signed-off-by: Edward Adam Davis <eadavis@qq.com>
>> ---
>>  drivers/ptp/ptp_chardev.c | 11 +++++++++--
>>  drivers/ptp/ptp_clock.c   |  3 +++
>>  drivers/ptp/ptp_private.h |  1 +
>>  3 files changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
>> index 282cd7d24077..e31551d2697d 100644
>> --- a/drivers/ptp/ptp_chardev.c
>> +++ b/drivers/ptp/ptp_chardev.c
>> @@ -109,6 +109,9 @@ int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode)
>>  	struct timestamp_event_queue *queue;
>>  	char debugfsname[32];
>>
>> +	if (mutex_lock_interruptible(&ptp->tsevq_mux))
>> +		return -ERESTARTSYS;
>> +
>
>This mutex is not needed.
>
>Please don't ignore review comments.

Thanks,
edward
Richard Cochran Nov. 3, 2023, 11:15 p.m. UTC | #4
On Thu, Nov 02, 2023 at 07:16:17PM +0800, Edward Adam Davis wrote:
> The above two logs can clearly indicate that there is corruption when 
> executing the operation of writing ptp->tsevqs in ptp_open() and ptp_release().

So just remove the bogus call to ptp_release.

Thanks,
Richard
diff mbox series

Patch

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 282cd7d24077..e31551d2697d 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -109,6 +109,9 @@  int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode)
 	struct timestamp_event_queue *queue;
 	char debugfsname[32];
 
+	if (mutex_lock_interruptible(&ptp->tsevq_mux)) 
+		return -ERESTARTSYS;
+
 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
 	if (!queue)
 		return -EINVAL;
@@ -132,15 +135,20 @@  int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode)
 	debugfs_create_u32_array("mask", 0444, queue->debugfs_instance,
 				 &queue->dfs_bitmap);
 
+	mutex_unlock(&ptp->tsevq_mux);
 	return 0;
 }
 
 int ptp_release(struct posix_clock_context *pccontext)
 {
 	struct timestamp_event_queue *queue = pccontext->private_clkdata;
+	struct ptp_clock *ptp =
+		container_of(pccontext->clk, struct ptp_clock, clock);
 	unsigned long flags;
 
 	if (queue) {
+		if (mutex_lock_interruptible(&ptp->tsevq_mux)) 
+			return -ERESTARTSYS;
 		debugfs_remove(queue->debugfs_instance);
 		pccontext->private_clkdata = NULL;
 		spin_lock_irqsave(&queue->lock, flags);
@@ -148,6 +156,7 @@  int ptp_release(struct posix_clock_context *pccontext)
 		spin_unlock_irqrestore(&queue->lock, flags);
 		bitmap_free(queue->mask);
 		kfree(queue);
+		mutex_unlock(&ptp->tsevq_mux);
 	}
 	return 0;
 }
@@ -585,7 +594,5 @@  ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags,
 free_event:
 	kfree(event);
 exit:
-	if (result < 0)
-		ptp_release(pccontext);
 	return result;
 }
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 3d1b0a97301c..7930db6ec18d 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -176,6 +176,7 @@  static void ptp_clock_release(struct device *dev)
 
 	ptp_cleanup_pin_groups(ptp);
 	kfree(ptp->vclock_index);
+	mutex_destroy(&ptp->tsevq_mux);
 	mutex_destroy(&ptp->pincfg_mux);
 	mutex_destroy(&ptp->n_vclocks_mux);
 	/* Delete first entry */
@@ -247,6 +248,7 @@  struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	if (!queue)
 		goto no_memory_queue;
 	list_add_tail(&queue->qlist, &ptp->tsevqs);
+	mutex_init(&ptp->tsevq_mux);
 	queue->mask = bitmap_alloc(PTP_MAX_CHANNELS, GFP_KERNEL);
 	if (!queue->mask)
 		goto no_memory_bitmap;
@@ -356,6 +358,7 @@  struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	if (ptp->kworker)
 		kthread_destroy_worker(ptp->kworker);
 kworker_err:
+	mutex_destroy(&ptp->tsevq_mux);
 	mutex_destroy(&ptp->pincfg_mux);
 	mutex_destroy(&ptp->n_vclocks_mux);
 	bitmap_free(queue->mask);
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index 52f87e394aa6..1525bd2059ba 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -44,6 +44,7 @@  struct ptp_clock {
 	struct pps_device *pps_source;
 	long dialed_frequency; /* remembers the frequency adjustment */
 	struct list_head tsevqs; /* timestamp fifo list */
+	struct mutex tsevq_mux; /* one process at a time reading the fifo */
 	struct mutex pincfg_mux; /* protect concurrent info->pin_config access */
 	wait_queue_head_t tsev_wq;
 	int defunct; /* tells readers to go away when clock is being removed */