Message ID | 20230912220217.2008895-2-reibax@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next,v2,1/3] ptp: Replace timestamp event queue with linked list | expand |
Xabier Marquiegui <reibax@gmail.com> writes: > Use linked lists to create one event queue per open file. This enables > simultaneous readers for timestamp event queues. > > Signed-off-by: Xabier Marquiegui <reibax@gmail.com> > Suggested-by: Richard Cochran <richardcochran@gmail.com> > --- > v2: > - fix ptp_poll() return value > - Style changes to comform to checkpatch strict suggestions > - more coherent ptp_read error exit routines > v1: https://lore.kernel.org/netdev/20230906104754.1324412-3-reibax@gmail.com/ > > drivers/ptp/ptp_chardev.c | 100 +++++++++++++++++++++++++++++--------- > drivers/ptp/ptp_clock.c | 6 +-- > drivers/ptp/ptp_private.h | 4 +- > drivers/ptp/ptp_sysfs.c | 4 -- > 4 files changed, 82 insertions(+), 32 deletions(-) > > diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c > index 197edf1179f1..c9da0f27d204 100644 > --- a/drivers/ptp/ptp_chardev.c > +++ b/drivers/ptp/ptp_chardev.c > @@ -103,9 +103,39 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, > > int ptp_open(struct posix_clock *pc, fmode_t fmode) > { > + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); > + struct timestamp_event_queue *queue; > + > + queue = kzalloc(sizeof(*queue), GFP_KERNEL); > + if (!queue) > + return -EINVAL; > + queue->reader_pid = task_pid_nr(current); Using the pid of the task will break when using some form of file descriptor passing. e.g. Task A opened the chardev, called the ioctl() with some mask and then passed the fd to Task B, that's actually going to use the fd. Is this something that we want to support? Am I missing something? > + list_add_tail(&queue->qlist, &ptp->tsevqs); > + > return 0; > } > > +int ptp_release(struct posix_clock *pc) > +{ > + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); > + struct list_head *pos, *n; > + struct timestamp_event_queue *element; > + int found = -1; > + pid_t reader_pid = task_pid_nr(current); > + > + list_for_each_safe(pos, n, &ptp->tsevqs) { > + element = list_entry(pos, struct timestamp_event_queue, qlist); > + if (element->reader_pid == reader_pid) { > + list_del(pos); > + kfree(element); > + found = 0; > + return found; > + } > + } > + > + return found; > +} > + > long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) > { > struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); > @@ -435,14 +465,24 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) > __poll_t ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait) > { > struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); > + pid_t reader_pid = task_pid_nr(current); > struct timestamp_event_queue *queue; > + struct list_head *pos, *n; > + bool found = false; > > poll_wait(fp, &ptp->tsev_wq, wait); > > - /* Extract only the first element in the queue list > - * TODO: Identify the relevant queue > - */ > - queue = list_entry(&ptp->tsevqs, struct timestamp_event_queue, qlist); > + /* Extract only the desired element in the queue list */ > + list_for_each_safe(pos, n, &ptp->tsevqs) { > + queue = list_entry(pos, struct timestamp_event_queue, qlist); > + if (queue->reader_pid == reader_pid) { > + found = true; > + break; > + } > + } > + > + if (!found) > + return EPOLLERR; > > return queue_cnt(queue) ? EPOLLIN : 0; > } > @@ -453,44 +493,54 @@ ssize_t ptp_read(struct posix_clock *pc, > uint rdflags, char __user *buf, size_t cnt) > { > struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); > + pid_t reader_pid = task_pid_nr(current); > struct timestamp_event_queue *queue; > struct ptp_extts_event *event; > + struct list_head *pos, *n; > unsigned long flags; > + bool found = false; > size_t qcnt, i; > int result; > > - /* Extract only the first element in the queue list > - * TODO: Identify the relevant queue > - */ > - queue = list_first_entry(&ptp->tsevqs, struct timestamp_event_queue, > - qlist); > + /* Extract only the desired element in the queue list */ > + list_for_each_safe(pos, n, &ptp->tsevqs) { > + queue = list_entry(pos, struct timestamp_event_queue, qlist); > + if (queue->reader_pid == reader_pid) { > + found = true; > + break; > + } > + } > > - if (cnt % sizeof(struct ptp_extts_event) != 0) > - return -EINVAL; > + if (!found) { > + result = -EINVAL; > + goto exit; > + } > + > + if (cnt % sizeof(struct ptp_extts_event) != 0) { > + result = -EINVAL; > + goto exit; > + } > > if (cnt > EXTTS_BUFSIZE) > cnt = EXTTS_BUFSIZE; > > cnt = cnt / sizeof(struct ptp_extts_event); > > - if (mutex_lock_interruptible(&ptp->tsevq_mux)) > - return -ERESTARTSYS; > - > if (wait_event_interruptible(ptp->tsev_wq, > ptp->defunct || queue_cnt(queue))) { > - mutex_unlock(&ptp->tsevq_mux); > - return -ERESTARTSYS; > + result = -ERESTARTSYS; > + goto exit; > } > > if (ptp->defunct) { > - mutex_unlock(&ptp->tsevq_mux); > - return -ENODEV; > + result = -ENODEV; > + goto exit; > } > > event = kmalloc(EXTTS_BUFSIZE, GFP_KERNEL); > if (!event) { > - mutex_unlock(&ptp->tsevq_mux); > - return -ENOMEM; > + result = -ENOMEM; > + goto exit; > } > > spin_lock_irqsave(&queue->lock, flags); > @@ -509,12 +559,16 @@ ssize_t ptp_read(struct posix_clock *pc, > > cnt = cnt * sizeof(struct ptp_extts_event); > > - mutex_unlock(&ptp->tsevq_mux); > - > result = cnt; > - if (copy_to_user(buf, event, cnt)) > + if (copy_to_user(buf, event, cnt)) { > result = -EFAULT; > + goto free_event; > + } > > +free_event: > kfree(event); > +exit: > + if (result < 0) > + ptp_release(pc); > return result; > } > diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c > index 7ac04a282ec5..d52fc23e20a8 100644 > --- a/drivers/ptp/ptp_clock.c > +++ b/drivers/ptp/ptp_clock.c > @@ -162,6 +162,7 @@ static struct posix_clock_operations ptp_clock_ops = { > .clock_settime = ptp_clock_settime, > .ioctl = ptp_ioctl, > .open = ptp_open, > + .release = ptp_release, > .poll = ptp_poll, > .read = ptp_read, > }; > @@ -184,7 +185,6 @@ static void ptp_clock_release(struct device *dev) > > ptp_cleanup_pin_groups(ptp); > kfree(ptp->vclock_index); > - mutex_destroy(&ptp->tsevq_mux); > mutex_destroy(&ptp->pincfg_mux); > mutex_destroy(&ptp->n_vclocks_mux); > ptp_clean_queue_list(ptp); > @@ -246,10 +246,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, > queue = kzalloc(sizeof(*queue), GFP_KERNEL); > if (!queue) > goto no_memory_queue; > + queue->reader_pid = 0; > spin_lock_init(&queue->lock); > list_add_tail(&queue->qlist, &ptp->tsevqs); > - /* TODO - Transform or delete this mutex */ > - mutex_init(&ptp->tsevq_mux); > mutex_init(&ptp->pincfg_mux); > mutex_init(&ptp->n_vclocks_mux); > init_waitqueue_head(&ptp->tsev_wq); > @@ -350,7 +349,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, > if (ptp->kworker) > kthread_destroy_worker(ptp->kworker); > kworker_err: > - mutex_destroy(&ptp->tsevq_mux); > mutex_destroy(&ptp->pincfg_mux); > mutex_destroy(&ptp->n_vclocks_mux); > ptp_clean_queue_list(ptp); > diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h > index 314c21c39f6a..046d1482bcee 100644 > --- a/drivers/ptp/ptp_private.h > +++ b/drivers/ptp/ptp_private.h > @@ -27,6 +27,7 @@ struct timestamp_event_queue { > int tail; > spinlock_t lock; > struct list_head qlist; > + pid_t reader_pid; > }; > > struct ptp_clock { > @@ -38,7 +39,6 @@ struct ptp_clock { > struct pps_device *pps_source; > long dialed_frequency; /* remembers the frequency adjustment */ > struct list_head tsevqs; /* timestamp fifo list */ > - struct mutex tsevq_mux; /* one process at a time reading the fifo */ > struct mutex pincfg_mux; /* protect concurrent info->pin_config access */ > wait_queue_head_t tsev_wq; > int defunct; /* tells readers to go away when clock is being removed */ > @@ -124,6 +124,8 @@ long ptp_ioctl(struct posix_clock *pc, > > int ptp_open(struct posix_clock *pc, fmode_t fmode); > > +int ptp_release(struct posix_clock *pc); > + > ssize_t ptp_read(struct posix_clock *pc, > uint flags, char __user *buf, size_t cnt); > > diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c > index 2675f383cd0a..512b0164ef18 100644 > --- a/drivers/ptp/ptp_sysfs.c > +++ b/drivers/ptp/ptp_sysfs.c > @@ -87,9 +87,6 @@ static ssize_t extts_fifo_show(struct device *dev, > > memset(&event, 0, sizeof(event)); > > - if (mutex_lock_interruptible(&ptp->tsevq_mux)) > - return -ERESTARTSYS; > - > spin_lock_irqsave(&queue->lock, flags); > qcnt = queue_cnt(queue); > if (qcnt) { > @@ -104,7 +101,6 @@ static ssize_t extts_fifo_show(struct device *dev, > cnt = snprintf(page, PAGE_SIZE, "%u %lld %u\n", > event.index, event.t.sec, event.t.nsec); > out: > - mutex_unlock(&ptp->tsevq_mux); > return cnt; > } > static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL); > -- > 2.34.1 > > Cheers,
> Using the pid of the task will break when using some form of file > descriptor passing. e.g. Task A opened the chardev, called the ioctl() > with some mask and then passed the fd to Task B, that's actually going > to use the fd. > > Is this something that we want to support? Am I missing something? Thank you very much for your valuable comments Vinicius, Let me try to confirm if I understand what you are observing here. Let's say we have a process with two threads. One of them opens the device file to do IOCTL operations, and the other one opens the device file to read timestamp events. Since both have the same PID, all the operations (read,release...) would fail because I designed them under the assumption that only one open operation would happen per PID. This is indeed not as safe as it should be and I should try to improve it. I am already looking at alternatives, but maybe someone can give me a hint. Do you have any suggestions on what I could do to have file operations (read, release...) determine which open instance they belong to? Cheers.
Xabier Marquiegui <xabier.marquiegui@gmail.com> writes: >> Using the pid of the task will break when using some form of file >> descriptor passing. e.g. Task A opened the chardev, called the ioctl() >> with some mask and then passed the fd to Task B, that's actually going >> to use the fd. >> >> Is this something that we want to support? Am I missing something? > > Thank you very much for your valuable comments Vinicius, > > Let me try to confirm if I understand what you are observing here. > > Let's say we have a process with two threads. One of them opens the device > file to do IOCTL operations, and the other one opens the device file to > read timestamp events. Since both have the same PID, all the operations > (read,release...) would fail because I designed them under the assumption > that only one open operation would happen per PID. > I was initially thinking about another scenario: when Process A passes the open file descriptor via SCM_RIGHTS to Process B. But your scenario also shows the limitations of the current approach. > This is indeed not as safe as it should be and I should try to improve it. > > I am already looking at alternatives, but maybe someone can give me a hint. > Do you have any suggestions on what I could do to have file operations > (read, release...) determine which open instance they belong to? Taking a quick look, it seems that you would have to change 'struct posix_clock_file_operations' to also pass around the 'struct file' of the file being used. That way we can track each user/"open()". And if one program decides that it needs to have have multiple fds with different masks, and so different queues, it should just work. What do you think? Cheers,
> Taking a quick look, it seems that you would have to change 'struct > posix_clock_file_operations' to also pass around the 'struct file' of > the file being used. > > That way we can track each user/"open()". And if one program decides > that it needs to have have multiple fds with different masks, and so > different queues, it should just work. > > What do you think? Thank you for the suggestion Vinicius. That was my initial approach, but I couldn't make it work. Maybe I'm missing something. I searched struct file for some variable that would help identifying the open/read/release instance a call belongs to, but couldn't find any. After that, I tried printing the struct file *fp pointer on every open call. Unless I did something wrong I would say that it points to the same address every time for a specific device file. My understanding is that if it was different for each time open gets called, it would help us identify the user of the rest of file operation calls, but not if it's the same. It is true that I did this verification on kernel 6.1.38 and not the latest, but I don't think that specific aspect has changed. I will keep looking for options, but any help would be appreciated. Thanks. -- Xabier.
On Wed, Sep 13, 2023 at 02:25:48PM -0700, Vinicius Costa Gomes wrote: > Taking a quick look, it seems that you would have to change 'struct > posix_clock_file_operations' to also pass around the 'struct file' of > the file being used. And let drivers compare struct file pointers from different consumers? > That way we can track each user/"open()". And if one program decides > that it needs to have have multiple fds with different masks, and so > different queues, it should just work. > > What do you think? See posix-clock.c : posix_clock_open() When the file is opened, the fp->private_data is used to track the posix_clock that was registered as a character device by the ptp clock instance. That character device may be opened multiple times, each time there is a unique fp, but fp->private_data points to the same ptp clock instance. So the information of which fp is being read() is lost. Looks like you will have to re-work posix-clock.c to allow drivers to provide their own "private" data populated during posix_clock_operations::open() Needs thought... Thanks, Richard
Hi Richard, Richard Cochran <richardcochran@gmail.com> writes: > On Wed, Sep 13, 2023 at 02:25:48PM -0700, Vinicius Costa Gomes wrote: > >> Taking a quick look, it seems that you would have to change 'struct >> posix_clock_file_operations' to also pass around the 'struct file' of >> the file being used. > > And let drivers compare struct file pointers from different consumers? > That was the first idea, not very good I admit. What you said below sounds better, more "final product". >> That way we can track each user/"open()". And if one program decides >> that it needs to have have multiple fds with different masks, and so >> different queues, it should just work. >> >> What do you think? > > See posix-clock.c : posix_clock_open() > > When the file is opened, the fp->private_data is used to track the > posix_clock that was registered as a character device by the ptp > clock instance. > > That character device may be opened multiple times, each time there is > a unique fp, but fp->private_data points to the same ptp clock instance. > > So the information of which fp is being read() is lost. yeah, that's the core issue. > > Looks like you will have to re-work posix-clock.c to allow drivers to > provide their own "private" data populated during posix_clock_operations::open() > > Needs thought... > Cheers,
Hello, kernel test robot noticed "kernel_BUG_at_lib/list_debug.c" on: commit: bb1445038f83a91450bc74ccc5a6f2b702233584 ("[PATCH net-next v2 2/3] ptp: support multiple timestamp event readers") url: https://github.com/intel-lab-lkp/linux/commits/Xabier-Marquiegui/ptp-support-multiple-timestamp-event-readers/20230913-060341 base: https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git 8fc8911b66962c6ff4345e7000930a4bcc54ae5a patch link: https://lore.kernel.org/all/20230912220217.2008895-2-reibax@gmail.com/ patch subject: [PATCH net-next v2 2/3] ptp: support multiple timestamp event readers in testcase: stress-ng version: stress-ng-x86_64-0.15.04-1_20230912 with following parameters: nr_threads: 100% disk: 1HDD testtime: 60s class: device test: dev cpufreq_governor: performance compiler: gcc-12 test machine: 64 threads 2 sockets Intel(R) Xeon(R) Gold 6346 CPU @ 3.10GHz (Ice Lake) with 256G memory (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202309192210.a6367ad9-oliver.sang@intel.com The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20230919/202309192210.a6367ad9-oliver.sang@intel.com [ 42.745852][ T4208] list_del corruption. next->prev should be ff11000108593010, but was ff11002089351010. (next=ff1100011152b3f0) [ 42.745864][ T4208] ------------[ cut here ]------------ [ 42.745864][ T4208] kernel BUG at lib/list_debug.c:65! [ 42.745870][ T4208] invalid opcode: 0000 [#1] SMP NOPTI [ 42.745873][ T4208] CPU: 14 PID: 4208 Comm: stress-ng-dev Not tainted 6.5.0-12685-gbb1445038f83 #1 [ 42.745875][ T4208] Hardware name: Inspur NF5180M6/NF5180M6, BIOS 06.00.04 04/12/2022 [ 42.745876][ T4208] RIP: 0010:__list_del_entry_valid_or_report+0xbe/0xf0 [ 42.745884][ T4208] Code: 0b 48 89 fe 48 89 c2 48 c7 c7 78 c0 71 82 e8 99 53 a3 ff 0f 0b 48 89 d1 48 c7 c7 c8 c0 71 82 48 89 f2 48 89 c6 e8 82 53 a3 ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 66 2e 0f 1f 84 00 00 00 00 00 [ 42.745885][ T4208] RSP: 0018:ffa000000ef47ec0 EFLAGS: 00010246 [ 42.745886][ T4208] RAX: 000000000000006d RBX: ff11000108593010 RCX: 0000000000000000 [ 42.745887][ T4208] RDX: 0000000000000000 RSI: ff11001fffd9c6c0 RDI: ff11001fffd9c6c0 [ 42.745888][ T4208] RBP: 0000000000000000 R08: 80000000ffff8c59 R09: 0000000000ffff10 [ 42.745889][ T4208] R10: 000000000000000f R11: 000000000000000f R12: ff1100011152b000 [ 42.745889][ T4208] R13: ff1100208a28ad20 R14: ff11000108fe40c0 R15: 0000000000000000 [ 42.745890][ T4208] FS: 00007f28c2aa0700(0000) GS:ff11001fffd80000(0000) knlGS:0000000000000000 [ 42.745891][ T4208] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 42.745892][ T4208] CR2: 00007f28c2a9dbf0 CR3: 00000001dd39e003 CR4: 0000000000771ee0 [ 42.745893][ T4208] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 42.745893][ T4208] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 42.745894][ T4208] PKRU: 55555554 [ 42.745894][ T4208] Call Trace: [ 42.745896][ T4208] <TASK> [ 42.745898][ T4208] ? die+0x36/0xb0 [ 42.745901][ T4208] ? do_trap+0xda/0x130 [ 42.745904][ T4208] ? __list_del_entry_valid_or_report+0xbe/0xf0 [ 42.745906][ T4208] ? do_error_trap+0x65/0xb0 [ 42.745908][ T4208] ? __list_del_entry_valid_or_report+0xbe/0xf0 [ 42.745910][ T4208] ? exc_invalid_op+0x50/0x70 [ 42.745915][ T4208] ? __list_del_entry_valid_or_report+0xbe/0xf0 [ 42.745917][ T4208] ? asm_exc_invalid_op+0x1a/0x20 [ 42.745920][ T4208] ? __list_del_entry_valid_or_report+0xbe/0xf0 [ 42.745922][ T4208] ptp_release+0x4c/0xb0 [ 42.745928][ T4208] posix_clock_release+0x28/0x70 [ 42.745933][ T4208] __fput+0xea/0x2b0 [ 42.745938][ T4208] __x64_sys_close+0x3d/0xb0 [ 42.745940][ T4208] do_syscall_64+0x3b/0xb0 [ 42.745942][ T4208] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 42.745947][ T4208] RIP: 0033:0x7f28c52acc2b [ 42.745949][ T4208] Code: 0f 05 48 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8 a3 4d f9 ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 e1 4d f9 ff 8b 44 [ 42.745950][ T4208] RSP: 002b:00007f28c2a9ebf0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 [ 42.745951][ T4208] RAX: ffffffffffffffda RBX: 00005629082e0920 RCX: 00007f28c52acc2b [ 42.745952][ T4208] RDX: 0000000000000062 RSI: 00005629096837d6 RDI: 0000000000000007 [ 42.745952][ T4208] RBP: 0000000000000007 R08: 0000000000000000 R09: 0000000000000000 [ 42.745953][ T4208] R10: 0000000000000001 R11: 0000000000000293 R12: 00005629096837a0 [ 42.745953][ T4208] R13: 00000000ffffffff R14: 00005629096837d0 R15: 00007fff201baa20 [ 42.745954][ T4208] </TASK> [ 42.745955][ T4208] Modules linked in: rfkill uhid vfio_iommu_type1 vfio vhost_net tun vhost_vsock vmw_vsock_virtio_transport_common vsock vhost vhost_iotlb intel_rapl_msr intel_rapl_common btrfs x86_pkg_temp_thermal blake2b_generic xor ipmi_ssif coretemp kvm_intel kvm irqbypass raid6_pq crct10dif_pclmul libcrc32c crc32_pclmul sd_mod crc32c_intel sg ghash_clmulni_intel nvme sha512_ssse3 nvme_core rapl ahci ast t10_pi intel_cstate libahci acpi_ipmi drm_shmem_helper mei_me crc64_rocksoft_generic intel_uncore i2c_i801 dax_hmem ioatdma crc64_rocksoft megaraid_sas ipmi_si crc64 libata drm_kms_helper mei joydev i2c_smbus dca intel_pch_thermal wmi ipmi_devintf ipmi_msghandler acpi_power_meter drm fuse ip_tables [ 42.745984][ T4208] ---[ end trace 0000000000000000 ]--- [ 42.747395][ T4218] list_add corruption. prev->next should be next (ff1100011152b3f0), but was ff1100029b963010. (prev=ff11004063643010). [ 42.747406][ T4218] ------------[ cut here ]------------ [ 42.747407][ T4218] kernel BUG at lib/list_debug.c:32! [ 42.747413][ T4218] invalid opcode: 0000 [#2] SMP NOPTI [ 42.747416][ T4218] CPU: 61 PID: 4218 Comm: stress-ng-dev Tainted: G D 6.5.0-12685-gbb1445038f83 #1 [ 42.747418][ T4218] Hardware name: Inspur NF5180M6/NF5180M6, BIOS 06.00.04 04/12/2022 [ 42.747420][ T4218] RIP: 0010:__list_add_valid_or_report+0x78/0xb0 [ 42.747427][ T4218] Code: a3 ff 0f 0b 48 89 c1 48 c7 c7 c0 be 71 82 e8 9f 54 a3 ff 0f 0b 48 89 d1 48 89 c6 4c 89 c2 48 c7 c7 18 bf 71 82 e8 88 54 a3 ff <0f> 0b 48 89 f2 48 89 c1 48 89 fe 48 c7 c7 70 bf 71 82 e8 71 54 a3 [ 42.747429][ T4218] RSP: 0018:ffa000000fdffc28 EFLAGS: 00010246 [ 42.747431][ T4218] RAX: 0000000000000075 RBX: ff11004063640000 RCX: 0000000000000000 [ 42.747433][ T4218] RDX: 0000000000000000 RSI: ff11003fc335c6c0 RDI: ff11003fc335c6c0 [ 42.747434][ T4218] RBP: ff1100011152b000 R08: 80000000ffff8c89 R09: 0000000000ffff10 [ 42.747436][ T4218] R10: 000000000000000f R11: 000000000000000f R12: ff11004063643010 [ 42.747437][ T4218] R13: ff11004063641010 R14: ff1100011152b3f0 R15: 0000000000000000 [ 42.747439][ T4218] FS: 00007f28c129d700(0000) GS:ff11003fc3340000(0000) knlGS:0000000000000000 [ 42.747441][ T4218] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 42.747443][ T4218] CR2: 00007f28c129abf0 CR3: 00000001dd39e001 CR4: 0000000000771ee0 [ 42.747445][ T4218] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 42.747446][ T4218] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 42.747447][ T4218] PKRU: 55555554 [ 42.747448][ T4218] Call Trace: [ 42.747450][ T4218] <TASK> [ 42.747452][ T4218] ? die+0x36/0xb0 [ 42.747456][ T4218] ? do_trap+0xda/0x130 [ 42.747458][ T4218] ? __list_add_valid_or_report+0x78/0xb0 [ 42.747461][ T4218] ? do_error_trap+0x65/0xb0 [ 42.747464][ T4218] ? __list_add_valid_or_report+0x78/0xb0 [ 42.747466][ T4218] ? exc_invalid_op+0x50/0x70 [ 42.747471][ T4218] ? __list_add_valid_or_report+0x78/0xb0 [ 42.747474][ T4218] ? asm_exc_invalid_op+0x1a/0x20 [ 42.747477][ T4218] ? __list_add_valid_or_report+0x78/0xb0 [ 42.747479][ T4218] ? __list_add_valid_or_report+0x78/0xb0 [ 42.747482][ T4218] ptp_open+0x6a/0xb0 [ 42.747487][ T4218] posix_clock_open+0x47/0xb0 [ 42.747491][ T4218] chrdev_open+0xf7/0x270 [ 42.747495][ T4218] ? __pfx_chrdev_open+0x10/0x10 [ 42.747497][ T4218] do_dentry_open+0x200/0x4f0 [ 42.747499][ T4218] do_open+0x291/0x430 [ 42.747503][ T4218] ? open_last_lookups+0x8b/0x430 [ 42.747505][ T4218] path_openat+0x130/0x2f0 [ 42.747508][ T4218] do_filp_open+0xb3/0x170 [ 42.747512][ T4218] do_sys_openat2+0xab/0xf0 [ 42.747515][ T4218] __x64_sys_openat+0x6e/0xb0 [ 42.747517][ T4218] do_syscall_64+0x3b/0xb0 [ 42.747521][ T4218] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 42.747525][ T4218] RIP: 0033:0x7f28c52ac1a4 [ 42.747528][ T4218] Code: 84 00 00 00 00 00 44 89 54 24 0c e8 36 58 f9 ff 44 8b 54 24 0c 44 89 e2 48 89 ee 41 89 c0 bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 77 34 44 89 c7 89 44 24 0c e8 68 58 f9 ff 8b 44 [ 42.747529][ T4218] RSP: 002b:00007f28c129bac0 EFLAGS: 00000293 ORIG_RAX: 0000000000000101 [ 42.747531][ T4218] RAX: ffffffffffffffda RBX: 000000000ee6b280 RCX: 00007f28c52ac1a4 [ 42.747533][ T4218] RDX: 0000000000040802 RSI: 00005629096837d0 RDI: 00000000ffffff9c [ 42.747534][ T4218] RBP: 00005629096837d0 R08: 0000000000000000 R09: 00007f28ac000b60 [ 42.747536][ T4218] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000040802 [ 42.747537][ T4218] R13: 0000000000040802 R14: 0000000000000000 R15: 00007fff201baa20 [ 42.747539][ T4218] </TASK> [ 42.747539][ T4218] Modules linked in: rfkill uhid vfio_iommu_type1 vfio vhost_net tun vhost_vsock vmw_vsock_virtio_transport_common vsock vhost vhost_iotlb intel_rapl_msr intel_rapl_common btrfs x86_pkg_temp_thermal blake2b_generic xor ipmi_ssif coretemp kvm_intel kvm irqbypass raid6_pq crct10dif_pclmul libcrc32c crc32_pclmul sd_mod crc32c_intel sg ghash_clmulni_intel nvme sha512_ssse3 nvme_core rapl ahci ast t10_pi intel_cstate libahci acpi_ipmi drm_shmem_helper mei_me crc64_rocksoft_generic intel_uncore i2c_i801 dax_hmem ioatdma crc64_rocksoft megaraid_sas ipmi_si crc64 libata drm_kms_helper mei joydev i2c_smbus dca intel_pch_thermal wmi ipmi_devintf ipmi_msghandler acpi_power_meter drm fuse ip_tables [ 42.747587][ T4218] ---[ end trace 0000000000000000 ]--- [ 42.766845][ T4216] list_add corruption. prev->next should be next (ff1100011152b3f0), but was ff110002610fb010. (prev=ff1100026116b010). [ 42.784805][ T4210] PM: hibernation: Marking nosave pages: [mem 0x00000000-0x00000fff] [ 42.784808][ T4210] PM: hibernation: Marking nosave pages: [mem 0x0003e000-0x0003efff] [ 42.784810][ T4210] PM: hibernation: Marking nosave pages: [mem 0x000a0000-0x000fffff] [ 42.784813][ T4210] PM: hibernation: Marking nosave pages: [mem 0x6702e000-0x6c1fefff] [ 42.785002][ T4210] PM: hibernation: Marking nosave pages: [mem 0x6f800000-0xffffffff] [ 42.785751][ T988] [ 42.785855][ T4210] PM: hibernation: Basic memory bitmaps created [ 42.803384][ T4210] PM: hibernation: Basic memory bitmaps freed [ 42.803908][ T4210] random: crng reseeded on system resumption [ 42.819198][ T4208] RIP: 0010:__list_del_entry_valid_or_report+0xbe/0xf0 [ 42.819207][ T4208] Code: 0b 48 89 fe 48 89 c2 48 c7 c7 78 c0 71 82 e8 99 53 a3 ff 0f 0b 48 89 d1 48 c7 c7 c8 c0 71 82 48 89 f2 48 89 c6 e8 82 53 a3 ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 66 2e 0f 1f 84 00 00 00 00 00 [ 42.819209][ T4208] RSP: 0018:ffa000000ef47ec0 EFLAGS: 00010246 [ 42.819213][ T4208] RAX: 000000000000006d RBX: ff11000108593010 RCX: 0000000000000000 [ 42.819215][ T4208] RDX: 0000000000000000 RSI: ff11001fffd9c6c0 RDI: ff11001fffd9c6c0 [ 42.819217][ T4208] RBP: 0000000000000000 R08: 80000000ffff8c59 R09: 0000000000ffff10 [ 42.819219][ T4208] R10: 000000000000000f R11: 000000000000000f R12: ff1100011152b000 [ 42.819221][ T4208] R13: ff1100208a28ad20 R14: ff11000108fe40c0 R15: 0000000000000000 [ 42.819222][ T4208] FS: 00007f28c2aa0700(0000) GS:ff11001fffd80000(0000) knlGS:0000000000000000 [ 42.819224][ T4208] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 42.819226][ T4208] CR2: 00007f28c2a9dbf0 CR3: 00000001dd39e003 CR4: 0000000000771ee0 [ 42.819227][ T4208] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 42.819229][ T4208] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 42.819230][ T4208] PKRU: 55555554 [ 42.819232][ T4208] Kernel panic - not syncing: Fatal exception [ 44.547139][ T4208] Kernel Offset: disabled
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 197edf1179f1..c9da0f27d204 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -103,9 +103,39 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, int ptp_open(struct posix_clock *pc, fmode_t fmode) { + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct timestamp_event_queue *queue; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -EINVAL; + queue->reader_pid = task_pid_nr(current); + list_add_tail(&queue->qlist, &ptp->tsevqs); + return 0; } +int ptp_release(struct posix_clock *pc) +{ + struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct list_head *pos, *n; + struct timestamp_event_queue *element; + int found = -1; + pid_t reader_pid = task_pid_nr(current); + + list_for_each_safe(pos, n, &ptp->tsevqs) { + element = list_entry(pos, struct timestamp_event_queue, qlist); + if (element->reader_pid == reader_pid) { + list_del(pos); + kfree(element); + found = 0; + return found; + } + } + + return found; +} + long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); @@ -435,14 +465,24 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) __poll_t ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + pid_t reader_pid = task_pid_nr(current); struct timestamp_event_queue *queue; + struct list_head *pos, *n; + bool found = false; poll_wait(fp, &ptp->tsev_wq, wait); - /* Extract only the first element in the queue list - * TODO: Identify the relevant queue - */ - queue = list_entry(&ptp->tsevqs, struct timestamp_event_queue, qlist); + /* Extract only the desired element in the queue list */ + list_for_each_safe(pos, n, &ptp->tsevqs) { + queue = list_entry(pos, struct timestamp_event_queue, qlist); + if (queue->reader_pid == reader_pid) { + found = true; + break; + } + } + + if (!found) + return EPOLLERR; return queue_cnt(queue) ? EPOLLIN : 0; } @@ -453,44 +493,54 @@ ssize_t ptp_read(struct posix_clock *pc, uint rdflags, char __user *buf, size_t cnt) { struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + pid_t reader_pid = task_pid_nr(current); struct timestamp_event_queue *queue; struct ptp_extts_event *event; + struct list_head *pos, *n; unsigned long flags; + bool found = false; size_t qcnt, i; int result; - /* Extract only the first element in the queue list - * TODO: Identify the relevant queue - */ - queue = list_first_entry(&ptp->tsevqs, struct timestamp_event_queue, - qlist); + /* Extract only the desired element in the queue list */ + list_for_each_safe(pos, n, &ptp->tsevqs) { + queue = list_entry(pos, struct timestamp_event_queue, qlist); + if (queue->reader_pid == reader_pid) { + found = true; + break; + } + } - if (cnt % sizeof(struct ptp_extts_event) != 0) - return -EINVAL; + if (!found) { + result = -EINVAL; + goto exit; + } + + if (cnt % sizeof(struct ptp_extts_event) != 0) { + result = -EINVAL; + goto exit; + } if (cnt > EXTTS_BUFSIZE) cnt = EXTTS_BUFSIZE; cnt = cnt / sizeof(struct ptp_extts_event); - if (mutex_lock_interruptible(&ptp->tsevq_mux)) - return -ERESTARTSYS; - if (wait_event_interruptible(ptp->tsev_wq, ptp->defunct || queue_cnt(queue))) { - mutex_unlock(&ptp->tsevq_mux); - return -ERESTARTSYS; + result = -ERESTARTSYS; + goto exit; } if (ptp->defunct) { - mutex_unlock(&ptp->tsevq_mux); - return -ENODEV; + result = -ENODEV; + goto exit; } event = kmalloc(EXTTS_BUFSIZE, GFP_KERNEL); if (!event) { - mutex_unlock(&ptp->tsevq_mux); - return -ENOMEM; + result = -ENOMEM; + goto exit; } spin_lock_irqsave(&queue->lock, flags); @@ -509,12 +559,16 @@ ssize_t ptp_read(struct posix_clock *pc, cnt = cnt * sizeof(struct ptp_extts_event); - mutex_unlock(&ptp->tsevq_mux); - result = cnt; - if (copy_to_user(buf, event, cnt)) + if (copy_to_user(buf, event, cnt)) { result = -EFAULT; + goto free_event; + } +free_event: kfree(event); +exit: + if (result < 0) + ptp_release(pc); return result; } diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 7ac04a282ec5..d52fc23e20a8 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -162,6 +162,7 @@ static struct posix_clock_operations ptp_clock_ops = { .clock_settime = ptp_clock_settime, .ioctl = ptp_ioctl, .open = ptp_open, + .release = ptp_release, .poll = ptp_poll, .read = ptp_read, }; @@ -184,7 +185,6 @@ static void ptp_clock_release(struct device *dev) ptp_cleanup_pin_groups(ptp); kfree(ptp->vclock_index); - mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); mutex_destroy(&ptp->n_vclocks_mux); ptp_clean_queue_list(ptp); @@ -246,10 +246,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, queue = kzalloc(sizeof(*queue), GFP_KERNEL); if (!queue) goto no_memory_queue; + queue->reader_pid = 0; spin_lock_init(&queue->lock); list_add_tail(&queue->qlist, &ptp->tsevqs); - /* TODO - Transform or delete this mutex */ - mutex_init(&ptp->tsevq_mux); mutex_init(&ptp->pincfg_mux); mutex_init(&ptp->n_vclocks_mux); init_waitqueue_head(&ptp->tsev_wq); @@ -350,7 +349,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, if (ptp->kworker) kthread_destroy_worker(ptp->kworker); kworker_err: - mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); mutex_destroy(&ptp->n_vclocks_mux); ptp_clean_queue_list(ptp); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 314c21c39f6a..046d1482bcee 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -27,6 +27,7 @@ struct timestamp_event_queue { int tail; spinlock_t lock; struct list_head qlist; + pid_t reader_pid; }; struct ptp_clock { @@ -38,7 +39,6 @@ struct ptp_clock { struct pps_device *pps_source; long dialed_frequency; /* remembers the frequency adjustment */ struct list_head tsevqs; /* timestamp fifo list */ - struct mutex tsevq_mux; /* one process at a time reading the fifo */ struct mutex pincfg_mux; /* protect concurrent info->pin_config access */ wait_queue_head_t tsev_wq; int defunct; /* tells readers to go away when clock is being removed */ @@ -124,6 +124,8 @@ long ptp_ioctl(struct posix_clock *pc, int ptp_open(struct posix_clock *pc, fmode_t fmode); +int ptp_release(struct posix_clock *pc); + ssize_t ptp_read(struct posix_clock *pc, uint flags, char __user *buf, size_t cnt); diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 2675f383cd0a..512b0164ef18 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -87,9 +87,6 @@ static ssize_t extts_fifo_show(struct device *dev, memset(&event, 0, sizeof(event)); - if (mutex_lock_interruptible(&ptp->tsevq_mux)) - return -ERESTARTSYS; - spin_lock_irqsave(&queue->lock, flags); qcnt = queue_cnt(queue); if (qcnt) { @@ -104,7 +101,6 @@ static ssize_t extts_fifo_show(struct device *dev, cnt = snprintf(page, PAGE_SIZE, "%u %lld %u\n", event.index, event.t.sec, event.t.nsec); out: - mutex_unlock(&ptp->tsevq_mux); return cnt; } static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL);
Use linked lists to create one event queue per open file. This enables simultaneous readers for timestamp event queues. Signed-off-by: Xabier Marquiegui <reibax@gmail.com> Suggested-by: Richard Cochran <richardcochran@gmail.com> --- v2: - fix ptp_poll() return value - Style changes to comform to checkpatch strict suggestions - more coherent ptp_read error exit routines v1: https://lore.kernel.org/netdev/20230906104754.1324412-3-reibax@gmail.com/ drivers/ptp/ptp_chardev.c | 100 +++++++++++++++++++++++++++++--------- drivers/ptp/ptp_clock.c | 6 +-- drivers/ptp/ptp_private.h | 4 +- drivers/ptp/ptp_sysfs.c | 4 -- 4 files changed, 82 insertions(+), 32 deletions(-)