diff mbox series

[net-next,v2,2/3] ptp: support multiple timestamp event readers

Message ID 20230912220217.2008895-2-reibax@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v2,1/3] ptp: Replace timestamp event queue with linked list | expand

Checks

Context Check Description
netdev/series_format warning Series does not have a cover letter
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1340 this patch: 1340
netdev/cc_maintainers success CCed 2 of 2 maintainers
netdev/build_clang success Errors and warnings before: 1363 this patch: 1363
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1363 this patch: 1363
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 226 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xabier Marquiegui Sept. 12, 2023, 10:02 p.m. UTC
Use linked lists to create one event queue per open file. This enables
simultaneous readers for timestamp event queues.

Signed-off-by: Xabier Marquiegui <reibax@gmail.com>
Suggested-by: Richard Cochran <richardcochran@gmail.com>
---
v2:
  - fix ptp_poll() return value
  - Style changes to comform to checkpatch strict suggestions
  - more coherent ptp_read error exit routines
v1: https://lore.kernel.org/netdev/20230906104754.1324412-3-reibax@gmail.com/

 drivers/ptp/ptp_chardev.c | 100 +++++++++++++++++++++++++++++---------
 drivers/ptp/ptp_clock.c   |   6 +--
 drivers/ptp/ptp_private.h |   4 +-
 drivers/ptp/ptp_sysfs.c   |   4 --
 4 files changed, 82 insertions(+), 32 deletions(-)

Comments

Vinicius Costa Gomes Sept. 13, 2023, 12:10 a.m. UTC | #1
Xabier Marquiegui <reibax@gmail.com> writes:

> Use linked lists to create one event queue per open file. This enables
> simultaneous readers for timestamp event queues.
>
> Signed-off-by: Xabier Marquiegui <reibax@gmail.com>
> Suggested-by: Richard Cochran <richardcochran@gmail.com>
> ---
> v2:
>   - fix ptp_poll() return value
>   - Style changes to comform to checkpatch strict suggestions
>   - more coherent ptp_read error exit routines
> v1: https://lore.kernel.org/netdev/20230906104754.1324412-3-reibax@gmail.com/
>
>  drivers/ptp/ptp_chardev.c | 100 +++++++++++++++++++++++++++++---------
>  drivers/ptp/ptp_clock.c   |   6 +--
>  drivers/ptp/ptp_private.h |   4 +-
>  drivers/ptp/ptp_sysfs.c   |   4 --
>  4 files changed, 82 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
> index 197edf1179f1..c9da0f27d204 100644
> --- a/drivers/ptp/ptp_chardev.c
> +++ b/drivers/ptp/ptp_chardev.c
> @@ -103,9 +103,39 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
>  
>  int ptp_open(struct posix_clock *pc, fmode_t fmode)
>  {
> +	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
> +	struct timestamp_event_queue *queue;
> +
> +	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
> +	if (!queue)
> +		return -EINVAL;
> +	queue->reader_pid = task_pid_nr(current);

Using the pid of the task will break when using some form of file
descriptor passing. e.g. Task A opened the chardev, called the ioctl()
with some mask and then passed the fd to Task B, that's actually going
to use the fd.

Is this something that we want to support? Am I missing something?

> +	list_add_tail(&queue->qlist, &ptp->tsevqs);
> +
>  	return 0;
>  }
>  
> +int ptp_release(struct posix_clock *pc)
> +{
> +	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
> +	struct list_head *pos, *n;
> +	struct timestamp_event_queue *element;
> +	int found = -1;
> +	pid_t reader_pid = task_pid_nr(current);
> +
> +	list_for_each_safe(pos, n, &ptp->tsevqs) {
> +		element = list_entry(pos, struct timestamp_event_queue, qlist);
> +		if (element->reader_pid == reader_pid) {
> +			list_del(pos);
> +			kfree(element);
> +			found = 0;
> +			return found;
> +		}
> +	}
> +
> +	return found;
> +}
> +
>  long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  {
>  	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
> @@ -435,14 +465,24 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
>  __poll_t ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait)
>  {
>  	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
> +	pid_t reader_pid = task_pid_nr(current);
>  	struct timestamp_event_queue *queue;
> +	struct list_head *pos, *n;
> +	bool found = false;
>  
>  	poll_wait(fp, &ptp->tsev_wq, wait);
>  
> -	/* Extract only the first element in the queue list
> -	 * TODO: Identify the relevant queue
> -	 */
> -	queue = list_entry(&ptp->tsevqs, struct timestamp_event_queue, qlist);
> +	/* Extract only the desired element in the queue list */
> +	list_for_each_safe(pos, n, &ptp->tsevqs) {
> +		queue = list_entry(pos, struct timestamp_event_queue, qlist);
> +		if (queue->reader_pid == reader_pid) {
> +			found = true;
> +			break;
> +		}
> +	}
> +
> +	if (!found)
> +		return EPOLLERR;
>  
>  	return queue_cnt(queue) ? EPOLLIN : 0;
>  }
> @@ -453,44 +493,54 @@ ssize_t ptp_read(struct posix_clock *pc,
>  		 uint rdflags, char __user *buf, size_t cnt)
>  {
>  	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
> +	pid_t reader_pid = task_pid_nr(current);
>  	struct timestamp_event_queue *queue;
>  	struct ptp_extts_event *event;
> +	struct list_head *pos, *n;
>  	unsigned long flags;
> +	bool found = false;
>  	size_t qcnt, i;
>  	int result;
>  
> -	/* Extract only the first element in the queue list
> -	 * TODO: Identify the relevant queue
> -	 */
> -	queue = list_first_entry(&ptp->tsevqs, struct timestamp_event_queue,
> -				 qlist);
> +	/* Extract only the desired element in the queue list */
> +	list_for_each_safe(pos, n, &ptp->tsevqs) {
> +		queue = list_entry(pos, struct timestamp_event_queue, qlist);
> +		if (queue->reader_pid == reader_pid) {
> +			found = true;
> +			break;
> +		}
> +	}
>  
> -	if (cnt % sizeof(struct ptp_extts_event) != 0)
> -		return -EINVAL;
> +	if (!found) {
> +		result = -EINVAL;
> +		goto exit;
> +	}
> +
> +	if (cnt % sizeof(struct ptp_extts_event) != 0) {
> +		result = -EINVAL;
> +		goto exit;
> +	}
>  
>  	if (cnt > EXTTS_BUFSIZE)
>  		cnt = EXTTS_BUFSIZE;
>  
>  	cnt = cnt / sizeof(struct ptp_extts_event);
>  
> -	if (mutex_lock_interruptible(&ptp->tsevq_mux))
> -		return -ERESTARTSYS;
> -
>  	if (wait_event_interruptible(ptp->tsev_wq,
>  				     ptp->defunct || queue_cnt(queue))) {
> -		mutex_unlock(&ptp->tsevq_mux);
> -		return -ERESTARTSYS;
> +		result = -ERESTARTSYS;
> +		goto exit;
>  	}
>  
>  	if (ptp->defunct) {
> -		mutex_unlock(&ptp->tsevq_mux);
> -		return -ENODEV;
> +		result = -ENODEV;
> +		goto exit;
>  	}
>  
>  	event = kmalloc(EXTTS_BUFSIZE, GFP_KERNEL);
>  	if (!event) {
> -		mutex_unlock(&ptp->tsevq_mux);
> -		return -ENOMEM;
> +		result = -ENOMEM;
> +		goto exit;
>  	}
>  
>  	spin_lock_irqsave(&queue->lock, flags);
> @@ -509,12 +559,16 @@ ssize_t ptp_read(struct posix_clock *pc,
>  
>  	cnt = cnt * sizeof(struct ptp_extts_event);
>  
> -	mutex_unlock(&ptp->tsevq_mux);
> -
>  	result = cnt;
> -	if (copy_to_user(buf, event, cnt))
> +	if (copy_to_user(buf, event, cnt)) {
>  		result = -EFAULT;
> +		goto free_event;
> +	}
>  
> +free_event:
>  	kfree(event);
> +exit:
> +	if (result < 0)
> +		ptp_release(pc);
>  	return result;
>  }
> diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
> index 7ac04a282ec5..d52fc23e20a8 100644
> --- a/drivers/ptp/ptp_clock.c
> +++ b/drivers/ptp/ptp_clock.c
> @@ -162,6 +162,7 @@ static struct posix_clock_operations ptp_clock_ops = {
>  	.clock_settime	= ptp_clock_settime,
>  	.ioctl		= ptp_ioctl,
>  	.open		= ptp_open,
> +	.release	= ptp_release,
>  	.poll		= ptp_poll,
>  	.read		= ptp_read,
>  };
> @@ -184,7 +185,6 @@ static void ptp_clock_release(struct device *dev)
>  
>  	ptp_cleanup_pin_groups(ptp);
>  	kfree(ptp->vclock_index);
> -	mutex_destroy(&ptp->tsevq_mux);
>  	mutex_destroy(&ptp->pincfg_mux);
>  	mutex_destroy(&ptp->n_vclocks_mux);
>  	ptp_clean_queue_list(ptp);
> @@ -246,10 +246,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
>  	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
>  	if (!queue)
>  		goto no_memory_queue;
> +	queue->reader_pid = 0;
>  	spin_lock_init(&queue->lock);
>  	list_add_tail(&queue->qlist, &ptp->tsevqs);
> -	/* TODO - Transform or delete this mutex */
> -	mutex_init(&ptp->tsevq_mux);
>  	mutex_init(&ptp->pincfg_mux);
>  	mutex_init(&ptp->n_vclocks_mux);
>  	init_waitqueue_head(&ptp->tsev_wq);
> @@ -350,7 +349,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
>  	if (ptp->kworker)
>  		kthread_destroy_worker(ptp->kworker);
>  kworker_err:
> -	mutex_destroy(&ptp->tsevq_mux);
>  	mutex_destroy(&ptp->pincfg_mux);
>  	mutex_destroy(&ptp->n_vclocks_mux);
>  	ptp_clean_queue_list(ptp);
> diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
> index 314c21c39f6a..046d1482bcee 100644
> --- a/drivers/ptp/ptp_private.h
> +++ b/drivers/ptp/ptp_private.h
> @@ -27,6 +27,7 @@ struct timestamp_event_queue {
>  	int tail;
>  	spinlock_t lock;
>  	struct list_head qlist;
> +	pid_t reader_pid;
>  };
>  
>  struct ptp_clock {
> @@ -38,7 +39,6 @@ struct ptp_clock {
>  	struct pps_device *pps_source;
>  	long dialed_frequency; /* remembers the frequency adjustment */
>  	struct list_head tsevqs; /* timestamp fifo list */
> -	struct mutex tsevq_mux; /* one process at a time reading the fifo */
>  	struct mutex pincfg_mux; /* protect concurrent info->pin_config access */
>  	wait_queue_head_t tsev_wq;
>  	int defunct; /* tells readers to go away when clock is being removed */
> @@ -124,6 +124,8 @@ long ptp_ioctl(struct posix_clock *pc,
>  
>  int ptp_open(struct posix_clock *pc, fmode_t fmode);
>  
> +int ptp_release(struct posix_clock *pc);
> +
>  ssize_t ptp_read(struct posix_clock *pc,
>  		 uint flags, char __user *buf, size_t cnt);
>  
> diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
> index 2675f383cd0a..512b0164ef18 100644
> --- a/drivers/ptp/ptp_sysfs.c
> +++ b/drivers/ptp/ptp_sysfs.c
> @@ -87,9 +87,6 @@ static ssize_t extts_fifo_show(struct device *dev,
>  
>  	memset(&event, 0, sizeof(event));
>  
> -	if (mutex_lock_interruptible(&ptp->tsevq_mux))
> -		return -ERESTARTSYS;
> -
>  	spin_lock_irqsave(&queue->lock, flags);
>  	qcnt = queue_cnt(queue);
>  	if (qcnt) {
> @@ -104,7 +101,6 @@ static ssize_t extts_fifo_show(struct device *dev,
>  	cnt = snprintf(page, PAGE_SIZE, "%u %lld %u\n",
>  		       event.index, event.t.sec, event.t.nsec);
>  out:
> -	mutex_unlock(&ptp->tsevq_mux);
>  	return cnt;
>  }
>  static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL);
> -- 
> 2.34.1
>
>

Cheers,
Xabier Marquiegui Sept. 13, 2023, 8:57 a.m. UTC | #2
> Using the pid of the task will break when using some form of file
> descriptor passing. e.g. Task A opened the chardev, called the ioctl()
> with some mask and then passed the fd to Task B, that's actually going
> to use the fd.
> 
> Is this something that we want to support? Am I missing something?

Thank you very much for your valuable comments Vinicius,

Let me try to confirm if I understand what you are observing here.

Let's say we have a process with two threads. One of them opens the device
file to do IOCTL operations, and the other one opens the device file to
read timestamp events. Since both have the same PID, all the operations
(read,release...) would fail because I designed them under the assumption
that only one open operation would happen per PID.

This is indeed not as safe as it should be and I should try to improve it.

I am already looking at alternatives, but maybe someone can give me a hint.
Do you have any suggestions on what I could do to have file operations 
(read, release...) determine which open instance they belong to?

Cheers.
Vinicius Costa Gomes Sept. 13, 2023, 9:25 p.m. UTC | #3
Xabier Marquiegui <xabier.marquiegui@gmail.com> writes:

>> Using the pid of the task will break when using some form of file
>> descriptor passing. e.g. Task A opened the chardev, called the ioctl()
>> with some mask and then passed the fd to Task B, that's actually going
>> to use the fd.
>> 
>> Is this something that we want to support? Am I missing something?
>
> Thank you very much for your valuable comments Vinicius,
>
> Let me try to confirm if I understand what you are observing here.
>
> Let's say we have a process with two threads. One of them opens the device
> file to do IOCTL operations, and the other one opens the device file to
> read timestamp events. Since both have the same PID, all the operations
> (read,release...) would fail because I designed them under the assumption
> that only one open operation would happen per PID.
>

I was initially thinking about another scenario: when Process A passes
the open file descriptor via SCM_RIGHTS to Process B.

But your scenario also shows the limitations of the current approach.

> This is indeed not as safe as it should be and I should try to improve it.
>
> I am already looking at alternatives, but maybe someone can give me a hint.
> Do you have any suggestions on what I could do to have file operations 
> (read, release...) determine which open instance they belong to?

Taking a quick look, it seems that you would have to change 'struct
posix_clock_file_operations' to also pass around the 'struct file' of
the file being used.

That way we can track each user/"open()". And if one program decides
that it needs to have have multiple fds with different masks, and so
different queues, it should just work.

What do you think?


Cheers,
Xabier Marquiegui Sept. 14, 2023, 10:02 a.m. UTC | #4
> Taking a quick look, it seems that you would have to change 'struct
> posix_clock_file_operations' to also pass around the 'struct file' of
> the file being used.
> 
> That way we can track each user/"open()". And if one program decides
> that it needs to have have multiple fds with different masks, and so
> different queues, it should just work.
> 
> What do you think?

Thank you for the suggestion Vinicius. That was my initial approach, but I
couldn't make it work. Maybe I'm missing something. I searched struct file
for some variable that would help identifying the open/read/release instance
a call belongs to, but couldn't find any.

After that, I tried printing the struct file *fp pointer on every open call.
Unless I did something wrong I would say that it points to the same address
every time for a specific device file. My understanding is that if it was
different for each time open gets called, it would help us identify the user
of the rest of file operation calls, but not if it's the same. It is true that
I did this verification on kernel 6.1.38 and not the latest, but I don't think
that specific aspect has changed.

I will keep looking for options, but any help would be appreciated.

Thanks.
--
Xabier.
Richard Cochran Sept. 14, 2023, 3:09 p.m. UTC | #5
On Wed, Sep 13, 2023 at 02:25:48PM -0700, Vinicius Costa Gomes wrote:

> Taking a quick look, it seems that you would have to change 'struct
> posix_clock_file_operations' to also pass around the 'struct file' of
> the file being used.

And let drivers compare struct file pointers from different consumers?

> That way we can track each user/"open()". And if one program decides
> that it needs to have have multiple fds with different masks, and so
> different queues, it should just work.
> 
> What do you think?

See posix-clock.c : posix_clock_open()

When the file is opened, the fp->private_data is used to track the
posix_clock that was registered as a character device by the ptp
clock instance.

That character device may be opened multiple times, each time there is
a unique fp, but fp->private_data points to the same ptp clock instance.

So the information of which fp is being read() is lost.

Looks like you will have to re-work posix-clock.c to allow drivers to
provide their own "private" data populated during posix_clock_operations::open()

Needs thought...

Thanks,
Richard
Vinicius Costa Gomes Sept. 14, 2023, 5:12 p.m. UTC | #6
Hi Richard,

Richard Cochran <richardcochran@gmail.com> writes:

> On Wed, Sep 13, 2023 at 02:25:48PM -0700, Vinicius Costa Gomes wrote:
>
>> Taking a quick look, it seems that you would have to change 'struct
>> posix_clock_file_operations' to also pass around the 'struct file' of
>> the file being used.
>
> And let drivers compare struct file pointers from different consumers?
>

That was the first idea, not very good I admit.

What you said below sounds better, more "final product".

>> That way we can track each user/"open()". And if one program decides
>> that it needs to have have multiple fds with different masks, and so
>> different queues, it should just work.
>> 
>> What do you think?
>
> See posix-clock.c : posix_clock_open()
>
> When the file is opened, the fp->private_data is used to track the
> posix_clock that was registered as a character device by the ptp
> clock instance.
>
> That character device may be opened multiple times, each time there is
> a unique fp, but fp->private_data points to the same ptp clock instance.
>
> So the information of which fp is being read() is lost.

yeah, that's the core issue.

>
> Looks like you will have to re-work posix-clock.c to allow drivers to
> provide their own "private" data populated during posix_clock_operations::open()
>
> Needs thought...
>


Cheers,
kernel test robot Sept. 19, 2023, 3:15 p.m. UTC | #7
Hello,

kernel test robot noticed "kernel_BUG_at_lib/list_debug.c" on:

commit: bb1445038f83a91450bc74ccc5a6f2b702233584 ("[PATCH net-next v2 2/3] ptp: support multiple timestamp event readers")
url: https://github.com/intel-lab-lkp/linux/commits/Xabier-Marquiegui/ptp-support-multiple-timestamp-event-readers/20230913-060341
base: https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git 8fc8911b66962c6ff4345e7000930a4bcc54ae5a
patch link: https://lore.kernel.org/all/20230912220217.2008895-2-reibax@gmail.com/
patch subject: [PATCH net-next v2 2/3] ptp: support multiple timestamp event readers

in testcase: stress-ng
version: stress-ng-x86_64-0.15.04-1_20230912
with following parameters:

	nr_threads: 100%
	disk: 1HDD
	testtime: 60s
	class: device
	test: dev
	cpufreq_governor: performance



compiler: gcc-12
test machine: 64 threads 2 sockets Intel(R) Xeon(R) Gold 6346 CPU @ 3.10GHz (Ice Lake) with 256G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)




If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202309192210.a6367ad9-oliver.sang@intel.com



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20230919/202309192210.a6367ad9-oliver.sang@intel.com


[   42.745852][ T4208] list_del corruption. next->prev should be ff11000108593010, but was ff11002089351010. (next=ff1100011152b3f0)
[   42.745864][ T4208] ------------[ cut here ]------------
[   42.745864][ T4208] kernel BUG at lib/list_debug.c:65!
[   42.745870][ T4208] invalid opcode: 0000 [#1] SMP NOPTI
[   42.745873][ T4208] CPU: 14 PID: 4208 Comm: stress-ng-dev Not tainted 6.5.0-12685-gbb1445038f83 #1
[   42.745875][ T4208] Hardware name: Inspur NF5180M6/NF5180M6, BIOS 06.00.04 04/12/2022
[   42.745876][ T4208] RIP: 0010:__list_del_entry_valid_or_report+0xbe/0xf0
[   42.745884][ T4208] Code: 0b 48 89 fe 48 89 c2 48 c7 c7 78 c0 71 82 e8 99 53 a3 ff 0f 0b 48 89 d1 48 c7 c7 c8 c0 71 82 48 89 f2 48 89 c6 e8 82 53 a3 ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 66 2e 0f 1f 84 00 00 00 00 00
[   42.745885][ T4208] RSP: 0018:ffa000000ef47ec0 EFLAGS: 00010246
[   42.745886][ T4208] RAX: 000000000000006d RBX: ff11000108593010 RCX: 0000000000000000
[   42.745887][ T4208] RDX: 0000000000000000 RSI: ff11001fffd9c6c0 RDI: ff11001fffd9c6c0
[   42.745888][ T4208] RBP: 0000000000000000 R08: 80000000ffff8c59 R09: 0000000000ffff10
[   42.745889][ T4208] R10: 000000000000000f R11: 000000000000000f R12: ff1100011152b000
[   42.745889][ T4208] R13: ff1100208a28ad20 R14: ff11000108fe40c0 R15: 0000000000000000
[   42.745890][ T4208] FS:  00007f28c2aa0700(0000) GS:ff11001fffd80000(0000) knlGS:0000000000000000
[   42.745891][ T4208] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   42.745892][ T4208] CR2: 00007f28c2a9dbf0 CR3: 00000001dd39e003 CR4: 0000000000771ee0
[   42.745893][ T4208] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   42.745893][ T4208] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   42.745894][ T4208] PKRU: 55555554
[   42.745894][ T4208] Call Trace:
[   42.745896][ T4208]  <TASK>
[   42.745898][ T4208]  ? die+0x36/0xb0
[   42.745901][ T4208]  ? do_trap+0xda/0x130
[   42.745904][ T4208]  ? __list_del_entry_valid_or_report+0xbe/0xf0
[   42.745906][ T4208]  ? do_error_trap+0x65/0xb0
[   42.745908][ T4208]  ? __list_del_entry_valid_or_report+0xbe/0xf0
[   42.745910][ T4208]  ? exc_invalid_op+0x50/0x70
[   42.745915][ T4208]  ? __list_del_entry_valid_or_report+0xbe/0xf0
[   42.745917][ T4208]  ? asm_exc_invalid_op+0x1a/0x20
[   42.745920][ T4208]  ? __list_del_entry_valid_or_report+0xbe/0xf0
[   42.745922][ T4208]  ptp_release+0x4c/0xb0
[   42.745928][ T4208]  posix_clock_release+0x28/0x70
[   42.745933][ T4208]  __fput+0xea/0x2b0
[   42.745938][ T4208]  __x64_sys_close+0x3d/0xb0
[   42.745940][ T4208]  do_syscall_64+0x3b/0xb0
[   42.745942][ T4208]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
[   42.745947][ T4208] RIP: 0033:0x7f28c52acc2b
[   42.745949][ T4208] Code: 0f 05 48 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8 a3 4d f9 ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 e1 4d f9 ff 8b 44
[   42.745950][ T4208] RSP: 002b:00007f28c2a9ebf0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003
[   42.745951][ T4208] RAX: ffffffffffffffda RBX: 00005629082e0920 RCX: 00007f28c52acc2b
[   42.745952][ T4208] RDX: 0000000000000062 RSI: 00005629096837d6 RDI: 0000000000000007
[   42.745952][ T4208] RBP: 0000000000000007 R08: 0000000000000000 R09: 0000000000000000
[   42.745953][ T4208] R10: 0000000000000001 R11: 0000000000000293 R12: 00005629096837a0
[   42.745953][ T4208] R13: 00000000ffffffff R14: 00005629096837d0 R15: 00007fff201baa20
[   42.745954][ T4208]  </TASK>
[   42.745955][ T4208] Modules linked in: rfkill uhid vfio_iommu_type1 vfio vhost_net tun vhost_vsock vmw_vsock_virtio_transport_common vsock vhost vhost_iotlb intel_rapl_msr intel_rapl_common btrfs x86_pkg_temp_thermal blake2b_generic xor ipmi_ssif coretemp kvm_intel kvm irqbypass raid6_pq crct10dif_pclmul libcrc32c crc32_pclmul sd_mod crc32c_intel sg ghash_clmulni_intel nvme sha512_ssse3 nvme_core rapl ahci ast t10_pi intel_cstate libahci acpi_ipmi drm_shmem_helper mei_me crc64_rocksoft_generic intel_uncore i2c_i801 dax_hmem ioatdma crc64_rocksoft megaraid_sas ipmi_si crc64 libata drm_kms_helper mei joydev i2c_smbus dca intel_pch_thermal wmi ipmi_devintf ipmi_msghandler acpi_power_meter drm fuse ip_tables
[   42.745984][ T4208] ---[ end trace 0000000000000000 ]---
[   42.747395][ T4218] list_add corruption. prev->next should be next (ff1100011152b3f0), but was ff1100029b963010. (prev=ff11004063643010).
[   42.747406][ T4218] ------------[ cut here ]------------
[   42.747407][ T4218] kernel BUG at lib/list_debug.c:32!
[   42.747413][ T4218] invalid opcode: 0000 [#2] SMP NOPTI
[   42.747416][ T4218] CPU: 61 PID: 4218 Comm: stress-ng-dev Tainted: G      D            6.5.0-12685-gbb1445038f83 #1
[   42.747418][ T4218] Hardware name: Inspur NF5180M6/NF5180M6, BIOS 06.00.04 04/12/2022
[   42.747420][ T4218] RIP: 0010:__list_add_valid_or_report+0x78/0xb0
[   42.747427][ T4218] Code: a3 ff 0f 0b 48 89 c1 48 c7 c7 c0 be 71 82 e8 9f 54 a3 ff 0f 0b 48 89 d1 48 89 c6 4c 89 c2 48 c7 c7 18 bf 71 82 e8 88 54 a3 ff <0f> 0b 48 89 f2 48 89 c1 48 89 fe 48 c7 c7 70 bf 71 82 e8 71 54 a3
[   42.747429][ T4218] RSP: 0018:ffa000000fdffc28 EFLAGS: 00010246
[   42.747431][ T4218] RAX: 0000000000000075 RBX: ff11004063640000 RCX: 0000000000000000
[   42.747433][ T4218] RDX: 0000000000000000 RSI: ff11003fc335c6c0 RDI: ff11003fc335c6c0
[   42.747434][ T4218] RBP: ff1100011152b000 R08: 80000000ffff8c89 R09: 0000000000ffff10
[   42.747436][ T4218] R10: 000000000000000f R11: 000000000000000f R12: ff11004063643010
[   42.747437][ T4218] R13: ff11004063641010 R14: ff1100011152b3f0 R15: 0000000000000000
[   42.747439][ T4218] FS:  00007f28c129d700(0000) GS:ff11003fc3340000(0000) knlGS:0000000000000000
[   42.747441][ T4218] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   42.747443][ T4218] CR2: 00007f28c129abf0 CR3: 00000001dd39e001 CR4: 0000000000771ee0
[   42.747445][ T4218] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   42.747446][ T4218] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   42.747447][ T4218] PKRU: 55555554
[   42.747448][ T4218] Call Trace:
[   42.747450][ T4218]  <TASK>
[   42.747452][ T4218]  ? die+0x36/0xb0
[   42.747456][ T4218]  ? do_trap+0xda/0x130
[   42.747458][ T4218]  ? __list_add_valid_or_report+0x78/0xb0
[   42.747461][ T4218]  ? do_error_trap+0x65/0xb0
[   42.747464][ T4218]  ? __list_add_valid_or_report+0x78/0xb0
[   42.747466][ T4218]  ? exc_invalid_op+0x50/0x70
[   42.747471][ T4218]  ? __list_add_valid_or_report+0x78/0xb0
[   42.747474][ T4218]  ? asm_exc_invalid_op+0x1a/0x20
[   42.747477][ T4218]  ? __list_add_valid_or_report+0x78/0xb0
[   42.747479][ T4218]  ? __list_add_valid_or_report+0x78/0xb0
[   42.747482][ T4218]  ptp_open+0x6a/0xb0
[   42.747487][ T4218]  posix_clock_open+0x47/0xb0
[   42.747491][ T4218]  chrdev_open+0xf7/0x270
[   42.747495][ T4218]  ? __pfx_chrdev_open+0x10/0x10
[   42.747497][ T4218]  do_dentry_open+0x200/0x4f0
[   42.747499][ T4218]  do_open+0x291/0x430
[   42.747503][ T4218]  ? open_last_lookups+0x8b/0x430
[   42.747505][ T4218]  path_openat+0x130/0x2f0
[   42.747508][ T4218]  do_filp_open+0xb3/0x170
[   42.747512][ T4218]  do_sys_openat2+0xab/0xf0
[   42.747515][ T4218]  __x64_sys_openat+0x6e/0xb0
[   42.747517][ T4218]  do_syscall_64+0x3b/0xb0
[   42.747521][ T4218]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
[   42.747525][ T4218] RIP: 0033:0x7f28c52ac1a4
[   42.747528][ T4218] Code: 84 00 00 00 00 00 44 89 54 24 0c e8 36 58 f9 ff 44 8b 54 24 0c 44 89 e2 48 89 ee 41 89 c0 bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 77 34 44 89 c7 89 44 24 0c e8 68 58 f9 ff 8b 44
[   42.747529][ T4218] RSP: 002b:00007f28c129bac0 EFLAGS: 00000293 ORIG_RAX: 0000000000000101
[   42.747531][ T4218] RAX: ffffffffffffffda RBX: 000000000ee6b280 RCX: 00007f28c52ac1a4
[   42.747533][ T4218] RDX: 0000000000040802 RSI: 00005629096837d0 RDI: 00000000ffffff9c
[   42.747534][ T4218] RBP: 00005629096837d0 R08: 0000000000000000 R09: 00007f28ac000b60
[   42.747536][ T4218] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000040802
[   42.747537][ T4218] R13: 0000000000040802 R14: 0000000000000000 R15: 00007fff201baa20
[   42.747539][ T4218]  </TASK>
[   42.747539][ T4218] Modules linked in: rfkill uhid vfio_iommu_type1 vfio vhost_net tun vhost_vsock vmw_vsock_virtio_transport_common vsock vhost vhost_iotlb intel_rapl_msr intel_rapl_common btrfs x86_pkg_temp_thermal blake2b_generic xor ipmi_ssif coretemp kvm_intel kvm irqbypass raid6_pq crct10dif_pclmul libcrc32c crc32_pclmul sd_mod crc32c_intel sg ghash_clmulni_intel nvme sha512_ssse3 nvme_core rapl ahci ast t10_pi intel_cstate libahci acpi_ipmi drm_shmem_helper mei_me crc64_rocksoft_generic intel_uncore i2c_i801 dax_hmem ioatdma crc64_rocksoft megaraid_sas ipmi_si crc64 libata drm_kms_helper mei joydev i2c_smbus dca intel_pch_thermal wmi ipmi_devintf ipmi_msghandler acpi_power_meter drm fuse ip_tables
[   42.747587][ T4218] ---[ end trace 0000000000000000 ]---
[   42.766845][ T4216] list_add corruption. prev->next should be next (ff1100011152b3f0), but was ff110002610fb010. (prev=ff1100026116b010).
[   42.784805][ T4210] PM: hibernation: Marking nosave pages: [mem 0x00000000-0x00000fff]
[   42.784808][ T4210] PM: hibernation: Marking nosave pages: [mem 0x0003e000-0x0003efff]
[   42.784810][ T4210] PM: hibernation: Marking nosave pages: [mem 0x000a0000-0x000fffff]
[   42.784813][ T4210] PM: hibernation: Marking nosave pages: [mem 0x6702e000-0x6c1fefff]
[   42.785002][ T4210] PM: hibernation: Marking nosave pages: [mem 0x6f800000-0xffffffff]
[   42.785751][  T988] 
[   42.785855][ T4210] PM: hibernation: Basic memory bitmaps created
[   42.803384][ T4210] PM: hibernation: Basic memory bitmaps freed
[   42.803908][ T4210] random: crng reseeded on system resumption
[   42.819198][ T4208] RIP: 0010:__list_del_entry_valid_or_report+0xbe/0xf0
[   42.819207][ T4208] Code: 0b 48 89 fe 48 89 c2 48 c7 c7 78 c0 71 82 e8 99 53 a3 ff 0f 0b 48 89 d1 48 c7 c7 c8 c0 71 82 48 89 f2 48 89 c6 e8 82 53 a3 ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 66 2e 0f 1f 84 00 00 00 00 00
[   42.819209][ T4208] RSP: 0018:ffa000000ef47ec0 EFLAGS: 00010246
[   42.819213][ T4208] RAX: 000000000000006d RBX: ff11000108593010 RCX: 0000000000000000
[   42.819215][ T4208] RDX: 0000000000000000 RSI: ff11001fffd9c6c0 RDI: ff11001fffd9c6c0
[   42.819217][ T4208] RBP: 0000000000000000 R08: 80000000ffff8c59 R09: 0000000000ffff10
[   42.819219][ T4208] R10: 000000000000000f R11: 000000000000000f R12: ff1100011152b000
[   42.819221][ T4208] R13: ff1100208a28ad20 R14: ff11000108fe40c0 R15: 0000000000000000
[   42.819222][ T4208] FS:  00007f28c2aa0700(0000) GS:ff11001fffd80000(0000) knlGS:0000000000000000
[   42.819224][ T4208] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   42.819226][ T4208] CR2: 00007f28c2a9dbf0 CR3: 00000001dd39e003 CR4: 0000000000771ee0
[   42.819227][ T4208] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   42.819229][ T4208] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   42.819230][ T4208] PKRU: 55555554
[   42.819232][ T4208] Kernel panic - not syncing: Fatal exception
[   44.547139][ T4208] Kernel Offset: disabled
diff mbox series

Patch

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 197edf1179f1..c9da0f27d204 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -103,9 +103,39 @@  int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
 
 int ptp_open(struct posix_clock *pc, fmode_t fmode)
 {
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct timestamp_event_queue *queue;
+
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return -EINVAL;
+	queue->reader_pid = task_pid_nr(current);
+	list_add_tail(&queue->qlist, &ptp->tsevqs);
+
 	return 0;
 }
 
+int ptp_release(struct posix_clock *pc)
+{
+	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	struct list_head *pos, *n;
+	struct timestamp_event_queue *element;
+	int found = -1;
+	pid_t reader_pid = task_pid_nr(current);
+
+	list_for_each_safe(pos, n, &ptp->tsevqs) {
+		element = list_entry(pos, struct timestamp_event_queue, qlist);
+		if (element->reader_pid == reader_pid) {
+			list_del(pos);
+			kfree(element);
+			found = 0;
+			return found;
+		}
+	}
+
+	return found;
+}
+
 long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 {
 	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
@@ -435,14 +465,24 @@  long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 __poll_t ptp_poll(struct posix_clock *pc, struct file *fp, poll_table *wait)
 {
 	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	pid_t reader_pid = task_pid_nr(current);
 	struct timestamp_event_queue *queue;
+	struct list_head *pos, *n;
+	bool found = false;
 
 	poll_wait(fp, &ptp->tsev_wq, wait);
 
-	/* Extract only the first element in the queue list
-	 * TODO: Identify the relevant queue
-	 */
-	queue = list_entry(&ptp->tsevqs, struct timestamp_event_queue, qlist);
+	/* Extract only the desired element in the queue list */
+	list_for_each_safe(pos, n, &ptp->tsevqs) {
+		queue = list_entry(pos, struct timestamp_event_queue, qlist);
+		if (queue->reader_pid == reader_pid) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found)
+		return EPOLLERR;
 
 	return queue_cnt(queue) ? EPOLLIN : 0;
 }
@@ -453,44 +493,54 @@  ssize_t ptp_read(struct posix_clock *pc,
 		 uint rdflags, char __user *buf, size_t cnt)
 {
 	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+	pid_t reader_pid = task_pid_nr(current);
 	struct timestamp_event_queue *queue;
 	struct ptp_extts_event *event;
+	struct list_head *pos, *n;
 	unsigned long flags;
+	bool found = false;
 	size_t qcnt, i;
 	int result;
 
-	/* Extract only the first element in the queue list
-	 * TODO: Identify the relevant queue
-	 */
-	queue = list_first_entry(&ptp->tsevqs, struct timestamp_event_queue,
-				 qlist);
+	/* Extract only the desired element in the queue list */
+	list_for_each_safe(pos, n, &ptp->tsevqs) {
+		queue = list_entry(pos, struct timestamp_event_queue, qlist);
+		if (queue->reader_pid == reader_pid) {
+			found = true;
+			break;
+		}
+	}
 
-	if (cnt % sizeof(struct ptp_extts_event) != 0)
-		return -EINVAL;
+	if (!found) {
+		result = -EINVAL;
+		goto exit;
+	}
+
+	if (cnt % sizeof(struct ptp_extts_event) != 0) {
+		result = -EINVAL;
+		goto exit;
+	}
 
 	if (cnt > EXTTS_BUFSIZE)
 		cnt = EXTTS_BUFSIZE;
 
 	cnt = cnt / sizeof(struct ptp_extts_event);
 
-	if (mutex_lock_interruptible(&ptp->tsevq_mux))
-		return -ERESTARTSYS;
-
 	if (wait_event_interruptible(ptp->tsev_wq,
 				     ptp->defunct || queue_cnt(queue))) {
-		mutex_unlock(&ptp->tsevq_mux);
-		return -ERESTARTSYS;
+		result = -ERESTARTSYS;
+		goto exit;
 	}
 
 	if (ptp->defunct) {
-		mutex_unlock(&ptp->tsevq_mux);
-		return -ENODEV;
+		result = -ENODEV;
+		goto exit;
 	}
 
 	event = kmalloc(EXTTS_BUFSIZE, GFP_KERNEL);
 	if (!event) {
-		mutex_unlock(&ptp->tsevq_mux);
-		return -ENOMEM;
+		result = -ENOMEM;
+		goto exit;
 	}
 
 	spin_lock_irqsave(&queue->lock, flags);
@@ -509,12 +559,16 @@  ssize_t ptp_read(struct posix_clock *pc,
 
 	cnt = cnt * sizeof(struct ptp_extts_event);
 
-	mutex_unlock(&ptp->tsevq_mux);
-
 	result = cnt;
-	if (copy_to_user(buf, event, cnt))
+	if (copy_to_user(buf, event, cnt)) {
 		result = -EFAULT;
+		goto free_event;
+	}
 
+free_event:
 	kfree(event);
+exit:
+	if (result < 0)
+		ptp_release(pc);
 	return result;
 }
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 7ac04a282ec5..d52fc23e20a8 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -162,6 +162,7 @@  static struct posix_clock_operations ptp_clock_ops = {
 	.clock_settime	= ptp_clock_settime,
 	.ioctl		= ptp_ioctl,
 	.open		= ptp_open,
+	.release	= ptp_release,
 	.poll		= ptp_poll,
 	.read		= ptp_read,
 };
@@ -184,7 +185,6 @@  static void ptp_clock_release(struct device *dev)
 
 	ptp_cleanup_pin_groups(ptp);
 	kfree(ptp->vclock_index);
-	mutex_destroy(&ptp->tsevq_mux);
 	mutex_destroy(&ptp->pincfg_mux);
 	mutex_destroy(&ptp->n_vclocks_mux);
 	ptp_clean_queue_list(ptp);
@@ -246,10 +246,9 @@  struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
 	if (!queue)
 		goto no_memory_queue;
+	queue->reader_pid = 0;
 	spin_lock_init(&queue->lock);
 	list_add_tail(&queue->qlist, &ptp->tsevqs);
-	/* TODO - Transform or delete this mutex */
-	mutex_init(&ptp->tsevq_mux);
 	mutex_init(&ptp->pincfg_mux);
 	mutex_init(&ptp->n_vclocks_mux);
 	init_waitqueue_head(&ptp->tsev_wq);
@@ -350,7 +349,6 @@  struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	if (ptp->kworker)
 		kthread_destroy_worker(ptp->kworker);
 kworker_err:
-	mutex_destroy(&ptp->tsevq_mux);
 	mutex_destroy(&ptp->pincfg_mux);
 	mutex_destroy(&ptp->n_vclocks_mux);
 	ptp_clean_queue_list(ptp);
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index 314c21c39f6a..046d1482bcee 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -27,6 +27,7 @@  struct timestamp_event_queue {
 	int tail;
 	spinlock_t lock;
 	struct list_head qlist;
+	pid_t reader_pid;
 };
 
 struct ptp_clock {
@@ -38,7 +39,6 @@  struct ptp_clock {
 	struct pps_device *pps_source;
 	long dialed_frequency; /* remembers the frequency adjustment */
 	struct list_head tsevqs; /* timestamp fifo list */
-	struct mutex tsevq_mux; /* one process at a time reading the fifo */
 	struct mutex pincfg_mux; /* protect concurrent info->pin_config access */
 	wait_queue_head_t tsev_wq;
 	int defunct; /* tells readers to go away when clock is being removed */
@@ -124,6 +124,8 @@  long ptp_ioctl(struct posix_clock *pc,
 
 int ptp_open(struct posix_clock *pc, fmode_t fmode);
 
+int ptp_release(struct posix_clock *pc);
+
 ssize_t ptp_read(struct posix_clock *pc,
 		 uint flags, char __user *buf, size_t cnt);
 
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
index 2675f383cd0a..512b0164ef18 100644
--- a/drivers/ptp/ptp_sysfs.c
+++ b/drivers/ptp/ptp_sysfs.c
@@ -87,9 +87,6 @@  static ssize_t extts_fifo_show(struct device *dev,
 
 	memset(&event, 0, sizeof(event));
 
-	if (mutex_lock_interruptible(&ptp->tsevq_mux))
-		return -ERESTARTSYS;
-
 	spin_lock_irqsave(&queue->lock, flags);
 	qcnt = queue_cnt(queue);
 	if (qcnt) {
@@ -104,7 +101,6 @@  static ssize_t extts_fifo_show(struct device *dev,
 	cnt = snprintf(page, PAGE_SIZE, "%u %lld %u\n",
 		       event.index, event.t.sec, event.t.nsec);
 out:
-	mutex_unlock(&ptp->tsevq_mux);
 	return cnt;
 }
 static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL);