diff mbox series

[v2] posix-timers: add multi_clock_gettime system call

Message ID 20231127153901.6399-1-maimon.sagi@gmail.com (mailing list archive)
State Changes Requested
Headers show
Series [v2] posix-timers: add multi_clock_gettime system call | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Sagi Maimon Nov. 27, 2023, 3:39 p.m. UTC
Some user space applications need to read some clocks.
 Each read requires moving from user space to kernel space.
 This asymmetry causes the measured offset to have a significant error.

 Introduce a new system call multi_clock_gettime, which can be used to measure
 the offset between multiple clocks, from variety of types: PHC, virtual PHC
 and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
 The offset includes the total time that the driver needs to read the clock
 timestamp.

 New system call allows the reading of a list of clocks - up to PTP_MAX_CLOCKS.
 Supported clocks IDs: PHC, virtual PHC and various system clocks.
 Up to PTP_MAX_SAMPLES times (per clock) in a single system call read.
 The system call returns n_clocks timestamps for each measurement:
 - clock 0 timestamp
 - ...
 - clock n timestamp

Signed-off-by: Sagi Maimon <maimon.sagi@gmail.com>
---
 Addressed comments from:
 - Richard Cochran : https://www.spinics.net/lists/netdev/msg951723.html
          
 Changes since version 1:
 - Change multi PHC ioctl implamantation into systemcall.
 
 arch/x86/entry/syscalls/syscall_32.tbl |  1 +
 arch/x86/entry/syscalls/syscall_64.tbl |  1 +
 include/linux/posix-timers.h           | 24 ++++++++++
 include/linux/syscalls.h               |  3 +-
 include/uapi/asm-generic/unistd.h      | 12 ++++-
 kernel/sys_ni.c                        |  1 +
 kernel/time/posix-timers.c             | 62 ++++++++++++++++++++++++++
 7 files changed, 102 insertions(+), 2 deletions(-)

Comments

Richard Cochran Nov. 28, 2023, 12:11 a.m. UTC | #1
On Mon, Nov 27, 2023 at 05:39:01PM +0200, Sagi Maimon wrote:
>  Some user space applications need to read some clocks.
>  Each read requires moving from user space to kernel space.
>  This asymmetry causes the measured offset to have a significant error.

Adding time/clock gurus (jstultz, tglx) on CC for visibility...

Thanks,
Richard


> 
>  Introduce a new system call multi_clock_gettime, which can be used to measure
>  the offset between multiple clocks, from variety of types: PHC, virtual PHC
>  and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
>  The offset includes the total time that the driver needs to read the clock
>  timestamp.
> 
>  New system call allows the reading of a list of clocks - up to PTP_MAX_CLOCKS.
>  Supported clocks IDs: PHC, virtual PHC and various system clocks.
>  Up to PTP_MAX_SAMPLES times (per clock) in a single system call read.
>  The system call returns n_clocks timestamps for each measurement:
>  - clock 0 timestamp
>  - ...
>  - clock n timestamp
> 
> Signed-off-by: Sagi Maimon <maimon.sagi@gmail.com>
> ---
>  Addressed comments from:
>  - Richard Cochran : https://www.spinics.net/lists/netdev/msg951723.html
>           
>  Changes since version 1:
>  - Change multi PHC ioctl implamantation into systemcall.
>  
>  arch/x86/entry/syscalls/syscall_32.tbl |  1 +
>  arch/x86/entry/syscalls/syscall_64.tbl |  1 +
>  include/linux/posix-timers.h           | 24 ++++++++++
>  include/linux/syscalls.h               |  3 +-
>  include/uapi/asm-generic/unistd.h      | 12 ++++-
>  kernel/sys_ni.c                        |  1 +
>  kernel/time/posix-timers.c             | 62 ++++++++++++++++++++++++++
>  7 files changed, 102 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
> index c8fac5205803..070efd266e7e 100644
> --- a/arch/x86/entry/syscalls/syscall_32.tbl
> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> @@ -461,3 +461,4 @@
>  454	i386	futex_wake		sys_futex_wake
>  455	i386	futex_wait		sys_futex_wait
>  456	i386	futex_requeue		sys_futex_requeue
> +457	i386	multi_clock_gettime		sys_multi_clock_gettime32
> \ No newline at end of file
> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
> index 8cb8bf68721c..f790330244bb 100644
> --- a/arch/x86/entry/syscalls/syscall_64.tbl
> +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> @@ -378,6 +378,7 @@
>  454	common	futex_wake		sys_futex_wake
>  455	common	futex_wait		sys_futex_wait
>  456	common	futex_requeue		sys_futex_requeue
> +457	common	multi_clock_gettime		sys_multi_clock_gettime
>  
>  #
>  # Due to a historical design error, certain syscalls are numbered differently
> diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
> index d607f51404fc..426a45441ab5 100644
> --- a/include/linux/posix-timers.h
> +++ b/include/linux/posix-timers.h
> @@ -260,4 +260,28 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
>  int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
>  
>  void posixtimer_rearm(struct kernel_siginfo *info);
> +
> +#define MULTI_PTP_MAX_CLOCKS 12 /* Max number of clocks */
> +#define MULTI_PTP_MAX_SAMPLES 10 /* Max allowed offset measurement samples. */
> +
> +struct __ptp_multi_clock_get {
> +	unsigned int n_clocks; /* Desired number of clocks. */
> +	unsigned int n_samples; /* Desired number of measurements per clock. */
> +	const clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
> +	/*
> +	 * Array of list of n_clocks clocks time samples n_samples times.
> +	 */
> +	struct  __kernel_timespec ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
> +};
> +
> +struct __ptp_multi_clock_get32 {
> +	unsigned int n_clocks; /* Desired number of clocks. */
> +	unsigned int n_samples; /* Desired number of measurements per clock. */
> +	const clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
> +	/*
> +	 * Array of list of n_clocks clocks time samples n_samples times.
> +	 */
> +	struct  old_timespec32 ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
> +};
> +
>  #endif
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index fd9d12de7e92..afcf68e83d63 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -1161,7 +1161,8 @@ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
>  			unsigned long prot, unsigned long flags,
>  			unsigned long fd, unsigned long pgoff);
>  asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
> -
> +asmlinkage long sys_multi_clock_gettime(struct __ptp_multi_clock_get __user * ptp_multi_clk_get);
> +asmlinkage long sys_multi_clock_gettime32(struct __ptp_multi_clock_get32 __user * ptp_multi_clk_get);
>  
>  /*
>   * Not a real system call, but a placeholder for syscalls which are
> diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
> index 756b013fb832..3ebcaa052650 100644
> --- a/include/uapi/asm-generic/unistd.h
> +++ b/include/uapi/asm-generic/unistd.h
> @@ -829,8 +829,18 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
>  #define __NR_futex_requeue 456
>  __SYSCALL(__NR_futex_requeue, sys_futex_requeue)
>  
> +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
> +#define __NR_multi_clock_gettime 457
> +__SC_3264(__NR_multi_clock_gettime, sys_multi_clock_gettime32, sys_multi_clock_gettime)
> +#endif
> +
> +#if defined(__SYSCALL_COMPAT) || __BITS_PER_LONG == 32
> +#define __NR_multi_clock_gettime64 458
> +__SYSCALL(__NR_multi_clock_gettime64, sys_multi_clock_gettime)
> +#endif
> +
>  #undef __NR_syscalls
> -#define __NR_syscalls 457
> +#define __NR_syscalls 459
>  
>  /*
>   * 32 bit systems traditionally used different
> diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
> index e1a6e3c675c0..8ed1c22f40ac 100644
> --- a/kernel/sys_ni.c
> +++ b/kernel/sys_ni.c
> @@ -335,6 +335,7 @@ COND_SYSCALL(ppoll_time32);
>  COND_SYSCALL_COMPAT(ppoll_time32);
>  COND_SYSCALL(utimensat_time32);
>  COND_SYSCALL(clock_adjtime32);
> +COND_SYSCALL(multi_clock_gettime32);
>  
>  /*
>   * The syscalls below are not found in include/uapi/asm-generic/unistd.h
> diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
> index b924f0f096fa..517558af2479 100644
> --- a/kernel/time/posix-timers.c
> +++ b/kernel/time/posix-timers.c
> @@ -1426,6 +1426,68 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
>  
>  #endif
>  
> +SYSCALL_DEFINE1(multi_clock_gettime, struct __ptp_multi_clock_get __user *, ptp_multi_clk_get)
> +{
> +	const struct k_clock *kc;
> +	struct timespec64 kernel_tp;
> +	struct __ptp_multi_clock_get multi_clk_get;
> +	int error;
> +	unsigned int i, j;
> +
> +	if (copy_from_user(&multi_clk_get, ptp_multi_clk_get, sizeof(multi_clk_get)))
> +		return -EFAULT;
> +
> +	if (multi_clk_get.n_samples > MULTI_PTP_MAX_SAMPLES)
> +		return -EINVAL;
> +	if (multi_clk_get.n_clocks > MULTI_PTP_MAX_CLOCKS)
> +		return -EINVAL;
> +
> +	for (j = 0; j < multi_clk_get.n_samples; j++) {
> +		for (i = 0; i < multi_clk_get.n_clocks; i++) {
> +			kc = clockid_to_kclock(multi_clk_get.clkid_arr[i]);
> +			if (!kc)
> +				return -EINVAL;
> +			error = kc->clock_get_timespec(multi_clk_get.clkid_arr[i], &kernel_tp);
> +			if (!error && put_timespec64(&kernel_tp, (struct __kernel_timespec __user *)
> +						     &ptp_multi_clk_get->ts[j][i]))
> +				error = -EFAULT;
> +		}
> +	}
> +
> +	return error;
> +}
> +
> +SYSCALL_DEFINE1(multi_clock_gettime32, struct __ptp_multi_clock_get32 __user *, ptp_multi_clk_get)
> +{
> +	const struct k_clock *kc;
> +	struct timespec64 kernel_tp;
> +	struct __ptp_multi_clock_get multi_clk_get;
> +	int error;
> +	unsigned int i, j;
> +
> +	if (copy_from_user(&multi_clk_get, ptp_multi_clk_get, sizeof(multi_clk_get)))
> +		return -EFAULT;
> +
> +	if (multi_clk_get.n_samples > MULTI_PTP_MAX_SAMPLES)
> +		return -EINVAL;
> +	if (multi_clk_get.n_clocks > MULTI_PTP_MAX_CLOCKS)
> +		return -EINVAL;
> +
> +	for (j = 0; j < multi_clk_get.n_samples; j++) {
> +		for (i = 0; i < multi_clk_get.n_clocks; i++) {
> +			kc = clockid_to_kclock(multi_clk_get.clkid_arr[i]);
> +			if (!kc)
> +				return -EINVAL;
> +			error = kc->clock_get_timespec(multi_clk_get.clkid_arr[i], &kernel_tp);
> +			if (!error && put_old_timespec32(&kernel_tp, (struct old_timespec32 __user *)
> +							&ptp_multi_clk_get->ts[j][i]))
> +				error = -EFAULT;
> +		}
> +	}
> +
> +	return error;
> +}
> +
>  static const struct k_clock clock_realtime = {
>  	.clock_getres		= posix_get_hrtimer_res,
>  	.clock_get_timespec	= posix_get_realtime_timespec,
> -- 
> 2.26.3
>
John Stultz Nov. 28, 2023, 12:46 a.m. UTC | #2
On Mon, Nov 27, 2023 at 4:12 PM Richard Cochran
<richardcochran@gmail.com> wrote:
>
> On Mon, Nov 27, 2023 at 05:39:01PM +0200, Sagi Maimon wrote:
> >  Some user space applications need to read some clocks.
> >  Each read requires moving from user space to kernel space.
> >  This asymmetry causes the measured offset to have a significant error.
>
> Adding time/clock gurus (jstultz, tglx) on CC for visibility...
>

Thanks for the heads up! (though, "guru" is just the noise I make
standing up these days)

> >  Introduce a new system call multi_clock_gettime, which can be used to measure
> >  the offset between multiple clocks, from variety of types: PHC, virtual PHC
> >  and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
> >  The offset includes the total time that the driver needs to read the clock
> >  timestamp.

This last bit about "offset includes the total time that the driver
needs to read the clock" is a bit confusing. It seems to suggest there
would be start/stop bookend timestamps so you could bound how long it
took to read all the clocks, but I don't see that in the patch.

> >  New system call allows the reading of a list of clocks - up to PTP_MAX_CLOCKS.
> >  Supported clocks IDs: PHC, virtual PHC and various system clocks.
> >  Up to PTP_MAX_SAMPLES times (per clock) in a single system call read.
> >  The system call returns n_clocks timestamps for each measurement:
> >  - clock 0 timestamp
> >  - ...
> >  - clock n timestamp
> >
> > Signed-off-by: Sagi Maimon <maimon.sagi@gmail.com>

Overally, while I understand the intent, I'm pretty hesitant on it
(and "__ptp_multi_clock_get multi_clk_get" has me squinting to find
the actual space amongst all the underscores :).

If the overhead of reading clockids individually is too much, it seems
like the next thing will be folks wanting to export multiple raw
hardware counter values so the counter->ns transformation doesn't get
inbetween each hw clock read, which this interface wouldn't solve, so
we'd have to add yet another interface.

Also, I wonder if trying to get multiple clocks in one read seems
similar to something uio_ring might help with? Though I can't say I'm
very savvy with uio_ring. Have folks looked into that?

thanks
-john
kernel test robot Nov. 28, 2023, 11:45 a.m. UTC | #3
Hi Sagi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on arnd-asm-generic/master]
[also build test WARNING on tip/timers/core linus/master v6.7-rc3]
[cannot apply to tip/x86/asm next-20231128]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Sagi-Maimon/posix-timers-add-multi_clock_gettime-system-call/20231128-000215
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
patch link:    https://lore.kernel.org/r/20231127153901.6399-1-maimon.sagi%40gmail.com
patch subject: [PATCH v2] posix-timers: add multi_clock_gettime system call
config: um-allmodconfig (https://download.01.org/0day-ci/archive/20231128/202311281817.puU0ujYG-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231128/202311281817.puU0ujYG-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311281817.puU0ujYG-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from kernel/time/posix-timers.c:13:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     547 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     560 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
         |                                                   ^
   In file included from kernel/time/posix-timers.c:13:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     573 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
         |                                                   ^
   In file included from kernel/time/posix-timers.c:13:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     584 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     594 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     604 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     692 |         readsb(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     700 |         readsw(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     708 |         readsl(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     717 |         writesb(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     726 |         writesw(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     735 |         writesl(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
>> kernel/time/posix-timers.c:1429:1: warning: stack frame size (2040) exceeds limit (1024) in '__se_sys_multi_clock_gettime' [-Wframe-larger-than]
    1429 | SYSCALL_DEFINE1(multi_clock_gettime, struct __ptp_multi_clock_get __user *, ptp_multi_clk_get)
         | ^
   include/linux/syscalls.h:219:36: note: expanded from macro 'SYSCALL_DEFINE1'
     219 | #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
         |                                    ^
   include/linux/syscalls.h:230:2: note: expanded from macro 'SYSCALL_DEFINEx'
     230 |         __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
         |         ^
   include/linux/syscalls.h:249:18: note: expanded from macro '__SYSCALL_DEFINEx'
     249 |         asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))  \
         |                         ^
   <scratch space>:122:1: note: expanded from here
     122 | __se_sys_multi_clock_gettime
         | ^
>> kernel/time/posix-timers.c:1460:1: warning: stack frame size (2040) exceeds limit (1024) in '__se_sys_multi_clock_gettime32' [-Wframe-larger-than]
    1460 | SYSCALL_DEFINE1(multi_clock_gettime32, struct __ptp_multi_clock_get32 __user *, ptp_multi_clk_get)
         | ^
   include/linux/syscalls.h:219:36: note: expanded from macro 'SYSCALL_DEFINE1'
     219 | #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
         |                                    ^
   include/linux/syscalls.h:230:2: note: expanded from macro 'SYSCALL_DEFINEx'
     230 |         __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
         |         ^
   include/linux/syscalls.h:249:18: note: expanded from macro '__SYSCALL_DEFINEx'
     249 |         asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))  \
         |                         ^
   <scratch space>:147:1: note: expanded from here
     147 | __se_sys_multi_clock_gettime32
         | ^
   14 warnings generated.


vim +/__se_sys_multi_clock_gettime +1429 kernel/time/posix-timers.c

  1428	
> 1429	SYSCALL_DEFINE1(multi_clock_gettime, struct __ptp_multi_clock_get __user *, ptp_multi_clk_get)
  1430	{
  1431		const struct k_clock *kc;
  1432		struct timespec64 kernel_tp;
  1433		struct __ptp_multi_clock_get multi_clk_get;
  1434		int error;
  1435		unsigned int i, j;
  1436	
  1437		if (copy_from_user(&multi_clk_get, ptp_multi_clk_get, sizeof(multi_clk_get)))
  1438			return -EFAULT;
  1439	
  1440		if (multi_clk_get.n_samples > MULTI_PTP_MAX_SAMPLES)
  1441			return -EINVAL;
  1442		if (multi_clk_get.n_clocks > MULTI_PTP_MAX_CLOCKS)
  1443			return -EINVAL;
  1444	
  1445		for (j = 0; j < multi_clk_get.n_samples; j++) {
  1446			for (i = 0; i < multi_clk_get.n_clocks; i++) {
  1447				kc = clockid_to_kclock(multi_clk_get.clkid_arr[i]);
  1448				if (!kc)
  1449					return -EINVAL;
  1450				error = kc->clock_get_timespec(multi_clk_get.clkid_arr[i], &kernel_tp);
  1451				if (!error && put_timespec64(&kernel_tp, (struct __kernel_timespec __user *)
  1452							     &ptp_multi_clk_get->ts[j][i]))
  1453					error = -EFAULT;
  1454			}
  1455		}
  1456	
  1457		return error;
  1458	}
  1459	
> 1460	SYSCALL_DEFINE1(multi_clock_gettime32, struct __ptp_multi_clock_get32 __user *, ptp_multi_clk_get)
  1461	{
  1462		const struct k_clock *kc;
  1463		struct timespec64 kernel_tp;
  1464		struct __ptp_multi_clock_get multi_clk_get;
  1465		int error;
  1466		unsigned int i, j;
  1467	
  1468		if (copy_from_user(&multi_clk_get, ptp_multi_clk_get, sizeof(multi_clk_get)))
  1469			return -EFAULT;
  1470	
  1471		if (multi_clk_get.n_samples > MULTI_PTP_MAX_SAMPLES)
  1472			return -EINVAL;
  1473		if (multi_clk_get.n_clocks > MULTI_PTP_MAX_CLOCKS)
  1474			return -EINVAL;
  1475	
  1476		for (j = 0; j < multi_clk_get.n_samples; j++) {
  1477			for (i = 0; i < multi_clk_get.n_clocks; i++) {
  1478				kc = clockid_to_kclock(multi_clk_get.clkid_arr[i]);
  1479				if (!kc)
  1480					return -EINVAL;
  1481				error = kc->clock_get_timespec(multi_clk_get.clkid_arr[i], &kernel_tp);
  1482				if (!error && put_old_timespec32(&kernel_tp, (struct old_timespec32 __user *)
  1483								&ptp_multi_clk_get->ts[j][i]))
  1484					error = -EFAULT;
  1485			}
  1486		}
  1487	
  1488		return error;
  1489	}
  1490
kernel test robot Nov. 28, 2023, 1:36 p.m. UTC | #4
Hi Sagi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on arnd-asm-generic/master]
[also build test WARNING on tip/timers/core linus/master v6.7-rc3]
[cannot apply to tip/x86/asm next-20231128]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Sagi-Maimon/posix-timers-add-multi_clock_gettime-system-call/20231128-000215
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
patch link:    https://lore.kernel.org/r/20231127153901.6399-1-maimon.sagi%40gmail.com
patch subject: [PATCH v2] posix-timers: add multi_clock_gettime system call
config: sh-allmodconfig (https://download.01.org/0day-ci/archive/20231128/202311282046.3hMwdKes-lkp@intel.com/config)
compiler: sh4-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231128/202311282046.3hMwdKes-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311282046.3hMwdKes-lkp@intel.com/

All warnings (new ones prefixed by >>):

   <stdin>:1519:2: warning: #warning syscall clone3 not implemented [-Wcpp]
>> <stdin>:1585:2: warning: #warning syscall multi_clock_gettime not implemented [-Wcpp]
--
   kernel/time/posix-timers.c: In function '__do_sys_multi_clock_gettime':
>> kernel/time/posix-timers.c:1458:1: warning: the frame size of 2004 bytes is larger than 1024 bytes [-Wframe-larger-than=]
    1458 | }
         | ^
   kernel/time/posix-timers.c: In function '__do_sys_multi_clock_gettime32':
   kernel/time/posix-timers.c:1489:1: warning: the frame size of 2004 bytes is larger than 1024 bytes [-Wframe-larger-than=]
    1489 | }
         | ^
--
   <stdin>:1519:2: warning: #warning syscall clone3 not implemented [-Wcpp]
>> <stdin>:1585:2: warning: #warning syscall multi_clock_gettime not implemented [-Wcpp]
--
   scripts/genksyms/parse.y: warning: 9 shift/reduce conflicts [-Wconflicts-sr]
   scripts/genksyms/parse.y: warning: 5 reduce/reduce conflicts [-Wconflicts-rr]
   scripts/genksyms/parse.y: note: rerun with option '-Wcounterexamples' to generate conflict counterexamples
   <stdin>:1519:2: warning: #warning syscall clone3 not implemented [-Wcpp]
>> <stdin>:1585:2: warning: #warning syscall multi_clock_gettime not implemented [-Wcpp]
kernel test robot Nov. 30, 2023, 3:43 p.m. UTC | #5
Hi Sagi,

kernel test robot noticed the following build errors:

[auto build test ERROR on arnd-asm-generic/master]
[also build test ERROR on tip/timers/core linus/master v6.7-rc3]
[cannot apply to tip/x86/asm next-20231130]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Sagi-Maimon/posix-timers-add-multi_clock_gettime-system-call/20231128-000215
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
patch link:    https://lore.kernel.org/r/20231127153901.6399-1-maimon.sagi%40gmail.com
patch subject: [PATCH v2] posix-timers: add multi_clock_gettime system call
config: arc-randconfig-001-20231130 (https://download.01.org/0day-ci/archive/20231130/202311302333.CokAopgU-lkp@intel.com/config)
compiler: arceb-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231130/202311302333.CokAopgU-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311302333.CokAopgU-lkp@intel.com/

All errors (new ones prefixed by >>):

   arceb-elf-ld: drivers/greybus/gb-beagleplay.o: in function `hdlc_tx_frames':
   gb-beagleplay.c:(.text+0x184): undefined reference to `crc_ccitt'
   arceb-elf-ld: gb-beagleplay.c:(.text+0x184): undefined reference to `crc_ccitt'
   arceb-elf-ld: gb-beagleplay.c:(.text+0x1f2): undefined reference to `crc_ccitt'
   arceb-elf-ld: gb-beagleplay.c:(.text+0x1f2): undefined reference to `crc_ccitt'
   arceb-elf-ld: gb-beagleplay.c:(.text+0x28c): undefined reference to `crc_ccitt'
   arceb-elf-ld: drivers/greybus/gb-beagleplay.o:gb-beagleplay.c:(.text+0x28c): more undefined references to `crc_ccitt' follow
>> arceb-elf-ld: arch/arc/kernel/sys.o:(.data+0x728): undefined reference to `sys_multi_clock_gettime'
>> arceb-elf-ld: arch/arc/kernel/sys.o:(.data+0x728): undefined reference to `sys_multi_clock_gettime'
Sagi Maimon Nov. 30, 2023, 3:45 p.m. UTC | #6
Hi John,
Thanks for your notes.

On Tue, Nov 28, 2023 at 2:46 AM John Stultz <jstultz@google.com> wrote:
>
> On Mon, Nov 27, 2023 at 4:12 PM Richard Cochran
> <richardcochran@gmail.com> wrote:
> >
> > On Mon, Nov 27, 2023 at 05:39:01PM +0200, Sagi Maimon wrote:
> > >  Some user space applications need to read some clocks.
> > >  Each read requires moving from user space to kernel space.
> > >  This asymmetry causes the measured offset to have a significant error.
> >
> > Adding time/clock gurus (jstultz, tglx) on CC for visibility...
> >
>
> Thanks for the heads up! (though, "guru" is just the noise I make
> standing up these days)
>
> > >  Introduce a new system call multi_clock_gettime, which can be used to measure
> > >  the offset between multiple clocks, from variety of types: PHC, virtual PHC
> > >  and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
> > >  The offset includes the total time that the driver needs to read the clock
> > >  timestamp.
>
> This last bit about "offset includes the total time that the driver
> needs to read the clock" is a bit confusing. It seems to suggest there
> would be start/stop bookend timestamps so you could bound how long it
> took to read all the clocks, but I don't see that in the patch.
>
You are right it is a little confusing, I will remove it from the commit .

> > >  New system call allows the reading of a list of clocks - up to PTP_MAX_CLOCKS.
> > >  Supported clocks IDs: PHC, virtual PHC and various system clocks.
> > >  Up to PTP_MAX_SAMPLES times (per clock) in a single system call read.
> > >  The system call returns n_clocks timestamps for each measurement:
> > >  - clock 0 timestamp
> > >  - ...
> > >  - clock n timestamp
> > >
> > > Signed-off-by: Sagi Maimon <maimon.sagi@gmail.com>
>
> Overally, while I understand the intent, I'm pretty hesitant on it
> (and "__ptp_multi_clock_get multi_clk_get" has me squinting to find
> the actual space amongst all the underscores :).
>
struct __ptp_multi_clock_get __user * ptp_multi_clk_get has many
underscores indeed :-)
I will rename it, if you have any naming suggestions, I will be glad to hear it.

> If the overhead of reading clockids individually is too much, it seems
> like the next thing will be folks wanting to export multiple raw
> hardware counter values so the counter->ns transformation doesn't get
> inbetween each hw clock read, which this interface wouldn't solve, so
> we'd have to add yet another interface.
>
future raw HW counter read interface:
• For PHC - reading raw HW counter is driver dependent, and will
require changes in many existing PTP drivers.
• For System clock it is possible but with much more code changes and
the improvements seems to be small.
• The issue you introduced can be reduced (but not completely
overcome) by user space usage of the interface.
For example, to minimize the difference between clock X and clock Y,
users can call  multi_clock_gettime  with 3 clock reads : X, Y, and X
again.
Considering gain (thin extra accuracy) vs pain (a lot of code changes,
also for system clocks) and needs – we chose to settle with the
current suggested interface.

> Also, I wonder if trying to get multiple clocks in one read seems
> similar to something uio_ring might help with? Though I can't say I'm
> very savvy with uio_ring. Have folks looked into that?
>
I am also not an expert with the uio_ring/liburing, but I researched a
little and it seems it is mainly used for IO operations and support
only few of them (IORING_OP_READV, IORING_OP_SENDMSG, etc.)
I am not sure how to implement it for consecutive clock_gettime system
calls and if it can be done at all.
Even if it can be done, it adds a lot of complexity to the user space
code and the use in one generic system call is more elegant in my
opinion.
For example: Looking at the existing ioctl PTP_SYS_OFFSET,
theoretically it can also be implemented by uio_ring, but it is still
a more elegant solution.
> thanks
> -john
Thomas Gleixner Dec. 15, 2023, 6:04 p.m. UTC | #7
On Mon, Nov 27 2023 at 17:39, Sagi Maimon wrote:
>  Some user space applications need to read some clocks.
>  Each read requires moving from user space to kernel space.
>  This asymmetry causes the measured offset to have a significant
>  error.

I can't figure out what you want to tell me here. Where is an asymmetry?

>  Introduce a new system call multi_clock_gettime, which can be used to measure
>  the offset between multiple clocks, from variety of types: PHC, virtual PHC
>  and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
>  The offset includes the total time that the driver needs to read the clock
>  timestamp.

What for? You still fail to explain the problem this is trying to solve.

> --- a/include/linux/posix-timers.h
> +++ b/include/linux/posix-timers.h
> @@ -260,4 +260,28 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
>  int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
>  
>  void posixtimer_rearm(struct kernel_siginfo *info);
> +
> +#define MULTI_PTP_MAX_CLOCKS 12 /* Max number of clocks */
> +#define MULTI_PTP_MAX_SAMPLES 10 /* Max allowed offset measurement samples. */
> +
> +struct __ptp_multi_clock_get {
> +	unsigned int n_clocks; /* Desired number of clocks. */
> +	unsigned int n_samples; /* Desired number of measurements per clock. */
> +	const clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
> +	/*
> +	 * Array of list of n_clocks clocks time samples n_samples times.
> +	 */
> +	struct  __kernel_timespec ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
> +};
> +
> +struct __ptp_multi_clock_get32 {
> +	unsigned int n_clocks; /* Desired number of clocks. */
> +	unsigned int n_samples; /* Desired number of measurements per clock. */
> +	const clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
> +	/*
> +	 * Array of list of n_clocks clocks time samples n_samples times.
> +	 */
> +	struct  old_timespec32
>  ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];

Seriously now. We are not adding new syscalls which take compat
timespecs. Any user space application which wants to use a new syscall
which takes a timespec needs to use the Y2038 safe variant.

Aside of that you define a data structure for a syscall in a kernel only
header. How is user space supposed to know the struct?

>  
> +SYSCALL_DEFINE1(multi_clock_gettime, struct __ptp_multi_clock_get __user *, ptp_multi_clk_get)
> +{
> +	const struct k_clock *kc;
> +	struct timespec64 kernel_tp;
> +	struct __ptp_multi_clock_get multi_clk_get;
> +	int error;
> +	unsigned int i, j;

https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#variable-declarations

> +
> +	if (copy_from_user(&multi_clk_get, ptp_multi_clk_get, sizeof(multi_clk_get)))
> +		return -EFAULT;
> +
> +	if (multi_clk_get.n_samples > MULTI_PTP_MAX_SAMPLES)
> +		return -EINVAL;
> +	if (multi_clk_get.n_clocks > MULTI_PTP_MAX_CLOCKS)
> +		return -EINVAL;
> +
> +	for (j = 0; j < multi_clk_get.n_samples; j++) {
> +		for (i = 0; i < multi_clk_get.n_clocks; i++) {
> +			kc = clockid_to_kclock(multi_clk_get.clkid_arr[i]);
> +			if (!kc)
> +				return -EINVAL;
> +			error = kc->clock_get_timespec(multi_clk_get.clkid_arr[i], &kernel_tp);
> +			if (!error && put_timespec64(&kernel_tp, (struct __kernel_timespec __user *)
> +						     &ptp_multi_clk_get->ts[j][i]))
> +				error = -EFAULT;

So this reads a clock from a specific clock id and stores the timestamp
in that user space array.

And how is this solving any of the claims you make in the changelog:

    >  Introduce a new system call multi_clock_gettime, which can be used to measure
    >  the offset between multiple clocks, from variety of types: PHC, virtual PHC
    >  and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
    >  The offset includes the total time that the driver needs to read the clock
    >  timestamp.

That whole thing is not really different from N consecutive syscalls as
it does not provide and guarantee vs. the gaps between the readouts.

The common case might be closer to what you try to measure, as it avoids
the syscall overhead (which is marginal) but other than that it's
subject to be interrupted and preempted. So the worst case gaps between
the indiviual clock reads is unspecified.

IOW, this is nothing else than wishful thinking and does not solve any real
world problem at all.

Thanks,

        tglx
Sagi Maimon Dec. 27, 2023, 3:09 p.m. UTC | #8
On Fri, Dec 15, 2023 at 8:05 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>
Hi Thomas
Thanks for your notes.
> On Mon, Nov 27 2023 at 17:39, Sagi Maimon wrote:
> >  Some user space applications need to read some clocks.
> >  Each read requires moving from user space to kernel space.
> >  This asymmetry causes the measured offset to have a significant
> >  error.
>
> I can't figure out what you want to tell me here. Where is an asymmetry?
>
You are right the comment is not clear enough.
Some user space applications need to read some clocks.
Each read requires moving from user space to kernel space.
The syscall overhead causes unpredictable delay between N clocks reads
Removing this delay causes better synchronization between N clocks.
> >  Introduce a new system call multi_clock_gettime, which can be used to measure
> >  the offset between multiple clocks, from variety of types: PHC, virtual PHC
> >  and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
> >  The offset includes the total time that the driver needs to read the clock
> >  timestamp.
>
> What for? You still fail to explain the problem this is trying to solve.
>
Explanation above
> > --- a/include/linux/posix-timers.h
> > +++ b/include/linux/posix-timers.h
> > @@ -260,4 +260,28 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
> >  int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
> >
> >  void posixtimer_rearm(struct kernel_siginfo *info);
> > +
> > +#define MULTI_PTP_MAX_CLOCKS 12 /* Max number of clocks */
> > +#define MULTI_PTP_MAX_SAMPLES 10 /* Max allowed offset measurement samples. */
> > +
> > +struct __ptp_multi_clock_get {
> > +     unsigned int n_clocks; /* Desired number of clocks. */
> > +     unsigned int n_samples; /* Desired number of measurements per clock. */
> > +     const clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
> > +     /*
> > +      * Array of list of n_clocks clocks time samples n_samples times.
> > +      */
> > +     struct  __kernel_timespec ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
> > +};
> > +
> > +struct __ptp_multi_clock_get32 {
> > +     unsigned int n_clocks; /* Desired number of clocks. */
> > +     unsigned int n_samples; /* Desired number of measurements per clock. */
> > +     const clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
> > +     /*
> > +      * Array of list of n_clocks clocks time samples n_samples times.
> > +      */
> > +     struct  old_timespec32
> >  ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
>
> Seriously now. We are not adding new syscalls which take compat
> timespecs. Any user space application which wants to use a new syscall
> which takes a timespec needs to use the Y2038 safe variant.
>
you are right - will be fixed on patch V3
> Aside of that you define a data structure for a syscall in a kernel only
> header. How is user space supposed to know the struct?
>
you are right - will be fixed on patch V3
> >
> > +SYSCALL_DEFINE1(multi_clock_gettime, struct __ptp_multi_clock_get __user *, ptp_multi_clk_get)
> > +{
> > +     const struct k_clock *kc;
> > +     struct timespec64 kernel_tp;
> > +     struct __ptp_multi_clock_get multi_clk_get;
> > +     int error;
> > +     unsigned int i, j;
>
> https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#variable-declarations
>
you are right - will be fixed on patch V3
> > +
> > +     if (copy_from_user(&multi_clk_get, ptp_multi_clk_get, sizeof(multi_clk_get)))
> > +             return -EFAULT;
> > +
> > +     if (multi_clk_get.n_samples > MULTI_PTP_MAX_SAMPLES)
> > +             return -EINVAL;
> > +     if (multi_clk_get.n_clocks > MULTI_PTP_MAX_CLOCKS)
> > +             return -EINVAL;
> > +
> > +     for (j = 0; j < multi_clk_get.n_samples; j++) {
> > +             for (i = 0; i < multi_clk_get.n_clocks; i++) {
> > +                     kc = clockid_to_kclock(multi_clk_get.clkid_arr[i]);
> > +                     if (!kc)
> > +                             return -EINVAL;
> > +                     error = kc->clock_get_timespec(multi_clk_get.clkid_arr[i], &kernel_tp);
> > +                     if (!error && put_timespec64(&kernel_tp, (struct __kernel_timespec __user *)
> > +                                                  &ptp_multi_clk_get->ts[j][i]))
> > +                             error = -EFAULT;
>
> So this reads a clock from a specific clock id and stores the timestamp
> in that user space array.
>
> And how is this solving any of the claims you make in the changelog:
>
>     >  Introduce a new system call multi_clock_gettime, which can be used to measure
>     >  the offset between multiple clocks, from variety of types: PHC, virtual PHC
>     >  and various system clocks (CLOCK_REALTIME, CLOCK_MONOTONIC, etc).
>     >  The offset includes the total time that the driver needs to read the clock
>     >  timestamp.
>
> That whole thing is not really different from N consecutive syscalls as
> it does not provide and guarantee vs. the gaps between the readouts.
>
> The common case might be closer to what you try to measure, as it avoids
> the syscall overhead (which is marginal) but other than that it's
> subject to be interrupted and preempted. So the worst case gaps between
> the indiviual clock reads is unspecified.
>
> IOW, this is nothing else than wishful thinking and does not solve any real
> world problem at all.
>
preemption or interruption delays will still occur, but at least we
are removing the syscall overhead.
Plus the preemption issue can be reduced by using 99 RT priority while
calling this system call.
We have conducted an experiment that proved that the system call
overhead is not marginal at all.
A process with NICE 0 priority reading PHC twice and measuring the
time delay between two reads 1000 times.
The first is done by two consecutive calls to clock_gettime system
call and the other with
one call to multi_clock_gettime system call.
In the system with multi_clock_gettime system call, the delay of 990
calls was under 100 ns.
In the system with clock_gettime system call the delay of 580 calls
were under 100 ns
72 between 100-500ns 322 between 500-1000ns and some over 1000-5000ns.
> Thanks,
>
>         tglx
diff mbox series

Patch

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index c8fac5205803..070efd266e7e 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -461,3 +461,4 @@ 
 454	i386	futex_wake		sys_futex_wake
 455	i386	futex_wait		sys_futex_wait
 456	i386	futex_requeue		sys_futex_requeue
+457	i386	multi_clock_gettime		sys_multi_clock_gettime32
\ No newline at end of file
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 8cb8bf68721c..f790330244bb 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -378,6 +378,7 @@ 
 454	common	futex_wake		sys_futex_wake
 455	common	futex_wait		sys_futex_wait
 456	common	futex_requeue		sys_futex_requeue
+457	common	multi_clock_gettime		sys_multi_clock_gettime
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index d607f51404fc..426a45441ab5 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -260,4 +260,28 @@  void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
 
 void posixtimer_rearm(struct kernel_siginfo *info);
+
+#define MULTI_PTP_MAX_CLOCKS 12 /* Max number of clocks */
+#define MULTI_PTP_MAX_SAMPLES 10 /* Max allowed offset measurement samples. */
+
+struct __ptp_multi_clock_get {
+	unsigned int n_clocks; /* Desired number of clocks. */
+	unsigned int n_samples; /* Desired number of measurements per clock. */
+	const clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
+	/*
+	 * Array of list of n_clocks clocks time samples n_samples times.
+	 */
+	struct  __kernel_timespec ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
+};
+
+struct __ptp_multi_clock_get32 {
+	unsigned int n_clocks; /* Desired number of clocks. */
+	unsigned int n_samples; /* Desired number of measurements per clock. */
+	const clockid_t clkid_arr[MULTI_PTP_MAX_CLOCKS]; /* list of clock IDs */
+	/*
+	 * Array of list of n_clocks clocks time samples n_samples times.
+	 */
+	struct  old_timespec32 ts[MULTI_PTP_MAX_SAMPLES][MULTI_PTP_MAX_CLOCKS];
+};
+
 #endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fd9d12de7e92..afcf68e83d63 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1161,7 +1161,8 @@  asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, unsigned long pgoff);
 asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
-
+asmlinkage long sys_multi_clock_gettime(struct __ptp_multi_clock_get __user * ptp_multi_clk_get);
+asmlinkage long sys_multi_clock_gettime32(struct __ptp_multi_clock_get32 __user * ptp_multi_clk_get);
 
 /*
  * Not a real system call, but a placeholder for syscalls which are
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 756b013fb832..3ebcaa052650 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -829,8 +829,18 @@  __SYSCALL(__NR_futex_wait, sys_futex_wait)
 #define __NR_futex_requeue 456
 __SYSCALL(__NR_futex_requeue, sys_futex_requeue)
 
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
+#define __NR_multi_clock_gettime 457
+__SC_3264(__NR_multi_clock_gettime, sys_multi_clock_gettime32, sys_multi_clock_gettime)
+#endif
+
+#if defined(__SYSCALL_COMPAT) || __BITS_PER_LONG == 32
+#define __NR_multi_clock_gettime64 458
+__SYSCALL(__NR_multi_clock_gettime64, sys_multi_clock_gettime)
+#endif
+
 #undef __NR_syscalls
-#define __NR_syscalls 457
+#define __NR_syscalls 459
 
 /*
  * 32 bit systems traditionally used different
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index e1a6e3c675c0..8ed1c22f40ac 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -335,6 +335,7 @@  COND_SYSCALL(ppoll_time32);
 COND_SYSCALL_COMPAT(ppoll_time32);
 COND_SYSCALL(utimensat_time32);
 COND_SYSCALL(clock_adjtime32);
+COND_SYSCALL(multi_clock_gettime32);
 
 /*
  * The syscalls below are not found in include/uapi/asm-generic/unistd.h
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index b924f0f096fa..517558af2479 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1426,6 +1426,68 @@  SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
 
 #endif
 
+SYSCALL_DEFINE1(multi_clock_gettime, struct __ptp_multi_clock_get __user *, ptp_multi_clk_get)
+{
+	const struct k_clock *kc;
+	struct timespec64 kernel_tp;
+	struct __ptp_multi_clock_get multi_clk_get;
+	int error;
+	unsigned int i, j;
+
+	if (copy_from_user(&multi_clk_get, ptp_multi_clk_get, sizeof(multi_clk_get)))
+		return -EFAULT;
+
+	if (multi_clk_get.n_samples > MULTI_PTP_MAX_SAMPLES)
+		return -EINVAL;
+	if (multi_clk_get.n_clocks > MULTI_PTP_MAX_CLOCKS)
+		return -EINVAL;
+
+	for (j = 0; j < multi_clk_get.n_samples; j++) {
+		for (i = 0; i < multi_clk_get.n_clocks; i++) {
+			kc = clockid_to_kclock(multi_clk_get.clkid_arr[i]);
+			if (!kc)
+				return -EINVAL;
+			error = kc->clock_get_timespec(multi_clk_get.clkid_arr[i], &kernel_tp);
+			if (!error && put_timespec64(&kernel_tp, (struct __kernel_timespec __user *)
+						     &ptp_multi_clk_get->ts[j][i]))
+				error = -EFAULT;
+		}
+	}
+
+	return error;
+}
+
+SYSCALL_DEFINE1(multi_clock_gettime32, struct __ptp_multi_clock_get32 __user *, ptp_multi_clk_get)
+{
+	const struct k_clock *kc;
+	struct timespec64 kernel_tp;
+	struct __ptp_multi_clock_get multi_clk_get;
+	int error;
+	unsigned int i, j;
+
+	if (copy_from_user(&multi_clk_get, ptp_multi_clk_get, sizeof(multi_clk_get)))
+		return -EFAULT;
+
+	if (multi_clk_get.n_samples > MULTI_PTP_MAX_SAMPLES)
+		return -EINVAL;
+	if (multi_clk_get.n_clocks > MULTI_PTP_MAX_CLOCKS)
+		return -EINVAL;
+
+	for (j = 0; j < multi_clk_get.n_samples; j++) {
+		for (i = 0; i < multi_clk_get.n_clocks; i++) {
+			kc = clockid_to_kclock(multi_clk_get.clkid_arr[i]);
+			if (!kc)
+				return -EINVAL;
+			error = kc->clock_get_timespec(multi_clk_get.clkid_arr[i], &kernel_tp);
+			if (!error && put_old_timespec32(&kernel_tp, (struct old_timespec32 __user *)
+							&ptp_multi_clk_get->ts[j][i]))
+				error = -EFAULT;
+		}
+	}
+
+	return error;
+}
+
 static const struct k_clock clock_realtime = {
 	.clock_getres		= posix_get_hrtimer_res,
 	.clock_get_timespec	= posix_get_realtime_timespec,