diff mbox series

[v5,3/6] timekeeping: Add clocksource to system_time_snapshot

Message ID 20191015104822.13890-4-jianyong.wu@arm.com (mailing list archive)
State New, archived
Headers show
Series Enable ptp_kvm for arm64 | expand

Commit Message

Jianyong Wu Oct. 15, 2019, 10:48 a.m. UTC
Sometimes, we need check current clocksource outside of
timekeeping area. Add clocksource to system_time_snapshot then
we can get clocksource as well as system time.

Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/timekeeping.h | 35 ++++++++++++++++++-----------------
 kernel/time/timekeeping.c   |  7 ++++---
 2 files changed, 22 insertions(+), 20 deletions(-)

Comments

Paolo Bonzini Oct. 15, 2019, 4:37 p.m. UTC | #1
On 15/10/19 12:48, Jianyong Wu wrote:
> Sometimes, we need check current clocksource outside of
> timekeeping area. Add clocksource to system_time_snapshot then
> we can get clocksource as well as system time.
> 
> Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  include/linux/timekeeping.h | 35 ++++++++++++++++++-----------------
>  kernel/time/timekeeping.c   |  7 ++++---
>  2 files changed, 22 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
> index a8ab0f143ac4..964c14fbbf69 100644
> --- a/include/linux/timekeeping.h
> +++ b/include/linux/timekeeping.h
> @@ -194,23 +194,6 @@ extern bool timekeeping_rtc_skipresume(void);
>  
>  extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
>  
> -/*
> - * struct system_time_snapshot - simultaneous raw/real time capture with
> - *	counter value
> - * @cycles:	Clocksource counter value to produce the system times
> - * @real:	Realtime system time
> - * @raw:	Monotonic raw system time
> - * @clock_was_set_seq:	The sequence number of clock was set events
> - * @cs_was_changed_seq:	The sequence number of clocksource change events
> - */
> -struct system_time_snapshot {
> -	u64		cycles;
> -	ktime_t		real;
> -	ktime_t		raw;
> -	unsigned int	clock_was_set_seq;
> -	u8		cs_was_changed_seq;
> -};
> -
>  /*
>   * struct system_device_crosststamp - system/device cross-timestamp
>   *	(syncronized capture)
> @@ -236,6 +219,24 @@ struct system_counterval_t {
>  	struct clocksource	*cs;
>  };
>  
> +/*
> + * struct system_time_snapshot - simultaneous raw/real time capture with
> + *	counter value
> + * @sc:		Contains clocksource and clocksource counter value to produce
> + * 	the system times
> + * @real:	Realtime system time
> + * @raw:	Monotonic raw system time
> + * @clock_was_set_seq:	The sequence number of clock was set events
> + * @cs_was_changed_seq:	The sequence number of clocksource change events
> + */
> +struct system_time_snapshot {
> +	struct system_counterval_t sc;
> +	ktime_t		real;
> +	ktime_t		raw;
> +	unsigned int	clock_was_set_seq;
> +	u8		cs_was_changed_seq;
> +};
> +
>  /*
>   * Get cross timestamp between system clock and device clock
>   */
> diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
> index 44b726bab4bd..66ff089605b3 100644
> --- a/kernel/time/timekeeping.c
> +++ b/kernel/time/timekeeping.c
> @@ -983,7 +983,8 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
>  		nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
>  	} while (read_seqcount_retry(&tk_core.seq, seq));
>  
> -	systime_snapshot->cycles = now;
> +	systime_snapshot->sc.cycles = now;
> +	systime_snapshot->sc.cs = tk->tkr_mono.clock;
>  	systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
>  	systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
>  }
> @@ -1189,12 +1190,12 @@ int get_device_system_crosststamp(int (*get_time_fn)
>  		 * clocksource change
>  		 */
>  		if (!history_begin ||
> -		    !cycle_between(history_begin->cycles,
> +		    !cycle_between(history_begin->sc.cycles,
>  				   system_counterval.cycles, cycles) ||
>  		    history_begin->cs_was_changed_seq != cs_was_changed_seq)
>  			return -EINVAL;
>  		partial_history_cycles = cycles - system_counterval.cycles;
> -		total_history_cycles = cycles - history_begin->cycles;
> +		total_history_cycles = cycles - history_begin->sc.cycles;
>  		discontinuity =
>  			history_begin->clock_was_set_seq != clock_was_set_seq;
>  
> 

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Thomas Gleixner Oct. 15, 2019, 8:12 p.m. UTC | #2
On Tue, 15 Oct 2019, Jianyong Wu wrote:

> Sometimes, we need check current clocksource outside of
> timekeeping area. Add clocksource to system_time_snapshot then
> we can get clocksource as well as system time.

This changelog is telling absolutely nothing WHY anything outside of the
timekeeping core code needs access to the current clocksource. Neither does
it tell why it is safe to provide the pointer to random callers.

> +/*
> + * struct system_time_snapshot - simultaneous raw/real time capture with
> + *	counter value
> + * @sc:		Contains clocksource and clocksource counter value to produce
> + * 	the system times
> + * @real:	Realtime system time
> + * @raw:	Monotonic raw system time
> + * @clock_was_set_seq:	The sequence number of clock was set events
> + * @cs_was_changed_seq:	The sequence number of clocksource change events
> + */
> +struct system_time_snapshot {
> +	struct system_counterval_t sc;
> +	ktime_t		real;
> +	ktime_t		raw;
> +	unsigned int	clock_was_set_seq;
> +	u8		cs_was_changed_seq;
> +};
> +
>  /*
>   * Get cross timestamp between system clock and device clock
>   */
> diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
> index 44b726bab4bd..66ff089605b3 100644
> --- a/kernel/time/timekeeping.c
> +++ b/kernel/time/timekeeping.c
> @@ -983,7 +983,8 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
>  		nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
>  	} while (read_seqcount_retry(&tk_core.seq, seq));
>  
> -	systime_snapshot->cycles = now;
> +	systime_snapshot->sc.cycles = now;
> +	systime_snapshot->sc.cs = tk->tkr_mono.clock;

The clock pointer can change right after the store, the underlying data can
be freed .....

Looking at the rest of the patch set the actual usage site is:

> +       case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
> +               ktime_get_snapshot(&systime_snapshot);
> +               if (!is_arm_arch_counter(systime_snapshot.sc.cs))
> +                       return kvm_psci_call(vcpu);

and that function does:

> +bool is_arm_arch_counter(void *cs)

void *? Type safety is overrated, right? The type is well known....

+{
+       return (struct clocksource *)cs == &clocksource_counter;

That nonsensical typecast does not make up for that.

+}

So while the access to the pointer is actually safe, this is not going to
happen simply because you modify a generic interface in a way which will
lead the next developer to insane assumptions about the validity of that
pointer.

While the kernel is pretty lax in terms of isolation due to the nature of
the programming language, this does not justify to expose critical
internals of core code to random callers. Guess why most of the timekeeping
internals are carefully shielded from external access.

Something like the completely untested (not even compiled) patch below
gives you access to the information you need and allows to reuse the
mechanism for other purposes without adding is_$FOO_timer() all over the
place.

Thanks,

	tglx

8<--------------
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -9,6 +9,7 @@
 #ifndef _LINUX_CLOCKSOURCE_H
 #define _LINUX_CLOCKSOURCE_H
 
+#include <linux/clocksource_ids.h>
 #include <linux/types.h>
 #include <linux/timex.h>
 #include <linux/time.h>
@@ -49,6 +50,10 @@ struct module;
  *			400-499: Perfect
  *				The ideal clocksource. A must-use where
  *				available.
+ * @id:			Defaults to CSID_GENERIC. The id value is captured
+ *			in certain snapshot functions to allow callers to
+ *			validate the clocksource from which the snapshot was
+ *			taken.
  * @read:		returns a cycle value, passes clocksource as argument
  * @enable:		optional function to enable the clocksource
  * @disable:		optional function to disable the clocksource
@@ -91,6 +96,7 @@ struct clocksource {
 	const char *name;
 	struct list_head list;
 	int rating;
+	enum clocksource_ids id;
 	int (*enable)(struct clocksource *cs);
 	void (*disable)(struct clocksource *cs);
 	unsigned long flags;
--- /dev/null
+++ b/include/linux/clocksource_ids.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CLOCKSOURCE_IDS_H
+#define _LINUX_CLOCKSOURCE_IDS_H
+
+/* Enum to give clocksources a unique identifier */
+enum clocksource_ids {
+	CSID_GENERIC		= 0,
+	CSID_ARM_ARCH_COUNTER,
+	CSID_MAX,
+};
+
+#endif
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_TIMEKEEPING_H
 #define _LINUX_TIMEKEEPING_H
 
+#include <linux/clocksource_ids.h>
 #include <linux/errno.h>
 
 /* Included from linux/ktime.h */
@@ -228,15 +229,17 @@ extern void timekeeping_inject_sleeptime
  * @cycles:	Clocksource counter value to produce the system times
  * @real:	Realtime system time
  * @raw:	Monotonic raw system time
+ * @cs_id:	The id of the current clocksource which produced the snapshot
  * @clock_was_set_seq:	The sequence number of clock was set events
  * @cs_was_changed_seq:	The sequence number of clocksource change events
  */
 struct system_time_snapshot {
-	u64		cycles;
-	ktime_t		real;
-	ktime_t		raw;
-	unsigned int	clock_was_set_seq;
-	u8		cs_was_changed_seq;
+	u64			cycles;
+	ktime_t			real;
+	ktime_t			raw;
+	enum clocksource_ids	cs_id;
+	unsigned int		clock_was_set_seq;
+	u8			cs_was_changed_seq;
 };
 
 /*
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -921,6 +921,9 @@ int __clocksource_register_scale(struct
 
 	clocksource_arch_init(cs);
 
+	if (WARN_ON_ONCE((unsigned int)cs->id >= CSID_MAX))
+		cs->id = CSID_GENERIC;
+
 	/* Initialize mult/shift and max_idle_ns */
 	__clocksource_update_freq_scale(cs, scale, freq);
 
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -979,6 +979,7 @@ void ktime_get_snapshot(struct system_ti
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 		now = tk_clock_read(&tk->tkr_mono);
+		systime_snapshot->cs_id = tk->tkr_mono.clock->id;
 		systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
 		systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
 		base_real = ktime_add(tk->tkr_mono.base,
Thomas Gleixner Oct. 15, 2019, 8:13 p.m. UTC | #3
On Tue, 15 Oct 2019, Paolo Bonzini wrote:
> On 15/10/19 12:48, Jianyong Wu wrote:
> >  
> > 
> 
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

You're sure about having reviewed that in detail?

Thanks,

	tglx
Paolo Bonzini Oct. 15, 2019, 10:35 p.m. UTC | #4
On 15/10/19 22:12, Thomas Gleixner wrote:
> @@ -91,6 +96,7 @@ struct clocksource {
>  	const char *name;
>  	struct list_head list;
>  	int rating;
> +	enum clocksource_ids id;

Why add a global id?  ARM can add it to archdata similar to how x86 has
vclock_mode.  But I still think the right thing to do is to include the
full system_counterval_t in the result of ktime_get_snapshot.  (More in
a second, feel free to reply to the other email only).

Paolo
Paolo Bonzini Oct. 15, 2019, 10:36 p.m. UTC | #5
On 15/10/19 22:13, Thomas Gleixner wrote:
> On Tue, 15 Oct 2019, Paolo Bonzini wrote:
>> On 15/10/19 12:48, Jianyong Wu wrote:
>>>  
>>>
>>
>> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> You're sure about having reviewed that in detail?

I did review the patch; the void* ugliness is not in this one, and I do
have some other qualms on that one.

> This changelog is telling absolutely nothing WHY anything outside of the
> timekeeping core code needs access to the current clocksource. Neither does
> it tell why it is safe to provide the pointer to random callers.

Agreed on the changelog, but the pointer to a clocksource is already
part of the timekeeping external API via struct system_counterval_t.
get_device_system_crosststamp for example expects a clocksource pointer
but provides no way to get such a pointer.

Paolo
Thomas Gleixner Oct. 16, 2019, 7:28 a.m. UTC | #6
On Wed, 16 Oct 2019, Paolo Bonzini wrote:
> On 15/10/19 22:13, Thomas Gleixner wrote:
> > On Tue, 15 Oct 2019, Paolo Bonzini wrote:
> >> On 15/10/19 12:48, Jianyong Wu wrote:
> >>>  
> >>>
> >>
> >> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > You're sure about having reviewed that in detail?
> 
> I did review the patch; the void* ugliness is not in this one, and I do
> have some other qualms on that one.
> 
> > This changelog is telling absolutely nothing WHY anything outside of the
> > timekeeping core code needs access to the current clocksource. Neither does
> > it tell why it is safe to provide the pointer to random callers.
> 
> Agreed on the changelog, but the pointer to a clocksource is already
> part of the timekeeping external API via struct system_counterval_t.
> get_device_system_crosststamp for example expects a clocksource pointer
> but provides no way to get such a pointer.

That's a completely different beast, really.

The clocksource pointer is handed in by the caller and the core code
validates if the clocksource is the same as the current system clocksource
and not the other way round.

So there is no need for getting that pointer from the core code because the
caller knows already which clocksource needs to be active to make.the whole
cross device timestamp correlation work. And in that case it's the callers
responsibility to ensure that the pointer is valid which is the case for
the current use cases.

From your other reply:

> Why add a global id?  ARM can add it to archdata similar to how x86 has
> vclock_mode.  But I still think the right thing to do is to include the
> full system_counterval_t in the result of ktime_get_snapshot.  (More in
> a second, feel free to reply to the other email only).

No, the clocksource pointer is not going to be exposed as there is no
guarantee that it will be still around after the call returns.

It's not even guaranteed to be correct when the store happens in Wu's patch
simply because the store is done outside of the seqcount protected region.

Vs. arch data: arch data is an opaque struct, so you'd need to store a
pointer which has the same issue as the clocksource pointer itself.

If we want to convey information then it has to be in the generic part
of struct clocksource.

In fact we could even simplify the existing get_device_system_crosststamp()
use case by using the ID field.

Thanks,

	tglx
Jianyong Wu Oct. 16, 2019, 9:48 a.m. UTC | #7
Hi tglx,

> -----Original Message-----
> From: Thomas Gleixner <tglx@linutronix.de>
> Sent: Wednesday, October 16, 2019 3:29 PM
> To: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Jianyong Wu (Arm Technology China) <Jianyong.Wu@arm.com>;
> netdev@vger.kernel.org; yangbo.lu@nxp.com; john.stultz@linaro.org;
> sean.j.christopherson@intel.com; maz@kernel.org;
> richardcochran@gmail.com; Mark Rutland <Mark.Rutland@arm.com>;
> will@kernel.org; Suzuki Poulose <Suzuki.Poulose@arm.com>; linux-
> kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org;
> kvmarm@lists.cs.columbia.edu; kvm@vger.kernel.org; Steve Capper
> <Steve.Capper@arm.com>; Kaly Xin (Arm Technology China)
> <Kaly.Xin@arm.com>; Justin He (Arm Technology China)
> <Justin.He@arm.com>; nd <nd@arm.com>
> Subject: Re: [PATCH v5 3/6] timekeeping: Add clocksource to
> system_time_snapshot
> 
> On Wed, 16 Oct 2019, Paolo Bonzini wrote:
> > On 15/10/19 22:13, Thomas Gleixner wrote:
> > > On Tue, 15 Oct 2019, Paolo Bonzini wrote:
> > >> On 15/10/19 12:48, Jianyong Wu wrote:
> > >>>
> > >>>
> > >>
> > >> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> > >
> > > You're sure about having reviewed that in detail?
> >
> > I did review the patch; the void* ugliness is not in this one, and I
> > do have some other qualms on that one.
> >
> > > This changelog is telling absolutely nothing WHY anything outside of
> > > the timekeeping core code needs access to the current clocksource.
> > > Neither does it tell why it is safe to provide the pointer to random callers.
> >
> > Agreed on the changelog, but the pointer to a clocksource is already
> > part of the timekeeping external API via struct system_counterval_t.
> > get_device_system_crosststamp for example expects a clocksource
> > pointer but provides no way to get such a pointer.
> 
> That's a completely different beast, really.
> 
> The clocksource pointer is handed in by the caller and the core code validates
> if the clocksource is the same as the current system clocksource and not the
> other way round.
> 
> So there is no need for getting that pointer from the core code because the
> caller knows already which clocksource needs to be active to make.the whole
> cross device timestamp correlation work. And in that case it's the callers
> responsibility to ensure that the pointer is valid which is the case for the
> current use cases.
> 
I thinks there is something misunderstanding of my patch. See patch 4/6, the reason why I add clocksource is that I want to check if the current clocksouce is
arm_arch_counter in virt/kvm/arm/psci.c and nothing to do with get_device_system_crosststamp.

So I really need a mechanism to do that check.

Thanks
Jianyong

> From your other reply:
> 
> > Why add a global id?  ARM can add it to archdata similar to how x86
> > has vclock_mode.  But I still think the right thing to do is to
> > include the full system_counterval_t in the result of
> > ktime_get_snapshot.  (More in a second, feel free to reply to the other
> email only).
> 
> No, the clocksource pointer is not going to be exposed as there is no
> guarantee that it will be still around after the call returns.
> 
> It's not even guaranteed to be correct when the store happens in Wu's patch
> simply because the store is done outside of the seqcount protected region.

Yeah, all of the elements in system_time_snapshot should be captured in consistency. So
I think the consistency will be guaranteed if the store ops added in the seqcount region.

> 
> Vs. arch data: arch data is an opaque struct, so you'd need to store a pointer
> which has the same issue as the clocksource pointer itself.
> 
> If we want to convey information then it has to be in the generic part of
> struct clocksource.
> 
> In fact we could even simplify the existing get_device_system_crosststamp()
> use case by using the ID field.
> 
> Thanks,
> 
> 	tglx
Jianyong Wu Oct. 16, 2019, 10:01 a.m. UTC | #8
Hi tglx,

> -----Original Message-----
> From: Thomas Gleixner <tglx@linutronix.de>
> Sent: Wednesday, October 16, 2019 4:13 AM
> To: Jianyong Wu (Arm Technology China) <Jianyong.Wu@arm.com>
> Cc: netdev@vger.kernel.org; yangbo.lu@nxp.com; john.stultz@linaro.org;
> pbonzini@redhat.com; sean.j.christopherson@intel.com; maz@kernel.org;
> richardcochran@gmail.com; Mark Rutland <Mark.Rutland@arm.com>;
> will@kernel.org; Suzuki Poulose <Suzuki.Poulose@arm.com>; linux-
> kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org;
> kvmarm@lists.cs.columbia.edu; kvm@vger.kernel.org; Steve Capper
> <Steve.Capper@arm.com>; Kaly Xin (Arm Technology China)
> <Kaly.Xin@arm.com>; Justin He (Arm Technology China)
> <Justin.He@arm.com>; nd <nd@arm.com>
> Subject: Re: [PATCH v5 3/6] timekeeping: Add clocksource to
> system_time_snapshot
> 
> On Tue, 15 Oct 2019, Jianyong Wu wrote:
> 
> > Sometimes, we need check current clocksource outside of timekeeping
> > area. Add clocksource to system_time_snapshot then we can get
> > clocksource as well as system time.
> 
> This changelog is telling absolutely nothing WHY anything outside of the
> timekeeping core code needs access to the current clocksource. Neither
> does it tell why it is safe to provide the pointer to random callers.
> 
Really need more information.

> > +/*
> > + * struct system_time_snapshot - simultaneous raw/real time capture
> with
> > + *	counter value
> > + * @sc:		Contains clocksource and clocksource counter value
> to produce
> > + * 	the system times
> > + * @real:	Realtime system time
> > + * @raw:	Monotonic raw system time
> > + * @clock_was_set_seq:	The sequence number of clock was set
> events
> > + * @cs_was_changed_seq:	The sequence number of clocksource change
> events
> > + */
> > +struct system_time_snapshot {
> > +	struct system_counterval_t sc;
> > +	ktime_t		real;
> > +	ktime_t		raw;
> > +	unsigned int	clock_was_set_seq;
> > +	u8		cs_was_changed_seq;
> > +};
> > +
> >  /*
> >   * Get cross timestamp between system clock and device clock
> >   */
> > diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
> > index 44b726bab4bd..66ff089605b3 100644
> > --- a/kernel/time/timekeeping.c
> > +++ b/kernel/time/timekeeping.c
> > @@ -983,7 +983,8 @@ void ktime_get_snapshot(struct
> system_time_snapshot *systime_snapshot)
> >  		nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
> >  	} while (read_seqcount_retry(&tk_core.seq, seq));
> >
> > -	systime_snapshot->cycles = now;
> > +	systime_snapshot->sc.cycles = now;
> > +	systime_snapshot->sc.cs = tk->tkr_mono.clock;
> 
> The clock pointer can change right after the store, the underlying data can be
> freed .....
> 

Yeah, need put it into seqcount region.

> Looking at the rest of the patch set the actual usage site is:
> 
> > +       case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
> > +               ktime_get_snapshot(&systime_snapshot);
> > +               if (!is_arm_arch_counter(systime_snapshot.sc.cs))
> > +                       return kvm_psci_call(vcpu);
> 
> and that function does:
> 
> > +bool is_arm_arch_counter(void *cs)
> 
> void *? Type safety is overrated, right? The type is well known....
> 
> +{
> +       return (struct clocksource *)cs == &clocksource_counter;
> 
> That nonsensical typecast does not make up for that.
> 

It's really bad code and need fix.

> +}
> 
> So while the access to the pointer is actually safe, this is not going to happen
> simply because you modify a generic interface in a way which will lead the
> next developer to insane assumptions about the validity of that pointer.
> 
> While the kernel is pretty lax in terms of isolation due to the nature of the
> programming language, this does not justify to expose critical internals of
> core code to random callers. Guess why most of the timekeeping internals
> are carefully shielded from external access.
> 
> Something like the completely untested (not even compiled) patch below
> gives you access to the information you need and allows to reuse the
> mechanism for other purposes without adding is_$FOO_timer() all over the
> place.
> 
> Thanks,
> 
> 	tglx
> 
> 8<--------------
> --- a/include/linux/clocksource.h
> +++ b/include/linux/clocksource.h
> @@ -9,6 +9,7 @@
>  #ifndef _LINUX_CLOCKSOURCE_H
>  #define _LINUX_CLOCKSOURCE_H
> 
> +#include <linux/clocksource_ids.h>
>  #include <linux/types.h>
>  #include <linux/timex.h>
>  #include <linux/time.h>
> @@ -49,6 +50,10 @@ struct module;
>   *			400-499: Perfect
>   *				The ideal clocksource. A must-use where
>   *				available.
> + * @id:			Defaults to CSID_GENERIC. The id value is
> captured
> + *			in certain snapshot functions to allow callers to
> + *			validate the clocksource from which the snapshot
> was
> + *			taken.
>   * @read:		returns a cycle value, passes clocksource as argument
>   * @enable:		optional function to enable the clocksource
>   * @disable:		optional function to disable the clocksource
> @@ -91,6 +96,7 @@ struct clocksource {
>  	const char *name;
>  	struct list_head list;
>  	int rating;
> +	enum clocksource_ids id;
>  	int (*enable)(struct clocksource *cs);
>  	void (*disable)(struct clocksource *cs);
>  	unsigned long flags;
> --- /dev/null
> +++ b/include/linux/clocksource_ids.h
> @@ -0,0 +1,12 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_CLOCKSOURCE_IDS_H
> +#define _LINUX_CLOCKSOURCE_IDS_H
> +
> +/* Enum to give clocksources a unique identifier */ enum
> +clocksource_ids {
> +	CSID_GENERIC		= 0,
> +	CSID_ARM_ARCH_COUNTER,
> +	CSID_MAX,
> +};
> +

Does this mean I must add clocksource id for all kinds of ARCHs and update all the code which have checked clocksource in the old way?

Thanks
Jianyong

> +#endif
> --- a/include/linux/timekeeping.h
> +++ b/include/linux/timekeeping.h
> @@ -2,6 +2,7 @@
>  #ifndef _LINUX_TIMEKEEPING_H
>  #define _LINUX_TIMEKEEPING_H
> 
> +#include <linux/clocksource_ids.h>
>  #include <linux/errno.h>
> 
>  /* Included from linux/ktime.h */
> @@ -228,15 +229,17 @@ extern void timekeeping_inject_sleeptime
>   * @cycles:	Clocksource counter value to produce the system times
>   * @real:	Realtime system time
>   * @raw:	Monotonic raw system time
> + * @cs_id:	The id of the current clocksource which produced the
> snapshot
>   * @clock_was_set_seq:	The sequence number of clock was set
> events
>   * @cs_was_changed_seq:	The sequence number of clocksource change
> events
>   */
>  struct system_time_snapshot {
> -	u64		cycles;
> -	ktime_t		real;
> -	ktime_t		raw;
> -	unsigned int	clock_was_set_seq;
> -	u8		cs_was_changed_seq;
> +	u64			cycles;
> +	ktime_t			real;
> +	ktime_t			raw;
> +	enum clocksource_ids	cs_id;
> +	unsigned int		clock_was_set_seq;
> +	u8			cs_was_changed_seq;
>  };
> 
>  /*
> --- a/kernel/time/clocksource.c
> +++ b/kernel/time/clocksource.c
> @@ -921,6 +921,9 @@ int __clocksource_register_scale(struct
> 
>  	clocksource_arch_init(cs);
> 
> +	if (WARN_ON_ONCE((unsigned int)cs->id >= CSID_MAX))
> +		cs->id = CSID_GENERIC;
> +
>  	/* Initialize mult/shift and max_idle_ns */
>  	__clocksource_update_freq_scale(cs, scale, freq);
> 
> --- a/kernel/time/timekeeping.c
> +++ b/kernel/time/timekeeping.c
> @@ -979,6 +979,7 @@ void ktime_get_snapshot(struct system_ti
>  	do {
>  		seq = read_seqcount_begin(&tk_core.seq);
>  		now = tk_clock_read(&tk->tkr_mono);
> +		systime_snapshot->cs_id = tk->tkr_mono.clock->id;
>  		systime_snapshot->cs_was_changed_seq = tk-
> >cs_was_changed_seq;
>  		systime_snapshot->clock_was_set_seq = tk-
> >clock_was_set_seq;
>  		base_real = ktime_add(tk->tkr_mono.base,
> 
>
Thomas Gleixner Oct. 16, 2019, 10:23 a.m. UTC | #9
Jianyong,

On Wed, 16 Oct 2019, Jianyong Wu (Arm Technology China) wrote:

Please fix your mail client not to copy the full headers into the reply.

> > On Wed, 16 Oct 2019, Paolo Bonzini wrote:
> > > On 15/10/19 22:13, Thomas Gleixner wrote:
> > That's a completely different beast, really.
> > 
> > The clocksource pointer is handed in by the caller and the core code validates
> > if the clocksource is the same as the current system clocksource and not the
> > other way round.
> > 
> > So there is no need for getting that pointer from the core code because the
> > caller knows already which clocksource needs to be active to make.the whole
> > cross device timestamp correlation work. And in that case it's the callers
> > responsibility to ensure that the pointer is valid which is the case for the
> > current use cases.
> > 
> I thinks there is something misunderstanding of my patch. See patch 4/6,
> the reason why I add clocksource is that I want to check if the current
> clocksouce is arm_arch_counter in virt/kvm/arm/psci.c and nothing to do
> with get_device_system_crosststamp.

There is no misunderstanding at all. Your patch is broken in several ways
as I explained in detail.

> So I really need a mechanism to do that check.
> 
> Thanks
> Jianyong

So just by chance I scrolled further down and found more replies from
you. Please trim the reply properly and add your 'Thanks Jianyong' to the
end of the mail.
 
> > From your other reply:
> > 
> > > Why add a global id?  ARM can add it to archdata similar to how x86
> > > has vclock_mode.  But I still think the right thing to do is to
> > > include the full system_counterval_t in the result of
> > > ktime_get_snapshot.  (More in a second, feel free to reply to the other
> > email only).
> > 
> > No, the clocksource pointer is not going to be exposed as there is no
> > guarantee that it will be still around after the call returns.
> > 
> > It's not even guaranteed to be correct when the store happens in Wu's patch
> > simply because the store is done outside of the seqcount protected region.
> 
> Yeah, all of the elements in system_time_snapshot should be captured in
> consistency. So I think the consistency will be guaranteed if the store
> ops added in the seqcount region.

Again. While it is consistent in terms of storage, it's still wrong to
expose a pointer to something which has no life time guarantee. Even if
your use case is just to compare the pointer it's a bad idea to do that
especially without any comment about the pointer validity at all.

Thanks,

	tglx
Thomas Gleixner Oct. 16, 2019, 10:26 a.m. UTC | #10
On Wed, 16 Oct 2019, Jianyong Wu (Arm Technology China) wrote:
> On Wed, 16 Oct 2019, Thomas Gleixner wrote:
> > On Tue, 15 Oct 2019, Jianyong Wu wrote:
> > +/* Enum to give clocksources a unique identifier */ enum
> > +clocksource_ids {
> > +	CSID_GENERIC		= 0,
> > +	CSID_ARM_ARCH_COUNTER,
> > +	CSID_MAX,
> > +};
> > +
> 
> Does this mean I must add clocksource id for all kinds of ARCHs and
> update all the code which have checked clocksource in the old way?

What is the old way? No code exists which can access the current
clocksource pointer because the core code does not expose it at all.

Thanks,

	tglx
diff mbox series

Patch

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index a8ab0f143ac4..964c14fbbf69 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -194,23 +194,6 @@  extern bool timekeeping_rtc_skipresume(void);
 
 extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
 
-/*
- * struct system_time_snapshot - simultaneous raw/real time capture with
- *	counter value
- * @cycles:	Clocksource counter value to produce the system times
- * @real:	Realtime system time
- * @raw:	Monotonic raw system time
- * @clock_was_set_seq:	The sequence number of clock was set events
- * @cs_was_changed_seq:	The sequence number of clocksource change events
- */
-struct system_time_snapshot {
-	u64		cycles;
-	ktime_t		real;
-	ktime_t		raw;
-	unsigned int	clock_was_set_seq;
-	u8		cs_was_changed_seq;
-};
-
 /*
  * struct system_device_crosststamp - system/device cross-timestamp
  *	(syncronized capture)
@@ -236,6 +219,24 @@  struct system_counterval_t {
 	struct clocksource	*cs;
 };
 
+/*
+ * struct system_time_snapshot - simultaneous raw/real time capture with
+ *	counter value
+ * @sc:		Contains clocksource and clocksource counter value to produce
+ * 	the system times
+ * @real:	Realtime system time
+ * @raw:	Monotonic raw system time
+ * @clock_was_set_seq:	The sequence number of clock was set events
+ * @cs_was_changed_seq:	The sequence number of clocksource change events
+ */
+struct system_time_snapshot {
+	struct system_counterval_t sc;
+	ktime_t		real;
+	ktime_t		raw;
+	unsigned int	clock_was_set_seq;
+	u8		cs_was_changed_seq;
+};
+
 /*
  * Get cross timestamp between system clock and device clock
  */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 44b726bab4bd..66ff089605b3 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -983,7 +983,8 @@  void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 		nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	systime_snapshot->cycles = now;
+	systime_snapshot->sc.cycles = now;
+	systime_snapshot->sc.cs = tk->tkr_mono.clock;
 	systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
 	systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
 }
@@ -1189,12 +1190,12 @@  int get_device_system_crosststamp(int (*get_time_fn)
 		 * clocksource change
 		 */
 		if (!history_begin ||
-		    !cycle_between(history_begin->cycles,
+		    !cycle_between(history_begin->sc.cycles,
 				   system_counterval.cycles, cycles) ||
 		    history_begin->cs_was_changed_seq != cs_was_changed_seq)
 			return -EINVAL;
 		partial_history_cycles = cycles - system_counterval.cycles;
-		total_history_cycles = cycles - history_begin->cycles;
+		total_history_cycles = cycles - history_begin->sc.cycles;
 		discontinuity =
 			history_begin->clock_was_set_seq != clock_was_set_seq;