diff mbox series

[v2,1/1] arm64: Early boot time stamps

Message ID 20181120144340.30201-2-pasha.tatashin@soleen.com (mailing list archive)
State New, archived
Headers show
Series Early boot time stamps for arm64 | expand

Commit Message

Pasha Tatashin Nov. 20, 2018, 2:43 p.m. UTC
Allow printk time stamps/sched_clock() to be available from the early
boot.

Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
---
 arch/arm64/kernel/setup.c            | 25 +++++++++++++++++++++++++
 drivers/clocksource/arm_arch_timer.c |  8 ++++----
 include/clocksource/arm_arch_timer.h |  3 +++
 3 files changed, 32 insertions(+), 4 deletions(-)

Comments

Will Deacon Nov. 21, 2018, 5:47 p.m. UTC | #1
Hi Pavel,

On Tue, Nov 20, 2018 at 09:43:40AM -0500, Pavel Tatashin wrote:
> Allow printk time stamps/sched_clock() to be available from the early
> boot.
> 
> Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
> ---
>  arch/arm64/kernel/setup.c            | 25 +++++++++++++++++++++++++
>  drivers/clocksource/arm_arch_timer.c |  8 ++++----
>  include/clocksource/arm_arch_timer.h |  3 +++
>  3 files changed, 32 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> index f4fc1e0544b7..7a43e63b737b 100644
> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
> @@ -40,6 +40,7 @@
>  #include <linux/efi.h>
>  #include <linux/psci.h>
>  #include <linux/sched/task.h>
> +#include <linux/sched_clock.h>
>  #include <linux/mm.h>
>  
>  #include <asm/acpi.h>
> @@ -279,8 +280,32 @@ arch_initcall(reserve_memblock_reserved_regions);
>  
>  u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
>  
> +/*
> + * Get time stamps available early in boot, useful to identify boot time issues
> + * from the early boot.
> + */
> +static __init void sched_clock_early_init(void)
> +{
> +	u64 freq = arch_timer_get_cntfrq();
> +
> +	/*
> +	 * The arm64 boot protocol mandates that CNTFRQ_EL0 reflects
> +	 * the timer frequency. To avoid breakage on misconfigured
> +	 * systems, do not register the early sched_clock if the
> +	 * programmed value if zero. Other random values will just
> +	 * result in random output.
> +	 */
> +	if (!freq)
> +		return;
> +
> +	arch_timer_read_counter = arch_counter_get_cntvct;

Why do you need to assign this here?

> +	sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, freq);

arch_timer_read_counter can be reassigned once the arm_arch_timer driver
has probed; what stops this from being unused as the sched_clock after that
has happened? I worry that toggling the function pointer could lead to
sched_clock() going backwards.

> +}
> +
>  void __init setup_arch(char **cmdline_p)
>  {
> +	sched_clock_early_init();
> +
>  	init_mm.start_code = (unsigned long) _text;
>  	init_mm.end_code   = (unsigned long) _etext;
>  	init_mm.end_data   = (unsigned long) _edata;

The patch from this point onwards just looks like a refactoring to me which
should be independent of adding early printk timestamps. Also, it doesn't
update the vdso logic, which hardwires a 56-bit mask for the counter values.

Will

> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index 9a7d4dc00b6e..e4843ad48bd3 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -175,13 +175,13 @@ static struct clocksource clocksource_counter = {
>  	.name	= "arch_sys_counter",
>  	.rating	= 400,
>  	.read	= arch_counter_read,
> -	.mask	= CLOCKSOURCE_MASK(56),
> +	.mask	= CLOCKSOURCE_MASK(ARCH_TIMER_NBITS),
>  	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
>  };
>  
>  static struct cyclecounter cyclecounter __ro_after_init = {
>  	.read	= arch_counter_read_cc,
> -	.mask	= CLOCKSOURCE_MASK(56),
> +	.mask	= CLOCKSOURCE_MASK(ARCH_TIMER_NBITS),
>  };
>  
>  struct ate_acpi_oem_info {
> @@ -963,8 +963,8 @@ static void __init arch_counter_register(unsigned type)
>  	timecounter_init(&arch_timer_kvm_info.timecounter,
>  			 &cyclecounter, start_count);
>  
> -	/* 56 bits minimum, so we assume worst case rollover */
> -	sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate);
> +	sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS,
> +			     arch_timer_rate);
>  }
>  
>  static void arch_timer_stop(struct clock_event_device *clk)
> diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
> index 349e5957c949..c485512e1d01 100644
> --- a/include/clocksource/arm_arch_timer.h
> +++ b/include/clocksource/arm_arch_timer.h
> @@ -71,6 +71,9 @@ enum arch_timer_spi_nr {
>  #define ARCH_TIMER_EVT_STREAM_FREQ				\
>  	(USEC_PER_SEC / ARCH_TIMER_EVT_STREAM_PERIOD_US)
>  
> +/* 56 bits minimum, so we assume worst case rollover */
> +#define	ARCH_TIMER_NBITS		56
> +
>  struct arch_timer_kvm_info {
>  	struct timecounter timecounter;
>  	int virtual_irq;
> -- 
> 2.19.1
>
Pasha Tatashin Nov. 21, 2018, 5:58 p.m. UTC | #2
On 18-11-21 17:47:07, Will Deacon wrote:
> > +	/*
> > +	 * The arm64 boot protocol mandates that CNTFRQ_EL0 reflects
> > +	 * the timer frequency. To avoid breakage on misconfigured
> > +	 * systems, do not register the early sched_clock if the
> > +	 * programmed value if zero. Other random values will just
> > +	 * result in random output.
> > +	 */
> > +	if (!freq)
> > +		return;
> > +
> > +	arch_timer_read_counter = arch_counter_get_cntvct;
> 
> Why do you need to assign this here?
> 
> > +	sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, freq);
> 
> arch_timer_read_counter can be reassigned once the arm_arch_timer driver
> has probed; what stops this from being unused as the sched_clock after that
> has happened? I worry that toggling the function pointer could lead to
> sched_clock() going backwards.

No reason, I will revert it back to use a local variable. I agree, time
can go backward for a period of time while we switch to permanent clock later,
if that clock is different. 

> 
> > +}
> > +
> >  void __init setup_arch(char **cmdline_p)
> >  {
> > +	sched_clock_early_init();
> > +
> >  	init_mm.start_code = (unsigned long) _text;
> >  	init_mm.end_code   = (unsigned long) _etext;
> >  	init_mm.end_data   = (unsigned long) _edata;
> 
> The patch from this point onwards just looks like a refactoring to me which
> should be independent of adding early printk timestamps. Also, it doesn't
> update the vdso logic, which hardwires a 56-bit mask for the counter values.

OK, I will split the patch, and will also address the hard coded value
here:

https://soleen.com/source/xref/linux/arch/arm64/kernel/vdso/gettimeofday.S?r=c80ed088#73

Are there more that you know of?

Thank you,
Pasha
Marc Zyngier Nov. 22, 2018, 2:14 p.m. UTC | #3
On Wed, 21 Nov 2018 17:58:41 +0000,
Pavel Tatashin <pasha.tatashin@soleen.com> wrote:
> 
> On 18-11-21 17:47:07, Will Deacon wrote:
> > > +	/*
> > > +	 * The arm64 boot protocol mandates that CNTFRQ_EL0 reflects
> > > +	 * the timer frequency. To avoid breakage on misconfigured
> > > +	 * systems, do not register the early sched_clock if the
> > > +	 * programmed value if zero. Other random values will just
> > > +	 * result in random output.
> > > +	 */
> > > +	if (!freq)
> > > +		return;
> > > +
> > > +	arch_timer_read_counter = arch_counter_get_cntvct;
> > 
> > Why do you need to assign this here?
> > 
> > > +	sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, freq);
> > 
> > arch_timer_read_counter can be reassigned once the arm_arch_timer driver
> > has probed; what stops this from being unused as the sched_clock after that
> > has happened? I worry that toggling the function pointer could lead to
> > sched_clock() going backwards.
> 
> No reason, I will revert it back to use a local variable.

I think the issue is that you are doing an assignment for something
that the kernel has already statically initialized.

> I agree, time can go backward for a period of time while we switch
> to permanent clock later, if that clock is different.

It is worse than that. You're setting up sched_clock with an
unreliable clock source which can go backward at any point, not just
at handover time.

I'd rather we have the timestamping code be able to use another souce
than sched_clock, and eventually switch to it once sched_clock is
registered (and properly corrected.

Thanks,

	M.
Pasha Tatashin Dec. 5, 2018, 4:01 p.m. UTC | #4
On 18-11-22 14:14:29, Marc Zyngier wrote:
> On Wed, 21 Nov 2018 17:58:41 +0000,
> Pavel Tatashin <pasha.tatashin@soleen.com> wrote:
> > 
> > On 18-11-21 17:47:07, Will Deacon wrote:
> > > > +	/*
> > > > +	 * The arm64 boot protocol mandates that CNTFRQ_EL0 reflects
> > > > +	 * the timer frequency. To avoid breakage on misconfigured
> > > > +	 * systems, do not register the early sched_clock if the
> > > > +	 * programmed value if zero. Other random values will just
> > > > +	 * result in random output.
> > > > +	 */
> > > > +	if (!freq)
> > > > +		return;
> > > > +
> > > > +	arch_timer_read_counter = arch_counter_get_cntvct;
> > > 
> > > Why do you need to assign this here?
> > > 
> > > > +	sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, freq);
> > > 
> > > arch_timer_read_counter can be reassigned once the arm_arch_timer driver
> > > has probed; what stops this from being unused as the sched_clock after that
> > > has happened? I worry that toggling the function pointer could lead to
> > > sched_clock() going backwards.
> > 
> > No reason, I will revert it back to use a local variable.
> 
> I think the issue is that you are doing an assignment for something
> that the kernel has already statically initialized.
> 
> > I agree, time can go backward for a period of time while we switch
> > to permanent clock later, if that clock is different.
> 
> It is worse than that. You're setting up sched_clock with an
> unreliable clock source which can go backward at any point, not just
> at handover time.
> 
> I'd rather we have the timestamping code be able to use another souce
> than sched_clock, and eventually switch to it once sched_clock is
> registered (and properly corrected.

Hi Marc,

The early sched clock is implemented the same way on SPARC and x86: it
uses the sched_clock() interface to get timestamps. In addition it can
be used be engineers to precisely measure and debug early boot problems
by directly inserting cheap sched_clock() calls into code. Ftrace might
also be extended in future to support early boot.

I am not sure it is a good idea to invent a different interface that
would be used by printk() only for timestamping early in boot.

During the hang over from unstable to stable the sched_clock_register()
does the right thing and calculates the offset where the last clock
ended and new started.

In unlikely even if on some broken chips time goes backward, it will
happen during early boot only. Do you know if there are arm64 chips
exist where time may go backward? We can either don't do early boot
clock on those chips, or modify sched_clock() to check for the last
return value, and make sure that it is always smaller than the new
return value.

Thank you,
Pasha
diff mbox series

Patch

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f4fc1e0544b7..7a43e63b737b 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -40,6 +40,7 @@ 
 #include <linux/efi.h>
 #include <linux/psci.h>
 #include <linux/sched/task.h>
+#include <linux/sched_clock.h>
 #include <linux/mm.h>
 
 #include <asm/acpi.h>
@@ -279,8 +280,32 @@  arch_initcall(reserve_memblock_reserved_regions);
 
 u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
+/*
+ * Get time stamps available early in boot, useful to identify boot time issues
+ * from the early boot.
+ */
+static __init void sched_clock_early_init(void)
+{
+	u64 freq = arch_timer_get_cntfrq();
+
+	/*
+	 * The arm64 boot protocol mandates that CNTFRQ_EL0 reflects
+	 * the timer frequency. To avoid breakage on misconfigured
+	 * systems, do not register the early sched_clock if the
+	 * programmed value if zero. Other random values will just
+	 * result in random output.
+	 */
+	if (!freq)
+		return;
+
+	arch_timer_read_counter = arch_counter_get_cntvct;
+	sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, freq);
+}
+
 void __init setup_arch(char **cmdline_p)
 {
+	sched_clock_early_init();
+
 	init_mm.start_code = (unsigned long) _text;
 	init_mm.end_code   = (unsigned long) _etext;
 	init_mm.end_data   = (unsigned long) _edata;
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 9a7d4dc00b6e..e4843ad48bd3 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -175,13 +175,13 @@  static struct clocksource clocksource_counter = {
 	.name	= "arch_sys_counter",
 	.rating	= 400,
 	.read	= arch_counter_read,
-	.mask	= CLOCKSOURCE_MASK(56),
+	.mask	= CLOCKSOURCE_MASK(ARCH_TIMER_NBITS),
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static struct cyclecounter cyclecounter __ro_after_init = {
 	.read	= arch_counter_read_cc,
-	.mask	= CLOCKSOURCE_MASK(56),
+	.mask	= CLOCKSOURCE_MASK(ARCH_TIMER_NBITS),
 };
 
 struct ate_acpi_oem_info {
@@ -963,8 +963,8 @@  static void __init arch_counter_register(unsigned type)
 	timecounter_init(&arch_timer_kvm_info.timecounter,
 			 &cyclecounter, start_count);
 
-	/* 56 bits minimum, so we assume worst case rollover */
-	sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate);
+	sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS,
+			     arch_timer_rate);
 }
 
 static void arch_timer_stop(struct clock_event_device *clk)
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 349e5957c949..c485512e1d01 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -71,6 +71,9 @@  enum arch_timer_spi_nr {
 #define ARCH_TIMER_EVT_STREAM_FREQ				\
 	(USEC_PER_SEC / ARCH_TIMER_EVT_STREAM_PERIOD_US)
 
+/* 56 bits minimum, so we assume worst case rollover */
+#define	ARCH_TIMER_NBITS		56
+
 struct arch_timer_kvm_info {
 	struct timecounter timecounter;
 	int virtual_irq;