Message ID | 20181120144340.30201-2-pasha.tatashin@soleen.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Early boot time stamps for arm64 | expand |
Hi Pavel, On Tue, Nov 20, 2018 at 09:43:40AM -0500, Pavel Tatashin wrote: > Allow printk time stamps/sched_clock() to be available from the early > boot. > > Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com> > --- > arch/arm64/kernel/setup.c | 25 +++++++++++++++++++++++++ > drivers/clocksource/arm_arch_timer.c | 8 ++++---- > include/clocksource/arm_arch_timer.h | 3 +++ > 3 files changed, 32 insertions(+), 4 deletions(-) > > diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c > index f4fc1e0544b7..7a43e63b737b 100644 > --- a/arch/arm64/kernel/setup.c > +++ b/arch/arm64/kernel/setup.c > @@ -40,6 +40,7 @@ > #include <linux/efi.h> > #include <linux/psci.h> > #include <linux/sched/task.h> > +#include <linux/sched_clock.h> > #include <linux/mm.h> > > #include <asm/acpi.h> > @@ -279,8 +280,32 @@ arch_initcall(reserve_memblock_reserved_regions); > > u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; > > +/* > + * Get time stamps available early in boot, useful to identify boot time issues > + * from the early boot. > + */ > +static __init void sched_clock_early_init(void) > +{ > + u64 freq = arch_timer_get_cntfrq(); > + > + /* > + * The arm64 boot protocol mandates that CNTFRQ_EL0 reflects > + * the timer frequency. To avoid breakage on misconfigured > + * systems, do not register the early sched_clock if the > + * programmed value if zero. Other random values will just > + * result in random output. > + */ > + if (!freq) > + return; > + > + arch_timer_read_counter = arch_counter_get_cntvct; Why do you need to assign this here? > + sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, freq); arch_timer_read_counter can be reassigned once the arm_arch_timer driver has probed; what stops this from being unused as the sched_clock after that has happened? I worry that toggling the function pointer could lead to sched_clock() going backwards. > +} > + > void __init setup_arch(char **cmdline_p) > { > + sched_clock_early_init(); > + > init_mm.start_code = (unsigned long) _text; > init_mm.end_code = (unsigned long) _etext; > init_mm.end_data = (unsigned long) _edata; The patch from this point onwards just looks like a refactoring to me which should be independent of adding early printk timestamps. Also, it doesn't update the vdso logic, which hardwires a 56-bit mask for the counter values. Will > diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c > index 9a7d4dc00b6e..e4843ad48bd3 100644 > --- a/drivers/clocksource/arm_arch_timer.c > +++ b/drivers/clocksource/arm_arch_timer.c > @@ -175,13 +175,13 @@ static struct clocksource clocksource_counter = { > .name = "arch_sys_counter", > .rating = 400, > .read = arch_counter_read, > - .mask = CLOCKSOURCE_MASK(56), > + .mask = CLOCKSOURCE_MASK(ARCH_TIMER_NBITS), > .flags = CLOCK_SOURCE_IS_CONTINUOUS, > }; > > static struct cyclecounter cyclecounter __ro_after_init = { > .read = arch_counter_read_cc, > - .mask = CLOCKSOURCE_MASK(56), > + .mask = CLOCKSOURCE_MASK(ARCH_TIMER_NBITS), > }; > > struct ate_acpi_oem_info { > @@ -963,8 +963,8 @@ static void __init arch_counter_register(unsigned type) > timecounter_init(&arch_timer_kvm_info.timecounter, > &cyclecounter, start_count); > > - /* 56 bits minimum, so we assume worst case rollover */ > - sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate); > + sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, > + arch_timer_rate); > } > > static void arch_timer_stop(struct clock_event_device *clk) > diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h > index 349e5957c949..c485512e1d01 100644 > --- a/include/clocksource/arm_arch_timer.h > +++ b/include/clocksource/arm_arch_timer.h > @@ -71,6 +71,9 @@ enum arch_timer_spi_nr { > #define ARCH_TIMER_EVT_STREAM_FREQ \ > (USEC_PER_SEC / ARCH_TIMER_EVT_STREAM_PERIOD_US) > > +/* 56 bits minimum, so we assume worst case rollover */ > +#define ARCH_TIMER_NBITS 56 > + > struct arch_timer_kvm_info { > struct timecounter timecounter; > int virtual_irq; > -- > 2.19.1 >
On 18-11-21 17:47:07, Will Deacon wrote: > > + /* > > + * The arm64 boot protocol mandates that CNTFRQ_EL0 reflects > > + * the timer frequency. To avoid breakage on misconfigured > > + * systems, do not register the early sched_clock if the > > + * programmed value if zero. Other random values will just > > + * result in random output. > > + */ > > + if (!freq) > > + return; > > + > > + arch_timer_read_counter = arch_counter_get_cntvct; > > Why do you need to assign this here? > > > + sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, freq); > > arch_timer_read_counter can be reassigned once the arm_arch_timer driver > has probed; what stops this from being unused as the sched_clock after that > has happened? I worry that toggling the function pointer could lead to > sched_clock() going backwards. No reason, I will revert it back to use a local variable. I agree, time can go backward for a period of time while we switch to permanent clock later, if that clock is different. > > > +} > > + > > void __init setup_arch(char **cmdline_p) > > { > > + sched_clock_early_init(); > > + > > init_mm.start_code = (unsigned long) _text; > > init_mm.end_code = (unsigned long) _etext; > > init_mm.end_data = (unsigned long) _edata; > > The patch from this point onwards just looks like a refactoring to me which > should be independent of adding early printk timestamps. Also, it doesn't > update the vdso logic, which hardwires a 56-bit mask for the counter values. OK, I will split the patch, and will also address the hard coded value here: https://soleen.com/source/xref/linux/arch/arm64/kernel/vdso/gettimeofday.S?r=c80ed088#73 Are there more that you know of? Thank you, Pasha
On Wed, 21 Nov 2018 17:58:41 +0000, Pavel Tatashin <pasha.tatashin@soleen.com> wrote: > > On 18-11-21 17:47:07, Will Deacon wrote: > > > + /* > > > + * The arm64 boot protocol mandates that CNTFRQ_EL0 reflects > > > + * the timer frequency. To avoid breakage on misconfigured > > > + * systems, do not register the early sched_clock if the > > > + * programmed value if zero. Other random values will just > > > + * result in random output. > > > + */ > > > + if (!freq) > > > + return; > > > + > > > + arch_timer_read_counter = arch_counter_get_cntvct; > > > > Why do you need to assign this here? > > > > > + sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, freq); > > > > arch_timer_read_counter can be reassigned once the arm_arch_timer driver > > has probed; what stops this from being unused as the sched_clock after that > > has happened? I worry that toggling the function pointer could lead to > > sched_clock() going backwards. > > No reason, I will revert it back to use a local variable. I think the issue is that you are doing an assignment for something that the kernel has already statically initialized. > I agree, time can go backward for a period of time while we switch > to permanent clock later, if that clock is different. It is worse than that. You're setting up sched_clock with an unreliable clock source which can go backward at any point, not just at handover time. I'd rather we have the timestamping code be able to use another souce than sched_clock, and eventually switch to it once sched_clock is registered (and properly corrected. Thanks, M.
On 18-11-22 14:14:29, Marc Zyngier wrote: > On Wed, 21 Nov 2018 17:58:41 +0000, > Pavel Tatashin <pasha.tatashin@soleen.com> wrote: > > > > On 18-11-21 17:47:07, Will Deacon wrote: > > > > + /* > > > > + * The arm64 boot protocol mandates that CNTFRQ_EL0 reflects > > > > + * the timer frequency. To avoid breakage on misconfigured > > > > + * systems, do not register the early sched_clock if the > > > > + * programmed value if zero. Other random values will just > > > > + * result in random output. > > > > + */ > > > > + if (!freq) > > > > + return; > > > > + > > > > + arch_timer_read_counter = arch_counter_get_cntvct; > > > > > > Why do you need to assign this here? > > > > > > > + sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, freq); > > > > > > arch_timer_read_counter can be reassigned once the arm_arch_timer driver > > > has probed; what stops this from being unused as the sched_clock after that > > > has happened? I worry that toggling the function pointer could lead to > > > sched_clock() going backwards. > > > > No reason, I will revert it back to use a local variable. > > I think the issue is that you are doing an assignment for something > that the kernel has already statically initialized. > > > I agree, time can go backward for a period of time while we switch > > to permanent clock later, if that clock is different. > > It is worse than that. You're setting up sched_clock with an > unreliable clock source which can go backward at any point, not just > at handover time. > > I'd rather we have the timestamping code be able to use another souce > than sched_clock, and eventually switch to it once sched_clock is > registered (and properly corrected. Hi Marc, The early sched clock is implemented the same way on SPARC and x86: it uses the sched_clock() interface to get timestamps. In addition it can be used be engineers to precisely measure and debug early boot problems by directly inserting cheap sched_clock() calls into code. Ftrace might also be extended in future to support early boot. I am not sure it is a good idea to invent a different interface that would be used by printk() only for timestamping early in boot. During the hang over from unstable to stable the sched_clock_register() does the right thing and calculates the offset where the last clock ended and new started. In unlikely even if on some broken chips time goes backward, it will happen during early boot only. Do you know if there are arm64 chips exist where time may go backward? We can either don't do early boot clock on those chips, or modify sched_clock() to check for the last return value, and make sure that it is always smaller than the new return value. Thank you, Pasha
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index f4fc1e0544b7..7a43e63b737b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -40,6 +40,7 @@ #include <linux/efi.h> #include <linux/psci.h> #include <linux/sched/task.h> +#include <linux/sched_clock.h> #include <linux/mm.h> #include <asm/acpi.h> @@ -279,8 +280,32 @@ arch_initcall(reserve_memblock_reserved_regions); u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; +/* + * Get time stamps available early in boot, useful to identify boot time issues + * from the early boot. + */ +static __init void sched_clock_early_init(void) +{ + u64 freq = arch_timer_get_cntfrq(); + + /* + * The arm64 boot protocol mandates that CNTFRQ_EL0 reflects + * the timer frequency. To avoid breakage on misconfigured + * systems, do not register the early sched_clock if the + * programmed value if zero. Other random values will just + * result in random output. + */ + if (!freq) + return; + + arch_timer_read_counter = arch_counter_get_cntvct; + sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, freq); +} + void __init setup_arch(char **cmdline_p) { + sched_clock_early_init(); + init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 9a7d4dc00b6e..e4843ad48bd3 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -175,13 +175,13 @@ static struct clocksource clocksource_counter = { .name = "arch_sys_counter", .rating = 400, .read = arch_counter_read, - .mask = CLOCKSOURCE_MASK(56), + .mask = CLOCKSOURCE_MASK(ARCH_TIMER_NBITS), .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static struct cyclecounter cyclecounter __ro_after_init = { .read = arch_counter_read_cc, - .mask = CLOCKSOURCE_MASK(56), + .mask = CLOCKSOURCE_MASK(ARCH_TIMER_NBITS), }; struct ate_acpi_oem_info { @@ -963,8 +963,8 @@ static void __init arch_counter_register(unsigned type) timecounter_init(&arch_timer_kvm_info.timecounter, &cyclecounter, start_count); - /* 56 bits minimum, so we assume worst case rollover */ - sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate); + sched_clock_register(arch_timer_read_counter, ARCH_TIMER_NBITS, + arch_timer_rate); } static void arch_timer_stop(struct clock_event_device *clk) diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index 349e5957c949..c485512e1d01 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -71,6 +71,9 @@ enum arch_timer_spi_nr { #define ARCH_TIMER_EVT_STREAM_FREQ \ (USEC_PER_SEC / ARCH_TIMER_EVT_STREAM_PERIOD_US) +/* 56 bits minimum, so we assume worst case rollover */ +#define ARCH_TIMER_NBITS 56 + struct arch_timer_kvm_info { struct timecounter timecounter; int virtual_irq;
Allow printk time stamps/sched_clock() to be available from the early boot. Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com> --- arch/arm64/kernel/setup.c | 25 +++++++++++++++++++++++++ drivers/clocksource/arm_arch_timer.c | 8 ++++---- include/clocksource/arm_arch_timer.h | 3 +++ 3 files changed, 32 insertions(+), 4 deletions(-)