Message ID | 20200519161755.209565-5-maz@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm/arm64: Turning IPIs into normal interrupts | expand |
On Tue, May 19, 2020 at 05:17:48PM +0100, Marc Zyngier wrote: > In order to deal with IPIs as normal interrupts, let's add > a new way to register them with the architecture code. > > set_smp_ipi_range() takes a range of interrupts, and allows > the arch code to request them as if the were normal interrupts. > A standard handler is then called by the core IRQ code to deal > with the IPI. > > This means that we don't need to call irq_enter/irq_exit, and > that we don't need to deal with set_irq_regs either. So let's > move the dispatcher into its own function, and leave handle_IPI() > as a compatibility function. > > On the sending side, let's make use of ipi_send_mask, which > already exists for this purpose. You say nothing about the nesting of irq_enter() and irq_exit() for scheduler_ipi(). Given that lockdep introduced the requirement that hard IRQs can't be nested, are we sure that calling irq_exit() twice is safe? Looking at irqtime_account_irq(), it seems that will cause double- accounting of in-interrupt time, since we will increment irq_start_time by just over twice the the period spent handling the IPI. I think the rest of irq_exit() should be safe, but still, this behaviour should be documented at the very least, if not avoided. > > Signed-off-by: Marc Zyngier <maz@kernel.org> > --- > arch/arm/Kconfig | 1 + > arch/arm/include/asm/smp.h | 5 ++ > arch/arm/kernel/smp.c | 97 ++++++++++++++++++++++++++++++++------ > 3 files changed, 88 insertions(+), 15 deletions(-) > > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > index c77c93c485a0..0caaba9bf880 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -48,6 +48,7 @@ config ARM > select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY > select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI > select GENERIC_CLOCKEVENTS_BROADCAST if SMP > + select GENERIC_IRQ_IPI if SMP > select GENERIC_CPU_AUTOPROBE > select GENERIC_EARLY_IOREMAP > select GENERIC_IDLE_POLL_SETUP > diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h > index a91f21e3c5b5..0e29730295ca 100644 > --- a/arch/arm/include/asm/smp.h > +++ b/arch/arm/include/asm/smp.h > @@ -45,6 +45,11 @@ extern void smp_init_cpus(void); > */ > extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); > > +/* > + * Register IPI interrupts with the arch SMP code > + */ > +extern void set_smp_ipi_range(int ipi_base, int nr_ipi); > + > /* > * Called from platform specific assembly code, this is the > * secondary CPU entry point. > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c > index 46e1be9e57a8..618641978a5b 100644 > --- a/arch/arm/kernel/smp.c > +++ b/arch/arm/kernel/smp.c > @@ -79,10 +79,19 @@ enum ipi_msg_type { > */ > }; > > +static int ipi_irq_base; > +static int nr_ipi = NR_IPI; > +static struct irq_desc *ipi_desc[NR_IPI]; > + > +static void ipi_setup(int cpu); > +static void ipi_teardown(int cpu); > + > static DECLARE_COMPLETION(cpu_running); > > static struct smp_operations smp_ops __ro_after_init; > > +static void ipi_setup(int cpu); > + > void __init smp_set_ops(const struct smp_operations *ops) > { > if (ops) > @@ -308,6 +317,8 @@ void arch_cpu_idle_dead(void) > > local_irq_disable(); > > + ipi_teardown(cpu); > + > /* > * Flush the data out of the L1 cache for this CPU. This must be > * before the completion to ensure that data is safely written out > @@ -424,6 +435,8 @@ asmlinkage void secondary_start_kernel(void) > > notify_cpu_starting(cpu); > > + ipi_setup(cpu); > + > calibrate_delay(); > > smp_store_cpu_info(cpu); > @@ -629,10 +642,9 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) > handle_IPI(ipinr, regs); > } > > -void handle_IPI(int ipinr, struct pt_regs *regs) > +static void do_handle_IPI(int ipinr) > { > unsigned int cpu = smp_processor_id(); > - struct pt_regs *old_regs = set_irq_regs(regs); > > if ((unsigned)ipinr < NR_IPI) { > trace_ipi_entry_rcuidle(ipi_types[ipinr]); > @@ -645,9 +657,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) > > #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST > case IPI_TIMER: > - irq_enter(); > tick_receive_broadcast(); > - irq_exit(); > break; > #endif > > @@ -656,36 +666,26 @@ void handle_IPI(int ipinr, struct pt_regs *regs) > break; > > case IPI_CALL_FUNC: > - irq_enter(); > generic_smp_call_function_interrupt(); > - irq_exit(); > break; > > case IPI_CPU_STOP: > - irq_enter(); > ipi_cpu_stop(cpu); > - irq_exit(); > break; > > #ifdef CONFIG_IRQ_WORK > case IPI_IRQ_WORK: > - irq_enter(); > irq_work_run(); > - irq_exit(); > break; > #endif > > case IPI_COMPLETION: > - irq_enter(); > ipi_complete(cpu); > - irq_exit(); > break; > > case IPI_CPU_BACKTRACE: > printk_nmi_enter(); > - irq_enter(); > - nmi_cpu_backtrace(regs); > - irq_exit(); > + nmi_cpu_backtrace(get_irq_regs()); > printk_nmi_exit(); > break; > > @@ -697,9 +697,76 @@ void handle_IPI(int ipinr, struct pt_regs *regs) > > if ((unsigned)ipinr < NR_IPI) > trace_ipi_exit_rcuidle(ipi_types[ipinr]); > +} > + > +/* Legacy version, should go away once all irqchips have been converted */ > +void handle_IPI(int ipinr, struct pt_regs *regs) > +{ > + struct pt_regs *old_regs = set_irq_regs(regs); > + > + irq_enter(); > + do_handle_IPI(ipinr); > + irq_exit(); > + > set_irq_regs(old_regs); > } > > +static irqreturn_t ipi_handler(int irq, void *data) > +{ > + do_handle_IPI(irq - ipi_irq_base); > + return IRQ_HANDLED; > +} > + > +static void ipi_send(const struct cpumask *target, unsigned int ipi) > +{ > + __ipi_send_mask(ipi_desc[ipi], target); > +} > + > +static void ipi_setup(int cpu) > +{ > + if (ipi_irq_base) { > + int i; > + > + for (i = 0; i < nr_ipi; i++) > + enable_percpu_irq(ipi_irq_base + i, 0); > + } > +} > + > +static void ipi_teardown(int cpu) > +{ > + if (ipi_irq_base) { > + int i; > + > + for (i = 0; i < nr_ipi; i++) > + disable_percpu_irq(ipi_irq_base + i); > + } > +} > + > +void __init set_smp_ipi_range(int ipi_base, int n) > +{ > + int i; > + > + WARN_ON(n < NR_IPI); > + nr_ipi = min(n, NR_IPI); > + > + for (i = 0; i < nr_ipi; i++) { > + int err; > + > + err = request_percpu_irq(ipi_base + i, ipi_handler, > + "IPI", &irq_stat); > + WARN_ON(err); > + > + ipi_desc[i] = irq_to_desc(ipi_base + i); > + irq_set_status_flags(ipi_base + i, IRQ_NO_ACCOUNTING); > + } > + > + ipi_irq_base = ipi_base; > + set_smp_cross_call(ipi_send); > + > + /* Setup the boot CPU immediately */ > + ipi_setup(smp_processor_id()); > +} > + > void smp_send_reschedule(int cpu) > { > smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); > -- > 2.26.2 > >
On 19/05/20 23:24, Russell King - ARM Linux admin wrote: > On Tue, May 19, 2020 at 05:17:48PM +0100, Marc Zyngier wrote: >> In order to deal with IPIs as normal interrupts, let's add >> a new way to register them with the architecture code. >> >> set_smp_ipi_range() takes a range of interrupts, and allows >> the arch code to request them as if the were normal interrupts. >> A standard handler is then called by the core IRQ code to deal >> with the IPI. >> >> This means that we don't need to call irq_enter/irq_exit, and >> that we don't need to deal with set_irq_regs either. So let's >> move the dispatcher into its own function, and leave handle_IPI() >> as a compatibility function. >> >> On the sending side, let's make use of ipi_send_mask, which >> already exists for this purpose. > > You say nothing about the nesting of irq_enter() and irq_exit() > for scheduler_ipi(). > > Given that lockdep introduced the requirement that hard IRQs can't > be nested, are we sure that calling irq_exit() twice is safe? > > Looking at irqtime_account_irq(), it seems that will cause double- > accounting of in-interrupt time, since we will increment > irq_start_time by just over twice the the period spent handling > the IPI. > > I think the rest of irq_exit() should be safe, but still, this > behaviour should be documented at the very least, if not avoided. > x86 does the same (though IIUC only when tracing reschedule IPI's), and MIPS has the same issue as it also uses generic IRQ IPI's - so although it's not ideal, I think we can live with it.
On Thu, May 21, 2020 at 03:03:49PM +0100, Valentin Schneider wrote: > > On 19/05/20 23:24, Russell King - ARM Linux admin wrote: > > On Tue, May 19, 2020 at 05:17:48PM +0100, Marc Zyngier wrote: > >> In order to deal with IPIs as normal interrupts, let's add > >> a new way to register them with the architecture code. > >> > >> set_smp_ipi_range() takes a range of interrupts, and allows > >> the arch code to request them as if the were normal interrupts. > >> A standard handler is then called by the core IRQ code to deal > >> with the IPI. > >> > >> This means that we don't need to call irq_enter/irq_exit, and > >> that we don't need to deal with set_irq_regs either. So let's > >> move the dispatcher into its own function, and leave handle_IPI() > >> as a compatibility function. > >> > >> On the sending side, let's make use of ipi_send_mask, which > >> already exists for this purpose. > > > > You say nothing about the nesting of irq_enter() and irq_exit() > > for scheduler_ipi(). > > > > Given that lockdep introduced the requirement that hard IRQs can't > > be nested, are we sure that calling irq_exit() twice is safe? > > > > Looking at irqtime_account_irq(), it seems that will cause double- > > accounting of in-interrupt time, since we will increment > > irq_start_time by just over twice the the period spent handling > > the IPI. > > > > I think the rest of irq_exit() should be safe, but still, this > > behaviour should be documented at the very least, if not avoided. > > > > x86 does the same (though IIUC only when tracing reschedule IPI's), Right, so when the system is operating normally, then the accounting is correct. When the reschedule path is being explicitly traced, then the accounting will be doubled for it. What's being proposed for ARM is to always have this mis-accounting, where no mis-accounting was present before - and some of us (me) /do/ enable IRQ accounting in our kernels as standard. So, you can take this as a kernel regression report from a user. > and MIPS has the same issue as it also uses generic IRQ IPI's - so > although it's not ideal, I think we can live with it. Yes, but is there anyone who cares about this for MIPS?
On 21/05/20 16:12, Russell King - ARM Linux admin wrote: > On Thu, May 21, 2020 at 03:03:49PM +0100, Valentin Schneider wrote: >> >> On 19/05/20 23:24, Russell King - ARM Linux admin wrote: >> > On Tue, May 19, 2020 at 05:17:48PM +0100, Marc Zyngier wrote: >> >> In order to deal with IPIs as normal interrupts, let's add >> >> a new way to register them with the architecture code. >> >> >> >> set_smp_ipi_range() takes a range of interrupts, and allows >> >> the arch code to request them as if the were normal interrupts. >> >> A standard handler is then called by the core IRQ code to deal >> >> with the IPI. >> >> >> >> This means that we don't need to call irq_enter/irq_exit, and >> >> that we don't need to deal with set_irq_regs either. So let's >> >> move the dispatcher into its own function, and leave handle_IPI() >> >> as a compatibility function. >> >> >> >> On the sending side, let's make use of ipi_send_mask, which >> >> already exists for this purpose. >> > >> > You say nothing about the nesting of irq_enter() and irq_exit() >> > for scheduler_ipi(). >> > >> > Given that lockdep introduced the requirement that hard IRQs can't >> > be nested, are we sure that calling irq_exit() twice is safe? >> > >> > Looking at irqtime_account_irq(), it seems that will cause double- >> > accounting of in-interrupt time, since we will increment >> > irq_start_time by just over twice the the period spent handling >> > the IPI. >> > >> > I think the rest of irq_exit() should be safe, but still, this >> > behaviour should be documented at the very least, if not avoided. >> > >> >> x86 does the same (though IIUC only when tracing reschedule IPI's), > > Right, so when the system is operating normally, then the accounting is > correct. When the reschedule path is being explicitly traced, then > the accounting will be doubled for it. > Right, it's true that they are only affected when tracing. That said, AFAICT the accounting nests correctly. Consider: irq_enter() @t0 irq_enter() @t1 ... irq_exit() @t2 irq_exit() @t3 Entering irqtime_account_irq() at time t, we get something like: delta = t - irq_start_time; irq_start_time = t; if (hardirq_count()) total += delta; Since we go through the accounting on both irq_enter() and irq_exit(), we'd have something like: irq_enter() @t0 irq_start_time = t0 irq_enter() @t1 delta = t1 - t0 irq_start_time = t1 total += t1 - t0 irq_exit() @t2 delta = t2 - t1 irq_start_time = t2 total += t2 - t1 irq_exit() @t3 delta = t3 - t2 irq_start_time = t3 total += t3 - t2 So at the end we have incremented the total by t1-t0 + t2-t1 + t3-t2 = t3 - t0 IOW the duration of the outermost pair (... Unless I goofed up). > What's being proposed for ARM is to always have this mis-accounting, > where no mis-accounting was present before - and some of us (me) /do/ > enable IRQ accounting in our kernels as standard. So, you can take > this as a kernel regression report from a user. > >> and MIPS has the same issue as it also uses generic IRQ IPI's - so >> although it's not ideal, I think we can live with it. > > Yes, but is there anyone who cares about this for MIPS?
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c77c93c485a0..0caaba9bf880 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -48,6 +48,7 @@ config ARM select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_IRQ_IPI if SMP select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index a91f21e3c5b5..0e29730295ca 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -45,6 +45,11 @@ extern void smp_init_cpus(void); */ extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); +/* + * Register IPI interrupts with the arch SMP code + */ +extern void set_smp_ipi_range(int ipi_base, int nr_ipi); + /* * Called from platform specific assembly code, this is the * secondary CPU entry point. diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 46e1be9e57a8..618641978a5b 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -79,10 +79,19 @@ enum ipi_msg_type { */ }; +static int ipi_irq_base; +static int nr_ipi = NR_IPI; +static struct irq_desc *ipi_desc[NR_IPI]; + +static void ipi_setup(int cpu); +static void ipi_teardown(int cpu); + static DECLARE_COMPLETION(cpu_running); static struct smp_operations smp_ops __ro_after_init; +static void ipi_setup(int cpu); + void __init smp_set_ops(const struct smp_operations *ops) { if (ops) @@ -308,6 +317,8 @@ void arch_cpu_idle_dead(void) local_irq_disable(); + ipi_teardown(cpu); + /* * Flush the data out of the L1 cache for this CPU. This must be * before the completion to ensure that data is safely written out @@ -424,6 +435,8 @@ asmlinkage void secondary_start_kernel(void) notify_cpu_starting(cpu); + ipi_setup(cpu); + calibrate_delay(); smp_store_cpu_info(cpu); @@ -629,10 +642,9 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) handle_IPI(ipinr, regs); } -void handle_IPI(int ipinr, struct pt_regs *regs) +static void do_handle_IPI(int ipinr) { unsigned int cpu = smp_processor_id(); - struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { trace_ipi_entry_rcuidle(ipi_types[ipinr]); @@ -645,9 +657,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST case IPI_TIMER: - irq_enter(); tick_receive_broadcast(); - irq_exit(); break; #endif @@ -656,36 +666,26 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; case IPI_CALL_FUNC: - irq_enter(); generic_smp_call_function_interrupt(); - irq_exit(); break; case IPI_CPU_STOP: - irq_enter(); ipi_cpu_stop(cpu); - irq_exit(); break; #ifdef CONFIG_IRQ_WORK case IPI_IRQ_WORK: - irq_enter(); irq_work_run(); - irq_exit(); break; #endif case IPI_COMPLETION: - irq_enter(); ipi_complete(cpu); - irq_exit(); break; case IPI_CPU_BACKTRACE: printk_nmi_enter(); - irq_enter(); - nmi_cpu_backtrace(regs); - irq_exit(); + nmi_cpu_backtrace(get_irq_regs()); printk_nmi_exit(); break; @@ -697,9 +697,76 @@ void handle_IPI(int ipinr, struct pt_regs *regs) if ((unsigned)ipinr < NR_IPI) trace_ipi_exit_rcuidle(ipi_types[ipinr]); +} + +/* Legacy version, should go away once all irqchips have been converted */ +void handle_IPI(int ipinr, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + irq_enter(); + do_handle_IPI(ipinr); + irq_exit(); + set_irq_regs(old_regs); } +static irqreturn_t ipi_handler(int irq, void *data) +{ + do_handle_IPI(irq - ipi_irq_base); + return IRQ_HANDLED; +} + +static void ipi_send(const struct cpumask *target, unsigned int ipi) +{ + __ipi_send_mask(ipi_desc[ipi], target); +} + +static void ipi_setup(int cpu) +{ + if (ipi_irq_base) { + int i; + + for (i = 0; i < nr_ipi; i++) + enable_percpu_irq(ipi_irq_base + i, 0); + } +} + +static void ipi_teardown(int cpu) +{ + if (ipi_irq_base) { + int i; + + for (i = 0; i < nr_ipi; i++) + disable_percpu_irq(ipi_irq_base + i); + } +} + +void __init set_smp_ipi_range(int ipi_base, int n) +{ + int i; + + WARN_ON(n < NR_IPI); + nr_ipi = min(n, NR_IPI); + + for (i = 0; i < nr_ipi; i++) { + int err; + + err = request_percpu_irq(ipi_base + i, ipi_handler, + "IPI", &irq_stat); + WARN_ON(err); + + ipi_desc[i] = irq_to_desc(ipi_base + i); + irq_set_status_flags(ipi_base + i, IRQ_NO_ACCOUNTING); + } + + ipi_irq_base = ipi_base; + set_smp_cross_call(ipi_send); + + /* Setup the boot CPU immediately */ + ipi_setup(smp_processor_id()); +} + void smp_send_reschedule(int cpu) { smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
In order to deal with IPIs as normal interrupts, let's add a new way to register them with the architecture code. set_smp_ipi_range() takes a range of interrupts, and allows the arch code to request them as if the were normal interrupts. A standard handler is then called by the core IRQ code to deal with the IPI. This means that we don't need to call irq_enter/irq_exit, and that we don't need to deal with set_irq_regs either. So let's move the dispatcher into its own function, and leave handle_IPI() as a compatibility function. On the sending side, let's make use of ipi_send_mask, which already exists for this purpose. Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm/Kconfig | 1 + arch/arm/include/asm/smp.h | 5 ++ arch/arm/kernel/smp.c | 97 ++++++++++++++++++++++++++++++++------ 3 files changed, 88 insertions(+), 15 deletions(-)