Message ID | 20200901144324.1071694-4-maz@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm/arm64: Turning IPIs into normal interrupts | expand |
On Tue, Sep 01, 2020 at 03:43:11PM +0100, Marc Zyngier wrote: > In order to deal with IPIs as normal interrupts, let's add > a new way to register them with the architecture code. > > set_smp_ipi_range() takes a range of interrupts, and allows > the arch code to request them as if the were normal interrupts. > A standard handler is then called by the core IRQ code to deal > with the IPI. > > This means that we don't need to call irq_enter/irq_exit, and > that we don't need to deal with set_irq_regs either. So let's > move the dispatcher into its own function, and leave handle_IPI() > as a compatibility function. > > On the sending side, let's make use of ipi_send_mask, which > already exists for this purpose. > > One of the major difference is that we end up, in some cases > (such as when performing IRQ time accounting on the scheduler > IPI), end up with nested irq_enter()/irq_exit() pairs. > Other than the (relatively small) overhead, there should be > no consequences to it (these pairs are designed to nest > correctly, and the accounting shouldn't be off). > > Reviewed-by: Valentin Schneider <valentin.schneider@arm.com> > Signed-off-by: Marc Zyngier <maz@kernel.org> In case you need an ack for the arm64 part: Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Hi Marc, On Tue, 1 Sep 2020 at 16:44, Marc Zyngier <maz@kernel.org> wrote: > > In order to deal with IPIs as normal interrupts, let's add > a new way to register them with the architecture code. > > set_smp_ipi_range() takes a range of interrupts, and allows > the arch code to request them as if the were normal interrupts. > A standard handler is then called by the core IRQ code to deal > with the IPI. > > This means that we don't need to call irq_enter/irq_exit, and > that we don't need to deal with set_irq_regs either. So let's > move the dispatcher into its own function, and leave handle_IPI() > as a compatibility function. > > On the sending side, let's make use of ipi_send_mask, which > already exists for this purpose. > > One of the major difference is that we end up, in some cases > (such as when performing IRQ time accounting on the scheduler > IPI), end up with nested irq_enter()/irq_exit() pairs. > Other than the (relatively small) overhead, there should be > no consequences to it (these pairs are designed to nest > correctly, and the accounting shouldn't be off). While rebasing on mainline, I have faced a performance regression for the benchmark: perf bench sched pipe on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2) The regression comes from: commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal interrupts") v5.9 + this patch hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2% thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4% By + this patch, I mean merging branch from this patch. Whereas merging the previous: commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from /proc/interrupts") It doesn't show any regression Vincent > > Reviewed-by: Valentin Schneider <valentin.schneider@arm.com> > Signed-off-by: Marc Zyngier <maz@kernel.org> > --- > arch/arm64/Kconfig | 1 + > arch/arm64/include/asm/smp.h | 5 ++ > arch/arm64/kernel/smp.c | 93 +++++++++++++++++++++++++++++++----- > 3 files changed, 87 insertions(+), 12 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 6d232837cbee..d0fdbe5fb32f 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -106,6 +106,7 @@ config ARM64 > select GENERIC_CPU_VULNERABILITIES > select GENERIC_EARLY_IOREMAP > select GENERIC_IDLE_POLL_SETUP > + select GENERIC_IRQ_IPI > select GENERIC_IRQ_MULTI_HANDLER > select GENERIC_IRQ_PROBE > select GENERIC_IRQ_SHOW > diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h > index 0eadbf933e35..57c5db15f6b7 100644 > --- a/arch/arm64/include/asm/smp.h > +++ b/arch/arm64/include/asm/smp.h > @@ -78,6 +78,11 @@ extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); > > extern void (*__smp_cross_call)(const struct cpumask *, unsigned int); > > +/* > + * Register IPI interrupts with the arch SMP code > + */ > +extern void set_smp_ipi_range(int ipi_base, int nr_ipi); > + > /* > * Called from the secondary holding pen, this is the secondary CPU entry point. > */ > diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c > index 355ee9eed4dd..00c9db1b61b5 100644 > --- a/arch/arm64/kernel/smp.c > +++ b/arch/arm64/kernel/smp.c > @@ -75,6 +75,13 @@ enum ipi_msg_type { > IPI_WAKEUP > }; > > +static int ipi_irq_base __read_mostly; > +static int nr_ipi __read_mostly = NR_IPI; > +static struct irq_desc *ipi_desc[NR_IPI] __read_mostly; > + > +static void ipi_setup(int cpu); > +static void ipi_teardown(int cpu); > + > #ifdef CONFIG_HOTPLUG_CPU > static int op_cpu_kill(unsigned int cpu); > #else > @@ -237,6 +244,8 @@ asmlinkage notrace void secondary_start_kernel(void) > */ > notify_cpu_starting(cpu); > > + ipi_setup(cpu); > + > store_cpu_topology(cpu); > numa_add_cpu(cpu); > > @@ -302,6 +311,7 @@ int __cpu_disable(void) > * and we must not schedule until we're ready to give up the cpu. > */ > set_cpu_online(cpu, false); > + ipi_teardown(cpu); > > /* > * OK - migrate IRQs away from this CPU > @@ -890,10 +900,9 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) > /* > * Main handler for inter-processor interrupts > */ > -void handle_IPI(int ipinr, struct pt_regs *regs) > +static void do_handle_IPI(int ipinr) > { > unsigned int cpu = smp_processor_id(); > - struct pt_regs *old_regs = set_irq_regs(regs); > > if ((unsigned)ipinr < NR_IPI) { > trace_ipi_entry_rcuidle(ipi_types[ipinr]); > @@ -906,21 +915,16 @@ void handle_IPI(int ipinr, struct pt_regs *regs) > break; > > case IPI_CALL_FUNC: > - irq_enter(); > generic_smp_call_function_interrupt(); > - irq_exit(); > break; > > case IPI_CPU_STOP: > - irq_enter(); > local_cpu_stop(); > - irq_exit(); > break; > > case IPI_CPU_CRASH_STOP: > if (IS_ENABLED(CONFIG_KEXEC_CORE)) { > - irq_enter(); > - ipi_cpu_crash_stop(cpu, regs); > + ipi_cpu_crash_stop(cpu, get_irq_regs()); > > unreachable(); > } > @@ -928,17 +932,13 @@ void handle_IPI(int ipinr, struct pt_regs *regs) > > #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST > case IPI_TIMER: > - irq_enter(); > tick_receive_broadcast(); > - irq_exit(); > break; > #endif > > #ifdef CONFIG_IRQ_WORK > case IPI_IRQ_WORK: > - irq_enter(); > irq_work_run(); > - irq_exit(); > break; > #endif > > @@ -957,9 +957,78 @@ void handle_IPI(int ipinr, struct pt_regs *regs) > > if ((unsigned)ipinr < NR_IPI) > trace_ipi_exit_rcuidle(ipi_types[ipinr]); > +} > + > +/* Legacy version, should go away once all irqchips have been converted */ > +void handle_IPI(int ipinr, struct pt_regs *regs) > +{ > + struct pt_regs *old_regs = set_irq_regs(regs); > + > + irq_enter(); > + do_handle_IPI(ipinr); > + irq_exit(); > + > set_irq_regs(old_regs); > } > > +static irqreturn_t ipi_handler(int irq, void *data) > +{ > + do_handle_IPI(irq - ipi_irq_base); > + return IRQ_HANDLED; > +} > + > +static void ipi_send(const struct cpumask *target, unsigned int ipi) > +{ > + __ipi_send_mask(ipi_desc[ipi], target); > +} > + > +static void ipi_setup(int cpu) > +{ > + int i; > + > + if (!ipi_irq_base) > + return; > + > + for (i = 0; i < nr_ipi; i++) > + enable_percpu_irq(ipi_irq_base + i, 0); > +} > + > +static void ipi_teardown(int cpu) > +{ > + int i; > + > + if (!ipi_irq_base) > + return; > + > + for (i = 0; i < nr_ipi; i++) > + disable_percpu_irq(ipi_irq_base + i); > +} > + > +void __init set_smp_ipi_range(int ipi_base, int n) > +{ > + int i; > + > + WARN_ON(n < NR_IPI); > + nr_ipi = min(n, NR_IPI); > + > + for (i = 0; i < nr_ipi; i++) { > + int err; > + > + err = request_percpu_irq(ipi_base + i, ipi_handler, > + "IPI", &irq_stat); > + WARN_ON(err); > + > + ipi_desc[i] = irq_to_desc(ipi_base + i); > + irq_set_status_flags(ipi_base + i, IRQ_HIDDEN); > + } > + > + ipi_irq_base = ipi_base; > + __smp_cross_call = ipi_send; > + > + /* Setup the boot CPU immediately */ > + ipi_setup(smp_processor_id()); > +} > + > void smp_send_reschedule(int cpu) > { > smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); > -- > 2.27.0 >
Hi Vincent, On 2020-10-19 13:42, Vincent Guittot wrote: > Hi Marc, > > On Tue, 1 Sep 2020 at 16:44, Marc Zyngier <maz@kernel.org> wrote: >> >> In order to deal with IPIs as normal interrupts, let's add >> a new way to register them with the architecture code. >> >> set_smp_ipi_range() takes a range of interrupts, and allows >> the arch code to request them as if the were normal interrupts. >> A standard handler is then called by the core IRQ code to deal >> with the IPI. >> >> This means that we don't need to call irq_enter/irq_exit, and >> that we don't need to deal with set_irq_regs either. So let's >> move the dispatcher into its own function, and leave handle_IPI() >> as a compatibility function. >> >> On the sending side, let's make use of ipi_send_mask, which >> already exists for this purpose. >> >> One of the major difference is that we end up, in some cases >> (such as when performing IRQ time accounting on the scheduler >> IPI), end up with nested irq_enter()/irq_exit() pairs. >> Other than the (relatively small) overhead, there should be >> no consequences to it (these pairs are designed to nest >> correctly, and the accounting shouldn't be off). > > While rebasing on mainline, I have faced a performance regression for > the benchmark: > perf bench sched pipe > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2) > > The regression comes from: > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal > interrupts") That's interesting, as this patch doesn't really change anything (most of the potential overhead comes in later). The only potential overhead I can see is that the scheduler_ipi() call is now wrapped around irq_enter()/irq_exit(). > > v5.9 + this patch > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2% > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4% > > By + this patch, I mean merging branch from this patch. Whereas > merging the previous: > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from > /proc/interrupts") > It doesn't show any regression Since you are running perf, can you spot where the overhead occurs? Thanks, M.
On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <maz@kernel.org> wrote: > > Hi Vincent, > > On 2020-10-19 13:42, Vincent Guittot wrote: > > Hi Marc, > > > > On Tue, 1 Sep 2020 at 16:44, Marc Zyngier <maz@kernel.org> wrote: > >> > >> In order to deal with IPIs as normal interrupts, let's add > >> a new way to register them with the architecture code. > >> > >> set_smp_ipi_range() takes a range of interrupts, and allows > >> the arch code to request them as if the were normal interrupts. > >> A standard handler is then called by the core IRQ code to deal > >> with the IPI. > >> > >> This means that we don't need to call irq_enter/irq_exit, and > >> that we don't need to deal with set_irq_regs either. So let's > >> move the dispatcher into its own function, and leave handle_IPI() > >> as a compatibility function. > >> > >> On the sending side, let's make use of ipi_send_mask, which > >> already exists for this purpose. > >> > >> One of the major difference is that we end up, in some cases > >> (such as when performing IRQ time accounting on the scheduler > >> IPI), end up with nested irq_enter()/irq_exit() pairs. > >> Other than the (relatively small) overhead, there should be > >> no consequences to it (these pairs are designed to nest > >> correctly, and the accounting shouldn't be off). > > > > While rebasing on mainline, I have faced a performance regression for > > the benchmark: > > perf bench sched pipe > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2) > > > > The regression comes from: > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal > > interrupts") > > That's interesting, as this patch doesn't really change anything (most > of the potential overhead comes in later). The only potential overhead > I can see is that the scheduler_ipi() call is now wrapped around > irq_enter()/irq_exit(). > > > > > v5.9 + this patch > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2% > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4% > > > > By + this patch, I mean merging branch from this patch. Whereas > > merging the previous: > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from > > /proc/interrupts") > > It doesn't show any regression > > Since you are running perf, can you spot where the overhead occurs? hmm... Difficult to say because tracing the bench decreases a lot the result. I have pasted the perf reports. With this patch : # Samples: 634 of event 'cpu-clock' # Event count (approx.): 158500000 # # Overhead Command Shared Object Symbol # ........ .......... .................. .................................. # 31.86% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 8.68% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irq 6.31% sched-pipe [kernel.kallsyms] [k] __schedule 5.21% sched-pipe [kernel.kallsyms] [k] schedule 4.73% sched-pipe [kernel.kallsyms] [k] pipe_read 3.31% sched-pipe [kernel.kallsyms] [k] el0_svc_common.constprop.3 2.84% sched-pipe [kernel.kallsyms] [k] ww_mutex_lock_interruptible 2.52% sched-pipe [kernel.kallsyms] [k] init_wait_entry 2.37% sched-pipe [kernel.kallsyms] [k] mutex_unlock 2.21% sched-pipe [kernel.kallsyms] [k] new_sync_read 1.89% sched-pipe [kernel.kallsyms] [k] new_sync_write 1.74% sched-pipe [kernel.kallsyms] [k] security_file_permission 1.74% sched-pipe [kernel.kallsyms] [k] vfs_read 1.58% sched-pipe [kernel.kallsyms] [k] __my_cpu_offset 1.26% sched-pipe libpthread-2.24.so [.] 0x0000000000010a2c 1.10% sched-pipe [kernel.kallsyms] [k] mutex_lock 1.10% sched-pipe [kernel.kallsyms] [k] vfs_write After reverting this patch which gives a result similar to v5.9: # Samples: 659 of event 'cpu-clock' # Event count (approx.): 164750000 # # Overhead Command Shared Object Symbol # ........ .......... .................. ............................... # 29.29% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 21.40% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irq 4.86% sched-pipe [kernel.kallsyms] [k] pipe_read 4.55% sched-pipe [kernel.kallsyms] [k] ww_mutex_lock_interruptible 2.88% sched-pipe [kernel.kallsyms] [k] __schedule 2.88% sched-pipe [kernel.kallsyms] [k] _raw_spin_lock_irqsave 2.88% sched-pipe [kernel.kallsyms] [k] schedule 2.12% sched-pipe [kernel.kallsyms] [k] new_sync_read 1.82% sched-pipe [kernel.kallsyms] [k] mutex_lock 1.67% sched-pipe [kernel.kallsyms] [k] el0_svc_common.constprop.3 1.67% sched-pipe [kernel.kallsyms] [k] pipe_write 1.21% sched-pipe [kernel.kallsyms] [k] rw_verify_area 1.21% sched-pipe [kernel.kallsyms] [k] security_file_permission 1.06% sched-pipe [kernel.kallsyms] [k] fsnotify I have only put symbol with overhead above 1% so _raw_spin_unlock_irq, schedule and __schedule seem the most impacted but i can't get any conclusion I can sent you perf.data files if you want > > Thanks, > > M. > -- > Jazz is not dead. It just smells funny...
Hi, On 19/10/20 16:43, Vincent Guittot wrote: > On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <maz@kernel.org> wrote: >> Since you are running perf, can you spot where the overhead occurs? > > hmm... Difficult to say because tracing the bench decreases a lot the > result. I have pasted the perf reports. > <snip> > I have only put symbol with overhead above 1% > > so _raw_spin_unlock_irq, schedule and __schedule seem the most > impacted but i can't get any conclusion > AFAICT on TX2 you should be able to run these and get some more details within IRQ-disabled regions: https://lore.kernel.org/linux-arm-kernel/20200924110706.254996-1-alexandru.elisei@arm.com/ (they should be on linux-next) > I can sent you perf.data files if you want > > >> >> Thanks, >> >> M. >> -- >> Jazz is not dead. It just smells funny...
HI Marc, On Mon, 19 Oct 2020 at 17:43, Vincent Guittot <vincent.guittot@linaro.org> wrote: > > On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <maz@kernel.org> wrote: > > ... > > >> > > >> One of the major difference is that we end up, in some cases > > >> (such as when performing IRQ time accounting on the scheduler > > >> IPI), end up with nested irq_enter()/irq_exit() pairs. > > >> Other than the (relatively small) overhead, there should be > > >> no consequences to it (these pairs are designed to nest > > >> correctly, and the accounting shouldn't be off). > > > > > > While rebasing on mainline, I have faced a performance regression for > > > the benchmark: > > > perf bench sched pipe > > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2) > > > > > > The regression comes from: > > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal > > > interrupts") > > > > That's interesting, as this patch doesn't really change anything (most > > of the potential overhead comes in later). The only potential overhead > > I can see is that the scheduler_ipi() call is now wrapped around > > irq_enter()/irq_exit(). > > > > > > > > v5.9 + this patch > > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2% > > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4% > > > > > > By + this patch, I mean merging branch from this patch. Whereas > > > merging the previous: > > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from > > > /proc/interrupts") > > > It doesn't show any regression > > > > Since you are running perf, can you spot where the overhead occurs? Any idea about the root cause of the regression ? I have faced it on more arm64 platforms in the meantime > > hmm... Difficult to say because tracing the bench decreases a lot the > result. I have pasted the perf reports. > > With this patch : > > # Samples: 634 of event 'cpu-clock' > # Event count (approx.): 158500000 > # > # Overhead Command Shared Object Symbol > # ........ .......... .................. .................................. > # > 31.86% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore > 8.68% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irq > 6.31% sched-pipe [kernel.kallsyms] [k] __schedule > 5.21% sched-pipe [kernel.kallsyms] [k] schedule > 4.73% sched-pipe [kernel.kallsyms] [k] pipe_read > 3.31% sched-pipe [kernel.kallsyms] [k] el0_svc_common.constprop.3 > 2.84% sched-pipe [kernel.kallsyms] [k] ww_mutex_lock_interruptible > 2.52% sched-pipe [kernel.kallsyms] [k] init_wait_entry > 2.37% sched-pipe [kernel.kallsyms] [k] mutex_unlock > 2.21% sched-pipe [kernel.kallsyms] [k] new_sync_read > 1.89% sched-pipe [kernel.kallsyms] [k] new_sync_write > 1.74% sched-pipe [kernel.kallsyms] [k] security_file_permission > 1.74% sched-pipe [kernel.kallsyms] [k] vfs_read > 1.58% sched-pipe [kernel.kallsyms] [k] __my_cpu_offset > 1.26% sched-pipe libpthread-2.24.so [.] 0x0000000000010a2c > 1.10% sched-pipe [kernel.kallsyms] [k] mutex_lock > 1.10% sched-pipe [kernel.kallsyms] [k] vfs_write > > After reverting this patch which gives a result similar to v5.9: > > # Samples: 659 of event 'cpu-clock' > # Event count (approx.): 164750000 > # > # Overhead Command Shared Object Symbol > # ........ .......... .................. ............................... > # > 29.29% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore > 21.40% sched-pipe [kernel.kallsyms] [k] _raw_spin_unlock_irq > 4.86% sched-pipe [kernel.kallsyms] [k] pipe_read > 4.55% sched-pipe [kernel.kallsyms] [k] ww_mutex_lock_interruptible > 2.88% sched-pipe [kernel.kallsyms] [k] __schedule > 2.88% sched-pipe [kernel.kallsyms] [k] _raw_spin_lock_irqsave > 2.88% sched-pipe [kernel.kallsyms] [k] schedule > 2.12% sched-pipe [kernel.kallsyms] [k] new_sync_read > 1.82% sched-pipe [kernel.kallsyms] [k] mutex_lock > 1.67% sched-pipe [kernel.kallsyms] [k] el0_svc_common.constprop.3 > 1.67% sched-pipe [kernel.kallsyms] [k] pipe_write > 1.21% sched-pipe [kernel.kallsyms] [k] rw_verify_area > 1.21% sched-pipe [kernel.kallsyms] [k] security_file_permission > 1.06% sched-pipe [kernel.kallsyms] [k] fsnotify > > I have only put symbol with overhead above 1% > > so _raw_spin_unlock_irq, schedule and __schedule seem the most > impacted but i can't get any conclusion > > I can sent you perf.data files if you want > > > > > > Thanks, > > > > M. > > -- > > Jazz is not dead. It just smells funny...
On 2020-10-27 10:12, Vincent Guittot wrote: > HI Marc, > > On Mon, 19 Oct 2020 at 17:43, Vincent Guittot > <vincent.guittot@linaro.org> wrote: >> >> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <maz@kernel.org> wrote: >> > > > ... > >> > >> >> > >> One of the major difference is that we end up, in some cases >> > >> (such as when performing IRQ time accounting on the scheduler >> > >> IPI), end up with nested irq_enter()/irq_exit() pairs. >> > >> Other than the (relatively small) overhead, there should be >> > >> no consequences to it (these pairs are designed to nest >> > >> correctly, and the accounting shouldn't be off). >> > > >> > > While rebasing on mainline, I have faced a performance regression for >> > > the benchmark: >> > > perf bench sched pipe >> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2) >> > > >> > > The regression comes from: >> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal >> > > interrupts") >> > >> > That's interesting, as this patch doesn't really change anything (most >> > of the potential overhead comes in later). The only potential overhead >> > I can see is that the scheduler_ipi() call is now wrapped around >> > irq_enter()/irq_exit(). >> > >> > > >> > > v5.9 + this patch >> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2% >> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4% >> > > >> > > By + this patch, I mean merging branch from this patch. Whereas >> > > merging the previous: >> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from >> > > /proc/interrupts") >> > > It doesn't show any regression >> > >> > Since you are running perf, can you spot where the overhead occurs? > > Any idea about the root cause of the regression ? > I have faced it on more arm64 platforms in the meantime two possible causes: (1) irq_enter/exit on the rescheduling IPI means we reschedule much more often (2) irq_domain lookups add some overhead. For (1), I have this series[1] which is ugly as sin and needs much more testing. For (2), I have some ideas which need more work (let the irq domain resolve to an irq_desc instead of an interrupt number, avoiding another radix-tree lookup). M. [1] https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/ipi-fixes
On Tue, 27 Oct 2020 at 11:37, Marc Zyngier <maz@kernel.org> wrote: > > On 2020-10-27 10:12, Vincent Guittot wrote: > > HI Marc, > > > > On Mon, 19 Oct 2020 at 17:43, Vincent Guittot > > <vincent.guittot@linaro.org> wrote: > >> > >> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <maz@kernel.org> wrote: > >> > > > > > ... > > > >> > >> > >> > >> One of the major difference is that we end up, in some cases > >> > >> (such as when performing IRQ time accounting on the scheduler > >> > >> IPI), end up with nested irq_enter()/irq_exit() pairs. > >> > >> Other than the (relatively small) overhead, there should be > >> > >> no consequences to it (these pairs are designed to nest > >> > >> correctly, and the accounting shouldn't be off). > >> > > > >> > > While rebasing on mainline, I have faced a performance regression for > >> > > the benchmark: > >> > > perf bench sched pipe > >> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2) > >> > > > >> > > The regression comes from: > >> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal > >> > > interrupts") > >> > > >> > That's interesting, as this patch doesn't really change anything (most > >> > of the potential overhead comes in later). The only potential overhead > >> > I can see is that the scheduler_ipi() call is now wrapped around > >> > irq_enter()/irq_exit(). > >> > > >> > > > >> > > v5.9 + this patch > >> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2% > >> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4% > >> > > > >> > > By + this patch, I mean merging branch from this patch. Whereas > >> > > merging the previous: > >> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from > >> > > /proc/interrupts") > >> > > It doesn't show any regression > >> > > >> > Since you are running perf, can you spot where the overhead occurs? > > > > Any idea about the root cause of the regression ? > > I have faced it on more arm64 platforms in the meantime > > two possible causes: > > (1) irq_enter/exit on the rescheduling IPI means we reschedule much more > often > (2) irq_domain lookups add some overhead. > > For (1), I have this series[1] which is ugly as sin and needs much more > testing. Ok, I'm going to test this series to see if it fixes the perf regression > > For (2), I have some ideas which need more work (let the irq domain > resolve to > an irq_desc instead of an interrupt number, avoiding another radix-tree > lookup). > > M. > > [1] > https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/ipi-fixes > -- > Jazz is not dead. It just smells funny...
On Tue, 27 Oct 2020 at 11:50, Vincent Guittot <vincent.guittot@linaro.org> wrote: > > On Tue, 27 Oct 2020 at 11:37, Marc Zyngier <maz@kernel.org> wrote: > > > > On 2020-10-27 10:12, Vincent Guittot wrote: > > > HI Marc, > > > > > > On Mon, 19 Oct 2020 at 17:43, Vincent Guittot > > > <vincent.guittot@linaro.org> wrote: > > >> > > >> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <maz@kernel.org> wrote: > > >> > > > > > > > ... > > > > > >> > >> > > >> > >> One of the major difference is that we end up, in some cases > > >> > >> (such as when performing IRQ time accounting on the scheduler > > >> > >> IPI), end up with nested irq_enter()/irq_exit() pairs. > > >> > >> Other than the (relatively small) overhead, there should be > > >> > >> no consequences to it (these pairs are designed to nest > > >> > >> correctly, and the accounting shouldn't be off). > > >> > > > > >> > > While rebasing on mainline, I have faced a performance regression for > > >> > > the benchmark: > > >> > > perf bench sched pipe > > >> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2) > > >> > > > > >> > > The regression comes from: > > >> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal > > >> > > interrupts") > > >> > > > >> > That's interesting, as this patch doesn't really change anything (most > > >> > of the potential overhead comes in later). The only potential overhead > > >> > I can see is that the scheduler_ipi() call is now wrapped around > > >> > irq_enter()/irq_exit(). > > >> > > > >> > > > > >> > > v5.9 + this patch > > >> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2% > > >> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4% > > >> > > > > >> > > By + this patch, I mean merging branch from this patch. Whereas > > >> > > merging the previous: > > >> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from > > >> > > /proc/interrupts") > > >> > > It doesn't show any regression > > >> > > > >> > Since you are running perf, can you spot where the overhead occurs? > > > > > > Any idea about the root cause of the regression ? > > > I have faced it on more arm64 platforms in the meantime > > > > two possible causes: > > > > (1) irq_enter/exit on the rescheduling IPI means we reschedule much more > > often > > (2) irq_domain lookups add some overhead. > > > > For (1), I have this series[1] which is ugly as sin and needs much more > > testing. > > Ok, I'm going to test this series to see if it fixes the perf regression You have spotted the root cause of the regression. We are back to ~1% performance diff on the hikey > > > > > For (2), I have some ideas which need more work (let the irq domain > > resolve to > > an irq_desc instead of an interrupt number, avoiding another radix-tree > > lookup). > > > > M. > > > > [1] > > https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/ipi-fixes > > -- > > Jazz is not dead. It just smells funny...
On 2020-10-27 11:21, Vincent Guittot wrote: > On Tue, 27 Oct 2020 at 11:50, Vincent Guittot > <vincent.guittot@linaro.org> wrote: >> >> On Tue, 27 Oct 2020 at 11:37, Marc Zyngier <maz@kernel.org> wrote: >> > >> > On 2020-10-27 10:12, Vincent Guittot wrote: >> > > HI Marc, >> > > >> > > On Mon, 19 Oct 2020 at 17:43, Vincent Guittot >> > > <vincent.guittot@linaro.org> wrote: >> > >> >> > >> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <maz@kernel.org> wrote: >> > >> > >> > > >> > > ... >> > > >> > >> > >> >> > >> > >> One of the major difference is that we end up, in some cases >> > >> > >> (such as when performing IRQ time accounting on the scheduler >> > >> > >> IPI), end up with nested irq_enter()/irq_exit() pairs. >> > >> > >> Other than the (relatively small) overhead, there should be >> > >> > >> no consequences to it (these pairs are designed to nest >> > >> > >> correctly, and the accounting shouldn't be off). >> > >> > > >> > >> > > While rebasing on mainline, I have faced a performance regression for >> > >> > > the benchmark: >> > >> > > perf bench sched pipe >> > >> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2) >> > >> > > >> > >> > > The regression comes from: >> > >> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal >> > >> > > interrupts") >> > >> > >> > >> > That's interesting, as this patch doesn't really change anything (most >> > >> > of the potential overhead comes in later). The only potential overhead >> > >> > I can see is that the scheduler_ipi() call is now wrapped around >> > >> > irq_enter()/irq_exit(). >> > >> > >> > >> > > >> > >> > > v5.9 + this patch >> > >> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2% >> > >> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4% >> > >> > > >> > >> > > By + this patch, I mean merging branch from this patch. Whereas >> > >> > > merging the previous: >> > >> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from >> > >> > > /proc/interrupts") >> > >> > > It doesn't show any regression >> > >> > >> > >> > Since you are running perf, can you spot where the overhead occurs? >> > > >> > > Any idea about the root cause of the regression ? >> > > I have faced it on more arm64 platforms in the meantime >> > >> > two possible causes: >> > >> > (1) irq_enter/exit on the rescheduling IPI means we reschedule much more >> > often >> > (2) irq_domain lookups add some overhead. >> > >> > For (1), I have this series[1] which is ugly as sin and needs much more >> > testing. >> >> Ok, I'm going to test this series to see if it fixes the perf >> regression > > You have spotted the root cause of the regression. We are back to ~1% > performance diff on the hikey Yeah. Only thing is that I can't look at this hack without vomiting... M.
On Tue, 27 Oct 2020 at 13:06, Marc Zyngier <maz@kernel.org> wrote: > > On 2020-10-27 11:21, Vincent Guittot wrote: > > On Tue, 27 Oct 2020 at 11:50, Vincent Guittot > > <vincent.guittot@linaro.org> wrote: > >> > >> On Tue, 27 Oct 2020 at 11:37, Marc Zyngier <maz@kernel.org> wrote: > >> > > >> > On 2020-10-27 10:12, Vincent Guittot wrote: > >> > > HI Marc, > >> > > > >> > > On Mon, 19 Oct 2020 at 17:43, Vincent Guittot > >> > > <vincent.guittot@linaro.org> wrote: > >> > >> > >> > >> On Mon, 19 Oct 2020 at 15:04, Marc Zyngier <maz@kernel.org> wrote: > >> > >> > > >> > > > >> > > ... > >> > > > >> > >> > >> > >> > >> > >> One of the major difference is that we end up, in some cases > >> > >> > >> (such as when performing IRQ time accounting on the scheduler > >> > >> > >> IPI), end up with nested irq_enter()/irq_exit() pairs. > >> > >> > >> Other than the (relatively small) overhead, there should be > >> > >> > >> no consequences to it (these pairs are designed to nest > >> > >> > >> correctly, and the accounting shouldn't be off). > >> > >> > > > >> > >> > > While rebasing on mainline, I have faced a performance regression for > >> > >> > > the benchmark: > >> > >> > > perf bench sched pipe > >> > >> > > on my arm64 dual quad core (hikey) and my 2 nodes x 112 CPUS (thx2) > >> > >> > > > >> > >> > > The regression comes from: > >> > >> > > commit: d3afc7f12987 ("arm64: Allow IPIs to be handled as normal > >> > >> > > interrupts") > >> > >> > > >> > >> > That's interesting, as this patch doesn't really change anything (most > >> > >> > of the potential overhead comes in later). The only potential overhead > >> > >> > I can see is that the scheduler_ipi() call is now wrapped around > >> > >> > irq_enter()/irq_exit(). > >> > >> > > >> > >> > > > >> > >> > > v5.9 + this patch > >> > >> > > hikey : 48818(+/- 0.31) 37503(+/- 0.15%) -23.2% > >> > >> > > thx2 : 132410(+/- 1.72) 122646(+/- 1.92%) -7.4% > >> > >> > > > >> > >> > > By + this patch, I mean merging branch from this patch. Whereas > >> > >> > > merging the previous: > >> > >> > > commit: 83cfac95c018 ("genirq: Allow interrupts to be excluded from > >> > >> > > /proc/interrupts") > >> > >> > > It doesn't show any regression > >> > >> > > >> > >> > Since you are running perf, can you spot where the overhead occurs? > >> > > > >> > > Any idea about the root cause of the regression ? > >> > > I have faced it on more arm64 platforms in the meantime > >> > > >> > two possible causes: > >> > > >> > (1) irq_enter/exit on the rescheduling IPI means we reschedule much more > >> > often > >> > (2) irq_domain lookups add some overhead. > >> > > >> > For (1), I have this series[1] which is ugly as sin and needs much more > >> > testing. > >> > >> Ok, I'm going to test this series to see if it fixes the perf > >> regression > > > > You have spotted the root cause of the regression. We are back to ~1% > > performance diff on the hikey > > Yeah. Only thing is that I can't look at this hack without vomiting... At least, we know the root cause and the impact of irq_enter/exit > > M. > -- > Jazz is not dead. It just smells funny...
On Thu, 06 May 2021 08:50:42 +0100, He Ying <heying24@huawei.com> wrote: > > Hello Marc, > > We have faced a performance regression for handling ipis since this > commit. I think it's the same issue reported by Vincent. Can you share more details on what regression you have observed? What's the workload, the system, the performance drop? > I found you pointed out the possible two causes: > > (1) irq_enter/exit on the rescheduling IPI means we reschedule much > more often. It turned out to be a red herring. We don't reschedule more often, but we instead suffer from the overhead of irq_enter()/irq_exit(). However, this only matters for silly benchmarks, and no real-life workload showed any significant regression. Have you identified such realistic workload? > (2) irq_domain lookups add some overhead. While this is also a potential source of overhead, it turned out not to be the case. > But I don't see any following patches in mainline. So, are you still > working on this issue? Looking forward to your reply. See [1]. However, there is probably better things to do than this low-level specialisation of IPIs, and Thomas outlined what needs to be done (see v1 of the patch series). Thanks, M. [1] https://lore.kernel.org/lkml/20201124141449.572446-1-maz@kernel.org/
在 2021/5/6 19:44, Marc Zyngier 写道: > On Thu, 06 May 2021 08:50:42 +0100, > He Ying <heying24@huawei.com> wrote: >> Hello Marc, >> >> We have faced a performance regression for handling ipis since this >> commit. I think it's the same issue reported by Vincent. > Can you share more details on what regression you have observed? > What's the workload, the system, the performance drop? OK. We have just calculated the pmu cycles from the entry of gic_handle_irq to the entry of do_handle_ipi. Here is some more information about our test: CPU: Hisilicon hip05-d02 Applying the patch series: 1115 cycles Reverting the patch series: 599 cycles > >> I found you pointed out the possible two causes: >> >> (1) irq_enter/exit on the rescheduling IPI means we reschedule much >> more often. > It turned out to be a red herring. We don't reschedule more often, but > we instead suffer from the overhead of irq_enter()/irq_exit(). > However, this only matters for silly benchmarks, and no real-life > workload showed any significant regression. Have you identified such > realistic workload? I'm afraid not. We just run some benchmarks and calculated pmu cycle counters. But we have observed running time from the entry of gic_handle_irq to the entry of do_handle_ipi almost doubles. Doesn't it affect realistic workload? > >> (2) irq_domain lookups add some overhead. > While this is also a potential source of overhead, it turned out not > to be the case. OK. > >> But I don't see any following patches in mainline. So, are you still >> working on this issue? Looking forward to your reply. > See [1]. However, there is probably better things to do than this > low-level specialisation of IPIs, and Thomas outlined what needs to be > done (see v1 of the patch series). OK. I see the patch series. Would it be applied to the mainline someday? I notice that more than 5 months have passed since you sent the patch series. Thanks. > > Thanks, > > M. > > [1] https://lore.kernel.org/lkml/20201124141449.572446-1-maz@kernel.org/ >
On Fri, 07 May 2021 08:30:06 +0100, He Ying <heying24@huawei.com> wrote: > > > 在 2021/5/6 19:44, Marc Zyngier 写道: > > On Thu, 06 May 2021 08:50:42 +0100, > > He Ying <heying24@huawei.com> wrote: > >> Hello Marc, > >> > >> We have faced a performance regression for handling ipis since this > >> commit. I think it's the same issue reported by Vincent. > > Can you share more details on what regression you have observed? > > What's the workload, the system, the performance drop? > > OK. We have just calculated the pmu cycles from the entry of gic_handle_irq > to the entry of do_handle_ipi. Here is some more information about our test: > > CPU: Hisilicon hip05-d02 > > Applying the patch series: 1115 cycles > Reverting the patch series: 599 cycles And? How is that meaningful? Interrupts are pretty rare compared to everything that happens in the system. How does it affect the behaviour of the system as a whole? > > > > >> I found you pointed out the possible two causes: > >> > >> (1) irq_enter/exit on the rescheduling IPI means we reschedule much > >> more often. > > It turned out to be a red herring. We don't reschedule more often, but > > we instead suffer from the overhead of irq_enter()/irq_exit(). > > However, this only matters for silly benchmarks, and no real-life > > workload showed any significant regression. Have you identified such > > realistic workload? > > I'm afraid not. We just run some benchmarks and calculated pmu cycle > counters. But we have observed running time from the entry of > gic_handle_irq to the entry of do_handle_ipi almost doubles. Doesn't > it affect realistic workload? Then I'm not that interested. Show me an actual regression in a real workload that affects people, and I'll be a bit more sympathetic to your complain. But quoting raw numbers do not help. There is a number of advantages to having IPI as IRQs, as it allows us to deal with proper allocation (other subsystem want to use IPIs), and eventually NMIs. There is a trade-off, and if that means wasting a few cycles, so be it. > >> (2) irq_domain lookups add some overhead. > > While this is also a potential source of overhead, it turned out not > > to be the case. > OK. > > > >> But I don't see any following patches in mainline. So, are you still > >> working on this issue? Looking forward to your reply. > > See [1]. However, there is probably better things to do than this > > low-level specialisation of IPIs, and Thomas outlined what needs to be > > done (see v1 of the patch series). > > OK. I see the patch series. Would it be applied to the mainline > someday? I notice that more than 5 months have passed since you sent > the patch series. I have no plan to merge these patches any time soon, given that nobody has shown a measurable regression using something other than a trivial benchmark. If you come up with such an example, I will of course reconsider this position. Thanks, M.
在 2021/5/7 16:56, Marc Zyngier 写道: > On Fri, 07 May 2021 08:30:06 +0100, > He Ying <heying24@huawei.com> wrote: >> >> 在 2021/5/6 19:44, Marc Zyngier 写道: >>> On Thu, 06 May 2021 08:50:42 +0100, >>> He Ying <heying24@huawei.com> wrote: >>>> Hello Marc, >>>> >>>> We have faced a performance regression for handling ipis since this >>>> commit. I think it's the same issue reported by Vincent. >>> Can you share more details on what regression you have observed? >>> What's the workload, the system, the performance drop? >> OK. We have just calculated the pmu cycles from the entry of gic_handle_irq >> to the entry of do_handle_ipi. Here is some more information about our test: >> >> CPU: Hisilicon hip05-d02 >> >> Applying the patch series: 1115 cycles >> Reverting the patch series: 599 cycles > And? How is that meaningful? Interrupts are pretty rare compared to > everything that happens in the system. How does it affect the > behaviour of the system as a whole? OK. > >>>> I found you pointed out the possible two causes: >>>> >>>> (1) irq_enter/exit on the rescheduling IPI means we reschedule much >>>> more often. >>> It turned out to be a red herring. We don't reschedule more often, but >>> we instead suffer from the overhead of irq_enter()/irq_exit(). >>> However, this only matters for silly benchmarks, and no real-life >>> workload showed any significant regression. Have you identified such >>> realistic workload? >> I'm afraid not. We just run some benchmarks and calculated pmu cycle >> counters. But we have observed running time from the entry of >> gic_handle_irq to the entry of do_handle_ipi almost doubles. Doesn't >> it affect realistic workload? > Then I'm not that interested. Show me an actual regression in a real > workload that affects people, and I'll be a bit more sympathetic to > your complain. But quoting raw numbers do not help. > > There is a number of advantages to having IPI as IRQs, as it allows us > to deal with proper allocation (other subsystem want to use IPIs), and > eventually NMIs. There is a trade-off, and if that means wasting a few > cycles, so be it. OK. I see. > >>>> (2) irq_domain lookups add some overhead. >>> While this is also a potential source of overhead, it turned out not >>> to be the case. >> OK. >>>> But I don't see any following patches in mainline. So, are you still >>>> working on this issue? Looking forward to your reply. >>> See [1]. However, there is probably better things to do than this >>> low-level specialisation of IPIs, and Thomas outlined what needs to be >>> done (see v1 of the patch series). >> OK. I see the patch series. Would it be applied to the mainline >> someday? I notice that more than 5 months have passed since you sent >> the patch series. > I have no plan to merge these patches any time soon, given that nobody > has shown a measurable regression using something other than a trivial > benchmark. If you come up with such an example, I will of course > reconsider this position. OK. Thanks a lot for all your reply. If I come up with a measurable regression with a realistic workload, I'll contact you again. Thanks. > > Thanks, > > M. >
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d232837cbee..d0fdbe5fb32f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -106,6 +106,7 @@ config ARM64 select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP + select GENERIC_IRQ_IPI select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 0eadbf933e35..57c5db15f6b7 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -78,6 +78,11 @@ extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); extern void (*__smp_cross_call)(const struct cpumask *, unsigned int); +/* + * Register IPI interrupts with the arch SMP code + */ +extern void set_smp_ipi_range(int ipi_base, int nr_ipi); + /* * Called from the secondary holding pen, this is the secondary CPU entry point. */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 355ee9eed4dd..00c9db1b61b5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -75,6 +75,13 @@ enum ipi_msg_type { IPI_WAKEUP }; +static int ipi_irq_base __read_mostly; +static int nr_ipi __read_mostly = NR_IPI; +static struct irq_desc *ipi_desc[NR_IPI] __read_mostly; + +static void ipi_setup(int cpu); +static void ipi_teardown(int cpu); + #ifdef CONFIG_HOTPLUG_CPU static int op_cpu_kill(unsigned int cpu); #else @@ -237,6 +244,8 @@ asmlinkage notrace void secondary_start_kernel(void) */ notify_cpu_starting(cpu); + ipi_setup(cpu); + store_cpu_topology(cpu); numa_add_cpu(cpu); @@ -302,6 +311,7 @@ int __cpu_disable(void) * and we must not schedule until we're ready to give up the cpu. */ set_cpu_online(cpu, false); + ipi_teardown(cpu); /* * OK - migrate IRQs away from this CPU @@ -890,10 +900,9 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) /* * Main handler for inter-processor interrupts */ -void handle_IPI(int ipinr, struct pt_regs *regs) +static void do_handle_IPI(int ipinr) { unsigned int cpu = smp_processor_id(); - struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { trace_ipi_entry_rcuidle(ipi_types[ipinr]); @@ -906,21 +915,16 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; case IPI_CALL_FUNC: - irq_enter(); generic_smp_call_function_interrupt(); - irq_exit(); break; case IPI_CPU_STOP: - irq_enter(); local_cpu_stop(); - irq_exit(); break; case IPI_CPU_CRASH_STOP: if (IS_ENABLED(CONFIG_KEXEC_CORE)) { - irq_enter(); - ipi_cpu_crash_stop(cpu, regs); + ipi_cpu_crash_stop(cpu, get_irq_regs()); unreachable(); } @@ -928,17 +932,13 @@ void handle_IPI(int ipinr, struct pt_regs *regs) #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST case IPI_TIMER: - irq_enter(); tick_receive_broadcast(); - irq_exit(); break; #endif #ifdef CONFIG_IRQ_WORK case IPI_IRQ_WORK: - irq_enter(); irq_work_run(); - irq_exit(); break; #endif @@ -957,9 +957,78 @@ void handle_IPI(int ipinr, struct pt_regs *regs) if ((unsigned)ipinr < NR_IPI) trace_ipi_exit_rcuidle(ipi_types[ipinr]); +} + +/* Legacy version, should go away once all irqchips have been converted */ +void handle_IPI(int ipinr, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + irq_enter(); + do_handle_IPI(ipinr); + irq_exit(); + set_irq_regs(old_regs); } +static irqreturn_t ipi_handler(int irq, void *data) +{ + do_handle_IPI(irq - ipi_irq_base); + return IRQ_HANDLED; +} + +static void ipi_send(const struct cpumask *target, unsigned int ipi) +{ + __ipi_send_mask(ipi_desc[ipi], target); +} + +static void ipi_setup(int cpu) +{ + int i; + + if (!ipi_irq_base) + return; + + for (i = 0; i < nr_ipi; i++) + enable_percpu_irq(ipi_irq_base + i, 0); +} + +static void ipi_teardown(int cpu) +{ + int i; + + if (!ipi_irq_base) + return; + + for (i = 0; i < nr_ipi; i++) + disable_percpu_irq(ipi_irq_base + i); +} + +void __init set_smp_ipi_range(int ipi_base, int n) +{ + int i; + + WARN_ON(n < NR_IPI); + nr_ipi = min(n, NR_IPI); + + for (i = 0; i < nr_ipi; i++) { + int err; + + err = request_percpu_irq(ipi_base + i, ipi_handler, + "IPI", &irq_stat); + WARN_ON(err); + + ipi_desc[i] = irq_to_desc(ipi_base + i); + irq_set_status_flags(ipi_base + i, IRQ_HIDDEN); + } + + ipi_irq_base = ipi_base; + __smp_cross_call = ipi_send; + + /* Setup the boot CPU immediately */ + ipi_setup(smp_processor_id()); +} + void smp_send_reschedule(int cpu) { smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);