diff mbox series

[04/11] ARM: Allow IPIs to be handled as normal interrupts

Message ID 20200519161755.209565-5-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm/arm64: Turning IPIs into normal interrupts | expand

Commit Message

Marc Zyngier May 19, 2020, 4:17 p.m. UTC
In order to deal with IPIs as normal interrupts, let's add
a new way to register them with the architecture code.

set_smp_ipi_range() takes a range of interrupts, and allows
the arch code to request them as if the were normal interrupts.
A standard handler is then called by the core IRQ code to deal
with the IPI.

This means that we don't need to call irq_enter/irq_exit, and
that we don't need to deal with set_irq_regs either. So let's
move the dispatcher into its own function, and leave handle_IPI()
as a compatibility function.

On the sending side, let's make use of ipi_send_mask, which
already exists for this purpose.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm/Kconfig           |  1 +
 arch/arm/include/asm/smp.h |  5 ++
 arch/arm/kernel/smp.c      | 97 ++++++++++++++++++++++++++++++++------
 3 files changed, 88 insertions(+), 15 deletions(-)

Comments

Russell King (Oracle) May 19, 2020, 10:24 p.m. UTC | #1
On Tue, May 19, 2020 at 05:17:48PM +0100, Marc Zyngier wrote:
> In order to deal with IPIs as normal interrupts, let's add
> a new way to register them with the architecture code.
> 
> set_smp_ipi_range() takes a range of interrupts, and allows
> the arch code to request them as if the were normal interrupts.
> A standard handler is then called by the core IRQ code to deal
> with the IPI.
> 
> This means that we don't need to call irq_enter/irq_exit, and
> that we don't need to deal with set_irq_regs either. So let's
> move the dispatcher into its own function, and leave handle_IPI()
> as a compatibility function.
> 
> On the sending side, let's make use of ipi_send_mask, which
> already exists for this purpose.

You say nothing about the nesting of irq_enter() and irq_exit()
for scheduler_ipi().

Given that lockdep introduced the requirement that hard IRQs can't
be nested, are we sure that calling irq_exit() twice is safe?

Looking at irqtime_account_irq(), it seems that will cause double-
accounting of in-interrupt time, since we will increment
irq_start_time by just over twice the the period spent handling
the IPI.

I think the rest of irq_exit() should be safe, but still, this
behaviour should be documented at the very least, if not avoided.

> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  arch/arm/Kconfig           |  1 +
>  arch/arm/include/asm/smp.h |  5 ++
>  arch/arm/kernel/smp.c      | 97 ++++++++++++++++++++++++++++++++------
>  3 files changed, 88 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index c77c93c485a0..0caaba9bf880 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -48,6 +48,7 @@ config ARM
>  	select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
>  	select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
>  	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
> +	select GENERIC_IRQ_IPI if SMP
>  	select GENERIC_CPU_AUTOPROBE
>  	select GENERIC_EARLY_IOREMAP
>  	select GENERIC_IDLE_POLL_SETUP
> diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
> index a91f21e3c5b5..0e29730295ca 100644
> --- a/arch/arm/include/asm/smp.h
> +++ b/arch/arm/include/asm/smp.h
> @@ -45,6 +45,11 @@ extern void smp_init_cpus(void);
>   */
>  extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));
>  
> +/*
> + * Register IPI interrupts with the arch SMP code
> + */
> +extern void set_smp_ipi_range(int ipi_base, int nr_ipi);
> +
>  /*
>   * Called from platform specific assembly code, this is the
>   * secondary CPU entry point.
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index 46e1be9e57a8..618641978a5b 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -79,10 +79,19 @@ enum ipi_msg_type {
>  	 */
>  };
>  
> +static int ipi_irq_base;
> +static int nr_ipi = NR_IPI;
> +static struct irq_desc *ipi_desc[NR_IPI];
> +
> +static void ipi_setup(int cpu);
> +static void ipi_teardown(int cpu);
> +
>  static DECLARE_COMPLETION(cpu_running);
>  
>  static struct smp_operations smp_ops __ro_after_init;
>  
> +static void ipi_setup(int cpu);
> +
>  void __init smp_set_ops(const struct smp_operations *ops)
>  {
>  	if (ops)
> @@ -308,6 +317,8 @@ void arch_cpu_idle_dead(void)
>  
>  	local_irq_disable();
>  
> +	ipi_teardown(cpu);
> +
>  	/*
>  	 * Flush the data out of the L1 cache for this CPU.  This must be
>  	 * before the completion to ensure that data is safely written out
> @@ -424,6 +435,8 @@ asmlinkage void secondary_start_kernel(void)
>  
>  	notify_cpu_starting(cpu);
>  
> +	ipi_setup(cpu);
> +
>  	calibrate_delay();
>  
>  	smp_store_cpu_info(cpu);
> @@ -629,10 +642,9 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
>  	handle_IPI(ipinr, regs);
>  }
>  
> -void handle_IPI(int ipinr, struct pt_regs *regs)
> +static void do_handle_IPI(int ipinr)
>  {
>  	unsigned int cpu = smp_processor_id();
> -	struct pt_regs *old_regs = set_irq_regs(regs);
>  
>  	if ((unsigned)ipinr < NR_IPI) {
>  		trace_ipi_entry_rcuidle(ipi_types[ipinr]);
> @@ -645,9 +657,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
>  
>  #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
>  	case IPI_TIMER:
> -		irq_enter();
>  		tick_receive_broadcast();
> -		irq_exit();
>  		break;
>  #endif
>  
> @@ -656,36 +666,26 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
>  		break;
>  
>  	case IPI_CALL_FUNC:
> -		irq_enter();
>  		generic_smp_call_function_interrupt();
> -		irq_exit();
>  		break;
>  
>  	case IPI_CPU_STOP:
> -		irq_enter();
>  		ipi_cpu_stop(cpu);
> -		irq_exit();
>  		break;
>  
>  #ifdef CONFIG_IRQ_WORK
>  	case IPI_IRQ_WORK:
> -		irq_enter();
>  		irq_work_run();
> -		irq_exit();
>  		break;
>  #endif
>  
>  	case IPI_COMPLETION:
> -		irq_enter();
>  		ipi_complete(cpu);
> -		irq_exit();
>  		break;
>  
>  	case IPI_CPU_BACKTRACE:
>  		printk_nmi_enter();
> -		irq_enter();
> -		nmi_cpu_backtrace(regs);
> -		irq_exit();
> +		nmi_cpu_backtrace(get_irq_regs());
>  		printk_nmi_exit();
>  		break;
>  
> @@ -697,9 +697,76 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
>  
>  	if ((unsigned)ipinr < NR_IPI)
>  		trace_ipi_exit_rcuidle(ipi_types[ipinr]);
> +}
> +
> +/* Legacy version, should go away once all irqchips have been converted */
> +void handle_IPI(int ipinr, struct pt_regs *regs)
> +{
> +	struct pt_regs *old_regs = set_irq_regs(regs);
> +
> +	irq_enter();
> +	do_handle_IPI(ipinr);
> +	irq_exit();
> +
>  	set_irq_regs(old_regs);
>  }
>  
> +static irqreturn_t ipi_handler(int irq, void *data)
> +{
> +	do_handle_IPI(irq - ipi_irq_base);
> +	return IRQ_HANDLED;
> +}
> +
> +static void ipi_send(const struct cpumask *target, unsigned int ipi)
> +{
> +	__ipi_send_mask(ipi_desc[ipi], target);
> +}
> +
> +static void ipi_setup(int cpu)
> +{
> +	if (ipi_irq_base) {
> +		int i;
> +
> +		for (i = 0; i < nr_ipi; i++)
> +			enable_percpu_irq(ipi_irq_base + i, 0);
> +	}
> +}
> +
> +static void ipi_teardown(int cpu)
> +{
> +	if (ipi_irq_base) {
> +		int i;
> +
> +		for (i = 0; i < nr_ipi; i++)
> +			disable_percpu_irq(ipi_irq_base + i);
> +	}
> +}
> +
> +void __init set_smp_ipi_range(int ipi_base, int n)
> +{
> +	int i;
> +
> +	WARN_ON(n < NR_IPI);
> +	nr_ipi = min(n, NR_IPI);
> +
> +	for (i = 0; i < nr_ipi; i++) {
> +		int err;
> +
> +		err = request_percpu_irq(ipi_base + i, ipi_handler,
> +					 "IPI", &irq_stat);
> +		WARN_ON(err);
> +
> +		ipi_desc[i] = irq_to_desc(ipi_base + i);
> +		irq_set_status_flags(ipi_base + i, IRQ_NO_ACCOUNTING);
> +	}
> +
> +	ipi_irq_base = ipi_base;
> +	set_smp_cross_call(ipi_send);
> +
> +	/* Setup the boot CPU immediately */
> +	ipi_setup(smp_processor_id());
> +}
> +
>  void smp_send_reschedule(int cpu)
>  {
>  	smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
> -- 
> 2.26.2
> 
>
Valentin Schneider May 21, 2020, 2:03 p.m. UTC | #2
On 19/05/20 23:24, Russell King - ARM Linux admin wrote:
> On Tue, May 19, 2020 at 05:17:48PM +0100, Marc Zyngier wrote:
>> In order to deal with IPIs as normal interrupts, let's add
>> a new way to register them with the architecture code.
>>
>> set_smp_ipi_range() takes a range of interrupts, and allows
>> the arch code to request them as if the were normal interrupts.
>> A standard handler is then called by the core IRQ code to deal
>> with the IPI.
>>
>> This means that we don't need to call irq_enter/irq_exit, and
>> that we don't need to deal with set_irq_regs either. So let's
>> move the dispatcher into its own function, and leave handle_IPI()
>> as a compatibility function.
>>
>> On the sending side, let's make use of ipi_send_mask, which
>> already exists for this purpose.
>
> You say nothing about the nesting of irq_enter() and irq_exit()
> for scheduler_ipi().
>
> Given that lockdep introduced the requirement that hard IRQs can't
> be nested, are we sure that calling irq_exit() twice is safe?
>
> Looking at irqtime_account_irq(), it seems that will cause double-
> accounting of in-interrupt time, since we will increment
> irq_start_time by just over twice the the period spent handling
> the IPI.
>
> I think the rest of irq_exit() should be safe, but still, this
> behaviour should be documented at the very least, if not avoided.
>

x86 does the same (though IIUC only when tracing reschedule IPI's), and
MIPS has the same issue as it also uses generic IRQ IPI's - so although
it's not ideal, I think we can live with it.
Russell King (Oracle) May 21, 2020, 3:12 p.m. UTC | #3
On Thu, May 21, 2020 at 03:03:49PM +0100, Valentin Schneider wrote:
> 
> On 19/05/20 23:24, Russell King - ARM Linux admin wrote:
> > On Tue, May 19, 2020 at 05:17:48PM +0100, Marc Zyngier wrote:
> >> In order to deal with IPIs as normal interrupts, let's add
> >> a new way to register them with the architecture code.
> >>
> >> set_smp_ipi_range() takes a range of interrupts, and allows
> >> the arch code to request them as if the were normal interrupts.
> >> A standard handler is then called by the core IRQ code to deal
> >> with the IPI.
> >>
> >> This means that we don't need to call irq_enter/irq_exit, and
> >> that we don't need to deal with set_irq_regs either. So let's
> >> move the dispatcher into its own function, and leave handle_IPI()
> >> as a compatibility function.
> >>
> >> On the sending side, let's make use of ipi_send_mask, which
> >> already exists for this purpose.
> >
> > You say nothing about the nesting of irq_enter() and irq_exit()
> > for scheduler_ipi().
> >
> > Given that lockdep introduced the requirement that hard IRQs can't
> > be nested, are we sure that calling irq_exit() twice is safe?
> >
> > Looking at irqtime_account_irq(), it seems that will cause double-
> > accounting of in-interrupt time, since we will increment
> > irq_start_time by just over twice the the period spent handling
> > the IPI.
> >
> > I think the rest of irq_exit() should be safe, but still, this
> > behaviour should be documented at the very least, if not avoided.
> >
> 
> x86 does the same (though IIUC only when tracing reschedule IPI's),

Right, so when the system is operating normally, then the accounting is
correct.  When the reschedule path is being explicitly traced, then
the accounting will be doubled for it.

What's being proposed for ARM is to always have this mis-accounting,
where no mis-accounting was present before - and some of us (me) /do/
enable IRQ accounting in our kernels as standard. So, you can take
this as a kernel regression report from a user.

> and MIPS has the same issue as it also uses generic IRQ IPI's - so
> although it's not ideal, I think we can live with it.

Yes, but is there anyone who cares about this for MIPS?
Valentin Schneider May 21, 2020, 4:11 p.m. UTC | #4
On 21/05/20 16:12, Russell King - ARM Linux admin wrote:
> On Thu, May 21, 2020 at 03:03:49PM +0100, Valentin Schneider wrote:
>>
>> On 19/05/20 23:24, Russell King - ARM Linux admin wrote:
>> > On Tue, May 19, 2020 at 05:17:48PM +0100, Marc Zyngier wrote:
>> >> In order to deal with IPIs as normal interrupts, let's add
>> >> a new way to register them with the architecture code.
>> >>
>> >> set_smp_ipi_range() takes a range of interrupts, and allows
>> >> the arch code to request them as if the were normal interrupts.
>> >> A standard handler is then called by the core IRQ code to deal
>> >> with the IPI.
>> >>
>> >> This means that we don't need to call irq_enter/irq_exit, and
>> >> that we don't need to deal with set_irq_regs either. So let's
>> >> move the dispatcher into its own function, and leave handle_IPI()
>> >> as a compatibility function.
>> >>
>> >> On the sending side, let's make use of ipi_send_mask, which
>> >> already exists for this purpose.
>> >
>> > You say nothing about the nesting of irq_enter() and irq_exit()
>> > for scheduler_ipi().
>> >
>> > Given that lockdep introduced the requirement that hard IRQs can't
>> > be nested, are we sure that calling irq_exit() twice is safe?
>> >
>> > Looking at irqtime_account_irq(), it seems that will cause double-
>> > accounting of in-interrupt time, since we will increment
>> > irq_start_time by just over twice the the period spent handling
>> > the IPI.
>> >
>> > I think the rest of irq_exit() should be safe, but still, this
>> > behaviour should be documented at the very least, if not avoided.
>> >
>>
>> x86 does the same (though IIUC only when tracing reschedule IPI's),
>
> Right, so when the system is operating normally, then the accounting is
> correct.  When the reschedule path is being explicitly traced, then
> the accounting will be doubled for it.
>

Right, it's true that they are only affected when tracing.


That said, AFAICT the accounting nests correctly. Consider:

  irq_enter() @t0
    irq_enter() @t1
    ...
    irq_exit() @t2
  irq_exit() @t3

Entering irqtime_account_irq() at time t, we get something like:

  delta = t - irq_start_time;
  irq_start_time = t;

  if (hardirq_count())
          total += delta;

Since we go through the accounting on both irq_enter() and irq_exit(), we'd
have something like:

  irq_enter() @t0
    irq_start_time = t0

  irq_enter() @t1
    delta = t1 - t0
    irq_start_time = t1
    total += t1 - t0

  irq_exit() @t2
    delta = t2 - t1
    irq_start_time = t2
    total += t2 - t1

  irq_exit() @t3
    delta = t3 - t2
    irq_start_time = t3
    total += t3 - t2


So at the end we have incremented the total by

  t1-t0 + t2-t1 + t3-t2 = t3 - t0

IOW the duration of the outermost pair (... Unless I goofed up).

> What's being proposed for ARM is to always have this mis-accounting,
> where no mis-accounting was present before - and some of us (me) /do/
> enable IRQ accounting in our kernels as standard. So, you can take
> this as a kernel regression report from a user.
>
>> and MIPS has the same issue as it also uses generic IRQ IPI's - so
>> although it's not ideal, I think we can live with it.
>
> Yes, but is there anyone who cares about this for MIPS?
diff mbox series

Patch

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c77c93c485a0..0caaba9bf880 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -48,6 +48,7 @@  config ARM
 	select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
 	select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select GENERIC_IRQ_IPI if SMP
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_IDLE_POLL_SETUP
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index a91f21e3c5b5..0e29730295ca 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -45,6 +45,11 @@  extern void smp_init_cpus(void);
  */
 extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));
 
+/*
+ * Register IPI interrupts with the arch SMP code
+ */
+extern void set_smp_ipi_range(int ipi_base, int nr_ipi);
+
 /*
  * Called from platform specific assembly code, this is the
  * secondary CPU entry point.
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 46e1be9e57a8..618641978a5b 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -79,10 +79,19 @@  enum ipi_msg_type {
 	 */
 };
 
+static int ipi_irq_base;
+static int nr_ipi = NR_IPI;
+static struct irq_desc *ipi_desc[NR_IPI];
+
+static void ipi_setup(int cpu);
+static void ipi_teardown(int cpu);
+
 static DECLARE_COMPLETION(cpu_running);
 
 static struct smp_operations smp_ops __ro_after_init;
 
+static void ipi_setup(int cpu);
+
 void __init smp_set_ops(const struct smp_operations *ops)
 {
 	if (ops)
@@ -308,6 +317,8 @@  void arch_cpu_idle_dead(void)
 
 	local_irq_disable();
 
+	ipi_teardown(cpu);
+
 	/*
 	 * Flush the data out of the L1 cache for this CPU.  This must be
 	 * before the completion to ensure that data is safely written out
@@ -424,6 +435,8 @@  asmlinkage void secondary_start_kernel(void)
 
 	notify_cpu_starting(cpu);
 
+	ipi_setup(cpu);
+
 	calibrate_delay();
 
 	smp_store_cpu_info(cpu);
@@ -629,10 +642,9 @@  asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
 	handle_IPI(ipinr, regs);
 }
 
-void handle_IPI(int ipinr, struct pt_regs *regs)
+static void do_handle_IPI(int ipinr)
 {
 	unsigned int cpu = smp_processor_id();
-	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	if ((unsigned)ipinr < NR_IPI) {
 		trace_ipi_entry_rcuidle(ipi_types[ipinr]);
@@ -645,9 +657,7 @@  void handle_IPI(int ipinr, struct pt_regs *regs)
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 	case IPI_TIMER:
-		irq_enter();
 		tick_receive_broadcast();
-		irq_exit();
 		break;
 #endif
 
@@ -656,36 +666,26 @@  void handle_IPI(int ipinr, struct pt_regs *regs)
 		break;
 
 	case IPI_CALL_FUNC:
-		irq_enter();
 		generic_smp_call_function_interrupt();
-		irq_exit();
 		break;
 
 	case IPI_CPU_STOP:
-		irq_enter();
 		ipi_cpu_stop(cpu);
-		irq_exit();
 		break;
 
 #ifdef CONFIG_IRQ_WORK
 	case IPI_IRQ_WORK:
-		irq_enter();
 		irq_work_run();
-		irq_exit();
 		break;
 #endif
 
 	case IPI_COMPLETION:
-		irq_enter();
 		ipi_complete(cpu);
-		irq_exit();
 		break;
 
 	case IPI_CPU_BACKTRACE:
 		printk_nmi_enter();
-		irq_enter();
-		nmi_cpu_backtrace(regs);
-		irq_exit();
+		nmi_cpu_backtrace(get_irq_regs());
 		printk_nmi_exit();
 		break;
 
@@ -697,9 +697,76 @@  void handle_IPI(int ipinr, struct pt_regs *regs)
 
 	if ((unsigned)ipinr < NR_IPI)
 		trace_ipi_exit_rcuidle(ipi_types[ipinr]);
+}
+
+/* Legacy version, should go away once all irqchips have been converted */
+void handle_IPI(int ipinr, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	irq_enter();
+	do_handle_IPI(ipinr);
+	irq_exit();
+
 	set_irq_regs(old_regs);
 }
 
+static irqreturn_t ipi_handler(int irq, void *data)
+{
+	do_handle_IPI(irq - ipi_irq_base);
+	return IRQ_HANDLED;
+}
+
+static void ipi_send(const struct cpumask *target, unsigned int ipi)
+{
+	__ipi_send_mask(ipi_desc[ipi], target);
+}
+
+static void ipi_setup(int cpu)
+{
+	if (ipi_irq_base) {
+		int i;
+
+		for (i = 0; i < nr_ipi; i++)
+			enable_percpu_irq(ipi_irq_base + i, 0);
+	}
+}
+
+static void ipi_teardown(int cpu)
+{
+	if (ipi_irq_base) {
+		int i;
+
+		for (i = 0; i < nr_ipi; i++)
+			disable_percpu_irq(ipi_irq_base + i);
+	}
+}
+
+void __init set_smp_ipi_range(int ipi_base, int n)
+{
+	int i;
+
+	WARN_ON(n < NR_IPI);
+	nr_ipi = min(n, NR_IPI);
+
+	for (i = 0; i < nr_ipi; i++) {
+		int err;
+
+		err = request_percpu_irq(ipi_base + i, ipi_handler,
+					 "IPI", &irq_stat);
+		WARN_ON(err);
+
+		ipi_desc[i] = irq_to_desc(ipi_base + i);
+		irq_set_status_flags(ipi_base + i, IRQ_NO_ACCOUNTING);
+	}
+
+	ipi_irq_base = ipi_base;
+	set_smp_cross_call(ipi_send);
+
+	/* Setup the boot CPU immediately */
+	ipi_setup(smp_processor_id());
+}
+
 void smp_send_reschedule(int cpu)
 {
 	smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);