diff mbox series

[1/5] irqchip/riscv-intc: Balance priority and fairness during irq handling

Message ID 20250113150933.65121-2-luxu.kernel@bytedance.com (mailing list archive)
State New
Headers show
Series riscv: irqchip: Optimization of interrupt handling | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR success PR summary
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh took 105.13s
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh took 1059.44s
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh took 1280.47s
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh took 16.32s
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh took 18.06s
conchuod/patch-1-test-6 success .github/scripts/patches/tests/checkpatch.sh took 0.40s
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh took 37.55s
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh took 0.00s
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh took 0.48s
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh took 0.01s
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh took 0.00s
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh took 0.02s

Commit Message

Xu Lu Jan. 13, 2025, 3:09 p.m. UTC
Both csr cause and csr topi record the pending bit with the highest
priority. If interrupts with high priority arrive frequently within a
certain period of time, the interrupts with low priority won't get a
chance to be handled.

For example, if external interrupts and software interrupts arrive very
frequently, the timer interrupts will never be handled. Then buddy
watchdog on a buddy CPU will report a hardlockup on the current CPU while
current CPU actually can receive irq.

This commit solves this problem by handling all pending irqs in a round.
During each round, this commit handles pending irqs by their priority.

Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
 drivers/irqchip/irq-riscv-intc.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

Comments

Anup Patel Jan. 15, 2025, 11:39 a.m. UTC | #1
On Mon, Jan 13, 2025 at 8:39 PM Xu Lu <luxu.kernel@bytedance.com> wrote:
>
> Both csr cause and csr topi record the pending bit with the highest
> priority. If interrupts with high priority arrive frequently within a
> certain period of time, the interrupts with low priority won't get a
> chance to be handled.
>
> For example, if external interrupts and software interrupts arrive very
> frequently, the timer interrupts will never be handled. Then buddy
> watchdog on a buddy CPU will report a hardlockup on the current CPU while
> current CPU actually can receive irq.

Platform with proper HW watchdog will not see this issue because HW
watchdog interrupt will be an external interrupt.

There was an effort to standardize watchdog for RISC-V platforms but it was
deferred to future standardization. We should resume those discussions within
RVI forums.

>
> This commit solves this problem by handling all pending irqs in a round.
> During each round, this commit handles pending irqs by their priority.
>
> Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> ---
>  drivers/irqchip/irq-riscv-intc.c | 32 ++++++++++++++++++++++++++------
>  1 file changed, 26 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c
> index f653c13de62b..bc2ec26aa9e9 100644
> --- a/drivers/irqchip/irq-riscv-intc.c
> +++ b/drivers/irqchip/irq-riscv-intc.c
> @@ -26,20 +26,40 @@ static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG;
>  static unsigned int riscv_intc_custom_base __ro_after_init = BITS_PER_LONG;
>  static unsigned int riscv_intc_custom_nr_irqs __ro_after_init;
>
> +static unsigned int riscv_prio_irqs[] = {
> +#ifdef CONFIG_RISCV_M_MODE
> +       IRQ_M_EXT, IRQ_M_SOFT, IRQ_M_TIMER,
> +#endif
> +       IRQ_S_EXT, IRQ_S_SOFT, IRQ_S_TIMER, IRQ_S_GEXT,
> +       IRQ_VS_EXT, IRQ_VS_SOFT, IRQ_VS_TIMER,
> +       IRQ_PMU_OVF,
> +};
> +
>  static void riscv_intc_irq(struct pt_regs *regs)
>  {
> -       unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
> -
> -       if (generic_handle_domain_irq(intc_domain, cause))
> -               pr_warn_ratelimited("Failed to handle interrupt (cause: %ld)\n", cause);
> +       unsigned long pending = csr_read(CSR_IP) & csr_read(CSR_IE);
> +       unsigned int i;
> +
> +       for (i = 0; i < ARRAY_SIZE(riscv_prio_irqs); i++)
> +               if (pending & (1UL << riscv_prio_irqs[i]))
> +                       if (generic_handle_domain_irq(intc_domain, riscv_prio_irqs[i]))
> +                               pr_warn_ratelimited("Failed to handle interrupt (cause: %u)\n",
> +                                                   riscv_prio_irqs[i]);

This is overriding the builtin priority assignment for local interrupts as
defined by the RISC-V privileged specification for non-AIA systems
which changes the expected behaviour on existing platforms.

>  }
>
>  static void riscv_intc_aia_irq(struct pt_regs *regs)
>  {
>         unsigned long topi;
> +       unsigned long pending;
> +       unsigned int i;
> +
> +       while ((topi = csr_read(CSR_TOPI))) {
> +               pending = csr_read(CSR_IP) & csr_read(CSR_IE);
>
> -       while ((topi = csr_read(CSR_TOPI)))
> -               generic_handle_domain_irq(intc_domain, topi >> TOPI_IID_SHIFT);
> +               for (i = 0; i < ARRAY_SIZE(riscv_prio_irqs); i++)
> +                       if (pending & (1UL << riscv_prio_irqs[i]))
> +                               generic_handle_domain_irq(intc_domain, riscv_prio_irqs[i]);
> +       }

The AIA specification already provides a mechanism to change
priorities of local interrupts. The firmware or bootloader can always
provide custom priorities to local interrupts.

In general, requiring certain local interrupts to have higher priority is
platform or use-case specific and should be done as AIA configuration
in firmware or bootloader.

NAK to this patch from my side since it is just adding a software
work-around for a missing standard watchdog in the RISC-V
ecosystem.

One possible approach is to let platforms have their implementation
specific M-mode watchdog and for supervisor software (both HS and
VS-mode) we can have SBI based watchdog where the supervisor
watchdog expiry is an SSE (higher priority than all local interrupts).

Regards,
Anup
Xu Lu Jan. 15, 2025, 12:37 p.m. UTC | #2
Hi Anup,

I agree that a standardized NMI or SSE event is the optimal solution
to the watchdog problem. Here I just use buddy watchdog to illustrate
the possible consequences of interrupts with low priority getting
starved.

I believe that AIA or firmware can adjust the priority of different
irqs. What I want to emphasize is that in existing implementation,
kernel will continue handling newly arrived high priority irqs, no
matter how long lower priority irqs have been waiting for.

For example, if both external irq and IPI (assuming non AIA
architecture using software irq) arrive in cycle 0, kernel will handle
the external irq first. Then a new external irq arrives during the
first external irq handling procedure, let's say cycle 100. Then
kernel finishes the first external irq handling procedure, let's say
cycle 200, it will handle the newly arrived external irq, instead of
the already arrived IPI. The IPI won't be handled until the second
external irq is handled, let's say cycle 300. Things get worse if
external interrupts keep arriving.

I think it is better to provide a mechanism to avoid this. So I regard
interrupts arriving within a certain period as a round and only handle
interrupts in the new round after all interrupts in the old round have
been handled. The interrupt priority only takes effect in the same
round.

Of course this is not the optimal way. Please give some advice if you
also think it is necessary (for example introduce an interrupt
priority decreasing mechanism instead).

Best Regards,

Xu Lu

On Wed, Jan 15, 2025 at 7:39 PM Anup Patel <anup@brainfault.org> wrote:
>
> On Mon, Jan 13, 2025 at 8:39 PM Xu Lu <luxu.kernel@bytedance.com> wrote:
> >
> > Both csr cause and csr topi record the pending bit with the highest
> > priority. If interrupts with high priority arrive frequently within a
> > certain period of time, the interrupts with low priority won't get a
> > chance to be handled.
> >
> > For example, if external interrupts and software interrupts arrive very
> > frequently, the timer interrupts will never be handled. Then buddy
> > watchdog on a buddy CPU will report a hardlockup on the current CPU while
> > current CPU actually can receive irq.
>
> Platform with proper HW watchdog will not see this issue because HW
> watchdog interrupt will be an external interrupt.
>
> There was an effort to standardize watchdog for RISC-V platforms but it was
> deferred to future standardization. We should resume those discussions within
> RVI forums.
>
> >
> > This commit solves this problem by handling all pending irqs in a round.
> > During each round, this commit handles pending irqs by their priority.
> >
> > Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> > ---
> >  drivers/irqchip/irq-riscv-intc.c | 32 ++++++++++++++++++++++++++------
> >  1 file changed, 26 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c
> > index f653c13de62b..bc2ec26aa9e9 100644
> > --- a/drivers/irqchip/irq-riscv-intc.c
> > +++ b/drivers/irqchip/irq-riscv-intc.c
> > @@ -26,20 +26,40 @@ static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG;
> >  static unsigned int riscv_intc_custom_base __ro_after_init = BITS_PER_LONG;
> >  static unsigned int riscv_intc_custom_nr_irqs __ro_after_init;
> >
> > +static unsigned int riscv_prio_irqs[] = {
> > +#ifdef CONFIG_RISCV_M_MODE
> > +       IRQ_M_EXT, IRQ_M_SOFT, IRQ_M_TIMER,
> > +#endif
> > +       IRQ_S_EXT, IRQ_S_SOFT, IRQ_S_TIMER, IRQ_S_GEXT,
> > +       IRQ_VS_EXT, IRQ_VS_SOFT, IRQ_VS_TIMER,
> > +       IRQ_PMU_OVF,
> > +};
> > +
> >  static void riscv_intc_irq(struct pt_regs *regs)
> >  {
> > -       unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
> > -
> > -       if (generic_handle_domain_irq(intc_domain, cause))
> > -               pr_warn_ratelimited("Failed to handle interrupt (cause: %ld)\n", cause);
> > +       unsigned long pending = csr_read(CSR_IP) & csr_read(CSR_IE);
> > +       unsigned int i;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(riscv_prio_irqs); i++)
> > +               if (pending & (1UL << riscv_prio_irqs[i]))
> > +                       if (generic_handle_domain_irq(intc_domain, riscv_prio_irqs[i]))
> > +                               pr_warn_ratelimited("Failed to handle interrupt (cause: %u)\n",
> > +                                                   riscv_prio_irqs[i]);
>
> This is overriding the builtin priority assignment for local interrupts as
> defined by the RISC-V privileged specification for non-AIA systems
> which changes the expected behaviour on existing platforms.
>
> >  }
> >
> >  static void riscv_intc_aia_irq(struct pt_regs *regs)
> >  {
> >         unsigned long topi;
> > +       unsigned long pending;
> > +       unsigned int i;
> > +
> > +       while ((topi = csr_read(CSR_TOPI))) {
> > +               pending = csr_read(CSR_IP) & csr_read(CSR_IE);
> >
> > -       while ((topi = csr_read(CSR_TOPI)))
> > -               generic_handle_domain_irq(intc_domain, topi >> TOPI_IID_SHIFT);
> > +               for (i = 0; i < ARRAY_SIZE(riscv_prio_irqs); i++)
> > +                       if (pending & (1UL << riscv_prio_irqs[i]))
> > +                               generic_handle_domain_irq(intc_domain, riscv_prio_irqs[i]);
> > +       }
>
> The AIA specification already provides a mechanism to change
> priorities of local interrupts. The firmware or bootloader can always
> provide custom priorities to local interrupts.
>
> In general, requiring certain local interrupts to have higher priority is
> platform or use-case specific and should be done as AIA configuration
> in firmware or bootloader.
>
> NAK to this patch from my side since it is just adding a software
> work-around for a missing standard watchdog in the RISC-V
> ecosystem.
>
> One possible approach is to let platforms have their implementation
> specific M-mode watchdog and for supervisor software (both HS and
> VS-mode) we can have SBI based watchdog where the supervisor
> watchdog expiry is an SSE (higher priority than all local interrupts).
>
> Regards,
> Anup
Anup Patel Jan. 15, 2025, 5:01 p.m. UTC | #3
On Wed, Jan 15, 2025 at 6:08 PM Xu Lu <luxu.kernel@bytedance.com> wrote:
>
> Hi Anup,
>
> I agree that a standardized NMI or SSE event is the optimal solution
> to the watchdog problem. Here I just use buddy watchdog to illustrate
> the possible consequences of interrupts with low priority getting
> starved.
>
> I believe that AIA or firmware can adjust the priority of different
> irqs. What I want to emphasize is that in existing implementation,
> kernel will continue handling newly arrived high priority irqs, no
> matter how long lower priority irqs have been waiting for.
>
> For example, if both external irq and IPI (assuming non AIA
> architecture using software irq) arrive in cycle 0, kernel will handle
> the external irq first. Then a new external irq arrives during the
> first external irq handling procedure, let's say cycle 100. Then
> kernel finishes the first external irq handling procedure, let's say
> cycle 200, it will handle the newly arrived external irq, instead of
> the already arrived IPI. The IPI won't be handled until the second
> external irq is handled, let's say cycle 300. Things get worse if
> external interrupts keep arriving.
>
> I think it is better to provide a mechanism to avoid this. So I regard
> interrupts arriving within a certain period as a round and only handle
> interrupts in the new round after all interrupts in the old round have
> been handled. The interrupt priority only takes effect in the same
> round.

Well, this flood of external interrupts from a device is a classical
problem across architectures. The best way to solve this is improve
the device driver to use better bottom-half techniques such as
NAPI in-case of network driver, threaded IRQs, etc.

Working around this in the interrupt controller driver is not the
right way.

Regards,
Anup

>
> Of course this is not the optimal way. Please give some advice if you
> also think it is necessary (for example introduce an interrupt
> priority decreasing mechanism instead).
>
> Best Regards,
>
> Xu Lu
>
> On Wed, Jan 15, 2025 at 7:39 PM Anup Patel <anup@brainfault.org> wrote:
> >
> > On Mon, Jan 13, 2025 at 8:39 PM Xu Lu <luxu.kernel@bytedance.com> wrote:
> > >
> > > Both csr cause and csr topi record the pending bit with the highest
> > > priority. If interrupts with high priority arrive frequently within a
> > > certain period of time, the interrupts with low priority won't get a
> > > chance to be handled.
> > >
> > > For example, if external interrupts and software interrupts arrive very
> > > frequently, the timer interrupts will never be handled. Then buddy
> > > watchdog on a buddy CPU will report a hardlockup on the current CPU while
> > > current CPU actually can receive irq.
> >
> > Platform with proper HW watchdog will not see this issue because HW
> > watchdog interrupt will be an external interrupt.
> >
> > There was an effort to standardize watchdog for RISC-V platforms but it was
> > deferred to future standardization. We should resume those discussions within
> > RVI forums.
> >
> > >
> > > This commit solves this problem by handling all pending irqs in a round.
> > > During each round, this commit handles pending irqs by their priority.
> > >
> > > Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> > > ---
> > >  drivers/irqchip/irq-riscv-intc.c | 32 ++++++++++++++++++++++++++------
> > >  1 file changed, 26 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c
> > > index f653c13de62b..bc2ec26aa9e9 100644
> > > --- a/drivers/irqchip/irq-riscv-intc.c
> > > +++ b/drivers/irqchip/irq-riscv-intc.c
> > > @@ -26,20 +26,40 @@ static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG;
> > >  static unsigned int riscv_intc_custom_base __ro_after_init = BITS_PER_LONG;
> > >  static unsigned int riscv_intc_custom_nr_irqs __ro_after_init;
> > >
> > > +static unsigned int riscv_prio_irqs[] = {
> > > +#ifdef CONFIG_RISCV_M_MODE
> > > +       IRQ_M_EXT, IRQ_M_SOFT, IRQ_M_TIMER,
> > > +#endif
> > > +       IRQ_S_EXT, IRQ_S_SOFT, IRQ_S_TIMER, IRQ_S_GEXT,
> > > +       IRQ_VS_EXT, IRQ_VS_SOFT, IRQ_VS_TIMER,
> > > +       IRQ_PMU_OVF,
> > > +};
> > > +
> > >  static void riscv_intc_irq(struct pt_regs *regs)
> > >  {
> > > -       unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
> > > -
> > > -       if (generic_handle_domain_irq(intc_domain, cause))
> > > -               pr_warn_ratelimited("Failed to handle interrupt (cause: %ld)\n", cause);
> > > +       unsigned long pending = csr_read(CSR_IP) & csr_read(CSR_IE);
> > > +       unsigned int i;
> > > +
> > > +       for (i = 0; i < ARRAY_SIZE(riscv_prio_irqs); i++)
> > > +               if (pending & (1UL << riscv_prio_irqs[i]))
> > > +                       if (generic_handle_domain_irq(intc_domain, riscv_prio_irqs[i]))
> > > +                               pr_warn_ratelimited("Failed to handle interrupt (cause: %u)\n",
> > > +                                                   riscv_prio_irqs[i]);
> >
> > This is overriding the builtin priority assignment for local interrupts as
> > defined by the RISC-V privileged specification for non-AIA systems
> > which changes the expected behaviour on existing platforms.
> >
> > >  }
> > >
> > >  static void riscv_intc_aia_irq(struct pt_regs *regs)
> > >  {
> > >         unsigned long topi;
> > > +       unsigned long pending;
> > > +       unsigned int i;
> > > +
> > > +       while ((topi = csr_read(CSR_TOPI))) {
> > > +               pending = csr_read(CSR_IP) & csr_read(CSR_IE);
> > >
> > > -       while ((topi = csr_read(CSR_TOPI)))
> > > -               generic_handle_domain_irq(intc_domain, topi >> TOPI_IID_SHIFT);
> > > +               for (i = 0; i < ARRAY_SIZE(riscv_prio_irqs); i++)
> > > +                       if (pending & (1UL << riscv_prio_irqs[i]))
> > > +                               generic_handle_domain_irq(intc_domain, riscv_prio_irqs[i]);
> > > +       }
> >
> > The AIA specification already provides a mechanism to change
> > priorities of local interrupts. The firmware or bootloader can always
> > provide custom priorities to local interrupts.
> >
> > In general, requiring certain local interrupts to have higher priority is
> > platform or use-case specific and should be done as AIA configuration
> > in firmware or bootloader.
> >
> > NAK to this patch from my side since it is just adding a software
> > work-around for a missing standard watchdog in the RISC-V
> > ecosystem.
> >
> > One possible approach is to let platforms have their implementation
> > specific M-mode watchdog and for supervisor software (both HS and
> > VS-mode) we can have SBI based watchdog where the supervisor
> > watchdog expiry is an SSE (higher priority than all local interrupts).
> >
> > Regards,
> > Anup
Xu Lu Jan. 16, 2025, 2:26 a.m. UTC | #4
On Thu, Jan 16, 2025 at 1:02 AM Anup Patel <anup@brainfault.org> wrote:
>
> On Wed, Jan 15, 2025 at 6:08 PM Xu Lu <luxu.kernel@bytedance.com> wrote:
> >
> > Hi Anup,
> >
> > I agree that a standardized NMI or SSE event is the optimal solution
> > to the watchdog problem. Here I just use buddy watchdog to illustrate
> > the possible consequences of interrupts with low priority getting
> > starved.
> >
> > I believe that AIA or firmware can adjust the priority of different
> > irqs. What I want to emphasize is that in existing implementation,
> > kernel will continue handling newly arrived high priority irqs, no
> > matter how long lower priority irqs have been waiting for.
> >
> > For example, if both external irq and IPI (assuming non AIA
> > architecture using software irq) arrive in cycle 0, kernel will handle
> > the external irq first. Then a new external irq arrives during the
> > first external irq handling procedure, let's say cycle 100. Then
> > kernel finishes the first external irq handling procedure, let's say
> > cycle 200, it will handle the newly arrived external irq, instead of
> > the already arrived IPI. The IPI won't be handled until the second
> > external irq is handled, let's say cycle 300. Things get worse if
> > external interrupts keep arriving.
> >
> > I think it is better to provide a mechanism to avoid this. So I regard
> > interrupts arriving within a certain period as a round and only handle
> > interrupts in the new round after all interrupts in the old round have
> > been handled. The interrupt priority only takes effect in the same
> > round.
>
> Well, this flood of external interrupts from a device is a classical
> problem across architectures. The best way to solve this is improve
> the device driver to use better bottom-half techniques such as
> NAPI in-case of network driver, threaded IRQs, etc.
>
> Working around this in the interrupt controller driver is not the
> right way.
>
> Regards,
> Anup

OK. I will remove this commit and only replace the 'writel_relaxedl'
with 'writel' when sending IPI.

Thanks!

Xu Lu

>
> >
> > Of course this is not the optimal way. Please give some advice if you
> > also think it is necessary (for example introduce an interrupt
> > priority decreasing mechanism instead).
> >
> > Best Regards,
> >
> > Xu Lu
> >
> > On Wed, Jan 15, 2025 at 7:39 PM Anup Patel <anup@brainfault.org> wrote:
> > >
> > > On Mon, Jan 13, 2025 at 8:39 PM Xu Lu <luxu.kernel@bytedance.com> wrote:
> > > >
> > > > Both csr cause and csr topi record the pending bit with the highest
> > > > priority. If interrupts with high priority arrive frequently within a
> > > > certain period of time, the interrupts with low priority won't get a
> > > > chance to be handled.
> > > >
> > > > For example, if external interrupts and software interrupts arrive very
> > > > frequently, the timer interrupts will never be handled. Then buddy
> > > > watchdog on a buddy CPU will report a hardlockup on the current CPU while
> > > > current CPU actually can receive irq.
> > >
> > > Platform with proper HW watchdog will not see this issue because HW
> > > watchdog interrupt will be an external interrupt.
> > >
> > > There was an effort to standardize watchdog for RISC-V platforms but it was
> > > deferred to future standardization. We should resume those discussions within
> > > RVI forums.
> > >
> > > >
> > > > This commit solves this problem by handling all pending irqs in a round.
> > > > During each round, this commit handles pending irqs by their priority.
> > > >
> > > > Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> > > > ---
> > > >  drivers/irqchip/irq-riscv-intc.c | 32 ++++++++++++++++++++++++++------
> > > >  1 file changed, 26 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c
> > > > index f653c13de62b..bc2ec26aa9e9 100644
> > > > --- a/drivers/irqchip/irq-riscv-intc.c
> > > > +++ b/drivers/irqchip/irq-riscv-intc.c
> > > > @@ -26,20 +26,40 @@ static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG;
> > > >  static unsigned int riscv_intc_custom_base __ro_after_init = BITS_PER_LONG;
> > > >  static unsigned int riscv_intc_custom_nr_irqs __ro_after_init;
> > > >
> > > > +static unsigned int riscv_prio_irqs[] = {
> > > > +#ifdef CONFIG_RISCV_M_MODE
> > > > +       IRQ_M_EXT, IRQ_M_SOFT, IRQ_M_TIMER,
> > > > +#endif
> > > > +       IRQ_S_EXT, IRQ_S_SOFT, IRQ_S_TIMER, IRQ_S_GEXT,
> > > > +       IRQ_VS_EXT, IRQ_VS_SOFT, IRQ_VS_TIMER,
> > > > +       IRQ_PMU_OVF,
> > > > +};
> > > > +
> > > >  static void riscv_intc_irq(struct pt_regs *regs)
> > > >  {
> > > > -       unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
> > > > -
> > > > -       if (generic_handle_domain_irq(intc_domain, cause))
> > > > -               pr_warn_ratelimited("Failed to handle interrupt (cause: %ld)\n", cause);
> > > > +       unsigned long pending = csr_read(CSR_IP) & csr_read(CSR_IE);
> > > > +       unsigned int i;
> > > > +
> > > > +       for (i = 0; i < ARRAY_SIZE(riscv_prio_irqs); i++)
> > > > +               if (pending & (1UL << riscv_prio_irqs[i]))
> > > > +                       if (generic_handle_domain_irq(intc_domain, riscv_prio_irqs[i]))
> > > > +                               pr_warn_ratelimited("Failed to handle interrupt (cause: %u)\n",
> > > > +                                                   riscv_prio_irqs[i]);
> > >
> > > This is overriding the builtin priority assignment for local interrupts as
> > > defined by the RISC-V privileged specification for non-AIA systems
> > > which changes the expected behaviour on existing platforms.
> > >
> > > >  }
> > > >
> > > >  static void riscv_intc_aia_irq(struct pt_regs *regs)
> > > >  {
> > > >         unsigned long topi;
> > > > +       unsigned long pending;
> > > > +       unsigned int i;
> > > > +
> > > > +       while ((topi = csr_read(CSR_TOPI))) {
> > > > +               pending = csr_read(CSR_IP) & csr_read(CSR_IE);
> > > >
> > > > -       while ((topi = csr_read(CSR_TOPI)))
> > > > -               generic_handle_domain_irq(intc_domain, topi >> TOPI_IID_SHIFT);
> > > > +               for (i = 0; i < ARRAY_SIZE(riscv_prio_irqs); i++)
> > > > +                       if (pending & (1UL << riscv_prio_irqs[i]))
> > > > +                               generic_handle_domain_irq(intc_domain, riscv_prio_irqs[i]);
> > > > +       }
> > >
> > > The AIA specification already provides a mechanism to change
> > > priorities of local interrupts. The firmware or bootloader can always
> > > provide custom priorities to local interrupts.
> > >
> > > In general, requiring certain local interrupts to have higher priority is
> > > platform or use-case specific and should be done as AIA configuration
> > > in firmware or bootloader.
> > >
> > > NAK to this patch from my side since it is just adding a software
> > > work-around for a missing standard watchdog in the RISC-V
> > > ecosystem.
> > >
> > > One possible approach is to let platforms have their implementation
> > > specific M-mode watchdog and for supervisor software (both HS and
> > > VS-mode) we can have SBI based watchdog where the supervisor
> > > watchdog expiry is an SSE (higher priority than all local interrupts).
> > >
> > > Regards,
> > > Anup
diff mbox series

Patch

diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c
index f653c13de62b..bc2ec26aa9e9 100644
--- a/drivers/irqchip/irq-riscv-intc.c
+++ b/drivers/irqchip/irq-riscv-intc.c
@@ -26,20 +26,40 @@  static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG;
 static unsigned int riscv_intc_custom_base __ro_after_init = BITS_PER_LONG;
 static unsigned int riscv_intc_custom_nr_irqs __ro_after_init;
 
+static unsigned int riscv_prio_irqs[] = {
+#ifdef CONFIG_RISCV_M_MODE
+	IRQ_M_EXT, IRQ_M_SOFT, IRQ_M_TIMER,
+#endif
+	IRQ_S_EXT, IRQ_S_SOFT, IRQ_S_TIMER, IRQ_S_GEXT,
+	IRQ_VS_EXT, IRQ_VS_SOFT, IRQ_VS_TIMER,
+	IRQ_PMU_OVF,
+};
+
 static void riscv_intc_irq(struct pt_regs *regs)
 {
-	unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
-
-	if (generic_handle_domain_irq(intc_domain, cause))
-		pr_warn_ratelimited("Failed to handle interrupt (cause: %ld)\n", cause);
+	unsigned long pending = csr_read(CSR_IP) & csr_read(CSR_IE);
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(riscv_prio_irqs); i++)
+		if (pending & (1UL << riscv_prio_irqs[i]))
+			if (generic_handle_domain_irq(intc_domain, riscv_prio_irqs[i]))
+				pr_warn_ratelimited("Failed to handle interrupt (cause: %u)\n",
+						    riscv_prio_irqs[i]);
 }
 
 static void riscv_intc_aia_irq(struct pt_regs *regs)
 {
 	unsigned long topi;
+	unsigned long pending;
+	unsigned int i;
+
+	while ((topi = csr_read(CSR_TOPI))) {
+		pending = csr_read(CSR_IP) & csr_read(CSR_IE);
 
-	while ((topi = csr_read(CSR_TOPI)))
-		generic_handle_domain_irq(intc_domain, topi >> TOPI_IID_SHIFT);
+		for (i = 0; i < ARRAY_SIZE(riscv_prio_irqs); i++)
+			if (pending & (1UL << riscv_prio_irqs[i]))
+				generic_handle_domain_irq(intc_domain, riscv_prio_irqs[i]);
+	}
 }
 
 /*