diff mbox series

[RFC] irqchip/sifive-plic: Fix getting wrong chip_data when interrupt is hierarchy

Message ID 20201029023738.127472-1-greentime.hu@sifive.com (mailing list archive)
State New, archived
Headers show
Series [RFC] irqchip/sifive-plic: Fix getting wrong chip_data when interrupt is hierarchy | expand

Commit Message

Greentime Hu Oct. 29, 2020, 2:37 a.m. UTC
This oops is caused by a wrong chip_data and it is because plic_irq_unmask
uses irq_get_chip_data(irq_data->irq) to get the chip_data. However it may
get another irq_data with the same irq_data->irq if it is hierarchy.

In this case, it will get irq_data of sifive_gpio_irqchip instead of
plic_chip so that it will get a wrong chip_data and then the wrong lmask
of it to cause this oops.

To fix this issue, we can use irq_data_get_irq_chip_data(irq_data) to get
the correct chip_data of plic_chip.

(gdb) p d
$11 = (struct irq_data *) 0xffffffe1f695f620
(gdb) p *d
$9 = {
  mask = 0,
  irq = 57,
  hwirq = 6,
  common = 0xffffffe1f695f600,
  chip = 0xffffffe0018b5630 <sifive_gpio_irqchip>,
  domain = 0xffffffe1f692c400,
  parent_data = 0xffffffe1f68482c0,
  chip_data = 0xffffffe1f564a820
}

(gdb) p d
$6 = (struct irq_data *) 0xffffffe1f68482c0
(gdb) p *d
$7 = {
  mask = 0,
  irq = 57,
  hwirq = 29,
  common = 0xffffffe1f695f600,
  chip = 0xffffffe0018b5070 <plic_chip>,
  domain = 0xffffffe1f6635e00,
  parent_data = 0x0,
  chip_data = 0xffffffe1f660f1a0
}

[    3.030165] ------------[ cut here ]------------
[    3.034614] WARNING: CPU: 1 PID: 1 at drivers/irqchip/irq-sifive-plic.c:125 plic_irq_unmask+0xc4/0x114
[    3.043887] Modules linked in:
[    3.046932] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.9.0 #1
[    3.052748] epc: ffffffe000588e90 ra : ffffffe000588e88 sp : ffffffe1f6753940
[    3.059869]  gp : ffffffe001978f48 tp : ffffffe1f6748000 t0 : ffffffe001995cb0
[    3.067080]  t1 : ffffffe001995be8 t2 : 73616d61202c343a s0 : ffffffe1f67539a0
[    3.074288]  s1 : ffffffe1f4968140 a0 : 00000000000000b2 a1 : 0000000000000000
[    3.081497]  a2 : 00000000000000c2 a3 : 0000000000000000 a4 : 381c5a89432fe900
[    3.088707]  a5 : 0000000000000004 a6 : 0000000000000000 a7 : 00000000000001aa
[    3.095916]  s2 : ffffffe1f5901020 s3 : ffffffe00197a0a8 s4 : ffffffe001978b0c
[    3.103125]  s5 : ffffffe00197a1f0 s6 : 0000000000000008 s7 : ffffffe1f4983c9c
[    3.110335]  s8 : ffffffe1f4983c68 s9 : ffffffe1f4983c00 s10: ffffffe00000117c
[    3.117544]  s11: 0000000000000000 t3 : 0000000000000007 t4 : 0000000000000000
[    3.124753]  t5 : 66666666663a6b73 t6 : ffffffe001988479
[    3.130052] status: 0000000200000100 badaddr: ffffffe001978b0c cause: 0000000000000003
[    3.137959] ---[ end trace dbc1129f842ecba3 ]---

Fixes: f1ad1133b18f ("irqchip/sifive-plic: Add support for multiple PLICs")
Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
---
 drivers/irqchip/irq-sifive-plic.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Comments

Anup Patel Oct. 28, 2020, 4:09 p.m. UTC | #1
On Thu, Oct 29, 2020 at 8:07 AM Greentime Hu <greentime.hu@sifive.com> wrote:
>
> This oops is caused by a wrong chip_data and it is because plic_irq_unmask
> uses irq_get_chip_data(irq_data->irq) to get the chip_data. However it may
> get another irq_data with the same irq_data->irq if it is hierarchy.
>
> In this case, it will get irq_data of sifive_gpio_irqchip instead of
> plic_chip so that it will get a wrong chip_data and then the wrong lmask
> of it to cause this oops.
>
> To fix this issue, we can use irq_data_get_irq_chip_data(irq_data) to get
> the correct chip_data of plic_chip.
>
> (gdb) p d
> $11 = (struct irq_data *) 0xffffffe1f695f620
> (gdb) p *d
> $9 = {
>   mask = 0,
>   irq = 57,
>   hwirq = 6,
>   common = 0xffffffe1f695f600,
>   chip = 0xffffffe0018b5630 <sifive_gpio_irqchip>,
>   domain = 0xffffffe1f692c400,
>   parent_data = 0xffffffe1f68482c0,
>   chip_data = 0xffffffe1f564a820
> }
>
> (gdb) p d
> $6 = (struct irq_data *) 0xffffffe1f68482c0
> (gdb) p *d
> $7 = {
>   mask = 0,
>   irq = 57,
>   hwirq = 29,
>   common = 0xffffffe1f695f600,
>   chip = 0xffffffe0018b5070 <plic_chip>,
>   domain = 0xffffffe1f6635e00,
>   parent_data = 0x0,
>   chip_data = 0xffffffe1f660f1a0
> }
>
> [    3.030165] ------------[ cut here ]------------
> [    3.034614] WARNING: CPU: 1 PID: 1 at drivers/irqchip/irq-sifive-plic.c:125 plic_irq_unmask+0xc4/0x114
> [    3.043887] Modules linked in:
> [    3.046932] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.9.0 #1
> [    3.052748] epc: ffffffe000588e90 ra : ffffffe000588e88 sp : ffffffe1f6753940
> [    3.059869]  gp : ffffffe001978f48 tp : ffffffe1f6748000 t0 : ffffffe001995cb0
> [    3.067080]  t1 : ffffffe001995be8 t2 : 73616d61202c343a s0 : ffffffe1f67539a0
> [    3.074288]  s1 : ffffffe1f4968140 a0 : 00000000000000b2 a1 : 0000000000000000
> [    3.081497]  a2 : 00000000000000c2 a3 : 0000000000000000 a4 : 381c5a89432fe900
> [    3.088707]  a5 : 0000000000000004 a6 : 0000000000000000 a7 : 00000000000001aa
> [    3.095916]  s2 : ffffffe1f5901020 s3 : ffffffe00197a0a8 s4 : ffffffe001978b0c
> [    3.103125]  s5 : ffffffe00197a1f0 s6 : 0000000000000008 s7 : ffffffe1f4983c9c
> [    3.110335]  s8 : ffffffe1f4983c68 s9 : ffffffe1f4983c00 s10: ffffffe00000117c
> [    3.117544]  s11: 0000000000000000 t3 : 0000000000000007 t4 : 0000000000000000
> [    3.124753]  t5 : 66666666663a6b73 t6 : ffffffe001988479
> [    3.130052] status: 0000000200000100 badaddr: ffffffe001978b0c cause: 0000000000000003
> [    3.137959] ---[ end trace dbc1129f842ecba3 ]---
>
> Fixes: f1ad1133b18f ("irqchip/sifive-plic: Add support for multiple PLICs")
> Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
> ---
>  drivers/irqchip/irq-sifive-plic.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
> index 4048657ece0a..6f432d2a5ceb 100644
> --- a/drivers/irqchip/irq-sifive-plic.c
> +++ b/drivers/irqchip/irq-sifive-plic.c
> @@ -99,7 +99,7 @@ static inline void plic_irq_toggle(const struct cpumask *mask,
>                                    struct irq_data *d, int enable)
>  {
>         int cpu;
> -       struct plic_priv *priv = irq_get_chip_data(d->irq);
> +       struct plic_priv *priv = irq_data_get_irq_chip_data(d);
>
>         writel(enable, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
>         for_each_cpu(cpu, mask) {
> @@ -115,7 +115,7 @@ static void plic_irq_unmask(struct irq_data *d)
>  {
>         struct cpumask amask;
>         unsigned int cpu;
> -       struct plic_priv *priv = irq_get_chip_data(d->irq);
> +       struct plic_priv *priv = irq_data_get_irq_chip_data(d);
>
>         cpumask_and(&amask, &priv->lmask, cpu_online_mask);
>         cpu = cpumask_any_and(irq_data_get_affinity_mask(d),
> @@ -127,7 +127,7 @@ static void plic_irq_unmask(struct irq_data *d)
>
>  static void plic_irq_mask(struct irq_data *d)
>  {
> -       struct plic_priv *priv = irq_get_chip_data(d->irq);
> +       struct plic_priv *priv = irq_data_get_irq_chip_data(d);
>
>         plic_irq_toggle(&priv->lmask, d, 0);
>  }
> @@ -138,7 +138,7 @@ static int plic_set_affinity(struct irq_data *d,
>  {
>         unsigned int cpu;
>         struct cpumask amask;
> -       struct plic_priv *priv = irq_get_chip_data(d->irq);
> +       struct plic_priv *priv = irq_data_get_irq_chip_data(d);
>
>         cpumask_and(&amask, &priv->lmask, mask_val);
>
> --
> 2.28.0
>

The PATCH which added multi-PLIC support was not tested
with hierarchical GPIO irqchip. I guess that's why we never saw
this issue previously. Thanks for investigating and fixing this.

Looks good to me.

Reviewed-by: Anup Patel <anup@brainfault.org>

Regards,
Anup
Atish Patra Oct. 29, 2020, 9:43 p.m. UTC | #2
On Wed, Oct 28, 2020 at 7:37 PM Greentime Hu <greentime.hu@sifive.com> wrote:
>
> This oops is caused by a wrong chip_data and it is because plic_irq_unmask
> uses irq_get_chip_data(irq_data->irq) to get the chip_data. However it may
> get another irq_data with the same irq_data->irq if it is hierarchy.
>
> In this case, it will get irq_data of sifive_gpio_irqchip instead of
> plic_chip so that it will get a wrong chip_data and then the wrong lmask
> of it to cause this oops.
>
> To fix this issue, we can use irq_data_get_irq_chip_data(irq_data) to get
> the correct chip_data of plic_chip.
>
> (gdb) p d
> $11 = (struct irq_data *) 0xffffffe1f695f620
> (gdb) p *d
> $9 = {
>   mask = 0,
>   irq = 57,
>   hwirq = 6,
>   common = 0xffffffe1f695f600,
>   chip = 0xffffffe0018b5630 <sifive_gpio_irqchip>,
>   domain = 0xffffffe1f692c400,
>   parent_data = 0xffffffe1f68482c0,
>   chip_data = 0xffffffe1f564a820
> }
>
> (gdb) p d
> $6 = (struct irq_data *) 0xffffffe1f68482c0
> (gdb) p *d
> $7 = {
>   mask = 0,
>   irq = 57,
>   hwirq = 29,
>   common = 0xffffffe1f695f600,
>   chip = 0xffffffe0018b5070 <plic_chip>,
>   domain = 0xffffffe1f6635e00,
>   parent_data = 0x0,
>   chip_data = 0xffffffe1f660f1a0
> }
>
> [    3.030165] ------------[ cut here ]------------
> [    3.034614] WARNING: CPU: 1 PID: 1 at drivers/irqchip/irq-sifive-plic.c:125 plic_irq_unmask+0xc4/0x114
> [    3.043887] Modules linked in:
> [    3.046932] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.9.0 #1
> [    3.052748] epc: ffffffe000588e90 ra : ffffffe000588e88 sp : ffffffe1f6753940
> [    3.059869]  gp : ffffffe001978f48 tp : ffffffe1f6748000 t0 : ffffffe001995cb0
> [    3.067080]  t1 : ffffffe001995be8 t2 : 73616d61202c343a s0 : ffffffe1f67539a0
> [    3.074288]  s1 : ffffffe1f4968140 a0 : 00000000000000b2 a1 : 0000000000000000
> [    3.081497]  a2 : 00000000000000c2 a3 : 0000000000000000 a4 : 381c5a89432fe900
> [    3.088707]  a5 : 0000000000000004 a6 : 0000000000000000 a7 : 00000000000001aa
> [    3.095916]  s2 : ffffffe1f5901020 s3 : ffffffe00197a0a8 s4 : ffffffe001978b0c
> [    3.103125]  s5 : ffffffe00197a1f0 s6 : 0000000000000008 s7 : ffffffe1f4983c9c
> [    3.110335]  s8 : ffffffe1f4983c68 s9 : ffffffe1f4983c00 s10: ffffffe00000117c
> [    3.117544]  s11: 0000000000000000 t3 : 0000000000000007 t4 : 0000000000000000
> [    3.124753]  t5 : 66666666663a6b73 t6 : ffffffe001988479
> [    3.130052] status: 0000000200000100 badaddr: ffffffe001978b0c cause: 0000000000000003
> [    3.137959] ---[ end trace dbc1129f842ecba3 ]---
>
> Fixes: f1ad1133b18f ("irqchip/sifive-plic: Add support for multiple PLICs")
> Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
> ---
>  drivers/irqchip/irq-sifive-plic.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
> index 4048657ece0a..6f432d2a5ceb 100644
> --- a/drivers/irqchip/irq-sifive-plic.c
> +++ b/drivers/irqchip/irq-sifive-plic.c
> @@ -99,7 +99,7 @@ static inline void plic_irq_toggle(const struct cpumask *mask,
>                                    struct irq_data *d, int enable)
>  {
>         int cpu;
> -       struct plic_priv *priv = irq_get_chip_data(d->irq);
> +       struct plic_priv *priv = irq_data_get_irq_chip_data(d);
>
>         writel(enable, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
>         for_each_cpu(cpu, mask) {
> @@ -115,7 +115,7 @@ static void plic_irq_unmask(struct irq_data *d)
>  {
>         struct cpumask amask;
>         unsigned int cpu;
> -       struct plic_priv *priv = irq_get_chip_data(d->irq);
> +       struct plic_priv *priv = irq_data_get_irq_chip_data(d);
>
>         cpumask_and(&amask, &priv->lmask, cpu_online_mask);
>         cpu = cpumask_any_and(irq_data_get_affinity_mask(d),
> @@ -127,7 +127,7 @@ static void plic_irq_unmask(struct irq_data *d)
>
>  static void plic_irq_mask(struct irq_data *d)
>  {
> -       struct plic_priv *priv = irq_get_chip_data(d->irq);
> +       struct plic_priv *priv = irq_data_get_irq_chip_data(d);
>
>         plic_irq_toggle(&priv->lmask, d, 0);
>  }
> @@ -138,7 +138,7 @@ static int plic_set_affinity(struct irq_data *d,
>  {
>         unsigned int cpu;
>         struct cpumask amask;
> -       struct plic_priv *priv = irq_get_chip_data(d->irq);
> +       struct plic_priv *priv = irq_data_get_irq_chip_data(d);
>
>         cpumask_and(&amask, &priv->lmask, mask_val);
>
> --
> 2.28.0
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

Thanks for the fix.

Reviewed-by: Atish Patra <atish.patra@wdc.com>
Marc Zyngier Nov. 1, 2020, 12:10 p.m. UTC | #3
On Thu, 29 Oct 2020 10:37:38 +0800, Greentime Hu wrote:
> This oops is caused by a wrong chip_data and it is because plic_irq_unmask
> uses irq_get_chip_data(irq_data->irq) to get the chip_data. However it may
> get another irq_data with the same irq_data->irq if it is hierarchy.
> 
> In this case, it will get irq_data of sifive_gpio_irqchip instead of
> plic_chip so that it will get a wrong chip_data and then the wrong lmask
> of it to cause this oops.
> 
> [...]

Applied to irq/irqchip-next, thanks!

[1/1] irqchip/sifive-plic: Fix chip_data access within a hierarchy
      commit: f9ac7bbd6e4540dcc6df621b9c9b6eb2e26ded1d

Cheers,

	M.
Thomas Gleixner Nov. 1, 2020, 4:52 p.m. UTC | #4
On Sun, Nov 01 2020 at 12:10, Marc Zyngier wrote:

> On Thu, 29 Oct 2020 10:37:38 +0800, Greentime Hu wrote:
>> This oops is caused by a wrong chip_data and it is because plic_irq_unmask
>> uses irq_get_chip_data(irq_data->irq) to get the chip_data. However it may
>> get another irq_data with the same irq_data->irq if it is hierarchy.
>> 
>> In this case, it will get irq_data of sifive_gpio_irqchip instead of
>> plic_chip so that it will get a wrong chip_data and then the wrong lmask
>> of it to cause this oops.
>> 
>> [...]
>
> Applied to irq/irqchip-next, thanks!

That should go into urgent, the offending commit is in Linus tree already

Thanks,

        tglx
Palmer Dabbelt Nov. 6, 2020, 2:02 a.m. UTC | #5
On Sun, 01 Nov 2020 08:52:06 PST (-0800), tglx@linutronix.de wrote:
> On Sun, Nov 01 2020 at 12:10, Marc Zyngier wrote:
>
>> On Thu, 29 Oct 2020 10:37:38 +0800, Greentime Hu wrote:
>>> This oops is caused by a wrong chip_data and it is because plic_irq_unmask
>>> uses irq_get_chip_data(irq_data->irq) to get the chip_data. However it may
>>> get another irq_data with the same irq_data->irq if it is hierarchy.
>>>
>>> In this case, it will get irq_data of sifive_gpio_irqchip instead of
>>> plic_chip so that it will get a wrong chip_data and then the wrong lmask
>>> of it to cause this oops.
>>>
>>> [...]
>>
>> Applied to irq/irqchip-next, thanks!
>
> That should go into urgent, the offending commit is in Linus tree already

I agree.  I'm assuming that's for Marc, but LMK if you guys want me to take it.

Acked-by: Palmer Dabbelt <palmerdabbelt@google.com>

Thanks!
diff mbox series

Patch

diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 4048657ece0a..6f432d2a5ceb 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -99,7 +99,7 @@  static inline void plic_irq_toggle(const struct cpumask *mask,
 				   struct irq_data *d, int enable)
 {
 	int cpu;
-	struct plic_priv *priv = irq_get_chip_data(d->irq);
+	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
 
 	writel(enable, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
 	for_each_cpu(cpu, mask) {
@@ -115,7 +115,7 @@  static void plic_irq_unmask(struct irq_data *d)
 {
 	struct cpumask amask;
 	unsigned int cpu;
-	struct plic_priv *priv = irq_get_chip_data(d->irq);
+	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
 
 	cpumask_and(&amask, &priv->lmask, cpu_online_mask);
 	cpu = cpumask_any_and(irq_data_get_affinity_mask(d),
@@ -127,7 +127,7 @@  static void plic_irq_unmask(struct irq_data *d)
 
 static void plic_irq_mask(struct irq_data *d)
 {
-	struct plic_priv *priv = irq_get_chip_data(d->irq);
+	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
 
 	plic_irq_toggle(&priv->lmask, d, 0);
 }
@@ -138,7 +138,7 @@  static int plic_set_affinity(struct irq_data *d,
 {
 	unsigned int cpu;
 	struct cpumask amask;
-	struct plic_priv *priv = irq_get_chip_data(d->irq);
+	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
 
 	cpumask_and(&amask, &priv->lmask, mask_val);