diff mbox series

platform: x86: vgpio: Pass irqchip when adding gpiochip

Message ID 20190812135335.10104-1-linus.walleij@linaro.org (mailing list archive)
State Not Applicable, archived
Headers show
Series platform: x86: vgpio: Pass irqchip when adding gpiochip | expand

Commit Message

Linus Walleij Aug. 12, 2019, 1:53 p.m. UTC
We need to convert all old gpio irqchips to pass the irqchip
setup along when adding the gpio_chip. For more info see
drivers/gpio/TODO.

For chained irqchips this is a pretty straight-forward
conversion.

Cc: Maxim Mikityanskiy <maxtram95@gmail.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
Andy please merge this into your platform tree when you
feel happy with the patch, would be great of someone
can test it on hardware as well.
---
 drivers/platform/x86/intel_int0002_vgpio.c | 29 +++++++++-------------
 1 file changed, 12 insertions(+), 17 deletions(-)

Comments

Hans de Goede Aug. 16, 2019, 9:18 a.m. UTC | #1
Hi Linus,

On 12-08-19 15:53, Linus Walleij wrote:
> We need to convert all old gpio irqchips to pass the irqchip
> setup along when adding the gpio_chip. For more info see
> drivers/gpio/TODO.
> 
> For chained irqchips this is a pretty straight-forward
> conversion.
> 
> Cc: Maxim Mikityanskiy <maxtram95@gmail.com>
> Cc: Hans de Goede <hdegoede@redhat.com>
> Cc: Darren Hart <dvhart@infradead.org>
> Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
> ---
> Andy please merge this into your platform tree when you
> feel happy with the patch, would be great of someone
> can test it on hardware as well.

So I've just tested this on a Cherry Trail machine and
the interrupt storm, fixing which is the reason the
intel_int0002_vgpio.c driver was introduced, is back:

[root@localhost ~]# cat /proc/interrupts | grep INT0002
    9:          0   23429420          0          0   IO-APIC    9-fasteoi   acpi, INT0002

23 million interrupts and counting, normally this is 0
at boot low 10s after a suspend/resume with wakeup by
USB keyboard.

Notice that the driver has attached itself as shared irq-handler
to the ACPI  IRQ,  but it seems something is going wrong with the
registration of its own IRQ and/or for some reason the ACPI
subsys is no longer attaching the ACPI event handler for the
child IRQ to it, here is a the same command on a working
kernel:

[root@localhost ~]# cat /proc/interrupts | grep INT0002
    9:          0          0          0          0   IO-APIC    9-fasteoi   acpi, INT0002
  123:          0          0          0          0  INT0002 Virtual GPIO    2  ACPI:Event

Do I need any patches on top of 5.3-rc4 to test this patch?

Note that it is important that the single irq on the chip is
advertised as irq number 2 (so the third irq) because that
is what the ACPI event code expects:

         Device (GPED)
         {
             Name (_ADR, Zero)  // _ADR: Address
             Name (_HID, "INT0002" /* Virtual GPIO Controller */)  // _HID: Hardw
             Name (_CID, "INT0002" /* Virtual GPIO Controller */)  // _CID: Compa
             Name (_DDN, "Virtual GPIO controller")  // _DDN: DOS Device Name
             ...
             Method (_AEI, 0, NotSerialized)  // _AEI: ACPI Event Interrupts
             {
                 Name (RBUF, ResourceTemplate ()
                 {
                     GpioInt (Level, ActiveHigh, ExclusiveAndWake, PullDown, 0x00
                         "\\_SB.GPED", 0x00, ResourceConsumer, ,
                         )
                         {   // Pin list
                             0x0002
                         }
                 })
                 Return (RBUF) /* \_SB_.GPED._AEI.RBUF */
             }

             Method (_L02, 0, NotSerialized)  // _Lxx: Level-Triggered GPE
             {
                 ...
             }
         }

Anyways, this will need to be fixed before we can merge this.

Regards,

Hans




> ---
>   drivers/platform/x86/intel_int0002_vgpio.c | 29 +++++++++-------------
>   1 file changed, 12 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c
> index d9542c661ddc..493a97ce0b08 100644
> --- a/drivers/platform/x86/intel_int0002_vgpio.c
> +++ b/drivers/platform/x86/intel_int0002_vgpio.c
> @@ -156,8 +156,8 @@ static int int0002_probe(struct platform_device *pdev)
>   {
>   	struct device *dev = &pdev->dev;
>   	const struct x86_cpu_id *cpu_id;
> -	struct irq_chip *irq_chip;
>   	struct gpio_chip *chip;
> +	struct gpio_irq_chip *girq;
>   	int irq, ret;
>   
>   	/* Menlow has a different INT0002 device? <sigh> */
> @@ -186,17 +186,9 @@ static int int0002_probe(struct platform_device *pdev)
>   	chip->ngpio = GPE0A_PME_B0_VIRT_GPIO_PIN + 1;
>   	chip->irq.need_valid_mask = true;
>   
> -	ret = devm_gpiochip_add_data(&pdev->dev, chip, NULL);
> -	if (ret) {
> -		dev_err(dev, "Error adding gpio chip: %d\n", ret);
> -		return ret;
> -	}
> -
> -	bitmap_clear(chip->irq.valid_mask, 0, GPE0A_PME_B0_VIRT_GPIO_PIN);
> -
>   	/*
> -	 * We manually request the irq here instead of passing a flow-handler
> -	 * to gpiochip_set_chained_irqchip, because the irq is shared.
> +	 * We directly request the irq here instead of passing a flow-handler
> +	 * to the gpio irqchip, because the irq is shared.
>   	 */
>   	ret = devm_request_irq(dev, irq, int0002_irq,
>   			       IRQF_SHARED, "INT0002", chip);
> @@ -204,17 +196,20 @@ static int int0002_probe(struct platform_device *pdev)
>   		dev_err(dev, "Error requesting IRQ %d: %d\n", irq, ret);
>   		return ret;
>   	}
> +	girq = &chip->irq;
> +	girq->chip = (struct irq_chip *)cpu_id->driver_data;
> +	girq->parent_handler = NULL;
> +	girq->num_parents = 0;
> +	girq->default_type = IRQ_TYPE_NONE;
> +	girq->handler = handle_edge_irq;
>   
> -	irq_chip = (struct irq_chip *)cpu_id->driver_data;
> -
> -	ret = gpiochip_irqchip_add(chip, irq_chip, 0, handle_edge_irq,
> -				   IRQ_TYPE_NONE);
> +	ret = devm_gpiochip_add_data(&pdev->dev, chip, NULL);
>   	if (ret) {
> -		dev_err(dev, "Error adding irqchip: %d\n", ret);
> +		dev_err(dev, "Error adding gpio chip: %d\n", ret);
>   		return ret;
>   	}
>   
> -	gpiochip_set_chained_irqchip(chip, irq_chip, irq, NULL);
> +	bitmap_clear(chip->irq.valid_mask, 0, GPE0A_PME_B0_VIRT_GPIO_PIN);
>   
>   	return 0;
>   }
>
Andy Shevchenko Aug. 16, 2019, 10:03 a.m. UTC | #2
On Fri, Aug 16, 2019 at 12:18 PM Hans de Goede <hdegoede@redhat.com> wrote:
>
> Hi Linus,
>
> On 12-08-19 15:53, Linus Walleij wrote:
> > We need to convert all old gpio irqchips to pass the irqchip
> > setup along when adding the gpio_chip. For more info see
> > drivers/gpio/TODO.
> >
> > For chained irqchips this is a pretty straight-forward
> > conversion.
> >
> > Cc: Maxim Mikityanskiy <maxtram95@gmail.com>
> > Cc: Hans de Goede <hdegoede@redhat.com>
> > Cc: Darren Hart <dvhart@infradead.org>
> > Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> > Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
> > ---
> > Andy please merge this into your platform tree when you
> > feel happy with the patch, would be great of someone
> > can test it on hardware as well.
>
> So I've just tested this on a Cherry Trail machine and
> the interrupt storm, fixing which is the reason the
> intel_int0002_vgpio.c driver was introduced, is back:

Hans, thanks for testing.
I postpone this one.

>
> [root@localhost ~]# cat /proc/interrupts | grep INT0002
>     9:          0   23429420          0          0   IO-APIC    9-fasteoi   acpi, INT0002
>
> 23 million interrupts and counting, normally this is 0
> at boot low 10s after a suspend/resume with wakeup by
> USB keyboard.
>
> Notice that the driver has attached itself as shared irq-handler
> to the ACPI  IRQ,  but it seems something is going wrong with the
> registration of its own IRQ and/or for some reason the ACPI
> subsys is no longer attaching the ACPI event handler for the
> child IRQ to it, here is a the same command on a working
> kernel:
>
> [root@localhost ~]# cat /proc/interrupts | grep INT0002
>     9:          0          0          0          0   IO-APIC    9-fasteoi   acpi, INT0002
>   123:          0          0          0          0  INT0002 Virtual GPIO    2  ACPI:Event
>
> Do I need any patches on top of 5.3-rc4 to test this patch?
>
> Note that it is important that the single irq on the chip is
> advertised as irq number 2 (so the third irq) because that
> is what the ACPI event code expects:
>
>          Device (GPED)
>          {
>              Name (_ADR, Zero)  // _ADR: Address
>              Name (_HID, "INT0002" /* Virtual GPIO Controller */)  // _HID: Hardw
>              Name (_CID, "INT0002" /* Virtual GPIO Controller */)  // _CID: Compa
>              Name (_DDN, "Virtual GPIO controller")  // _DDN: DOS Device Name
>              ...
>              Method (_AEI, 0, NotSerialized)  // _AEI: ACPI Event Interrupts
>              {
>                  Name (RBUF, ResourceTemplate ()
>                  {
>                      GpioInt (Level, ActiveHigh, ExclusiveAndWake, PullDown, 0x00
>                          "\\_SB.GPED", 0x00, ResourceConsumer, ,
>                          )
>                          {   // Pin list
>                              0x0002
>                          }
>                  })
>                  Return (RBUF) /* \_SB_.GPED._AEI.RBUF */
>              }
>
>              Method (_L02, 0, NotSerialized)  // _Lxx: Level-Triggered GPE
>              {
>                  ...
>              }
>          }
>
> Anyways, this will need to be fixed before we can merge this.
>
> Regards,
>
> Hans
>
>
>
>
> > ---
> >   drivers/platform/x86/intel_int0002_vgpio.c | 29 +++++++++-------------
> >   1 file changed, 12 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c
> > index d9542c661ddc..493a97ce0b08 100644
> > --- a/drivers/platform/x86/intel_int0002_vgpio.c
> > +++ b/drivers/platform/x86/intel_int0002_vgpio.c
> > @@ -156,8 +156,8 @@ static int int0002_probe(struct platform_device *pdev)
> >   {
> >       struct device *dev = &pdev->dev;
> >       const struct x86_cpu_id *cpu_id;
> > -     struct irq_chip *irq_chip;
> >       struct gpio_chip *chip;
> > +     struct gpio_irq_chip *girq;
> >       int irq, ret;
> >
> >       /* Menlow has a different INT0002 device? <sigh> */
> > @@ -186,17 +186,9 @@ static int int0002_probe(struct platform_device *pdev)
> >       chip->ngpio = GPE0A_PME_B0_VIRT_GPIO_PIN + 1;
> >       chip->irq.need_valid_mask = true;
> >
> > -     ret = devm_gpiochip_add_data(&pdev->dev, chip, NULL);
> > -     if (ret) {
> > -             dev_err(dev, "Error adding gpio chip: %d\n", ret);
> > -             return ret;
> > -     }
> > -
> > -     bitmap_clear(chip->irq.valid_mask, 0, GPE0A_PME_B0_VIRT_GPIO_PIN);
> > -
> >       /*
> > -      * We manually request the irq here instead of passing a flow-handler
> > -      * to gpiochip_set_chained_irqchip, because the irq is shared.
> > +      * We directly request the irq here instead of passing a flow-handler
> > +      * to the gpio irqchip, because the irq is shared.
> >        */
> >       ret = devm_request_irq(dev, irq, int0002_irq,
> >                              IRQF_SHARED, "INT0002", chip);
> > @@ -204,17 +196,20 @@ static int int0002_probe(struct platform_device *pdev)
> >               dev_err(dev, "Error requesting IRQ %d: %d\n", irq, ret);
> >               return ret;
> >       }
> > +     girq = &chip->irq;
> > +     girq->chip = (struct irq_chip *)cpu_id->driver_data;
> > +     girq->parent_handler = NULL;
> > +     girq->num_parents = 0;
> > +     girq->default_type = IRQ_TYPE_NONE;
> > +     girq->handler = handle_edge_irq;
> >
> > -     irq_chip = (struct irq_chip *)cpu_id->driver_data;
> > -
> > -     ret = gpiochip_irqchip_add(chip, irq_chip, 0, handle_edge_irq,
> > -                                IRQ_TYPE_NONE);
> > +     ret = devm_gpiochip_add_data(&pdev->dev, chip, NULL);
> >       if (ret) {
> > -             dev_err(dev, "Error adding irqchip: %d\n", ret);
> > +             dev_err(dev, "Error adding gpio chip: %d\n", ret);
> >               return ret;
> >       }
> >
> > -     gpiochip_set_chained_irqchip(chip, irq_chip, irq, NULL);
> > +     bitmap_clear(chip->irq.valid_mask, 0, GPE0A_PME_B0_VIRT_GPIO_PIN);
> >
> >       return 0;
> >   }
> >
Linus Walleij Aug. 16, 2019, 5:16 p.m. UTC | #3
Hi Hans,

On Fri, Aug 16, 2019 at 11:18 AM Hans de Goede <hdegoede@redhat.com> wrote:

> So I've just tested this on a Cherry Trail machine and
> the interrupt storm, fixing which is the reason the
> intel_int0002_vgpio.c driver was introduced, is back:

Sorry but just so I understand this report: when you say the
interrupt storm is back, do you mean that this patch brings
it back, or do you mean the interrupt storm is there
even before this patch?

This patch does bring semantic differences, but very
small ones.

> Notice that the driver has attached itself as shared irq-handler
> to the ACPI  IRQ

What is it sharing it with?

> Do I need any patches on top of 5.3-rc4 to test this patch?

No, none that I know of.

It is weird that this driver registers a chained
interrupt handler but int0002_irq() doesn't call
chained_irq_[enter|exit].

I don't understand the ACPI code but I'm confused about
a "virtual" GPIO controller with very real interrupt lines
attached to it. If it is actually virtual then just trying to
abuse gpiolib to cascade interrupts like the shared
interrupts were some, you know, cascaded GPIO IRQ line,
I guess all the consumers should just grab the interrupt
independently and shared instead, the idea being that
each of them will drop their pull of the shared level IRQ
line until it is eventually deasserted.

I'm just confused....

Yours,
Linus Walleij
Hans de Goede Aug. 16, 2019, 7:42 p.m. UTC | #4
Hi,

On 8/16/19 7:16 PM, Linus Walleij wrote:
> Hi Hans,
> 
> On Fri, Aug 16, 2019 at 11:18 AM Hans de Goede <hdegoede@redhat.com> wrote:
> 
>> So I've just tested this on a Cherry Trail machine and
>> the interrupt storm, fixing which is the reason the
>> intel_int0002_vgpio.c driver was introduced, is back:
> 
> Sorry but just so I understand this report: when you say the
> interrupt storm is back, do you mean that this patch brings
> it back, or do you mean the interrupt storm is there
> even before this patch?

I mean that the patch brings it back, iow the patch causes
a regression.

> This patch does bring semantic differences, but very
> small ones.

Could it be that the parent device of the IRQ controller is
different after this? I think that the ACPI subsys relies
on the parent being the INT0002 ACPI instantiated platform
device.

>> Notice that the driver has attached itself as shared irq-handler
>> to the ACPI  IRQ
> 
> What is it sharing it with?

With the ACPI subsys, this IRQ is called the GPE which is an
interrupt normally reserved for events to be handled by the
ACPI subsys.

>> Do I need any patches on top of 5.3-rc4 to test this patch?
> 
> No, none that I know of.
> 
> It is weird that this driver registers a chained
> interrupt handler but int0002_irq() doesn't call
> chained_irq_[enter|exit].
> 
> I don't understand the ACPI code but I'm confused about
> a "virtual" GPIO controller with very real interrupt lines
> attached to it. If it is actually virtual then just trying to
> abuse gpiolib to cascade interrupts like the shared
> interrupts were some, you know, cascaded GPIO IRQ line,
> I guess all the consumers should just grab the interrupt
> independently and shared instead, the idea being that
> each of them will drop their pull of the shared level IRQ
> line until it is eventually deasserted.
> 
> I'm just confused....

The ACPI subsys has the ability to attach an event handler
written in ACPI byte code (AML code) to GPIO events (GPIO
triggered IRQs), quoting the same piece of AML as before:


         Device (GPED)
         {
             Name (_ADR, Zero)  // _ADR: Address
             Name (_HID, "INT0002" /* Virtual GPIO Controller */)  // _HID: Hardw
             Name (_CID, "INT0002" /* Virtual GPIO Controller */)  // _CID: Compa
             Name (_DDN, "Virtual GPIO controller")  // _DDN: DOS Device Name
             ...
             Method (_AEI, 0, NotSerialized)  // _AEI: ACPI Event Interrupts
             {
                 Name (RBUF, ResourceTemplate ()
                 {
                     GpioInt (Level, ActiveHigh, ExclusiveAndWake, PullDown, 0x00
                         "\\_SB.GPED", 0x00, ResourceConsumer, ,
                         )
                         {   // Pin list
                             0x0002
                         }
                 })
                 Return (RBUF) /* \_SB_.GPED._AEI.RBUF */
             }

             Method (_L02, 0, NotSerialized)  // _Lxx: Level-Triggered GPE
             {
                 ...
             }
         }

So what we are seeing here is an AEI (ACPI Event Interrupt) entry pointing
to pin 2 of the INT0002 GPIO chip, note this is not a real GPIO chip, but
a way to attach an ACPI event handler to GPE interrupts, while only
running the event handler when a specific status bit is set.

So what the ACPI subsys does is it looksup the GPIO with index 2
on the INT0003 gpiochip (which is unchanged) and the calls gpio_to_irq
on the found GPIO, it seems that the gpio_to_irq is no longer working,
causing the:

  123:          0          0          0          0  INT0002 Virtual GPIO    2  ACPI:Event

Line in cat/ /proc/interrupts to disappear. That or gpio_to_irq works
but then for some reason the subsequent request irq fails.

Regards,

Hans
Linus Walleij Aug. 17, 2019, 10:26 p.m. UTC | #5
On Fri, Aug 16, 2019 at 9:42 PM Hans de Goede <hdegoede@redhat.com> wrote:
> On 8/16/19 7:16 PM, Linus Walleij wrote:

> > Sorry but just so I understand this report: when you say the
> > interrupt storm is back, do you mean that this patch brings
> > it back, or do you mean the interrupt storm is there
> > even before this patch?
>
> I mean that the patch brings it back, iow the patch causes
> a regression.

Gnah that's too bad. :/

> > This patch does bring semantic differences, but very
> > small ones.
>
> Could it be that the parent device of the IRQ controller is
> different after this?

If you mean parent_device in struct irq_chip then no,
the gpiolib core doesn't touch that neither before or
after this patch.

> I think that the ACPI subsys relies
> on the parent being the INT0002 ACPI instantiated platform
> device.

But this driver never sets up parent_device in struct
irq_chip even today... it just passes in in pretty open coded
with NULL as parent_device (compiletime default).

> >> Notice that the driver has attached itself as shared irq-handler
> >> to the ACPI  IRQ
> >
> > What is it sharing it with?
>
> With the ACPI subsys, this IRQ is called the GPE which is an
> interrupt normally reserved for events to be handled by the
> ACPI subsys.

Aha I get it, I think.

> The ACPI subsys has the ability to attach an event handler
> written in ACPI byte code (AML code) to GPIO events (GPIO
> triggered IRQs), quoting the same piece of AML as before:
>
>
>          Device (GPED)
>          {
>              Name (_ADR, Zero)  // _ADR: Address
>              Name (_HID, "INT0002" /* Virtual GPIO Controller */)  // _HID: Hardw
>              Name (_CID, "INT0002" /* Virtual GPIO Controller */)  // _CID: Compa
>              Name (_DDN, "Virtual GPIO controller")  // _DDN: DOS Device Name
>              ...
>              Method (_AEI, 0, NotSerialized)  // _AEI: ACPI Event Interrupts
>              {
>                  Name (RBUF, ResourceTemplate ()
>                  {
>                      GpioInt (Level, ActiveHigh, ExclusiveAndWake, PullDown, 0x00
>                          "\\_SB.GPED", 0x00, ResourceConsumer, ,
>                          )
>                          {   // Pin list
>                              0x0002
>                          }
>                  })
>                  Return (RBUF) /* \_SB_.GPED._AEI.RBUF */
>              }
>
>              Method (_L02, 0, NotSerialized)  // _Lxx: Level-Triggered GPE
>              {
>                  ...
>              }
>          }
>
> So what we are seeing here is an AEI (ACPI Event Interrupt) entry pointing
> to pin 2 of the INT0002 GPIO chip, note this is not a real GPIO chip, but
> a way to attach an ACPI event handler to GPE interrupts, while only
> running the event handler when a specific status bit is set.

I see ... that's a bit complex way to solve an easy problem but I
guess the ACPI gods want it that way.

> So what the ACPI subsys does is it looksup the GPIO with index 2
> on the INT0003 gpiochip (which is unchanged) and the calls gpio_to_irq
> on the found GPIO, it seems that the gpio_to_irq is no longer working,
> causing the:
>
>   123:          0          0          0          0  INT0002 Virtual GPIO    2  ACPI:Event
>
> Line in cat/ /proc/interrupts to disappear. That or gpio_to_irq works
> but then for some reason the subsequent request irq fails.

OK I get it, I try to see what the problem may come from.
I suspect gpio[d]_to_irq isn't working as expected for some
reason.

We are checking the valid_mask to see if the IRQ is valid for
mapping. Could it be that something is wrong with the valid_mask?

It used to be that we:

1. Set up the (only) GPIO chip
2. Set up the valid mask (now allocated)
3. Register the irqchip
4. Register the irqhandler

and now we instead:

1. Set up the (only) GPIO chip
2. Register the irqchip
3. Register the irqhandler
4. Set up the valid mask (now allocated)

The valid_mask in the GPIO chip itself has a special callback
to set up the mask, maybe we need to have the same for
the irqchip if that needs to happen in the same flow as
before.

It's a weird driver cascading a single GPIO IRQ that isn't
really a GPIO so my head is spinning a bit :D

Yours,
Linus Walleij
Hans de Goede Aug. 17, 2019, 11:23 p.m. UTC | #6
Hi,

On 18-08-19 00:26, Linus Walleij wrote:
> On Fri, Aug 16, 2019 at 9:42 PM Hans de Goede <hdegoede@redhat.com> wrote:
>> On 8/16/19 7:16 PM, Linus Walleij wrote:
> 
>>> Sorry but just so I understand this report: when you say the
>>> interrupt storm is back, do you mean that this patch brings
>>> it back, or do you mean the interrupt storm is there
>>> even before this patch?
>>
>> I mean that the patch brings it back, iow the patch causes
>> a regression.
> 
> Gnah that's too bad. :/
> 
>>> This patch does bring semantic differences, but very
>>> small ones.
>>
>> Could it be that the parent device of the IRQ controller is
>> different after this?
> 
> If you mean parent_device in struct irq_chip then no,
> the gpiolib core doesn't touch that neither before or
> after this patch.
> 
>> I think that the ACPI subsys relies
>> on the parent being the INT0002 ACPI instantiated platform
>> device.
> 
> But this driver never sets up parent_device in struct
> irq_chip even today... it just passes in in pretty open coded
> with NULL as parent_device (compiletime default).
> 
>>>> Notice that the driver has attached itself as shared irq-handler
>>>> to the ACPI  IRQ
>>>
>>> What is it sharing it with?
>>
>> With the ACPI subsys, this IRQ is called the GPE which is an
>> interrupt normally reserved for events to be handled by the
>> ACPI subsys.
> 
> Aha I get it, I think.
> 
>> The ACPI subsys has the ability to attach an event handler
>> written in ACPI byte code (AML code) to GPIO events (GPIO
>> triggered IRQs), quoting the same piece of AML as before:
>>
>>
>>           Device (GPED)
>>           {
>>               Name (_ADR, Zero)  // _ADR: Address
>>               Name (_HID, "INT0002" /* Virtual GPIO Controller */)  // _HID: Hardw
>>               Name (_CID, "INT0002" /* Virtual GPIO Controller */)  // _CID: Compa
>>               Name (_DDN, "Virtual GPIO controller")  // _DDN: DOS Device Name
>>               ...
>>               Method (_AEI, 0, NotSerialized)  // _AEI: ACPI Event Interrupts
>>               {
>>                   Name (RBUF, ResourceTemplate ()
>>                   {
>>                       GpioInt (Level, ActiveHigh, ExclusiveAndWake, PullDown, 0x00
>>                           "\\_SB.GPED", 0x00, ResourceConsumer, ,
>>                           )
>>                           {   // Pin list
>>                               0x0002
>>                           }
>>                   })
>>                   Return (RBUF) /* \_SB_.GPED._AEI.RBUF */
>>               }
>>
>>               Method (_L02, 0, NotSerialized)  // _Lxx: Level-Triggered GPE
>>               {
>>                   ...
>>               }
>>           }
>>
>> So what we are seeing here is an AEI (ACPI Event Interrupt) entry pointing
>> to pin 2 of the INT0002 GPIO chip, note this is not a real GPIO chip, but
>> a way to attach an ACPI event handler to GPE interrupts, while only
>> running the event handler when a specific status bit is set.
> 
> I see ... that's a bit complex way to solve an easy problem but I
> guess the ACPI gods want it that way.
> 
>> So what the ACPI subsys does is it looksup the GPIO with index 2
>> on the INT0003 gpiochip (which is unchanged) and the calls gpio_to_irq
>> on the found GPIO, it seems that the gpio_to_irq is no longer working,
>> causing the:
>>
>>    123:          0          0          0          0  INT0002 Virtual GPIO    2  ACPI:Event
>>
>> Line in cat/ /proc/interrupts to disappear. That or gpio_to_irq works
>> but then for some reason the subsequent request irq fails.
> 
> OK I get it, I try to see what the problem may come from.
> I suspect gpio[d]_to_irq isn't working as expected for some
> reason.
> 
> We are checking the valid_mask to see if the IRQ is valid for
> mapping. Could it be that something is wrong with the valid_mask?
> 
> It used to be that we:
> 
> 1. Set up the (only) GPIO chip
> 2. Set up the valid mask (now allocated)
> 3. Register the irqchip
> 4. Register the irqhandler
> 
> and now we instead:
> 
> 1. Set up the (only) GPIO chip
> 2. Register the irqchip
> 3. Register the irqhandler
> 4. Set up the valid mask (now allocated)
> 
> The valid_mask in the GPIO chip itself has a special callback
> to set up the mask, maybe we need to have the same for
> the irqchip if that needs to happen in the same flow as
> before.
> 
> It's a weird driver cascading a single GPIO IRQ that isn't
> really a GPIO so my head is spinning a bit :D

Ok, so the change to when the valid mask is set sounds like a possible
reason for the regression.

The GPIO lookup and gpio[d]_to_irq call happen from
acpi_gpiochip_request_interrupts which gets called at the end of
gpiochip_add_irqchip.

So yes it sounds like the irqmask getting set too late may very well
be the problem here.

Note that the acpi_gpiochip_request_interrupts call also does a
gpiochip_lock_as_irq call on the GPIO.

Basically the 3 relevant calls are:

         desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event",
                                          GPIO_ACTIVE_HIGH, GPIOD_IN);

         ret = gpiochip_lock_as_irq(chip, pin);

         irq = gpiod_to_irq(desc);

I see now that all 3 of these have error handling + dev_err calls,
so if you want I can retest and look for errors in dmesg, I guess I
should have done that right away...

Regards,

Hans
Andy Shevchenko Aug. 18, 2019, 1:24 p.m. UTC | #7
On Fri, Aug 16, 2019 at 11:18:22AM +0200, Hans de Goede wrote:
> On 12-08-19 15:53, Linus Walleij wrote:

> Anyways, this will need to be fixed before we can merge this.

It might affect the behaviour of pinctrl-baytrail as well.

Hans, do you have any Baytrail at hand to test latest linux-next, or take my
for-next branch from
git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git?

Linus, shall I postpone Baytrail patch as well?
Hans de Goede Aug. 18, 2019, 1:38 p.m. UTC | #8
Hi,

On 18-08-19 15:24, Andy Shevchenko wrote:
> On Fri, Aug 16, 2019 at 11:18:22AM +0200, Hans de Goede wrote:
>> On 12-08-19 15:53, Linus Walleij wrote:
> 
>> Anyways, this will need to be fixed before we can merge this.
> 
> It might affect the behaviour of pinctrl-baytrail as well.
> 
> Hans, do you have any Baytrail at hand to test latest linux-next, or take my
> for-next branch from
> git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git?

Given all the hw-enablement work I've done for BYT/CHT I have a whole
stack of Bay Trail devices. Is there anything specific you want me to
test?  Or should I just take the first one of the stack which uses a
GPIO from the SoC as IRQ for something and then test that something?

> Linus, shall I postpone Baytrail patch as well?

Unlike the INT0002 virtual GPIO driver changes this one does not seem
to move anything to a later point in time...

Regards,

Hans
Andy Shevchenko Aug. 18, 2019, 2:04 p.m. UTC | #9
On Sun, Aug 18, 2019 at 03:38:17PM +0200, Hans de Goede wrote:
> On 18-08-19 15:24, Andy Shevchenko wrote:
> > On Fri, Aug 16, 2019 at 11:18:22AM +0200, Hans de Goede wrote:
> > > On 12-08-19 15:53, Linus Walleij wrote:
> > 
> > > Anyways, this will need to be fixed before we can merge this.
> > 
> > It might affect the behaviour of pinctrl-baytrail as well.
> > 
> > Hans, do you have any Baytrail at hand to test latest linux-next, or take my
> > for-next branch from
> > git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git?
> 
> Given all the hw-enablement work I've done for BYT/CHT I have a whole
> stack of Bay Trail devices. Is there anything specific you want me to
> test?  Or should I just take the first one of the stack which uses a
> GPIO from the SoC as IRQ for something and then test that something?

From the thread I got that it should be one which uses GPIO for GPE.
Given that we have a fix there against misconfigured pins by firmware [1, 2],
which utilizes need_valid_mask, probably ASUS T100TA is a good candidate.

[1]: https://www.spinics.net/lists/linux-gpio/msg18842.html
[2]: commit 49c03096263871a68c9dea3e86b7d1e163d2fba8

> 
> > Linus, shall I postpone Baytrail patch as well?
> 
> Unlike the INT0002 virtual GPIO driver changes this one does not seem
> to move anything to a later point in time...
Hans de Goede Aug. 18, 2019, 4:19 p.m. UTC | #10
Hi,

On 18-08-19 16:04, Andy Shevchenko wrote:
> On Sun, Aug 18, 2019 at 03:38:17PM +0200, Hans de Goede wrote:
>> On 18-08-19 15:24, Andy Shevchenko wrote:
>>> On Fri, Aug 16, 2019 at 11:18:22AM +0200, Hans de Goede wrote:
>>>> On 12-08-19 15:53, Linus Walleij wrote:
>>>
>>>> Anyways, this will need to be fixed before we can merge this.
>>>
>>> It might affect the behaviour of pinctrl-baytrail as well.
>>>
>>> Hans, do you have any Baytrail at hand to test latest linux-next, or take my
>>> for-next branch from
>>> git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git?
>>
>> Given all the hw-enablement work I've done for BYT/CHT I have a whole
>> stack of Bay Trail devices. Is there anything specific you want me to
>> test?  Or should I just take the first one of the stack which uses a
>> GPIO from the SoC as IRQ for something and then test that something?
> 
>  From the thread I got that it should be one which uses GPIO for GPE.
> Given that we have a fix there against misconfigured pins by firmware [1, 2],
> which utilizes need_valid_mask, probably ASUS T100TA is a good candidate.
> 
> [1]: https://www.spinics.net/lists/linux-gpio/msg18842.html
> [2]: commit 49c03096263871a68c9dea3e86b7d1e163d2fba8

Thanks for the context, so for testing I need a Bay Trail device which
uses a GPIO interrupt line with a "Interrupt" descriptor, like this:

                         Interrupt (ResourceConsumer, Level, ActiveHigh, Exclusiv
                         {
                             0x00000045,
                         }

Instead of the more modern GpioInt descriptors. My Asus T100TA is in my
storage-bin at the localhackerspace ATM, but I have a T200TA here which
is the same wrt touchscreen IRQ routing.

I'm currently charging it because its battery was very dead TM, but in
the mean time I've been looking at the code and I can already tell
that a kernel with the 2e65e0fad935f578e998656d3e7be5a26e072b0e
("pinctrl: intel: baytrail: Pass irqchip when adding gpiochip")
is not going to boot, that commit moves the call to
byt_gpio_irq_init_hw() to before the call to devm_gpiochip_add_data().

devm_gpiochip_add_data() allocs gpio_chip.irq.valid_mask and
byt_gpio_irq_init_hw() does:

                 if (value & BYT_DIRECT_IRQ_EN) {
                         clear_bit(i, gc->irq.valid_mask);

Which means it will trigger a NULL deref after the
"pinctrl: intel: baytrail: Pass irqchip when adding gpiochip"
commit, sine now byt_gpio_irq_init_hw() runs before
devm_gpiochip_add_data().

Note that the gpio_chip structure already has a init_valid_mask
callback which runs after gpiochip_irqchip_init_valid_mask wich
allocs gpio_chip.irq.valid_mask, so we might use that, but:

That is intended to setup the valid_mask for the pins, not for
the IRQs, which would mean we are abusing it a bit and it runs
after gpiochip_add_irqchip(), which might be too late I guess.

So it looks like we need a gpio_chip.irq.init_valid_mask callback
to fix this ordering problem and until this is fixed we should revert
2e65e0fad935f578e998656d3e7be5a26e072b0e.

Regards,

Hans
Hans de Goede Aug. 18, 2019, 4:26 p.m. UTC | #11
Hi,

On 18-08-19 18:19, Hans de Goede wrote:
> Hi,
> 
> On 18-08-19 16:04, Andy Shevchenko wrote:
>> On Sun, Aug 18, 2019 at 03:38:17PM +0200, Hans de Goede wrote:
>>> On 18-08-19 15:24, Andy Shevchenko wrote:
>>>> On Fri, Aug 16, 2019 at 11:18:22AM +0200, Hans de Goede wrote:
>>>>> On 12-08-19 15:53, Linus Walleij wrote:
>>>>
>>>>> Anyways, this will need to be fixed before we can merge this.
>>>>
>>>> It might affect the behaviour of pinctrl-baytrail as well.
>>>>
>>>> Hans, do you have any Baytrail at hand to test latest linux-next, or take my
>>>> for-next branch from
>>>> git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git?
>>>
>>> Given all the hw-enablement work I've done for BYT/CHT I have a whole
>>> stack of Bay Trail devices. Is there anything specific you want me to
>>> test?  Or should I just take the first one of the stack which uses a
>>> GPIO from the SoC as IRQ for something and then test that something?
>>
>>  From the thread I got that it should be one which uses GPIO for GPE.
>> Given that we have a fix there against misconfigured pins by firmware [1, 2],
>> which utilizes need_valid_mask, probably ASUS T100TA is a good candidate.
>>
>> [1]: https://www.spinics.net/lists/linux-gpio/msg18842.html
>> [2]: commit 49c03096263871a68c9dea3e86b7d1e163d2fba8
> 
> Thanks for the context, so for testing I need a Bay Trail device which
> uses a GPIO interrupt line with a "Interrupt" descriptor, like this:
> 
>                          Interrupt (ResourceConsumer, Level, ActiveHigh, Exclusiv
>                          {
>                              0x00000045,
>                          }
> 
> Instead of the more modern GpioInt descriptors. My Asus T100TA is in my
> storage-bin at the localhackerspace ATM, but I have a T200TA here which
> is the same wrt touchscreen IRQ routing.
> 
> I'm currently charging it because its battery was very dead TM, but in
> the mean time I've been looking at the code and I can already tell
> that a kernel with the 2e65e0fad935f578e998656d3e7be5a26e072b0e
> ("pinctrl: intel: baytrail: Pass irqchip when adding gpiochip")
> is not going to boot, that commit moves the call to
> byt_gpio_irq_init_hw() to before the call to devm_gpiochip_add_data().
> 
> devm_gpiochip_add_data() allocs gpio_chip.irq.valid_mask and
> byt_gpio_irq_init_hw() does:
> 
>                  if (value & BYT_DIRECT_IRQ_EN) {
>                          clear_bit(i, gc->irq.valid_mask);
> 
> Which means it will trigger a NULL deref after the
> "pinctrl: intel: baytrail: Pass irqchip when adding gpiochip"
> commit, sine now byt_gpio_irq_init_hw() runs before
> devm_gpiochip_add_data().
> 
> Note that the gpio_chip structure already has a init_valid_mask
> callback which runs after gpiochip_irqchip_init_valid_mask wich
> allocs gpio_chip.irq.valid_mask, so we might use that, but:
> 
> That is intended to setup the valid_mask for the pins, not for
> the IRQs, which would mean we are abusing it a bit and it runs
> after gpiochip_add_irqchip(), which might be too late I guess.
> 
> So it looks like we need a gpio_chip.irq.init_valid_mask callback
> to fix this ordering problem and until this is fixed we should revert
> 2e65e0fad935f578e998656d3e7be5a26e072b0e.

So thinking a bit more about this, I think a better fix would be to
export gpiochip_irqchip_init_valid_mask and make it proof against
multiple calls. Then drivers which need this before
devm_gpiochip_add_data() time can simply call it in advance.

Maybe (not sure if this is a good idea) we can eventually even
drop gpio_chip.irq.need_valid_mask and simply have all users of
the valid_mask explictly call gpiochip_irqchip_init_valid_mask.
If this second bit is a good idea depends on how much users there
are I guess and what the changes to those users would look like.

Regards,

Hans
Andy Shevchenko Aug. 18, 2019, 5:55 p.m. UTC | #12
On Sun, Aug 18, 2019 at 06:26:38PM +0200, Hans de Goede wrote:
> On 18-08-19 18:19, Hans de Goede wrote:
> > On 18-08-19 16:04, Andy Shevchenko wrote:

> > Thanks for the context, so for testing I need a Bay Trail device which
> > uses a GPIO interrupt line with a "Interrupt" descriptor, like this:
> > 
> >                          Interrupt (ResourceConsumer, Level, ActiveHigh, Exclusiv
> >                          {
> >                              0x00000045,
> >                          }
> > 
> > Instead of the more modern GpioInt descriptors. My Asus T100TA is in my
> > storage-bin at the localhackerspace ATM, but I have a T200TA here which
> > is the same wrt touchscreen IRQ routing.
> > 
> > I'm currently charging it because its battery was very dead TM, but in
> > the mean time I've been looking at the code and I can already tell
> > that a kernel with the 2e65e0fad935f578e998656d3e7be5a26e072b0e
> > ("pinctrl: intel: baytrail: Pass irqchip when adding gpiochip")
> > is not going to boot, that commit moves the call to
> > byt_gpio_irq_init_hw() to before the call to devm_gpiochip_add_data().
> > 
> > devm_gpiochip_add_data() allocs gpio_chip.irq.valid_mask and
> > byt_gpio_irq_init_hw() does:
> > 
> >                  if (value & BYT_DIRECT_IRQ_EN) {
> >                          clear_bit(i, gc->irq.valid_mask);
> > 
> > Which means it will trigger a NULL deref after the
> > "pinctrl: intel: baytrail: Pass irqchip when adding gpiochip"
> > commit, sine now byt_gpio_irq_init_hw() runs before
> > devm_gpiochip_add_data().
> > 
> > Note that the gpio_chip structure already has a init_valid_mask
> > callback which runs after gpiochip_irqchip_init_valid_mask wich
> > allocs gpio_chip.irq.valid_mask, so we might use that, but:
> > 
> > That is intended to setup the valid_mask for the pins, not for
> > the IRQs, which would mean we are abusing it a bit and it runs
> > after gpiochip_add_irqchip(), which might be too late I guess.
> > 
> > So it looks like we need a gpio_chip.irq.init_valid_mask callback
> > to fix this ordering problem and until this is fixed we should revert
> > 2e65e0fad935f578e998656d3e7be5a26e072b0e.
> 
> So thinking a bit more about this, I think a better fix would be to
> export gpiochip_irqchip_init_valid_mask and make it proof against
> multiple calls. Then drivers which need this before
> devm_gpiochip_add_data() time can simply call it in advance.
> 
> Maybe (not sure if this is a good idea) we can eventually even
> drop gpio_chip.irq.need_valid_mask and simply have all users of
> the valid_mask explictly call gpiochip_irqchip_init_valid_mask.
> If this second bit is a good idea depends on how much users there
> are I guess and what the changes to those users would look like.

Thank you, Hans, very much!
I will simple drop the changes from all drivers which need a valid mask.

Linus, JFYI ^^^, It also makes your cherrivew RFC commented as no go.
Linus Walleij Aug. 20, 2019, 8:41 a.m. UTC | #13
On Sun, Aug 18, 2019 at 6:19 PM Hans de Goede <hdegoede@redhat.com> wrote:

> Note that the gpio_chip structure already has a init_valid_mask
> callback which runs after gpiochip_irqchip_init_valid_mask wich
> allocs gpio_chip.irq.valid_mask, so we might use that, but:
>
> That is intended to setup the valid_mask for the pins, not for
> the IRQs, which would mean we are abusing it a bit and it runs
> after gpiochip_add_irqchip(), which might be too late I guess.

Yeah there are two of them which is quite confusing. I am trying
to clean up the mess step by step. The initialization of the valid_mask
for the pins itself (which is BTW also there for ACPI mainly) is
rewritten, I just merged these:
https://lore.kernel.org/linux-gpio/20190819084904.30027-1-linus.walleij@linaro.org/
https://lore.kernel.org/linux-gpio/20190819091140.622-1-linus.walleij@linaro.org/
https://lore.kernel.org/linux-gpio/20190819093058.10863-1-linus.walleij@linaro.org/

This makes things a bit clearer, now I "just" need to do the same
for the gpio_irq_chip.

> So it looks like we need a gpio_chip.irq.init_valid_mask callback
> to fix this ordering problem and until this is fixed we should revert
> 2e65e0fad935f578e998656d3e7be5a26e072b0e.

Do you think this patch, created in response to an ACPI bug in
similar vein for PL061 fixes the ordering problem:
https://lore.kernel.org/linux-gpio/20190820080527.11796-1-linus.walleij@linaro.org/

Just applying it on top and see if the interrups storm is fixed
would be super...

I actually don't think it is the setting up of .irq.valid_mask after
registering the chip that is the problem, but rather the semantic
ordering in the gpiochip_add_data_with_key() function that this
patch tries to fix.

Yours,
Linus Walleij
Hans de Goede Aug. 23, 2019, 2:22 p.m. UTC | #14
Hi,

On 8/20/19 10:41 AM, Linus Walleij wrote:
> On Sun, Aug 18, 2019 at 6:19 PM Hans de Goede <hdegoede@redhat.com> wrote:
> 
>> Note that the gpio_chip structure already has a init_valid_mask
>> callback which runs after gpiochip_irqchip_init_valid_mask wich
>> allocs gpio_chip.irq.valid_mask, so we might use that, but:
>>
>> That is intended to setup the valid_mask for the pins, not for
>> the IRQs, which would mean we are abusing it a bit and it runs
>> after gpiochip_add_irqchip(), which might be too late I guess.
> 
> Yeah there are two of them which is quite confusing. I am trying
> to clean up the mess step by step. The initialization of the valid_mask
> for the pins itself (which is BTW also there for ACPI mainly) is
> rewritten, I just merged these:
> https://lore.kernel.org/linux-gpio/20190819084904.30027-1-linus.walleij@linaro.org/
> https://lore.kernel.org/linux-gpio/20190819091140.622-1-linus.walleij@linaro.org/
> https://lore.kernel.org/linux-gpio/20190819093058.10863-1-linus.walleij@linaro.org/
> 
> This makes things a bit clearer, now I "just" need to do the same
> for the gpio_irq_chip.
> 
>> So it looks like we need a gpio_chip.irq.init_valid_mask callback
>> to fix this ordering problem and until this is fixed we should revert
>> 2e65e0fad935f578e998656d3e7be5a26e072b0e.
> 
> Do you think this patch, created in response to an ACPI bug in
> similar vein for PL061 fixes the ordering problem:
> https://lore.kernel.org/linux-gpio/20190820080527.11796-1-linus.walleij@linaro.org/
> 
> Just applying it on top and see if the interrups storm is fixed
> would be super...

I can confirm that that patch, fixed the issue I was seeing with
your "platform: x86: vgpio: Pass irqchip when adding gpiochip" patch
with both patches combined things work fine.

Regards,

Hans
Andy Shevchenko Aug. 23, 2019, 2:47 p.m. UTC | #15
On Fri, Aug 23, 2019 at 5:22 PM Hans de Goede <hdegoede@redhat.com> wrote:
>
> Hi,
>
> On 8/20/19 10:41 AM, Linus Walleij wrote:
> > On Sun, Aug 18, 2019 at 6:19 PM Hans de Goede <hdegoede@redhat.com> wrote:
> >
> >> Note that the gpio_chip structure already has a init_valid_mask
> >> callback which runs after gpiochip_irqchip_init_valid_mask wich
> >> allocs gpio_chip.irq.valid_mask, so we might use that, but:
> >>
> >> That is intended to setup the valid_mask for the pins, not for
> >> the IRQs, which would mean we are abusing it a bit and it runs
> >> after gpiochip_add_irqchip(), which might be too late I guess.
> >
> > Yeah there are two of them which is quite confusing. I am trying
> > to clean up the mess step by step. The initialization of the valid_mask
> > for the pins itself (which is BTW also there for ACPI mainly) is
> > rewritten, I just merged these:
> > https://lore.kernel.org/linux-gpio/20190819084904.30027-1-linus.walleij@linaro.org/
> > https://lore.kernel.org/linux-gpio/20190819091140.622-1-linus.walleij@linaro.org/
> > https://lore.kernel.org/linux-gpio/20190819093058.10863-1-linus.walleij@linaro.org/
> >
> > This makes things a bit clearer, now I "just" need to do the same
> > for the gpio_irq_chip.
> >
> >> So it looks like we need a gpio_chip.irq.init_valid_mask callback
> >> to fix this ordering problem and until this is fixed we should revert
> >> 2e65e0fad935f578e998656d3e7be5a26e072b0e.
> >
> > Do you think this patch, created in response to an ACPI bug in
> > similar vein for PL061 fixes the ordering problem:
> > https://lore.kernel.org/linux-gpio/20190820080527.11796-1-linus.walleij@linaro.org/
> >
> > Just applying it on top and see if the interrups storm is fixed
> > would be super...
>
> I can confirm that that patch, fixed the issue I was seeing with
> your "platform: x86: vgpio: Pass irqchip when adding gpiochip" patch
> with both patches combined things work fine.

Thank you very much, Hans, for testing and, Linus, for fixing.
Linus, I'll wait your patch appears in next rc and then I will reapply
your clean ups.
Hans de Goede Aug. 23, 2019, 5:59 p.m. UTC | #16
Hi,

On 8/23/19 4:47 PM, Andy Shevchenko wrote:
> On Fri, Aug 23, 2019 at 5:22 PM Hans de Goede <hdegoede@redhat.com> wrote:
>>
>> Hi,
>>
>> On 8/20/19 10:41 AM, Linus Walleij wrote:
>>> On Sun, Aug 18, 2019 at 6:19 PM Hans de Goede <hdegoede@redhat.com> wrote:
>>>
>>>> Note that the gpio_chip structure already has a init_valid_mask
>>>> callback which runs after gpiochip_irqchip_init_valid_mask wich
>>>> allocs gpio_chip.irq.valid_mask, so we might use that, but:
>>>>
>>>> That is intended to setup the valid_mask for the pins, not for
>>>> the IRQs, which would mean we are abusing it a bit and it runs
>>>> after gpiochip_add_irqchip(), which might be too late I guess.
>>>
>>> Yeah there are two of them which is quite confusing. I am trying
>>> to clean up the mess step by step. The initialization of the valid_mask
>>> for the pins itself (which is BTW also there for ACPI mainly) is
>>> rewritten, I just merged these:
>>> https://lore.kernel.org/linux-gpio/20190819084904.30027-1-linus.walleij@linaro.org/
>>> https://lore.kernel.org/linux-gpio/20190819091140.622-1-linus.walleij@linaro.org/
>>> https://lore.kernel.org/linux-gpio/20190819093058.10863-1-linus.walleij@linaro.org/
>>>
>>> This makes things a bit clearer, now I "just" need to do the same
>>> for the gpio_irq_chip.
>>>
>>>> So it looks like we need a gpio_chip.irq.init_valid_mask callback
>>>> to fix this ordering problem and until this is fixed we should revert
>>>> 2e65e0fad935f578e998656d3e7be5a26e072b0e.
>>>
>>> Do you think this patch, created in response to an ACPI bug in
>>> similar vein for PL061 fixes the ordering problem:
>>> https://lore.kernel.org/linux-gpio/20190820080527.11796-1-linus.walleij@linaro.org/
>>>
>>> Just applying it on top and see if the interrups storm is fixed
>>> would be super...
>>
>> I can confirm that that patch, fixed the issue I was seeing with
>> your "platform: x86: vgpio: Pass irqchip when adding gpiochip" patch
>> with both patches combined things work fine.
> 
> Thank you very much, Hans, for testing and, Linus, for fixing.
> Linus, I'll wait your patch appears in next rc and then I will reapply
> your clean ups.

Note Linus' reordering patch only fixes the issue with ACPI event handlers
no longer registering. The ordering issue in the BYT pinctrl code where
it tries to write chip.irq.valid_mask[x] while valid_mask is a NULL
pointer is NOT fixed by this.

Regards,

Hans




drivers/platform/x86/intel_int0002_vgpio.c
>
Andy Shevchenko Sept. 13, 2019, 12:19 p.m. UTC | #17
On Mon, Aug 12, 2019 at 03:53:35PM +0200, Linus Walleij wrote:
> We need to convert all old gpio irqchips to pass the irqchip
> setup along when adding the gpio_chip. For more info see
> drivers/gpio/TODO.
> 
> For chained irqchips this is a pretty straight-forward
> conversion.

After Hans' patches against this driver and discussion about proper order of
initialization the patch is not applicable anymore. Please, send v2 which
supposed to satisfy above concerns.

For now, I'm dropping it from my queue.
Linus Walleij Sept. 13, 2019, 1:31 p.m. UTC | #18
On Fri, Sep 13, 2019 at 2:19 PM Andy Shevchenko
<andriy.shevchenko@linux.intel.com> wrote:
> On Mon, Aug 12, 2019 at 03:53:35PM +0200, Linus Walleij wrote:
> > We need to convert all old gpio irqchips to pass the irqchip
> > setup along when adding the gpio_chip. For more info see
> > drivers/gpio/TODO.
> >
> > For chained irqchips this is a pretty straight-forward
> > conversion.
>
> After Hans' patches against this driver and discussion about proper order of
> initialization the patch is not applicable anymore. Please, send v2 which
> supposed to satisfy above concerns.
>
> For now, I'm dropping it from my queue.

No problem, I think I broke the mergebase with my other patch
for irq valid_mask initialization anyway. Let's wait until after the
merge window, then I'll rebase, touch it up and resend.

Yours,
Linus Walleij
diff mbox series

Patch

diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c
index d9542c661ddc..493a97ce0b08 100644
--- a/drivers/platform/x86/intel_int0002_vgpio.c
+++ b/drivers/platform/x86/intel_int0002_vgpio.c
@@ -156,8 +156,8 @@  static int int0002_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	const struct x86_cpu_id *cpu_id;
-	struct irq_chip *irq_chip;
 	struct gpio_chip *chip;
+	struct gpio_irq_chip *girq;
 	int irq, ret;
 
 	/* Menlow has a different INT0002 device? <sigh> */
@@ -186,17 +186,9 @@  static int int0002_probe(struct platform_device *pdev)
 	chip->ngpio = GPE0A_PME_B0_VIRT_GPIO_PIN + 1;
 	chip->irq.need_valid_mask = true;
 
-	ret = devm_gpiochip_add_data(&pdev->dev, chip, NULL);
-	if (ret) {
-		dev_err(dev, "Error adding gpio chip: %d\n", ret);
-		return ret;
-	}
-
-	bitmap_clear(chip->irq.valid_mask, 0, GPE0A_PME_B0_VIRT_GPIO_PIN);
-
 	/*
-	 * We manually request the irq here instead of passing a flow-handler
-	 * to gpiochip_set_chained_irqchip, because the irq is shared.
+	 * We directly request the irq here instead of passing a flow-handler
+	 * to the gpio irqchip, because the irq is shared.
 	 */
 	ret = devm_request_irq(dev, irq, int0002_irq,
 			       IRQF_SHARED, "INT0002", chip);
@@ -204,17 +196,20 @@  static int int0002_probe(struct platform_device *pdev)
 		dev_err(dev, "Error requesting IRQ %d: %d\n", irq, ret);
 		return ret;
 	}
+	girq = &chip->irq;
+	girq->chip = (struct irq_chip *)cpu_id->driver_data;
+	girq->parent_handler = NULL;
+	girq->num_parents = 0;
+	girq->default_type = IRQ_TYPE_NONE;
+	girq->handler = handle_edge_irq;
 
-	irq_chip = (struct irq_chip *)cpu_id->driver_data;
-
-	ret = gpiochip_irqchip_add(chip, irq_chip, 0, handle_edge_irq,
-				   IRQ_TYPE_NONE);
+	ret = devm_gpiochip_add_data(&pdev->dev, chip, NULL);
 	if (ret) {
-		dev_err(dev, "Error adding irqchip: %d\n", ret);
+		dev_err(dev, "Error adding gpio chip: %d\n", ret);
 		return ret;
 	}
 
-	gpiochip_set_chained_irqchip(chip, irq_chip, irq, NULL);
+	bitmap_clear(chip->irq.valid_mask, 0, GPE0A_PME_B0_VIRT_GPIO_PIN);
 
 	return 0;
 }