diff mbox series

[RFC,v2] irqchip: qcom: pdc: Introduce irq_set_wake call

Message ID 1584019379-12085-2-git-send-email-mkshah@codeaurora.org (mailing list archive)
State Superseded
Headers show
Series [RFC,v2] irqchip: qcom: pdc: Introduce irq_set_wake call | expand

Commit Message

Maulik Shah March 12, 2020, 1:22 p.m. UTC
Change the way interrupts get enabled at wakeup capable PDC irq chip.

Introduce irq_set_wake call which lets interrupts enabled at PDC with
enable_irq_wake and disabled with disable_irq_wake with certain
conditions.

Interrupt will get enabled in HW at PDC and its parent GIC if they are
either enabled is SW or marked as wake up capable.

interrupt will get disabled in HW at PDC and its parent GIC only if its
disabled in SW and also marked as non-wake up capable.

Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
---
 drivers/irqchip/qcom-pdc.c | 124 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 117 insertions(+), 7 deletions(-)

Comments

Stephen Boyd March 17, 2020, 2:04 a.m. UTC | #1
Quoting Maulik Shah (2020-03-12 06:22:59)
> Change the way interrupts get enabled at wakeup capable PDC irq chip.
> 
> Introduce irq_set_wake call which lets interrupts enabled at PDC with
> enable_irq_wake and disabled with disable_irq_wake with certain
> conditions.
> 
> Interrupt will get enabled in HW at PDC and its parent GIC if they are
> either enabled is SW or marked as wake up capable.

Shouldn't we only enable in PDC and GIC if it's marked wakeup capable
and we're entering suspend? Otherwise we should let the hardware enable
state follow the software irq enable state?

> 
> interrupt will get disabled in HW at PDC and its parent GIC only if its
> disabled in SW and also marked as non-wake up capable.
> 
> Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
> ---
>  drivers/irqchip/qcom-pdc.c | 124 ++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 117 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
> index 6ae9e1f..d698cec 100644
> --- a/drivers/irqchip/qcom-pdc.c
> +++ b/drivers/irqchip/qcom-pdc.c
> @@ -1,6 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0
>  /*
> - * Copyright (c) 2017-2019, The Linux Foundation. All rights reserved.
> + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved.
>   */
>  
>  #include <linux/err.h>
> @@ -30,6 +30,9 @@
>  
>  #define PDC_NO_PARENT_IRQ      ~0UL
>  
> +DECLARE_BITMAP(pdc_wake_irqs, PDC_MAX_IRQS);
> +DECLARE_BITMAP(pdc_enabled_irqs, PDC_MAX_IRQS);

Please add static on both of these.

> +
>  struct pdc_pin_region {
>         u32 pin_base;
>         u32 parent_base;
> @@ -80,20 +83,32 @@ static void pdc_enable_intr(struct irq_data *d, bool on)
>         index = pin_out / 32;
>         mask = pin_out % 32;
>  
> -       raw_spin_lock(&pdc_lock);
>         enable = pdc_reg_read(IRQ_ENABLE_BANK, index);
>         enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask);
>         pdc_reg_write(IRQ_ENABLE_BANK, index, enable);
> -       raw_spin_unlock(&pdc_lock);
>  }
>  
>  static void qcom_pdc_gic_disable(struct irq_data *d)
>  {
> +       bool wake_status;

This name is not good. Why not 'wakeup_enabled'?

> +
>         if (d->hwirq == GPIO_NO_WAKE_IRQ)
>                 return;
>  
> -       pdc_enable_intr(d, false);
> -       irq_chip_disable_parent(d);
> +       raw_spin_lock(&pdc_lock);
> +
> +       clear_bit(d->hwirq, pdc_enabled_irqs);

clear_bit() is atomic, so why inside the lock?

> +       wake_status = test_bit(d->hwirq, pdc_wake_irqs);
> +
> +       /* Disable at PDC HW if wake_status also says same */
> +       if (!wake_status)

Should read as "if not wakeup_enabled".

> +               pdc_enable_intr(d, false);
> +
> +       raw_spin_unlock(&pdc_lock);
> +
> +       /* Disable at GIC HW if wake_status also says same */
> +       if (!wake_status)

This happens outside the lock, so I'm confused why any locking is needed
in this function.

> +               irq_chip_disable_parent(d);
>  }
>  
>  static void qcom_pdc_gic_enable(struct irq_data *d)
> @@ -101,7 +116,16 @@ static void qcom_pdc_gic_enable(struct irq_data *d)
>         if (d->hwirq == GPIO_NO_WAKE_IRQ)
>                 return;
>  
> +       raw_spin_lock(&pdc_lock);
> +
> +       set_bit(d->hwirq, pdc_enabled_irqs);
> +
> +       /* We can blindly enable at PDC HW as we are already in enable path */
>         pdc_enable_intr(d, true);
> +
> +       raw_spin_unlock(&pdc_lock);
> +
> +       /* We can blindly enable at GIC HW as we are already in enable path */
>         irq_chip_enable_parent(d);
>  }
>  
> @@ -121,6 +145,92 @@ static void qcom_pdc_gic_unmask(struct irq_data *d)
>         irq_chip_unmask_parent(d);
>  }
>  
> +/**
> + * qcom_pdc_gic_set_wake: Enables/Disables interrupt at PDC and parent GIC
> + *
> + * @d: the interrupt data
> + * @on: enable or disable wakeup capability
> + *
> + * The SW expects that an irq that's disabled with disable_irq()
> + * can still wake the system from sleep states such as "suspend to RAM",
> + * if it has been marked for wakeup.
> + *
> + * While the SW may choose to differ status for "wake" and "enabled"
> + * interrupts, its not the case with HW. There is no dedicated
> + * configuration in HW to differ "wake" and "enabled". Same is
> + * case for PDC's parent irq_chip (ARM GIC) which has only GICD_ISENABLER
> + * to indicate "enabled" or "disabled" status and also there is no .irq_set_wake
> + * implemented in parent GIC irq_chip.
> + *
> + * So, the HW ONLY understands either "enabled" or "disabled".
> + *
> + * This function is introduced to handle cases where drivers may invoke
> + * below during suspend in SW on their irq, and expect to wake up from this
> + * interrupt.
> + *
> + * 1. enable_irq_wake()
> + * 2. disable_irq()
> + *
> + * and below during resume
> + *
> + * 3. disable_irq_wake()
> + * 4. enable_irq()
> + *
> + * if (2) is invoked in end and just before suspend, it currently leaves

We shouldn't document the currently broken state of the code. Please
reword this.

> + * interrupt "disabled" at HW and hence not leading to resume.
> + *
> + * Note that the order of invoking (1) & (2) may interchange and similarly
> + * it can interchange for (3) & (4) as per driver's wish.
> + *
> + * if the driver call .irq_set_wake first it will enable at HW but later

s/if/If/

> + * call with .irq_disable will disable at HW. so final result is again

s/so/So/

> + * "disabled" at HW whereas the HW expectection is to keep it "enabled"

s/expectection/expectation/

> + * since it understands only "enabled" or "disabled".
> + *
> + * Hence .irq_set_wake can not just go ahead and  "enable" or "disable"
> + * at HW blindly, it needs to take in account status of currently "enable"

s/in/into/

> + * or "disable" as well.

"status of currently enable or disable as well" doesn't make any sense
to me. Is this "take into account if the interrupt is enabled or
disableed"?

> + *
> + * Introduce .irq_set_wake in PDC irq_chip to handle above issue.
> + * The final status in HW should be an "OR" of "enable" and "wake" calls.
> + * (i.e. same as below table)
> + * -------------------------------------------------|
> + * ENABLE in SW | WAKE in SW | PDC & GIC HW Status  |

Presumably 'PDC & GIC HW status' means enabled in PDC and GIC hardware?

> + *      0       |     0      |     0               |
> + *      0      |     1      |     1                |
> + *     1       |     0      |     1                |
> + *     1       |     1      |     1                |
> + *--------------------------------------------------|

Are there tabs in here? Probably better to just use spaces everywhere or
drop the OR table because it's literally just two variables.

 irq enable | irq wake == PDC & GIC hardware irq enabled

> + */
> +
> +static int qcom_pdc_gic_set_wake(struct irq_data *d, unsigned int on)
> +{
> +       bool enabled_status;
> +
> +       if (d->hwirq == GPIO_NO_WAKE_IRQ)
> +               return 0;
> +
> +       raw_spin_lock(&pdc_lock);
> +       enabled_status = test_bit(d->hwirq, pdc_enabled_irqs);
> +       if (on) {
> +               set_bit(d->hwirq, pdc_wake_irqs);
> +               pdc_enable_intr(d, true);
> +       } else {
> +               clear_bit(d->hwirq, pdc_wake_irqs);
> +               pdc_enable_intr(d, enabled_status);
> +       }
> +
> +       raw_spin_unlock(&pdc_lock);
> +
> +       /* Either "wake" or "enabled" need same status at parent as well */
> +       if (on || enabled_status)
> +               irq_chip_enable_parent(d);
> +       else
> +               irq_chip_disable_parent(d);

What happens if irq is "disabled" in software, because this is the first
function called on the irq, and we aren't in suspend yet. Then we get
the irq. Won't we be interrupting the CPU because we've enabled in PDC
and GIC hardware? Why doesn't this function update the wake bit and then
leave the force on irq logic to suspend entry? Will we miss an interrupt
while entering suspend because of that?

Otherwise, I wonder why the code can't be:

	if (on)
		set_bit(d->hwirq, pdc_wake_irqs);
	else
		clear_bit(d->hwirq, pdc_wake_irqs);
	
	pdc_enable_intr(d, on);

Then we can leave the lock inside the pdc_enable_intr() function and
test for both bitmasks there and or in whatever software is asking for?
It would be nice to simplify the callers and make the code that actually
writes the hardware do a small bit test and set operation.

> +
> +       return irq_chip_set_wake_parent(d, on);
> +}
> +
>  /*
>   * GIC does not handle falling edge or active low. To allow falling edge and
>   * active low interrupts to be handled at GIC, PDC has an inverter that inverts
Maulik Shah March 17, 2020, 6:47 a.m. UTC | #2
Hi,

On 3/17/2020 7:34 AM, Stephen Boyd wrote:
> Quoting Maulik Shah (2020-03-12 06:22:59)
>> Change the way interrupts get enabled at wakeup capable PDC irq chip.
>>
>> Introduce irq_set_wake call which lets interrupts enabled at PDC with
>> enable_irq_wake and disabled with disable_irq_wake with certain
>> conditions.
>>
>> Interrupt will get enabled in HW at PDC and its parent GIC if they are
>> either enabled is SW or marked as wake up capable.
> Shouldn't we only enable in PDC and GIC if it's marked wakeup capable
> and we're entering suspend? Otherwise we should let the hardware enable
> state follow the software irq enable state?
Not only during "sleep" but PDC (and GIC) have a role during "active" time as well.
so we can not just enabled at PDC and GIC when entering to suspend, interrupt need
to keep interrupt enabled at PDC and GIC HW when out of suspend as well.
>
>> interrupt will get disabled in HW at PDC and its parent GIC only if its
>> disabled in SW and also marked as non-wake up capable.
>>
>> Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
>> ---
>>  drivers/irqchip/qcom-pdc.c | 124 ++++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 117 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
>> index 6ae9e1f..d698cec 100644
>> --- a/drivers/irqchip/qcom-pdc.c
>> +++ b/drivers/irqchip/qcom-pdc.c
>> @@ -1,6 +1,6 @@
>>  // SPDX-License-Identifier: GPL-2.0
>>  /*
>> - * Copyright (c) 2017-2019, The Linux Foundation. All rights reserved.
>> + * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved.
>>   */
>>  
>>  #include <linux/err.h>
>> @@ -30,6 +30,9 @@
>>  
>>  #define PDC_NO_PARENT_IRQ      ~0UL
>>  
>> +DECLARE_BITMAP(pdc_wake_irqs, PDC_MAX_IRQS);
>> +DECLARE_BITMAP(pdc_enabled_irqs, PDC_MAX_IRQS);
> Please add static on both of these.
Sure.
>
>> +
>>  struct pdc_pin_region {
>>         u32 pin_base;
>>         u32 parent_base;
>> @@ -80,20 +83,32 @@ static void pdc_enable_intr(struct irq_data *d, bool on)
>>         index = pin_out / 32;
>>         mask = pin_out % 32;
>>  
>> -       raw_spin_lock(&pdc_lock);
>>         enable = pdc_reg_read(IRQ_ENABLE_BANK, index);
>>         enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask);
>>         pdc_reg_write(IRQ_ENABLE_BANK, index, enable);
>> -       raw_spin_unlock(&pdc_lock);
>>  }
>>  
>>  static void qcom_pdc_gic_disable(struct irq_data *d)
>>  {
>> +       bool wake_status;
> This name is not good. Why not 'wakeup_enabled'?
I will rename to 'wakeup_enabled'
>
>> +
>>         if (d->hwirq == GPIO_NO_WAKE_IRQ)
>>                 return;
>>  
>> -       pdc_enable_intr(d, false);
>> -       irq_chip_disable_parent(d);
>> +       raw_spin_lock(&pdc_lock);
>> +
>> +       clear_bit(d->hwirq, pdc_enabled_irqs);
> clear_bit() is atomic, so why inside the lock?
I will move it out of lock.
>
>> +       wake_status = test_bit(d->hwirq, pdc_wake_irqs);
>> +
>> +       /* Disable at PDC HW if wake_status also says same */
>> +       if (!wake_status)
> Should read as "if not wakeup_enabled".
I will update comment.
>
>> +               pdc_enable_intr(d, false);
>> +
>> +       raw_spin_unlock(&pdc_lock);
>> +
>> +       /* Disable at GIC HW if wake_status also says same */
>> +       if (!wake_status)
> This happens outside the lock, so I'm confused why any locking is needed
> in this function.
Okay, since test_bit() is also atomic so i will keep locking inside pc_enable_intr() as it is.
>
>> +               irq_chip_disable_parent(d);
>>  }
>>  
>>  static void qcom_pdc_gic_enable(struct irq_data *d)
>> @@ -101,7 +116,16 @@ static void qcom_pdc_gic_enable(struct irq_data *d)
>>         if (d->hwirq == GPIO_NO_WAKE_IRQ)
>>                 return;
>>  
>> +       raw_spin_lock(&pdc_lock);
>> +
>> +       set_bit(d->hwirq, pdc_enabled_irqs);
>> +
>> +       /* We can blindly enable at PDC HW as we are already in enable path */
>>         pdc_enable_intr(d, true);
>> +
>> +       raw_spin_unlock(&pdc_lock);
>> +
>> +       /* We can blindly enable at GIC HW as we are already in enable path */
>>         irq_chip_enable_parent(d);
>>  }
>>  
>> @@ -121,6 +145,92 @@ static void qcom_pdc_gic_unmask(struct irq_data *d)
>>         irq_chip_unmask_parent(d);
>>  }
>>  
>> +/**
>> + * qcom_pdc_gic_set_wake: Enables/Disables interrupt at PDC and parent GIC
>> + *
>> + * @d: the interrupt data
>> + * @on: enable or disable wakeup capability
>> + *
>> + * The SW expects that an irq that's disabled with disable_irq()
>> + * can still wake the system from sleep states such as "suspend to RAM",
>> + * if it has been marked for wakeup.
>> + *
>> + * While the SW may choose to differ status for "wake" and "enabled"
>> + * interrupts, its not the case with HW. There is no dedicated
>> + * configuration in HW to differ "wake" and "enabled". Same is
>> + * case for PDC's parent irq_chip (ARM GIC) which has only GICD_ISENABLER
>> + * to indicate "enabled" or "disabled" status and also there is no .irq_set_wake
>> + * implemented in parent GIC irq_chip.
>> + *
>> + * So, the HW ONLY understands either "enabled" or "disabled".
>> + *
>> + * This function is introduced to handle cases where drivers may invoke
>> + * below during suspend in SW on their irq, and expect to wake up from this
>> + * interrupt.
>> + *
>> + * 1. enable_irq_wake()
>> + * 2. disable_irq()
>> + *
>> + * and below during resume
>> + *
>> + * 3. disable_irq_wake()
>> + * 4. enable_irq()
>> + *
>> + * if (2) is invoked in end and just before suspend, it currently leaves
> We shouldn't document the currently broken state of the code. Please
> reword this.
Okay.
>
>> + * interrupt "disabled" at HW and hence not leading to resume.
>> + *
>> + * Note that the order of invoking (1) & (2) may interchange and similarly
>> + * it can interchange for (3) & (4) as per driver's wish.
>> + *
>> + * if the driver call .irq_set_wake first it will enable at HW but later
> s/if/If/
I will address.
>
>> + * call with .irq_disable will disable at HW. so final result is again
> s/so/So/
I will address.
>
>> + * "disabled" at HW whereas the HW expectection is to keep it "enabled"
> s/expectection/expectation/
I will address.
>
>> + * since it understands only "enabled" or "disabled".
>> + *
>> + * Hence .irq_set_wake can not just go ahead and  "enable" or "disable"
>> + * at HW blindly, it needs to take in account status of currently "enable"
> s/in/into/
I will address.
>
>> + * or "disable" as well.
> "status of currently enable or disable as well" doesn't make any sense
> to me. Is this "take into account if the interrupt is enabled or
> disableed"?
I will address.
>
>> + *
>> + * Introduce .irq_set_wake in PDC irq_chip to handle above issue.
>> + * The final status in HW should be an "OR" of "enable" and "wake" calls.
>> + * (i.e. same as below table)
>> + * -------------------------------------------------|
>> + * ENABLE in SW | WAKE in SW | PDC & GIC HW Status  |
> Presumably 'PDC & GIC HW status' means enabled in PDC and GIC hardware?
True.
>
>> + *      0       |     0      |     0               |
>> + *      0      |     1      |     1                |
>> + *     1       |     0      |     1                |
>> + *     1       |     1      |     1                |
>> + *--------------------------------------------------|
> Are there tabs in here? Probably better to just use spaces everywhere or
> drop the OR table because it's literally just two variables.
>
>  irq enable | irq wake == PDC & GIC hardware irq enabled
Okay, i will remove table and keep above.
>
>> + */
>> +
>> +static int qcom_pdc_gic_set_wake(struct irq_data *d, unsigned int on)
>> +{
>> +       bool enabled_status;
>> +
>> +       if (d->hwirq == GPIO_NO_WAKE_IRQ)
>> +               return 0;
>> +
>> +       raw_spin_lock(&pdc_lock);
>> +       enabled_status = test_bit(d->hwirq, pdc_enabled_irqs);
>> +       if (on) {
>> +               set_bit(d->hwirq, pdc_wake_irqs);
>> +               pdc_enable_intr(d, true);
>> +       } else {
>> +               clear_bit(d->hwirq, pdc_wake_irqs);
>> +               pdc_enable_intr(d, enabled_status);
>> +       }
>> +
>> +       raw_spin_unlock(&pdc_lock);
>> +
>> +       /* Either "wake" or "enabled" need same status at parent as well */
>> +       if (on || enabled_status)
>> +               irq_chip_enable_parent(d);
>> +       else
>> +               irq_chip_disable_parent(d);
> What happens if irq is "disabled" in software, because this is the first
> function called on the irq, and we aren't in suspend yet. Then we get
> the irq. Won't we be interrupting the CPU because we've enabled in PDC
> and GIC hardware? Why doesn't this function update the wake bit and then
> leave the force on irq logic to suspend entry? Will we miss an interrupt
> while entering suspend because of that?
As PDC (and GIC) have a role during "active" time as well, interrupt should be
enabled in PDC and GIC HW.
>
> Otherwise, I wonder why the code can't be:
>
> 	if (on)
> 		set_bit(d->hwirq, pdc_wake_irqs);
> 	else
> 		clear_bit(d->hwirq, pdc_wake_irqs);
> 	
> 	pdc_enable_intr(d, on);
>
> Then we can leave the lock inside the pdc_enable_intr() function 
okay, since set_bit(), clear_bit(), test_bit() are atomic, i will address this to keep
locking inside pdc_enable_intr() only.
> and
> test for both bitmasks there and or in whatever software is asking for?
> It would be nice to simplify the callers and make the code that actually
> writes the hardware do a small bit test and set operation.
.irq_set_wake is testing both "enabled" and "wake" bitmask and then invoking respective parent API.
IMO, This is simpler to keep PDC functionality separate in pdc_enable_intr() and not mix it with invoking parent APIs.
>
>> +
>> +       return irq_chip_set_wake_parent(d, on);
>> +}
>> +
>>  /*
>>   * GIC does not handle falling edge or active low. To allow falling edge and
>>   * active low interrupts to be handled at GIC, PDC has an inverter that inverts
Thanks,
Maulik
Stephen Boyd March 18, 2020, 9:14 p.m. UTC | #3
Quoting Maulik Shah (2020-03-16 23:47:21)
> Hi,
> 
> On 3/17/2020 7:34 AM, Stephen Boyd wrote:
> > Quoting Maulik Shah (2020-03-12 06:22:59)
> >> Change the way interrupts get enabled at wakeup capable PDC irq chip.
> >>
> >> Introduce irq_set_wake call which lets interrupts enabled at PDC with
> >> enable_irq_wake and disabled with disable_irq_wake with certain
> >> conditions.
> >>
> >> Interrupt will get enabled in HW at PDC and its parent GIC if they are
> >> either enabled is SW or marked as wake up capable.
> > Shouldn't we only enable in PDC and GIC if it's marked wakeup capable
> > and we're entering suspend? Otherwise we should let the hardware enable
> > state follow the software irq enable state?
> Not only during "sleep" but PDC (and GIC) have a role during "active" time as well.
> so we can not just enabled at PDC and GIC when entering to suspend, interrupt need
> to keep interrupt enabled at PDC and GIC HW when out of suspend as well.

Yes, but if an interrupt is only marked for wakeup and not actually
enabled we shouldn't deliver it to the GIC. That's what I'm asking
about.

> >
> >> interrupt will get disabled in HW at PDC and its parent GIC only if its
> >> disabled in SW and also marked as non-wake up capable.
> >>
> >> Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
> >> ---
> >>  drivers/irqchip/qcom-pdc.c | 124 ++++++++++++++++++++++++++++++++++++++++++---
> >>  1 file changed, 117 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
> >> index 6ae9e1f..d698cec 100644
> >> --- a/drivers/irqchip/qcom-pdc.c
> >> +++ b/drivers/irqchip/qcom-pdc.c
> >> @@ -1,6 +1,6 @@
[...]
> >
> >> +
> >>         if (d->hwirq == GPIO_NO_WAKE_IRQ)
> >>                 return;
> >>  
> >> -       pdc_enable_intr(d, false);
> >> -       irq_chip_disable_parent(d);
> >> +       raw_spin_lock(&pdc_lock);
> >> +
> >> +       clear_bit(d->hwirq, pdc_enabled_irqs);
> > clear_bit() is atomic, so why inside the lock?
> I will move it out of lock.
> >
> >> +       wake_status = test_bit(d->hwirq, pdc_wake_irqs);
> >> +
> >> +       /* Disable at PDC HW if wake_status also says same */
> >> +       if (!wake_status)
> > Should read as "if not wakeup_enabled".
> I will update comment.

Hopefully the comment isn't useful and can just be removed if the code
reads properly.

> >
> >> +               pdc_enable_intr(d, false);
> >> +
> >> +       raw_spin_unlock(&pdc_lock);
> >> +
> >> +       /* Disable at GIC HW if wake_status also says same */
> >> +       if (!wake_status)
> > This happens outside the lock, so I'm confused why any locking is needed
> > in this function.
> Okay, since test_bit() is also atomic so i will keep locking inside pc_enable_intr() as it is.
> >
> >> +               irq_chip_disable_parent(d);
> >>  }
> >>  
> >>  static void qcom_pdc_gic_enable(struct irq_data *d)
> >> @@ -101,7 +116,16 @@ static void qcom_pdc_gic_enable(struct irq_data *d)
> >>         if (d->hwirq == GPIO_NO_WAKE_IRQ)
> >>                 return;
> >>  
> >> +       raw_spin_lock(&pdc_lock);
> >> +
> >> +       set_bit(d->hwirq, pdc_enabled_irqs);
> >> +
> >> +       /* We can blindly enable at PDC HW as we are already in enable path */
> >>         pdc_enable_intr(d, true);
> >> +
> >> +       raw_spin_unlock(&pdc_lock);
> >> +
> >> +       /* We can blindly enable at GIC HW as we are already in enable path */
> >>         irq_chip_enable_parent(d);
> >>  }
> >>  
[...]
> >> + */
> >> +
> >> +static int qcom_pdc_gic_set_wake(struct irq_data *d, unsigned int on)
> >> +{
> >> +       bool enabled_status;
> >> +
> >> +       if (d->hwirq == GPIO_NO_WAKE_IRQ)
> >> +               return 0;
> >> +
> >> +       raw_spin_lock(&pdc_lock);
> >> +       enabled_status = test_bit(d->hwirq, pdc_enabled_irqs);
> >> +       if (on) {
> >> +               set_bit(d->hwirq, pdc_wake_irqs);
> >> +               pdc_enable_intr(d, true);
> >> +       } else {
> >> +               clear_bit(d->hwirq, pdc_wake_irqs);
> >> +               pdc_enable_intr(d, enabled_status);
> >> +       }
> >> +
> >> +       raw_spin_unlock(&pdc_lock);
> >> +
> >> +       /* Either "wake" or "enabled" need same status at parent as well */
> >> +       if (on || enabled_status)
> >> +               irq_chip_enable_parent(d);
> >> +       else
> >> +               irq_chip_disable_parent(d);
> > What happens if irq is "disabled" in software, because this is the first
> > function called on the irq, and we aren't in suspend yet. Then we get
> > the irq. Won't we be interrupting the CPU because we've enabled in PDC
> > and GIC hardware? Why doesn't this function update the wake bit and then
> > leave the force on irq logic to suspend entry? Will we miss an interrupt
> > while entering suspend because of that?
> As PDC (and GIC) have a role during "active" time as well, interrupt should be
> enabled in PDC and GIC HW.

Sure. When the irq is enabled we want to enable at the GIC, but if it
isn't enabled and we're not in suspend I would think we don't want the
irq enabled at the GIC. But this code is doing that. Why? I'd think we
would want to make enable in the PDC driver enable the parent and then
make the set_wake path just update some bitmap tracking wakeup enabled
irqs.

Then when we enter suspend we will enable any pdc interrupts only in the
PDC so that we can wakeup from suspend if that interrupt comes in. When
we wakeup we'll resend the edge interrupts to the GIC on the resume path
and level interrupts will "just work" because they'll stay asserted
throughout resume.

The bigger problem would look to be suspend entry, but in that case we
leave any interrupts enabled at the GIC on the path to suspend (see
suspend_device_irq() and how it bails out early if it's marked for
wakeup) so we should be able to have some suspend entry hook in pdc that
enables the irq in the PDC if it's in the wakeup bitmap. Then on the
path to suspend the GIC can lose power at any point after we enable the
wakeup path in PDC and then the system should resume and get the
interrupt through the resend mechanism.
Maulik Shah March 19, 2020, 9:56 a.m. UTC | #4
Hi,

On 3/19/2020 2:44 AM, Stephen Boyd wrote:
> Quoting Maulik Shah (2020-03-16 23:47:21)
>> Hi,
>>
>> On 3/17/2020 7:34 AM, Stephen Boyd wrote:
>>> Quoting Maulik Shah (2020-03-12 06:22:59)
>>>> Change the way interrupts get enabled at wakeup capable PDC irq chip.
>>>>
>>>> Introduce irq_set_wake call which lets interrupts enabled at PDC with
>>>> enable_irq_wake and disabled with disable_irq_wake with certain
>>>> conditions.
>>>>
>>>> Interrupt will get enabled in HW at PDC and its parent GIC if they are
>>>> either enabled is SW or marked as wake up capable.
>>> Shouldn't we only enable in PDC and GIC if it's marked wakeup capable
>>> and we're entering suspend? Otherwise we should let the hardware enable
>>> state follow the software irq enable state?
>> Not only during "sleep" but PDC (and GIC) have a role during "active" time as well.
>> so we can not just enabled at PDC and GIC when entering to suspend, interrupt need
>> to keep interrupt enabled at PDC and GIC HW when out of suspend as well.
> Yes, but if an interrupt is only marked for wakeup and not actually
> enabled we shouldn't deliver it to the GIC. That's what I'm asking
> about.
please see below details.
>>>> interrupt will get disabled in HW at PDC and its parent GIC only if its
>>>> disabled in SW and also marked as non-wake up capable.
>>>>
>>>> Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
>>>> ---
>>>>  drivers/irqchip/qcom-pdc.c | 124 ++++++++++++++++++++++++++++++++++++++++++---
>>>>  1 file changed, 117 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
>>>> index 6ae9e1f..d698cec 100644
>>>> --- a/drivers/irqchip/qcom-pdc.c
>>>> +++ b/drivers/irqchip/qcom-pdc.c
>>>> @@ -1,6 +1,6 @@
> [...]
>>>> +
>>>>         if (d->hwirq == GPIO_NO_WAKE_IRQ)
>>>>                 return;
>>>>  
>>>> -       pdc_enable_intr(d, false);
>>>> -       irq_chip_disable_parent(d);
>>>> +       raw_spin_lock(&pdc_lock);
>>>> +
>>>> +       clear_bit(d->hwirq, pdc_enabled_irqs);
>>> clear_bit() is atomic, so why inside the lock?
>> I will move it out of lock.
>>>> +       wake_status = test_bit(d->hwirq, pdc_wake_irqs);
>>>> +
>>>> +       /* Disable at PDC HW if wake_status also says same */
>>>> +       if (!wake_status)
>>> Should read as "if not wakeup_enabled".
>> I will update comment.
> Hopefully the comment isn't useful and can just be removed if the code
> reads properly.
Sure i will remove.
>>>> +               pdc_enable_intr(d, false);
>>>> +
>>>> +       raw_spin_unlock(&pdc_lock);
>>>> +
>>>> +       /* Disable at GIC HW if wake_status also says same */
>>>> +       if (!wake_status)
>>> This happens outside the lock, so I'm confused why any locking is needed
>>> in this function.
>> Okay, since test_bit() is also atomic so i will keep locking inside pc_enable_intr() as it is.
>>>> +               irq_chip_disable_parent(d);
>>>>  }
>>>>  
>>>>  static void qcom_pdc_gic_enable(struct irq_data *d)
>>>> @@ -101,7 +116,16 @@ static void qcom_pdc_gic_enable(struct irq_data *d)
>>>>         if (d->hwirq == GPIO_NO_WAKE_IRQ)
>>>>                 return;
>>>>  
>>>> +       raw_spin_lock(&pdc_lock);
>>>> +
>>>> +       set_bit(d->hwirq, pdc_enabled_irqs);
>>>> +
>>>> +       /* We can blindly enable at PDC HW as we are already in enable path */
>>>>         pdc_enable_intr(d, true);
>>>> +
>>>> +       raw_spin_unlock(&pdc_lock);
>>>> +
>>>> +       /* We can blindly enable at GIC HW as we are already in enable path */
>>>>         irq_chip_enable_parent(d);
>>>>  }
>>>>  
> [...]
>>>> + */
>>>> +
>>>> +static int qcom_pdc_gic_set_wake(struct irq_data *d, unsigned int on)
>>>> +{
>>>> +       bool enabled_status;
>>>> +
>>>> +       if (d->hwirq == GPIO_NO_WAKE_IRQ)
>>>> +               return 0;
>>>> +
>>>> +       raw_spin_lock(&pdc_lock);
>>>> +       enabled_status = test_bit(d->hwirq, pdc_enabled_irqs);
>>>> +       if (on) {
>>>> +               set_bit(d->hwirq, pdc_wake_irqs);
>>>> +               pdc_enable_intr(d, true);
>>>> +       } else {
>>>> +               clear_bit(d->hwirq, pdc_wake_irqs);
>>>> +               pdc_enable_intr(d, enabled_status);
>>>> +       }
>>>> +
>>>> +       raw_spin_unlock(&pdc_lock);
>>>> +
>>>> +       /* Either "wake" or "enabled" need same status at parent as well */
>>>> +       if (on || enabled_status)
>>>> +               irq_chip_enable_parent(d);
>>>> +       else
>>>> +               irq_chip_disable_parent(d);
>>> What happens if irq is "disabled" in software, because this is the first
>>> function called on the irq, and we aren't in suspend yet. Then we get
>>> the irq. Won't we be interrupting the CPU because we've enabled in PDC
>>> and GIC hardware? Why doesn't this function update the wake bit and then
>>> leave the force on irq logic to suspend entry? Will we miss an interrupt
>>> while entering suspend because of that?
>> As PDC (and GIC) have a role during "active" time as well, interrupt should be
>> enabled in PDC and GIC HW.
> Sure. When the irq is enabled we want to enable at the GIC, but if it
> isn't enabled and we're not in suspend I would think we don't want the
> irq enabled at the GIC. But this code is doing that. Why?

Since we want to wake up in idle path LPM as well, when IRQ is marked as wake up capable and even though its disabled.
>  I'd think we
> would want to make enable in the PDC driver enable the parent and then
> make the set_wake path just update some bitmap tracking wakeup enabled
> irqs.
>
> Then when we enter suspend we will enable any pdc interrupts only in the
> PDC so that we can wakeup from suspend if that interrupt comes in. When
> we wakeup we'll resend the edge interrupts to the GIC on the resume path
> and level interrupts will "just work" because they'll stay asserted
> throughout resume.
>
> The bigger problem would look to be suspend entry, but in that case we
> leave any interrupts enabled at the GIC on the path to suspend (see
> suspend_device_irq() and how it bails out early if it's marked for
> wakeup) 

No it doesn't happen this way in suspend_device_irq(), not for this disabled IRQ.

Agree, it’s a bigger problem to set IRQ enabled at GIC HW which is already disabled in HW and SW but marked for wake up.
However suspend_device_irq() is of little or no-use here (for that particular IRQ). Let me step by step give details.

This will benefit everyone to understand this problem. Correct me if something below in not happening as I listed.

Step-1

Driver invokes enable_irq_wake() to mark their IRQ wake up capable.

Step-2

After enable_irq_wake(), drivers invokes disable_irq().
Let’s break it down how this disable_irq() will traverse (in current code, without this RFC PATCH)

Step-2.1

In kernel/irq/manage.c
disable_irq() => __disable_irq_nosync() => __disable_irq() => irq_disable()

Step-2.2

This will jump to kernel/irq/chip.c
irq_disable() => __irq_disable()  => which then calls chip’s .irq_disable via desc->irq_data.chip->irq_disable(&desc->irq_data);
Note that this is a GPIO IRQ, gpiolib set’s this .irq_disable for every gpio chip during registration

(see below in drivers/gpio/gpiolib.c)
irqchip->irq_disable = gpiochip_irq_disable;

So it doesn’t take lazy path to disable at HW, its always unlazy (at-least for gpio IRQs)

Step-2.3

Call Jumps to gpiochip_irq_disable()
This then invokes irq_chip’s  .irq_disable via chip->irq.irq_disable(d)
which finally arrives at msmgpio irq_chip’s msm_gpio_irq_disable() call from here.
it finds that IRQ controller is in hierarchy, so it invokes irq_chip_disable_parent().

Step-2.4

This invokes PDC irq_chip’s qcom_pdc_gic_disable()
At this point,
This will go ahead and “disabled in PDC HW”
This also invokes irq_chip_disable_parent() since PDC is also in hierarchy with GIC.

Step-2.5

This invokes GIC’s gic_mask_irq() since GIC doesn’t have .irq_disable implemented, it instead invokes mask.
This will go ahead and “disables at GIC HW”.

Final status at the end of step 2:
(a)    IRQ is marked as wake up capable in SW
(b)    IRQ is disabled in both SW and HW at GIC and PDC

Step-3
Device enters “suspend to RAM” which invokes suspend_device_irq()
Pasting the interested part here…

       if (irqd_is_wakeup_set(&desc->irq_data)) {
                irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
                …
                return true;
        }
        ..
        __disable_irq(desc);

Note that it bails out with above if condition here for IRQs that are marked wake up capable, as you have already pointed out.

For the rest of IRQs it goes ahead and disables them at HW via invoking __disable_irq() (be it a GIC HW or PDC HW)
so when device is in suspend only wake up capable IRQs are finally left enabled in HW.

Now, If any driver that has only done step-1, then it make sense that it will bail out here and leave IRQ enabled in HW (to be precise in its
original state at HW, without disabling it)

But some drivers did step-1 and step-2 both.

Yes even for this case, it will still bail out here, since its marked as wake up capable but it defeats the purpose of bail out
(bail out is to NOT go ahead and disable at HW), but by doing step-2 driver already disabled the IRQ at HW well before
suspend_device_irq() is invoked.

In other words, IRQ status stays same as what was at the end of step-2 (disabled in HW)

So this whole function is of no use (for that particular IRQ part). Here driver is disabling IRQ on their own via step-2 and then asks
“hey remember I marked it as wake up capable in step-1, so IRQ should resume system from suspend when it occurs” completely ignoring
the fact that its already disabled at HW in step-2.

IMO, drivers should not even do step-2 here.  They should just mark irq as wake up capable and then let suspend framework decide whether
to keep it enabled or disabled in HW when entering to suspend (Read may only do step-1, then everything works fine with current code)

Hope that above details makes it clear on why I way asking earlier to remove unnecessary disable_irq() call from driver, I don’t understand its usage
(at least till now its not clear to me). May be there are some reasons like seeing spurious IRQ when entering to suspend so driver may want to disable it in HW.

But then responsibility is falling on either suspend framework to re-enable such wake up capable interrupts in HW.
This may be done by again invoking enable_irq() before bailing out for wakeup capable IRQ in suspend_device_irq(). I don’t know if this is acceptable. 

Someone from To/Cc may clarify if above can be done. Note that it may create problem for other drivers which does only step-1, by calling enable_irq()
during suspend, it will update desc->depth, so this agin need to be undo when resuming.

Otherwise, it is currently falling onto the individual irq_chip’s to avoid disabling in HW in the first place.

this RFC patch tries to do same and address this problem in PDC irq_chip when control reaches at Step 2.4, to NOT disable at HW when the IRQ
is already marked wake up capable, it bails out at Step 2.4 in PDC irq_chip so that interrupt is left enabled in HW at both PDC and GIC HW level.

> so we should be able to have some suspend entry hook in pdc that
> enables the irq in the PDC if it's in the wakeup bitmap. Then on the
> path to suspend the GIC can lose power at any point after we enable the
> wakeup path in PDC and then the system should resume and get the
> interrupt through the resend mechanism.
I thought of this to introduce suspend hook in PDC which decides to keep wake up marked irq enabled at PDC.
But then someone need to keep it enabled at GIC as well.

PDC does not directly forward IRQ to CPU. PDC brings SoC out of low power mode where GIC does
not have power cut, it replays the interrupt at GIC in HW and that leads to forwarding interrupt
to CPU and resume from low power mode.
So PDC and GIC HW status should need to be in sync.

Thanks,

Maulik
Stephen Boyd March 19, 2020, 9:53 p.m. UTC | #5
Quoting Maulik Shah (2020-03-19 02:56:31)
> On 3/19/2020 2:44 AM, Stephen Boyd wrote:
> > Quoting Maulik Shah (2020-03-16 23:47:21)
> >> On 3/17/2020 7:34 AM, Stephen Boyd wrote:
> >>> What happens if irq is "disabled" in software, because this is the first
> >>> function called on the irq, and we aren't in suspend yet. Then we get
> >>> the irq. Won't we be interrupting the CPU because we've enabled in PDC
> >>> and GIC hardware? Why doesn't this function update the wake bit and then
> >>> leave the force on irq logic to suspend entry? Will we miss an interrupt
> >>> while entering suspend because of that?
> >> As PDC (and GIC) have a role during "active" time as well, interrupt should be
> >> enabled in PDC and GIC HW.
> > Sure. When the irq is enabled we want to enable at the GIC, but if it
> > isn't enabled and we're not in suspend I would think we don't want the
> > irq enabled at the GIC. But this code is doing that. Why?
> 
> Since we want to wake up in idle path LPM as well, when IRQ is marked as wake up capable and even though its disabled.

No. IRQs that are disabled but have wake enabled on them should not wake
up the CPU from deep idle states (which is what I assume you mean by
idle path LPM (Low Power Mode)). Only if the irq is enabled should it
wake the CPU from deep idle states, and in this case irq wake state has
nothing to do with that working or not.

> >  I'd think we
> > would want to make enable in the PDC driver enable the parent and then
> > make the set_wake path just update some bitmap tracking wakeup enabled
> > irqs.
> >
> > Then when we enter suspend we will enable any pdc interrupts only in the
> > PDC so that we can wakeup from suspend if that interrupt comes in. When
> > we wakeup we'll resend the edge interrupts to the GIC on the resume path
> > and level interrupts will "just work" because they'll stay asserted
> > throughout resume.
> >
> > The bigger problem would look to be suspend entry, but in that case we
> > leave any interrupts enabled at the GIC on the path to suspend (see
> > suspend_device_irq() and how it bails out early if it's marked for
> > wakeup) 
> 
> No it doesn't happen this way in suspend_device_irq(), not for this disabled IRQ.
> 
> Agree, it's a bigger problem to set IRQ enabled at GIC HW which is already disabled in HW and SW but marked for wake up.
> However suspend_device_irq() is of little or no-use here (for that particular IRQ). Let me step by step give details.
> 
> This will benefit everyone to understand this problem. Correct me if something below in not happening as I listed.
> 
> Step-1
> 
> Driver invokes enable_irq_wake() to mark their IRQ wake up capable.
> 
> Step-2
> 
> After enable_irq_wake(), drivers invokes disable_irq().
> Let's break it down how this disable_irq() will traverse (in current code, without this RFC PATCH)
> 
> Step-2.1
> 
> In kernel/irq/manage.c
> disable_irq() => __disable_irq_nosync() => __disable_irq() => irq_disable()
> 
> Step-2.2
> 
> This will jump to kernel/irq/chip.c
> irq_disable() => __irq_disable()  => which then calls chip's .irq_disable via desc->irq_data.chip->irq_disable(&desc->irq_data);
> Note that this is a GPIO IRQ, gpiolib set's this .irq_disable for every gpio chip during registration
> 
> (see below in drivers/gpio/gpiolib.c)
> irqchip->irq_disable = gpiochip_irq_disable;
> 
> So it doesn't take lazy path to disable at HW, its always unlazy (at-least for gpio IRQs)

That looks like a bug. It appears that gpiolib is only hooking the irq
disable path here so that it can keep track of what irqs are not in use
by gpiolib and allow them to be used for GPIO purposes when the irqs are
disabled. See commit 461c1a7d4733 ("gpiolib: override
irq_enable/disable"). That code in gpiolib should probably see if lazy
mode has been disabled on the irq and do similar things to what genirq
does and then let genirq mask the gpios if they trigger during the time
when the irq is disabled. Regardless of this design though, I don't
understand why this matters.

> 
> Step-2.3
> 
> Call Jumps to gpiochip_irq_disable()
> This then invokes irq_chip's  .irq_disable via chip->irq.irq_disable(d)
> which finally arrives at msmgpio irq_chip's msm_gpio_irq_disable() call from here.
> it finds that IRQ controller is in hierarchy, so it invokes irq_chip_disable_parent().
> 
> Step-2.4
> 
> This invokes PDC irq_chip's qcom_pdc_gic_disable()
> At this point,
> This will go ahead and "disabled in PDC HW"
> This also invokes irq_chip_disable_parent() since PDC is also in hierarchy with GIC.
> 
> Step-2.5
> 
> This invokes GIC's gic_mask_irq() since GIC doesn't have .irq_disable implemented, it instead invokes mask.
> This will go ahead and "disables at GIC HW".
> 
> Final status at the end of step 2:
> (a)    IRQ is marked as wake up capable in SW
> (b)    IRQ is disabled in both SW and HW at GIC and PDC
> 
> Step-3
> Device enters "suspend to RAM" which invokes suspend_device_irq()
> Pasting the interested part here...
> 
>        if (irqd_is_wakeup_set(&desc->irq_data)) {
>                 irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
>                 ...
>                 return true;
>         }
>         ..
>         __disable_irq(desc);
> 
> Note that it bails out with above if condition here for IRQs that are marked wake up capable, as you have already pointed out.
> 
> For the rest of IRQs it goes ahead and disables them at HW via invoking __disable_irq() (be it a GIC HW or PDC HW)
> so when device is in suspend only wake up capable IRQs are finally left enabled in HW.

__disable_irq() considers lazy setting or not and leaves the irq
unmasked if the irq is lazy disabled. But we have the
IRQCHIP_MASK_ON_SUSPEND flag set in the gpio irq controller and that
properly masks the irq through the hierarchy if necessary. Again, this
is fine and all but I don't see how it matters.

> 
> Now, If any driver that has only done step-1, then it make sense that it will bail out here and leave IRQ enabled in HW (to be precise in its
> original state at HW, without disabling it)
> 
> But some drivers did step-1 and step-2 both.
> 
> Yes even for this case, it will still bail out here, since its marked as wake up capable but it defeats the purpose of bail out
> (bail out is to NOT go ahead and disable at HW), but by doing step-2 driver already disabled the IRQ at HW well before
> suspend_device_irq() is invoked.
> 
> In other words, IRQ status stays same as what was at the end of step-2 (disabled in HW)
> 
> So this whole function is of no use (for that particular IRQ part). Here driver is disabling IRQ on their own via step-2 and then asks
> "hey remember I marked it as wake up capable in step-1, so IRQ should
> resume system from suspend when it occurs" completely ignoring
> the fact that its already disabled at HW in step-2.
> 
> IMO, drivers should not even do step-2 here.  They should just mark irq as wake up capable and then let suspend framework decide whether
> to keep it enabled or disabled in HW when entering to suspend (Read may only do step-1, then everything works fine with current code)
> 
> Hope that above details makes it clear on why I way asking earlier to
> remove unnecessary disable_irq() call from driver, I don't understand its usage
> (at least till now its not clear to me). May be there are some reasons like seeing spurious IRQ when entering to suspend so driver may want to disable it in HW.
> 
> But then responsibility is falling on either suspend framework to re-enable such wake up capable interrupts in HW.
> This may be done by again invoking enable_irq() before bailing out for
> wakeup capable IRQ in suspend_device_irq(). I don't know if this is acceptable. 
> 
> Someone from To/Cc may clarify if above can be done. Note that it may create problem for other drivers which does only step-1, by calling enable_irq()
> during suspend, it will update desc->depth, so this agin need to be undo when resuming.
> 
> Otherwise, it is currently falling onto the individual irq_chip's to avoid disabling in HW in the first place.
> 
> this RFC patch tries to do same and address this problem in PDC irq_chip when control reaches at Step 2.4, to NOT disable at HW when the IRQ
> is already marked wake up capable, it bails out at Step 2.4 in PDC irq_chip so that interrupt is left enabled in HW at both PDC and GIC HW level.

I don't see any need to change genirq by changing how
suspend_device_irq() is written, but I'm not the maintainer of this code.

> 
> > so we should be able to have some suspend entry hook in pdc that
> > enables the irq in the PDC if it's in the wakeup bitmap. Then on the
> > path to suspend the GIC can lose power at any point after we enable the
> > wakeup path in PDC and then the system should resume and get the
> > interrupt through the resend mechanism.
> I thought of this to introduce suspend hook in PDC which decides to keep wake up marked irq enabled at PDC.
> But then someone need to keep it enabled at GIC as well.
> 
> PDC does not directly forward IRQ to CPU. PDC brings SoC out of low power mode where GIC does
> not have power cut, it replays the interrupt at GIC in HW and that leads to forwarding interrupt
> to CPU and resume from low power mode.
> So PDC and GIC HW status should need to be in sync.
> 

It sounds like PDC just passes along the level or edge and relies on the
parent irq chip (GIC in this case) to have the irq unmasked so it can be
seen at the CPU. If the irq is masked at the GIC does it still wakeup
the CPU if it's unmasked at the PDC?

Does the PDC have any other registers that can latch an edge type irq
across suspend so we can read it on resume? If that exists then we can
have software resend irqs at resume time. I think this was asked before
and the answer was it doesn't exist.

If there isn't any sort of latching, then why can't we unmask the irq in
the GIC hardware by calling irq_chip_enable_parent() from the suspend
hook in PDC? That should guarantee that the irq is unmasked at the GIC
when we're suspending and the GIC is about to lose power. And presumably
the GIC state is restored by hardware on resume so that the PDC can
forward the edge to the GIC which can interrupt the CPU because the
interrupt was left unmasked. The idea is to capture wake and enable
state in a bitmap, unmask at the PDC and GIC during system suspend if
it's marked for wakeup regardless of enable state, and then mask in the
PDC and GIC during resume if the irq is disabled in software. Does that
fail somehow?
Maulik Shah March 30, 2020, 9:39 a.m. UTC | #6
Hi,

On 3/20/2020 3:23 AM, Stephen Boyd wrote:
> Quoting Maulik Shah (2020-03-19 02:56:31)
>> On 3/19/2020 2:44 AM, Stephen Boyd wrote:
>>> Quoting Maulik Shah (2020-03-16 23:47:21)
>>>> On 3/17/2020 7:34 AM, Stephen Boyd wrote:
>>>>> What happens if irq is "disabled" in software, because this is the first
>>>>> function called on the irq, and we aren't in suspend yet. Then we get
>>>>> the irq. Won't we be interrupting the CPU because we've enabled in PDC
>>>>> and GIC hardware? Why doesn't this function update the wake bit and then
>>>>> leave the force on irq logic to suspend entry? Will we miss an interrupt
>>>>> while entering suspend because of that?
>>>> As PDC (and GIC) have a role during "active" time as well, interrupt should be
>>>> enabled in PDC and GIC HW.
>>> Sure. When the irq is enabled we want to enable at the GIC, but if it
>>> isn't enabled and we're not in suspend I would think we don't want the
>>> irq enabled at the GIC. But this code is doing that. Why?
>> Since we want to wake up in idle path LPM as well, when IRQ is marked as wake up capable and even though its disabled.
> No. IRQs that are disabled but have wake enabled on them should not wake
> up the CPU from deep idle states (which is what I assume you mean by
> idle path LPM (Low Power Mode)). Only if the irq is enabled should it
> wake the CPU from deep idle states, and in this case irq wake state has
> nothing to do with that working or not.
Okay i will address this in V3.
>>>   I'd think we
>>> would want to make enable in the PDC driver enable the parent and then
>>> make the set_wake path just update some bitmap tracking wakeup enabled
>>> irqs.
>>>
>>> Then when we enter suspend we will enable any pdc interrupts only in the
>>> PDC so that we can wakeup from suspend if that interrupt comes in. When
>>> we wakeup we'll resend the edge interrupts to the GIC on the resume path
>>> and level interrupts will "just work" because they'll stay asserted
>>> throughout resume.
>>>
>>> The bigger problem would look to be suspend entry, but in that case we
>>> leave any interrupts enabled at the GIC on the path to suspend (see
>>> suspend_device_irq() and how it bails out early if it's marked for
>>> wakeup)
>> No it doesn't happen this way in suspend_device_irq(), not for this disabled IRQ.
>>
>> Agree, it's a bigger problem to set IRQ enabled at GIC HW which is already disabled in HW and SW but marked for wake up.
>> However suspend_device_irq() is of little or no-use here (for that particular IRQ). Let me step by step give details.
>>
>> This will benefit everyone to understand this problem. Correct me if something below in not happening as I listed.
>>
>> Step-1
>>
>> Driver invokes enable_irq_wake() to mark their IRQ wake up capable.
>>
>> Step-2
>>
>> After enable_irq_wake(), drivers invokes disable_irq().
>> Let's break it down how this disable_irq() will traverse (in current code, without this RFC PATCH)
>>
>> Step-2.1
>>
>> In kernel/irq/manage.c
>> disable_irq() => __disable_irq_nosync() => __disable_irq() => irq_disable()
>>
>> Step-2.2
>>
>> This will jump to kernel/irq/chip.c
>> irq_disable() => __irq_disable()  => which then calls chip's .irq_disable via desc->irq_data.chip->irq_disable(&desc->irq_data);
>> Note that this is a GPIO IRQ, gpiolib set's this .irq_disable for every gpio chip during registration
>>
>> (see below in drivers/gpio/gpiolib.c)
>> irqchip->irq_disable = gpiochip_irq_disable;
>>
>> So it doesn't take lazy path to disable at HW, its always unlazy (at-least for gpio IRQs)
> That looks like a bug. It appears that gpiolib is only hooking the irq
> disable path here so that it can keep track of what irqs are not in use
> by gpiolib and allow them to be used for GPIO purposes when the irqs are
> disabled. See commit 461c1a7d4733 ("gpiolib: override
> irq_enable/disable"). That code in gpiolib should probably see if lazy
> mode has been disabled on the irq and do similar things to what genirq
> does and then let genirq mask the gpios if they trigger during the time
> when the irq is disabled. Regardless of this design though, I don't
> understand why this matters.
yes i already did see the commit 461c1a7d4733. The change went in few 
years back now in gpiolib.

I wanted to point out that since gpiolib doesn't take lazy approch, the 
interrupt will

get disabled in HW the moment driver invokes disable_irq() from driver's 
suspend ops.

So during suspend someone need to re-enable in HW if its been marked for 
wakeup

This should happen probably at very late stage in suspend after all 
drivers are done.


>
>> Step-2.3
>>
>> Call Jumps to gpiochip_irq_disable()
>> This then invokes irq_chip's  .irq_disable via chip->irq.irq_disable(d)
>> which finally arrives at msmgpio irq_chip's msm_gpio_irq_disable() call from here.
>> it finds that IRQ controller is in hierarchy, so it invokes irq_chip_disable_parent().
>>
>> Step-2.4
>>
>> This invokes PDC irq_chip's qcom_pdc_gic_disable()
>> At this point,
>> This will go ahead and "disabled in PDC HW"
>> This also invokes irq_chip_disable_parent() since PDC is also in hierarchy with GIC.
>>
>> Step-2.5
>>
>> This invokes GIC's gic_mask_irq() since GIC doesn't have .irq_disable implemented, it instead invokes mask.
>> This will go ahead and "disables at GIC HW".
>>
>> Final status at the end of step 2:
>> (a)    IRQ is marked as wake up capable in SW
>> (b)    IRQ is disabled in both SW and HW at GIC and PDC
>>
>> Step-3
>> Device enters "suspend to RAM" which invokes suspend_device_irq()
>> Pasting the interested part here...
>>
>>         if (irqd_is_wakeup_set(&desc->irq_data)) {
>>                  irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
>>                  ...
>>                  return true;
>>          }
>>          ..
>>          __disable_irq(desc);
>>
>> Note that it bails out with above if condition here for IRQs that are marked wake up capable, as you have already pointed out.
>>
>> For the rest of IRQs it goes ahead and disables them at HW via invoking __disable_irq() (be it a GIC HW or PDC HW)
>> so when device is in suspend only wake up capable IRQs are finally left enabled in HW.
> __disable_irq() considers lazy setting or not and leaves the irq
> unmasked if the irq is lazy disabled. But we have the
> IRQCHIP_MASK_ON_SUSPEND flag set in the gpio irq controller and that
> properly masks the irq through the hierarchy if necessary. Again, this
> is fine and all but I don't see how it matters.
>
>> Now, If any driver that has only done step-1, then it make sense that it will bail out here and leave IRQ enabled in HW (to be precise in its
>> original state at HW, without disabling it)
>>
>> But some drivers did step-1 and step-2 both.
>>
>> Yes even for this case, it will still bail out here, since its marked as wake up capable but it defeats the purpose of bail out
>> (bail out is to NOT go ahead and disable at HW), but by doing step-2 driver already disabled the IRQ at HW well before
>> suspend_device_irq() is invoked.
>>
>> In other words, IRQ status stays same as what was at the end of step-2 (disabled in HW)
>>
>> So this whole function is of no use (for that particular IRQ part). Here driver is disabling IRQ on their own via step-2 and then asks
>> "hey remember I marked it as wake up capable in step-1, so IRQ should
>> resume system from suspend when it occurs" completely ignoring
>> the fact that its already disabled at HW in step-2.
>>
>> IMO, drivers should not even do step-2 here.  They should just mark irq as wake up capable and then let suspend framework decide whether
>> to keep it enabled or disabled in HW when entering to suspend (Read may only do step-1, then everything works fine with current code)
>>
>> Hope that above details makes it clear on why I way asking earlier to
>> remove unnecessary disable_irq() call from driver, I don't understand its usage
>> (at least till now its not clear to me). May be there are some reasons like seeing spurious IRQ when entering to suspend so driver may want to disable it in HW.
>>
>> But then responsibility is falling on either suspend framework to re-enable such wake up capable interrupts in HW.
>> This may be done by again invoking enable_irq() before bailing out for
>> wakeup capable IRQ in suspend_device_irq(). I don't know if this is acceptable.
>>
>> Someone from To/Cc may clarify if above can be done. Note that it may create problem for other drivers which does only step-1, by calling enable_irq()
>> during suspend, it will update desc->depth, so this agin need to be undo when resuming.
>>
>> Otherwise, it is currently falling onto the individual irq_chip's to avoid disabling in HW in the first place.
>>
>> this RFC patch tries to do same and address this problem in PDC irq_chip when control reaches at Step 2.4, to NOT disable at HW when the IRQ
>> is already marked wake up capable, it bails out at Step 2.4 in PDC irq_chip so that interrupt is left enabled in HW at both PDC and GIC HW level.
> I don't see any need to change genirq by changing how
> suspend_device_irq() is written, but I'm not the maintainer of this code.

See above reply, the someone mentioned above can be suspend_device_irq().

Since i don't see this as a problem for PDC irqchip alone but for every 
irqchip in kernel using GPIO

interrupts. Although i did not check how other platforms might be 
working, but i guess probably they don't

really disable at HW with disable_irq() or may not even have 
.irq_disable implemented in gpio irq_chip drivers.

>>> so we should be able to have some suspend entry hook in pdc that
>>> enables the irq in the PDC if it's in the wakeup bitmap. Then on the
>>> path to suspend the GIC can lose power at any point after we enable the
>>> wakeup path in PDC and then the system should resume and get the
>>> interrupt through the resend mechanism.
>> I thought of this to introduce suspend hook in PDC which decides to keep wake up marked irq enabled at PDC.
>> But then someone need to keep it enabled at GIC as well.
>>
>> PDC does not directly forward IRQ to CPU. PDC brings SoC out of low power mode where GIC does
>> not have power cut, it replays the interrupt at GIC in HW and that leads to forwarding interrupt
>> to CPU and resume from low power mode.
>> So PDC and GIC HW status should need to be in sync.
>>
> It sounds like PDC just passes along the level or edge and relies on the
> parent irq chip (GIC in this case) to have the irq unmasked so it can be
> seen at the CPU. If the irq is masked at the GIC does it still wakeup
> the CPU if it's unmasked at the PDC?
No. if IRQ is maksed at GIC it doesn't wakeup even though at PDC its 
unmasked.
>
> Does the PDC have any other registers that can latch an edge type irq
> across suspend so we can read it on resume? If that exists then we can
> have software resend irqs at resume time. I think this was asked before
> and the answer was it doesn't exist.
Correct it doesn't exist.
>
> If there isn't any sort of latching, then why can't we unmask the irq in
> the GIC hardware by calling irq_chip_enable_parent() from the suspend
> hook in PDC? That should guarantee that the irq is unmasked at the GIC
> when we're suspending and the GIC is about to lose power. And presumably
> the GIC state is restored by hardware on resume so that the PDC can
> forward the edge to the GIC which can interrupt the CPU because the
> interrupt was left unmasked. The idea is to capture wake and enable
> state in a bitmap, unmask at the PDC and GIC during system suspend if
> it's marked for wakeup regardless of enable state, and then mask in the
> PDC and GIC during resume if the irq is disabled in software. Does that
> fail somehow?

Okay i will implement suspend/resume hook in PDC and use bitmap to

unmask at PDC and GIC during suspend. Addressed in V3.

Thanks,

Maulik
diff mbox series

Patch

diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 6ae9e1f..d698cec 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -1,6 +1,6 @@ 
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2017-2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/err.h>
@@ -30,6 +30,9 @@ 
 
 #define PDC_NO_PARENT_IRQ	~0UL
 
+DECLARE_BITMAP(pdc_wake_irqs, PDC_MAX_IRQS);
+DECLARE_BITMAP(pdc_enabled_irqs, PDC_MAX_IRQS);
+
 struct pdc_pin_region {
 	u32 pin_base;
 	u32 parent_base;
@@ -80,20 +83,32 @@  static void pdc_enable_intr(struct irq_data *d, bool on)
 	index = pin_out / 32;
 	mask = pin_out % 32;
 
-	raw_spin_lock(&pdc_lock);
 	enable = pdc_reg_read(IRQ_ENABLE_BANK, index);
 	enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask);
 	pdc_reg_write(IRQ_ENABLE_BANK, index, enable);
-	raw_spin_unlock(&pdc_lock);
 }
 
 static void qcom_pdc_gic_disable(struct irq_data *d)
 {
+	bool wake_status;
+
 	if (d->hwirq == GPIO_NO_WAKE_IRQ)
 		return;
 
-	pdc_enable_intr(d, false);
-	irq_chip_disable_parent(d);
+	raw_spin_lock(&pdc_lock);
+
+	clear_bit(d->hwirq, pdc_enabled_irqs);
+	wake_status = test_bit(d->hwirq, pdc_wake_irqs);
+
+	/* Disable at PDC HW if wake_status also says same */
+	if (!wake_status)
+		pdc_enable_intr(d, false);
+
+	raw_spin_unlock(&pdc_lock);
+
+	/* Disable at GIC HW if wake_status also says same */
+	if (!wake_status)
+		irq_chip_disable_parent(d);
 }
 
 static void qcom_pdc_gic_enable(struct irq_data *d)
@@ -101,7 +116,16 @@  static void qcom_pdc_gic_enable(struct irq_data *d)
 	if (d->hwirq == GPIO_NO_WAKE_IRQ)
 		return;
 
+	raw_spin_lock(&pdc_lock);
+
+	set_bit(d->hwirq, pdc_enabled_irqs);
+
+	/* We can blindly enable at PDC HW as we are already in enable path */
 	pdc_enable_intr(d, true);
+
+	raw_spin_unlock(&pdc_lock);
+
+	/* We can blindly enable at GIC HW as we are already in enable path */
 	irq_chip_enable_parent(d);
 }
 
@@ -121,6 +145,92 @@  static void qcom_pdc_gic_unmask(struct irq_data *d)
 	irq_chip_unmask_parent(d);
 }
 
+/**
+ * qcom_pdc_gic_set_wake: Enables/Disables interrupt at PDC and parent GIC
+ *
+ * @d: the interrupt data
+ * @on: enable or disable wakeup capability
+ *
+ * The SW expects that an irq that's disabled with disable_irq()
+ * can still wake the system from sleep states such as "suspend to RAM",
+ * if it has been marked for wakeup.
+ *
+ * While the SW may choose to differ status for "wake" and "enabled"
+ * interrupts, its not the case with HW. There is no dedicated
+ * configuration in HW to differ "wake" and "enabled". Same is
+ * case for PDC's parent irq_chip (ARM GIC) which has only GICD_ISENABLER
+ * to indicate "enabled" or "disabled" status and also there is no .irq_set_wake
+ * implemented in parent GIC irq_chip.
+ *
+ * So, the HW ONLY understands either "enabled" or "disabled".
+ *
+ * This function is introduced to handle cases where drivers may invoke
+ * below during suspend in SW on their irq, and expect to wake up from this
+ * interrupt.
+ *
+ * 1. enable_irq_wake()
+ * 2. disable_irq()
+ *
+ * and below during resume
+ *
+ * 3. disable_irq_wake()
+ * 4. enable_irq()
+ *
+ * if (2) is invoked in end and just before suspend, it currently leaves
+ * interrupt "disabled" at HW and hence not leading to resume.
+ *
+ * Note that the order of invoking (1) & (2) may interchange and similarly
+ * it can interchange for (3) & (4) as per driver's wish.
+ *
+ * if the driver call .irq_set_wake first it will enable at HW but later
+ * call with .irq_disable will disable at HW. so final result is again
+ * "disabled" at HW whereas the HW expectection is to keep it "enabled"
+ * since it understands only "enabled" or "disabled".
+ *
+ * Hence .irq_set_wake can not just go ahead and  "enable" or "disable"
+ * at HW blindly, it needs to take in account status of currently "enable"
+ * or "disable" as well.
+ *
+ * Introduce .irq_set_wake in PDC irq_chip to handle above issue.
+ * The final status in HW should be an "OR" of "enable" and "wake" calls.
+ * (i.e. same as below table)
+ * -------------------------------------------------|
+ * ENABLE in SW | WAKE in SW | PDC & GIC HW Status  |
+ *      0       |     0      |     0	            |
+ *      0	|     1      |     1	            |
+ *	1	|     0      |     1		    |
+ *	1	|     1      |     1		    |
+ *--------------------------------------------------|
+ */
+
+static int qcom_pdc_gic_set_wake(struct irq_data *d, unsigned int on)
+{
+	bool enabled_status;
+
+	if (d->hwirq == GPIO_NO_WAKE_IRQ)
+		return 0;
+
+	raw_spin_lock(&pdc_lock);
+	enabled_status = test_bit(d->hwirq, pdc_enabled_irqs);
+	if (on) {
+		set_bit(d->hwirq, pdc_wake_irqs);
+		pdc_enable_intr(d, true);
+	} else {
+		clear_bit(d->hwirq, pdc_wake_irqs);
+		pdc_enable_intr(d, enabled_status);
+	}
+
+	raw_spin_unlock(&pdc_lock);
+
+	/* Either "wake" or "enabled" need same status at parent as well */
+	if (on || enabled_status)
+		irq_chip_enable_parent(d);
+	else
+		irq_chip_disable_parent(d);
+
+	return irq_chip_set_wake_parent(d, on);
+}
+
 /*
  * GIC does not handle falling edge or active low. To allow falling edge and
  * active low interrupts to be handled at GIC, PDC has an inverter that inverts
@@ -203,9 +313,9 @@  static struct irq_chip qcom_pdc_gic_chip = {
 	.irq_set_irqchip_state	= qcom_pdc_gic_set_irqchip_state,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
 	.irq_set_type		= qcom_pdc_gic_set_type,
+	.irq_set_wake		= qcom_pdc_gic_set_wake,
 	.flags			= IRQCHIP_MASK_ON_SUSPEND |
-				  IRQCHIP_SET_TYPE_MASKED |
-				  IRQCHIP_SKIP_SET_WAKE,
+				  IRQCHIP_SET_TYPE_MASKED,
 	.irq_set_vcpu_affinity	= irq_chip_set_vcpu_affinity_parent,
 	.irq_set_affinity	= irq_chip_set_affinity_parent,
 };