diff mbox series

[RFC,v3] irqchip: qcom: pdc: Introduce irq_set_wake call

Message ID 1585586460-3272-2-git-send-email-mkshah@codeaurora.org (mailing list archive)
State Superseded
Headers show
Series [RFC,v3] irqchip: qcom: pdc: Introduce irq_set_wake call | expand

Commit Message

Maulik Shah March 30, 2020, 4:41 p.m. UTC
Add changes to differentiate enabled and wakeup capable interrupts in SW
to support drivers which disable the wakeup capable interrupt in SW during
suspend while irqchip HW expects it to leave enabled in HW during suspend.

Change the way interrupts get enabled at HW in wakeup capable PDC irq chip
during suspend. Introduce .irq_set_wake call which lets interrupts marked
as wakeup capable. Such interrupts in PDC domain and PDC gpio domain are
checked from PDC's CPU PM notification to enable them in HW at PDC and its
parent GIC if its marked as wakeup capable but are disabled in SW.

Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
---
 drivers/irqchip/qcom-pdc.c | 271 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 256 insertions(+), 15 deletions(-)

Comments

Stephen Boyd April 14, 2020, 12:35 a.m. UTC | #1
Quoting Maulik Shah (2020-03-30 09:41:00)
> diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
> index 6ae9e1f..c43715b 100644
> --- a/drivers/irqchip/qcom-pdc.c
> +++ b/drivers/irqchip/qcom-pdc.c
> @@ -36,6 +38,23 @@ struct pdc_pin_region {
>         u32 cnt;
>  };
>  
> +struct pdc_pm_data {
> +       struct cpumask cpus_in_pc;
> +       spinlock_t pm_lock;
> +       bool suspend_start;
> +       bool from_pdc_suspend;
> +       struct notifier_block pdc_pm_nfb;
> +       struct notifier_block pdc_cpu_pm_nfb;
> +
> +       DECLARE_BITMAP(pdc_domain_enabled_irqs, PDC_MAX_IRQS);
> +       DECLARE_BITMAP(pdc_domain_wake_irqs, PDC_MAX_IRQS);
> +       DECLARE_BITMAP(pdc_gpio_domain_enabled_irqs, PDC_MAX_GPIO_IRQS);
> +       DECLARE_BITMAP(pdc_gpio_domain_wake_irqs, PDC_MAX_GPIO_IRQS);
> +
> +       struct irq_domain *pdc_domain;
> +       struct irq_domain *pdc_gpio_domain;
> +};

Everything else is just a static local variable here. It looks odd to
have this struct to contain all this stuff but then have everything else
below be outside of it. If we're going to have a container node I'd
prefer that be another patch that makes this driver no longer a
singleton.

> +
>  static DEFINE_RAW_SPINLOCK(pdc_lock);
>  static void __iomem *pdc_base;
>  static struct pdc_pin_region *pdc_region;
> @@ -89,18 +108,38 @@ static void pdc_enable_intr(struct irq_data *d, bool on)
>  
>  static void qcom_pdc_gic_disable(struct irq_data *d)
>  {
> +       struct pdc_pm_data *p;
> +
>         if (d->hwirq == GPIO_NO_WAKE_IRQ)
>                 return;
>  
> +       p = irq_data_get_irq_chip_data(d);
> +       if (!p->from_pdc_suspend) {
> +               if (irq_domain_qcom_handle_wakeup(d->domain))
> +                       clear_bit(d->hwirq, p->pdc_gpio_domain_enabled_irqs);
> +               else
> +                       clear_bit(d->hwirq, p->pdc_domain_enabled_irqs);
> +       }
> +
>         pdc_enable_intr(d, false);
>         irq_chip_disable_parent(d);
>  }
>  
>  static void qcom_pdc_gic_enable(struct irq_data *d)
>  {
> +       struct pdc_pm_data *p;
> +
>         if (d->hwirq == GPIO_NO_WAKE_IRQ)
>                 return;
>  
> +       p = irq_data_get_irq_chip_data(d);
> +       if (!p->from_pdc_suspend) {
> +               if (irq_domain_qcom_handle_wakeup(d->domain))
> +                       set_bit(d->hwirq, p->pdc_gpio_domain_enabled_irqs);
> +               else
> +                       set_bit(d->hwirq, p->pdc_domain_enabled_irqs);
> +       }
> +
>         pdc_enable_intr(d, true);
>         irq_chip_enable_parent(d);
>  }
> @@ -145,6 +184,39 @@ enum pdc_irq_config_bits {
>  };
>  
>  /**
> + * qcom_pdc_gic_set_wake: Mark IRQ as wakeup capable
> + *
> + * @d: the interrupt data
> + * @on: enable or disable wakeup capability
> + *
> + * Mark IRQ as wake up capable at either pdc_domain or pdc_gpio_domain.
> + * This will be used when entering to suspend where if any wakeup capable
> + * IRQ is already disabled in SW, such IRQ needs to be re-enabled at HW.
> + */
> +static int qcom_pdc_gic_set_wake(struct irq_data *d, unsigned int on)
> +{
> +       struct pdc_pm_data *p;
> +
> +       if (d->hwirq == GPIO_NO_WAKE_IRQ)
> +               return 0;
> +
> +       p = irq_data_get_irq_chip_data(d);
> +       if (on) {
> +               if (irq_domain_qcom_handle_wakeup(d->domain))
> +                       set_bit(d->hwirq, p->pdc_gpio_domain_wake_irqs);
> +               else
> +                       set_bit(d->hwirq, p->pdc_domain_wake_irqs);
> +       } else {
> +               if (irq_domain_qcom_handle_wakeup(d->domain))
> +                       clear_bit(d->hwirq, p->pdc_gpio_domain_wake_irqs);
> +               else
> +                       clear_bit(d->hwirq, p->pdc_domain_wake_irqs);
> +       }
> +
> +       return irq_chip_set_wake_parent(d, on);
> +}
> +
> +/**
>   * qcom_pdc_gic_set_type: Configure PDC for the interrupt
>   *
>   * @d: the interrupt data
> @@ -202,14 +274,162 @@ static struct irq_chip qcom_pdc_gic_chip = {
>         .irq_get_irqchip_state  = qcom_pdc_gic_get_irqchip_state,
>         .irq_set_irqchip_state  = qcom_pdc_gic_set_irqchip_state,
>         .irq_retrigger          = irq_chip_retrigger_hierarchy,
> +       .irq_set_wake           = qcom_pdc_gic_set_wake,
>         .irq_set_type           = qcom_pdc_gic_set_type,
>         .flags                  = IRQCHIP_MASK_ON_SUSPEND |
> -                                 IRQCHIP_SET_TYPE_MASKED |
> -                                 IRQCHIP_SKIP_SET_WAKE,
> +                                 IRQCHIP_SET_TYPE_MASKED,
>         .irq_set_vcpu_affinity  = irq_chip_set_vcpu_affinity_parent,
>         .irq_set_affinity       = irq_chip_set_affinity_parent,
>  };
>  
> +static struct irq_data *pdc_find_irq_data(struct irq_domain *domain,
> +                                         int wake_irq)
> +{
> +       int irq = irq_find_mapping(domain, wake_irq);
> +       struct irq_desc *desc = irq_to_desc(irq);
> +
> +       return &desc->irq_data;
> +}
> +
> +/**
> + * pdc_suspend: Enable IRQs marked for wakeup
> + *
> + * @p: pdc_pm_data
> + *
> + * The SW expects that an IRQ that's disabled with disable_irq() can still
> + * wake the system from sleep states such as "suspend to RAM", if it has
> + * been marked for wakeup.
> + *
> + * While the SW may choose to differ status for "wake" and "enabled" interrupts,

s/differ/differentiate/ perhaps?

> + * its not the case with HW. There is no dedicated config in HW to differ "wake"

s/differ/differentiate/ perhaps?

> + * and "enabled". Same is case for PDC's parent irq_chip (ARM GIC) which has
> + * only GICD_ISENABLER to indicate "enabled" or "disabled" status.
> + *
> + * So, the HW ONLY understands either "enabled" or "disabled".
> + * The final status in HW should be an "OR" of "enable" and "wake" status.
> + * i.e. PDC (and GIC) irq enable in HW = irq enable | irq wake in SW
> + */
> +static void pdc_suspend(struct pdc_pm_data *p)
> +{
> +       int wake_irq;
> +       bool enabled;
> +       struct irq_data *d;
> +
> +       p->from_pdc_suspend = true;
> +       for_each_set_bit(wake_irq, p->pdc_domain_wake_irqs, PDC_MAX_IRQS) {
> +               enabled = test_bit(wake_irq, p->pdc_domain_enabled_irqs);
> +               if (!enabled) {
> +                       d = pdc_find_irq_data(p->pdc_domain, wake_irq);
> +
> +                       pdc_enable_intr(d, true);
> +                       irq_chip_enable_parent(d);
> +               }
> +       }
> +
> +       for_each_set_bit(wake_irq, p->pdc_gpio_domain_wake_irqs,
> +                        PDC_MAX_GPIO_IRQS) {

Why can't this code operate on the pdc hwirq number space? And keep
track of what pdc pin is enabled or disabled and marked for wakeup or
not? I don't understand why we're tracking different domains. 

> +               enabled = test_bit(wake_irq, p->pdc_gpio_domain_enabled_irqs);
> +               if (!enabled) {
> +                       d = pdc_find_irq_data(p->pdc_gpio_domain, wake_irq);
> +
> +                       irq_chip_enable_parent(d);
> +               }
> +       }
> +}
> +
> +static void pdc_resume(struct pdc_pm_data *p)
> +{
> +       int wake_irq;
> +       bool enabled, pending;
> +       struct irq_data *d;
> +
> +       for_each_set_bit(wake_irq, p->pdc_domain_wake_irqs, PDC_MAX_IRQS) {
> +               enabled = test_bit(wake_irq, p->pdc_domain_enabled_irqs);
> +               if (!enabled) {
> +                       d = pdc_find_irq_data(p->pdc_domain, wake_irq);
> +
> +                       pdc_enable_intr(d, false);
> +                       irq_chip_disable_parent(d);
> +               }
> +       }
> +
> +       for_each_set_bit(wake_irq, p->pdc_gpio_domain_wake_irqs,
> +                        PDC_MAX_GPIO_IRQS) {
> +               enabled = test_bit(wake_irq, p->pdc_gpio_domain_enabled_irqs);
> +               if (!enabled) {
> +                       d = pdc_find_irq_data(p->pdc_gpio_domain, wake_irq);
> +
> +                       /*
> +                        * When the drivers invoke enablie_irq() on a GPIO IRQ,

s/enablie/enable/

> +                        * the pending interrupt gets cleared at GIC before
> +                        * enabling it from msm_gpio_irq_enable(). So CPU will
> +                        * never see pending IRQ after resume if we disable them
> +                        * here.

Is there something that's doing this in the gpio driver? It sounds
like the bug lies in that driver. Maybe the gpio driver should use
irq_startup instead of irq_enable to clear anything pending? The comment
in msm_gpio_irq_enable() talks a lot but doesn't actually say why it's a
problem to be latched at the GIC as pending when the irq is enabled the
first time.

> +                        *
> +                        * If wakeup is due to GPIO interrupt do not disable it.
> +                        * By not disabling, The IRQ will be delivered to CPU
> +                        * and when driver invokes enable_irq(), The softirq
> +                        * tasklet does resend_irqs() to call irq handler.
> +                        */
> +                       irq_chip_get_parent_state(d, IRQCHIP_STATE_PENDING,
> +                                                 &pending);
> +                       if (pending) {
> +                               pending = false;

Please move 'pending' into the if (!enabled) scope so that it
automatically resets for each irq. And then remove this assignment.

> +                               continue;
> +                       }
> +
> +                       irq_chip_disable_parent(d);
> +               }
> +       }
> +       p->from_pdc_suspend = false;
> +}
> +
> +static int pdc_cpu_pm_callback(struct notifier_block *nfb,
> +                              unsigned long action, void *v)
> +{
> +       struct pdc_pm_data *p = container_of(nfb, struct pdc_pm_data,
> +                                            pdc_cpu_pm_nfb);
> +       unsigned long flags;
> +
> +       if (!p->suspend_start)
> +               return NOTIFY_OK;
> +
> +       spin_lock_irqsave(&p->pm_lock, flags);
> +       switch (action) {
> +       case CPU_PM_ENTER:
> +               cpumask_set_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
> +               if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
> +                       pdc_suspend(p);
> +               break;
> +       case CPU_PM_ENTER_FAILED:
> +       case CPU_PM_EXIT:
> +               if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
> +                       pdc_resume(p);
> +               cpumask_clear_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
> +               break;
> +       }
> +       spin_unlock_irqrestore(&p->pm_lock, flags);

What is the point of this callback? Any irqs that we want to wakeup the
CPUs from deep idle should be enabled via enable_irq(). Otherwise, they
shouldn't wake up the system. That's the difference between idle and
suspend. In suspend, we want wakeup enabled irqs to wake us up from
suspend no matter if the irq is enabled or not. But for idle, we don't
care about the wakeup enable bit at all. The only bit that matters is
that the irq is enabled and then the expectation is that it will wake us
up. If there's some irq that can't wake us up from idle then we'll have
to just ignore that interrupt across deep CPU idle states. Is that
actually a problem? Or the SoC architects have figured out that certain
irqs don't matter for deep CPU idle states and so we don't have to
monitor them?

> +
> +       return NOTIFY_OK;
> +}
> +
> +static int pdc_pm_callback(struct notifier_block *nfb,
> +                          unsigned long event, void *unused)
> +{
> +       struct pdc_pm_data *p = container_of(nfb, struct pdc_pm_data,
> +                                            pdc_pm_nfb);
> +       switch (event) {
> +       case PM_SUSPEND_PREPARE:
> +               p->suspend_start = true;
> +               break;
> +       case PM_POST_SUSPEND:
> +               p->suspend_start = false;
> +               break;
> +       }
> +
> +       return NOTIFY_OK;
> +}
> +
>  static irq_hw_number_t get_parent_hwirq(int pin)
>  {
>         int i;
> @@ -385,6 +608,10 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
>                 return -ENXIO;
>         }
>  
> +       p = kzalloc(sizeof(struct pdc_pm_data), GFP_KERNEL);
> +       if (!p)
> +               return -ENOMEM;
> +
>         parent_domain = irq_find_host(parent);
>         if (!parent_domain) {
>                 pr_err("%pOF: unable to find PDC's parent domain\n", node);
> @@ -398,33 +625,47 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
>                 goto fail;
>         }
>  
> -       pdc_domain = irq_domain_create_hierarchy(parent_domain, 0, PDC_MAX_IRQS,
> -                                                of_fwnode_handle(node),
> -                                                &qcom_pdc_ops, NULL);
> -       if (!pdc_domain) {
> +       p->pdc_domain = irq_domain_create_hierarchy(parent_domain, 0,
> +                                                   PDC_MAX_IRQS,
> +                                                   of_fwnode_handle(node),
> +                                                   &qcom_pdc_ops, p);
> +       if (!p->pdc_domain) {
>                 pr_err("%pOF: GIC domain add failed\n", node);
>                 ret = -ENOMEM;
>                 goto fail;
>         }
>  
> -       pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain,
> +       p->pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain,
>                                         IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP,
>                                         PDC_MAX_GPIO_IRQS,
>                                         of_fwnode_handle(node),
> -                                       &qcom_pdc_gpio_ops, NULL);
> -       if (!pdc_gpio_domain) {
> +                                       &qcom_pdc_gpio_ops, p);
> +       if (!p->pdc_gpio_domain) {
>                 pr_err("%pOF: PDC domain add failed for GPIO domain\n", node);
>                 ret = -ENOMEM;
>                 goto remove;
>         }
>  
> -       irq_domain_update_bus_token(pdc_gpio_domain, DOMAIN_BUS_WAKEUP);
> +       irq_domain_update_bus_token(p->pdc_gpio_domain, DOMAIN_BUS_WAKEUP);
> +
> +       /* Register for PM-transition events */
> +       p->pdc_pm_nfb.notifier_call = pdc_pm_callback;

We have irq_suspend() and irq_resume() callbacks in the irqchip. Can we
use that instead of this notifier?

> +       ret = register_pm_notifier(&p->pdc_pm_nfb);
> +       if (ret)
> +               goto remove;
> +
> +       spin_lock_init(&p->pm_lock);
> +
> +       /* Register for CPU PM notifications */

Yes that's obvious. Please don't add comments like this.

> +       p->pdc_cpu_pm_nfb.notifier_call = pdc_cpu_pm_callback;
> +       cpu_pm_register_notifier(&p->pdc_cpu_pm_nfb);
>  
>         return 0;
>
Maulik Shah April 14, 2020, 8:05 a.m. UTC | #2
Hi,

On 4/14/2020 6:05 AM, Stephen Boyd wrote:
> Quoting Maulik Shah (2020-03-30 09:41:00)
>> diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
>> index 6ae9e1f..c43715b 100644
>> --- a/drivers/irqchip/qcom-pdc.c
>> +++ b/drivers/irqchip/qcom-pdc.c
>> @@ -36,6 +38,23 @@ struct pdc_pin_region {
>>          u32 cnt;
>>   };
>>   
>> +struct pdc_pm_data {
>> +       struct cpumask cpus_in_pc;
>> +       spinlock_t pm_lock;
>> +       bool suspend_start;
>> +       bool from_pdc_suspend;
>> +       struct notifier_block pdc_pm_nfb;
>> +       struct notifier_block pdc_cpu_pm_nfb;
>> +
>> +       DECLARE_BITMAP(pdc_domain_enabled_irqs, PDC_MAX_IRQS);
>> +       DECLARE_BITMAP(pdc_domain_wake_irqs, PDC_MAX_IRQS);
>> +       DECLARE_BITMAP(pdc_gpio_domain_enabled_irqs, PDC_MAX_GPIO_IRQS);
>> +       DECLARE_BITMAP(pdc_gpio_domain_wake_irqs, PDC_MAX_GPIO_IRQS);
>> +
>> +       struct irq_domain *pdc_domain;
>> +       struct irq_domain *pdc_gpio_domain;
>> +};
> Everything else is just a static local variable here. It looks odd to
> have this struct to contain all this stuff but then have everything else
> below be outside of it. If we're going to have a container node I'd
> prefer that be another patch that makes this driver no longer a
> singleton.
Sure, I will add it to my To-Do to move other items as well in this 
struct but in a separate patch, mostly once this one gets in 
reviewed/aggreed to lands in kernel.
>
>> +
>>   static DEFINE_RAW_SPINLOCK(pdc_lock);
>>   static void __iomem *pdc_base;
>>   static struct pdc_pin_region *pdc_region;
>> @@ -89,18 +108,38 @@ static void pdc_enable_intr(struct irq_data *d, bool on)
>>   
>>   static void qcom_pdc_gic_disable(struct irq_data *d)
>>   {
>> +       struct pdc_pm_data *p;
>> +
>>          if (d->hwirq == GPIO_NO_WAKE_IRQ)
>>                  return;
>>   
>> +       p = irq_data_get_irq_chip_data(d);
>> +       if (!p->from_pdc_suspend) {
>> +               if (irq_domain_qcom_handle_wakeup(d->domain))
>> +                       clear_bit(d->hwirq, p->pdc_gpio_domain_enabled_irqs);
>> +               else
>> +                       clear_bit(d->hwirq, p->pdc_domain_enabled_irqs);
>> +       }
>> +
>>          pdc_enable_intr(d, false);
>>          irq_chip_disable_parent(d);
>>   }
>>   
>>   static void qcom_pdc_gic_enable(struct irq_data *d)
>>   {
>> +       struct pdc_pm_data *p;
>> +
>>          if (d->hwirq == GPIO_NO_WAKE_IRQ)
>>                  return;
>>   
>> +       p = irq_data_get_irq_chip_data(d);
>> +       if (!p->from_pdc_suspend) {
>> +               if (irq_domain_qcom_handle_wakeup(d->domain))
>> +                       set_bit(d->hwirq, p->pdc_gpio_domain_enabled_irqs);
>> +               else
>> +                       set_bit(d->hwirq, p->pdc_domain_enabled_irqs);
>> +       }
>> +
>>          pdc_enable_intr(d, true);
>>          irq_chip_enable_parent(d);
>>   }
>> @@ -145,6 +184,39 @@ enum pdc_irq_config_bits {
>>   };
>>   
>>   /**
>> + * qcom_pdc_gic_set_wake: Mark IRQ as wakeup capable
>> + *
>> + * @d: the interrupt data
>> + * @on: enable or disable wakeup capability
>> + *
>> + * Mark IRQ as wake up capable at either pdc_domain or pdc_gpio_domain.
>> + * This will be used when entering to suspend where if any wakeup capable
>> + * IRQ is already disabled in SW, such IRQ needs to be re-enabled at HW.
>> + */
>> +static int qcom_pdc_gic_set_wake(struct irq_data *d, unsigned int on)
>> +{
>> +       struct pdc_pm_data *p;
>> +
>> +       if (d->hwirq == GPIO_NO_WAKE_IRQ)
>> +               return 0;
>> +
>> +       p = irq_data_get_irq_chip_data(d);
>> +       if (on) {
>> +               if (irq_domain_qcom_handle_wakeup(d->domain))
>> +                       set_bit(d->hwirq, p->pdc_gpio_domain_wake_irqs);
>> +               else
>> +                       set_bit(d->hwirq, p->pdc_domain_wake_irqs);
>> +       } else {
>> +               if (irq_domain_qcom_handle_wakeup(d->domain))
>> +                       clear_bit(d->hwirq, p->pdc_gpio_domain_wake_irqs);
>> +               else
>> +                       clear_bit(d->hwirq, p->pdc_domain_wake_irqs);
>> +       }
>> +
>> +       return irq_chip_set_wake_parent(d, on);
>> +}
>> +
>> +/**
>>    * qcom_pdc_gic_set_type: Configure PDC for the interrupt
>>    *
>>    * @d: the interrupt data
>> @@ -202,14 +274,162 @@ static struct irq_chip qcom_pdc_gic_chip = {
>>          .irq_get_irqchip_state  = qcom_pdc_gic_get_irqchip_state,
>>          .irq_set_irqchip_state  = qcom_pdc_gic_set_irqchip_state,
>>          .irq_retrigger          = irq_chip_retrigger_hierarchy,
>> +       .irq_set_wake           = qcom_pdc_gic_set_wake,
>>          .irq_set_type           = qcom_pdc_gic_set_type,
>>          .flags                  = IRQCHIP_MASK_ON_SUSPEND |
>> -                                 IRQCHIP_SET_TYPE_MASKED |
>> -                                 IRQCHIP_SKIP_SET_WAKE,
>> +                                 IRQCHIP_SET_TYPE_MASKED,
>>          .irq_set_vcpu_affinity  = irq_chip_set_vcpu_affinity_parent,
>>          .irq_set_affinity       = irq_chip_set_affinity_parent,
>>   };
>>   
>> +static struct irq_data *pdc_find_irq_data(struct irq_domain *domain,
>> +                                         int wake_irq)
>> +{
>> +       int irq = irq_find_mapping(domain, wake_irq);
>> +       struct irq_desc *desc = irq_to_desc(irq);
>> +
>> +       return &desc->irq_data;
>> +}
>> +
>> +/**
>> + * pdc_suspend: Enable IRQs marked for wakeup
>> + *
>> + * @p: pdc_pm_data
>> + *
>> + * The SW expects that an IRQ that's disabled with disable_irq() can still
>> + * wake the system from sleep states such as "suspend to RAM", if it has
>> + * been marked for wakeup.
>> + *
>> + * While the SW may choose to differ status for "wake" and "enabled" interrupts,
> s/differ/differentiate/ perhaps?
Okay i will update.
>
>> + * its not the case with HW. There is no dedicated config in HW to differ "wake"
> s/differ/differentiate/ perhaps?
Okay i will update.
>
>> + * and "enabled". Same is case for PDC's parent irq_chip (ARM GIC) which has
>> + * only GICD_ISENABLER to indicate "enabled" or "disabled" status.
>> + *
>> + * So, the HW ONLY understands either "enabled" or "disabled".
>> + * The final status in HW should be an "OR" of "enable" and "wake" status.
>> + * i.e. PDC (and GIC) irq enable in HW = irq enable | irq wake in SW
>> + */
>> +static void pdc_suspend(struct pdc_pm_data *p)
>> +{
>> +       int wake_irq;
>> +       bool enabled;
>> +       struct irq_data *d;
>> +
>> +       p->from_pdc_suspend = true;
>> +       for_each_set_bit(wake_irq, p->pdc_domain_wake_irqs, PDC_MAX_IRQS) {
>> +               enabled = test_bit(wake_irq, p->pdc_domain_enabled_irqs);
>> +               if (!enabled) {
>> +                       d = pdc_find_irq_data(p->pdc_domain, wake_irq);
>> +
>> +                       pdc_enable_intr(d, true);
>> +                       irq_chip_enable_parent(d);
>> +               }
>> +       }
>> +
>> +       for_each_set_bit(wake_irq, p->pdc_gpio_domain_wake_irqs,
>> +                        PDC_MAX_GPIO_IRQS) {
> Why can't this code operate on the pdc hwirq number space? And keep
> track of what pdc pin is enabled or disabled and marked for wakeup or
> not? I don't understand why we're tracking different domains.
This is becasue on resume we are treating GPIO domain IRQs differently 
then others.
>
>> +               enabled = test_bit(wake_irq, p->pdc_gpio_domain_enabled_irqs);
>> +               if (!enabled) {
>> +                       d = pdc_find_irq_data(p->pdc_gpio_domain, wake_irq);
>> +
>> +                       irq_chip_enable_parent(d);
>> +               }
>> +       }
>> +}
>> +
>> +static void pdc_resume(struct pdc_pm_data *p)
>> +{
>> +       int wake_irq;
>> +       bool enabled, pending;
>> +       struct irq_data *d;
>> +
>> +       for_each_set_bit(wake_irq, p->pdc_domain_wake_irqs, PDC_MAX_IRQS) {
>> +               enabled = test_bit(wake_irq, p->pdc_domain_enabled_irqs);
>> +               if (!enabled) {
>> +                       d = pdc_find_irq_data(p->pdc_domain, wake_irq);
>> +
>> +                       pdc_enable_intr(d, false);
>> +                       irq_chip_disable_parent(d);
>> +               }
>> +       }
>> +
>> +       for_each_set_bit(wake_irq, p->pdc_gpio_domain_wake_irqs,
>> +                        PDC_MAX_GPIO_IRQS) {
>> +               enabled = test_bit(wake_irq, p->pdc_gpio_domain_enabled_irqs);
>> +               if (!enabled) {
>> +                       d = pdc_find_irq_data(p->pdc_gpio_domain, wake_irq);
>> +
>> +                       /*
>> +                        * When the drivers invoke enablie_irq() on a GPIO IRQ,
> s/enablie/enable/
Okay will correct.
>
>> +                        * the pending interrupt gets cleared at GIC before
>> +                        * enabling it from msm_gpio_irq_enable(). So CPU will
>> +                        * never see pending IRQ after resume if we disable them
>> +                        * here.
> Is there something that's doing this in the gpio driver? It sounds
> like the bug lies in that driver. Maybe the gpio driver should use
> irq_startup instead of irq_enable to clear anything pending? The comment
> in msm_gpio_irq_enable() talks a lot but doesn't actually say why it's a
> problem to be latched at the GIC as pending when the irq is enabled the
> first time.
This is to clear any erroneous interrupts that would have got latched 
when the interrupt is not in use.

There may be devices like UART which can use the same gpio line for data 
rx as well as a wakeup gpio

The data that was flowing on the line may latch the interrupt and when 
we enable the interrupt,we see IRQ pending and unexpected IRQ gets 
triggered.

>
>> +                        *
>> +                        * If wakeup is due to GPIO interrupt do not disable it.
>> +                        * By not disabling, The IRQ will be delivered to CPU
>> +                        * and when driver invokes enable_irq(), The softirq
>> +                        * tasklet does resend_irqs() to call irq handler.
>> +                        */
>> +                       irq_chip_get_parent_state(d, IRQCHIP_STATE_PENDING,
>> +                                                 &pending);
>> +                       if (pending) {
>> +                               pending = false;
> Please move 'pending' into the if (!enabled) scope so that it
> automatically resets for each irq. And then remove this assignment.
Sure, i will update.
>
>> +                               continue;
>> +                       }
>> +
>> +                       irq_chip_disable_parent(d);
>> +               }
>> +       }
>> +       p->from_pdc_suspend = false;
>> +}
>> +
>> +static int pdc_cpu_pm_callback(struct notifier_block *nfb,
>> +                              unsigned long action, void *v)
>> +{
>> +       struct pdc_pm_data *p = container_of(nfb, struct pdc_pm_data,
>> +                                            pdc_cpu_pm_nfb);
>> +       unsigned long flags;
>> +
>> +       if (!p->suspend_start)
>> +               return NOTIFY_OK;
>> +
>> +       spin_lock_irqsave(&p->pm_lock, flags);
>> +       switch (action) {
>> +       case CPU_PM_ENTER:
>> +               cpumask_set_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
>> +               if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
>> +                       pdc_suspend(p);
>> +               break;
>> +       case CPU_PM_ENTER_FAILED:
>> +       case CPU_PM_EXIT:
>> +               if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
>> +                       pdc_resume(p);
>> +               cpumask_clear_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
>> +               break;
>> +       }
>> +       spin_unlock_irqrestore(&p->pm_lock, flags);
> What is the point of this callback? Any irqs that we want to wakeup the
> CPUs from deep idle should be enabled via enable_irq(). Otherwise, they
> shouldn't wake up the system. That's the difference between idle and
> suspend.
We already discussed and agreed to treat IRQs differently in idle and 
suspend.
In summary if a SW does disable and mark IRQ as wake up capable.
1.    irq_disable()
2.    enable_irq_wake()

Since the HW don’t understand wake, it only knows either enabled or 
disabled IRQ. So the HW should do below.
1.    if system is in suspend, the IRQ should be kept Enabled in HW
2.    if system is out of suspend, the IRQ should be kept disabled in HW

As we have two different suspend states namely suspend-to-idle (s2idle) 
and deep suspend.

The PDC driver need to know “when” we are in suspend path. This is where 
pdc_pm_callback is used, it will
listen to PM_SUSPEND_PREPARE and PM_POST_SUSPEND events to know when 
system is entered in to suspend path
and when it exited.

Then comes the CPU PM notifier.
If we are not in suspend path, the CPU PM notifier immediately returns, 
but when we are in suspend path,
it starts marking CPUs to know when the last CPU notifies, it will 
invoke pdc_suspend().
That finally programs the HW accordingly.

Why last CPU? Because PDC is irq_chip and should execute suspend ops at 
very last stage during suspend.

(if PDC registered for dev pm ops, It could happen that PDC’s .suspend 
callback gets called first and then
Let’s say EC driver’s .suspend is called where it does above to disable 
and mark IRQ as wakeup capable,
however PDC should have called only after EC driver is done operating on 
its IRQ from its .suspend call)

Syscore ops are good to handle such scenarios since by this time all 
non-boot CPUs are offlined and we are
running on boot CPU (last CPU) But then if someone chooses to enter 
“s2idle”, syscore ops are not invoked in
s2idle suspend path.

I take we want to wake up with disabled but wakeup capable IRQ during 
s2idle suspend as well?

If that is not the case, i can remove these notifiers and register for 
sycore ops.
from syscore op’s .suspend we can call pdc_suspend()..

This answer’s your below comment as well on why can’t we use irqchip’s 
irq_suspend() and irq_resume() calls,
since they also get internally invoked from syscore ops only.


> In suspend, we want wakeup enabled irqs to wake us up from
> suspend no matter if the irq is enabled or not. But for idle, we don't
> care about the wakeup enable bit at all. The only bit that matters is
> that the irq is enabled and then the expectation is that it will wake us
> up. If there's some irq that can't wake us up from idle then we'll have
> to just ignore that interrupt across deep CPU idle states. Is that
> actually a problem? Or the SoC architects have figured out that certain
> irqs don't matter for deep CPU idle states and so we don't have to
> monitor them?
>
>> +
>> +       return NOTIFY_OK;
>> +}
>> +
>> +static int pdc_pm_callback(struct notifier_block *nfb,
>> +                          unsigned long event, void *unused)
>> +{
>> +       struct pdc_pm_data *p = container_of(nfb, struct pdc_pm_data,
>> +                                            pdc_pm_nfb);
>> +       switch (event) {
>> +       case PM_SUSPEND_PREPARE:
>> +               p->suspend_start = true;
>> +               break;
>> +       case PM_POST_SUSPEND:
>> +               p->suspend_start = false;
>> +               break;
>> +       }
>> +
>> +       return NOTIFY_OK;
>> +}
>> +
>>   static irq_hw_number_t get_parent_hwirq(int pin)
>>   {
>>          int i;
>> @@ -385,6 +608,10 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
>>                  return -ENXIO;
>>          }
>>   
>> +       p = kzalloc(sizeof(struct pdc_pm_data), GFP_KERNEL);
>> +       if (!p)
>> +               return -ENOMEM;
>> +
>>          parent_domain = irq_find_host(parent);
>>          if (!parent_domain) {
>>                  pr_err("%pOF: unable to find PDC's parent domain\n", node);
>> @@ -398,33 +625,47 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
>>                  goto fail;
>>          }
>>   
>> -       pdc_domain = irq_domain_create_hierarchy(parent_domain, 0, PDC_MAX_IRQS,
>> -                                                of_fwnode_handle(node),
>> -                                                &qcom_pdc_ops, NULL);
>> -       if (!pdc_domain) {
>> +       p->pdc_domain = irq_domain_create_hierarchy(parent_domain, 0,
>> +                                                   PDC_MAX_IRQS,
>> +                                                   of_fwnode_handle(node),
>> +                                                   &qcom_pdc_ops, p);
>> +       if (!p->pdc_domain) {
>>                  pr_err("%pOF: GIC domain add failed\n", node);
>>                  ret = -ENOMEM;
>>                  goto fail;
>>          }
>>   
>> -       pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain,
>> +       p->pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain,
>>                                          IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP,
>>                                          PDC_MAX_GPIO_IRQS,
>>                                          of_fwnode_handle(node),
>> -                                       &qcom_pdc_gpio_ops, NULL);
>> -       if (!pdc_gpio_domain) {
>> +                                       &qcom_pdc_gpio_ops, p);
>> +       if (!p->pdc_gpio_domain) {
>>                  pr_err("%pOF: PDC domain add failed for GPIO domain\n", node);
>>                  ret = -ENOMEM;
>>                  goto remove;
>>          }
>>   
>> -       irq_domain_update_bus_token(pdc_gpio_domain, DOMAIN_BUS_WAKEUP);
>> +       irq_domain_update_bus_token(p->pdc_gpio_domain, DOMAIN_BUS_WAKEUP);
>> +
>> +       /* Register for PM-transition events */
>> +       p->pdc_pm_nfb.notifier_call = pdc_pm_callback;
> We have irq_suspend() and irq_resume() callbacks in the irqchip. Can we
> use that instead of this notifier?
see above.
>
>> +       ret = register_pm_notifier(&p->pdc_pm_nfb);
>> +       if (ret)
>> +               goto remove;
>> +
>> +       spin_lock_init(&p->pm_lock);
>> +
>> +       /* Register for CPU PM notifications */
> Yes that's obvious. Please don't add comments like this.
Okay i will drop obvious comments.
>
>> +       p->pdc_cpu_pm_nfb.notifier_call = pdc_cpu_pm_callback;
>> +       cpu_pm_register_notifier(&p->pdc_cpu_pm_nfb);
>>   
>>          return 0;
Thanks,
Maulik
Stephen Boyd April 15, 2020, 8:06 a.m. UTC | #3
Quoting Maulik Shah (2020-04-14 01:05:36)
> Hi,
> 
> On 4/14/2020 6:05 AM, Stephen Boyd wrote:
> > Quoting Maulik Shah (2020-03-30 09:41:00)
> >> diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
> >> index 6ae9e1f..c43715b 100644
> >> --- a/drivers/irqchip/qcom-pdc.c
> >> +++ b/drivers/irqchip/qcom-pdc.c
> >> +                        * the pending interrupt gets cleared at GIC before
> >> +                        * enabling it from msm_gpio_irq_enable(). So CPU will
> >> +                        * never see pending IRQ after resume if we disable them
> >> +                        * here.
> > Is there something that's doing this in the gpio driver? It sounds
> > like the bug lies in that driver. Maybe the gpio driver should use
> > irq_startup instead of irq_enable to clear anything pending? The comment
> > in msm_gpio_irq_enable() talks a lot but doesn't actually say why it's a
> > problem to be latched at the GIC as pending when the irq is enabled the
> > first time.
> This is to clear any erroneous interrupts that would have got latched 
> when the interrupt is not in use.
> 
> There may be devices like UART which can use the same gpio line for data 
> rx as well as a wakeup gpio
> 
> The data that was flowing on the line may latch the interrupt and when 
> we enable the interrupt,we see IRQ pending and unexpected IRQ gets 
> triggered.

Isn't the interrupt supposed to latch in the hardware in this scenario?
We wanted to wakeup from UART RX over GPIO, and we did, and we also
wanted to send that data through the pin to the UART core, so I suspect
we muxed it as a UART pin and also watched it for an irq wakeup in the
GPIO driver and PDC? The wakeup irq handler can be ignored by the UART
driver if it wants.

> >
> >> +                               continue;
> >> +                       }
> >> +
> >> +                       irq_chip_disable_parent(d);
> >> +               }
> >> +       }
> >> +       p->from_pdc_suspend = false;
> >> +}
> >> +
> >> +static int pdc_cpu_pm_callback(struct notifier_block *nfb,
> >> +                              unsigned long action, void *v)
> >> +{
> >> +       struct pdc_pm_data *p = container_of(nfb, struct pdc_pm_data,
> >> +                                            pdc_cpu_pm_nfb);
> >> +       unsigned long flags;
> >> +
> >> +       if (!p->suspend_start)
> >> +               return NOTIFY_OK;
> >> +
> >> +       spin_lock_irqsave(&p->pm_lock, flags);
> >> +       switch (action) {
> >> +       case CPU_PM_ENTER:
> >> +               cpumask_set_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
> >> +               if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
> >> +                       pdc_suspend(p);
> >> +               break;
> >> +       case CPU_PM_ENTER_FAILED:
> >> +       case CPU_PM_EXIT:
> >> +               if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
> >> +                       pdc_resume(p);
> >> +               cpumask_clear_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
> >> +               break;
> >> +       }
> >> +       spin_unlock_irqrestore(&p->pm_lock, flags);
> > What is the point of this callback? Any irqs that we want to wakeup the
> > CPUs from deep idle should be enabled via enable_irq(). Otherwise, they
> > shouldn't wake up the system. That's the difference between idle and
> > suspend.
> We already discussed and agreed to treat IRQs differently in idle and 
> suspend.
> In summary if a SW does disable and mark IRQ as wake up capable.
> 1.    irq_disable()
> 2.    enable_irq_wake()
> 
> Since the HW don't understand wake, it only knows either enabled or 
> disabled IRQ. So the HW should do below.
> 1.    if system is in suspend, the IRQ should be kept Enabled in HW
> 2.    if system is out of suspend, the IRQ should be kept disabled in HW

Why should it be kept disabled at the PDC and GIC when the system is out
of suspend? Is that because software hasn't enabled the irq yet upon
resume and we're waiting for the irq consumer driver (EC for example) to
do that?

In the CPU idle path (i.e. the system isn't suspending) I'd expect the
PDC and GIC hwirqs to be enabled so that the interrupt can trigger at
anytime. This is the normal mode of operation. When the CPU goes to
idle, and for that matter all the CPUs go to idle, the PDC should still
be monitoring the irqs that are enabled with irq_enable() and wake up
the CPU to receive the irq by taking all the CPUs out of deep idle
states where the GIC is powered off. If the irq is disabled with
irq_disable(), I'd think we would want to disable in the PDC so that the
irq doesn't wake us up from idle unnecessarily.

Long story short, CPUs going in and out of idle and system not
suspending or freezing for s2idle means the PDC enable state should
follow irq enable state and completely ignore wake state. During system
suspend or freezing for s2idle the PDC enable state should follow wake
state. The tricky part is making sure the suspend and resume path
doesn't miss some interrupt that would have woken us up.

I thought that maybe lazy irq disable would save us here. The PDC
monitored irq could be disabled in software during suspend but not
actually disabled in hardware, so the irq could still latch. If it does
latch during suspend then we'll abort suspend either via hardware or
software detecting this case. Once we hit the end of suspend, we can
disable the PDC interrupts that aren't marked for wakeup. Implicitly,
the other interrupts that are marked for wakeup are already enabled
because they must have been lazily disabled or left enabled during
suspend. On the resume path we may have enabled some PDC interrupt for
wakeup that wasn't supposed to be enabled because software disabled it,
but it's all lazy so if it didn't trigger it doesn't matter, just let it
trigger later and if it does genirq will mask it appropriately and mark
it as pending.

> 
> As we have two different suspend states namely suspend-to-idle (s2idle) 
> and deep suspend.

Great! I'm glad you're interested in enabling s2idle.

> 
> The PDC driver need to know "when" we are in suspend path. This is where 
> pdc_pm_callback is used, it will
> listen to PM_SUSPEND_PREPARE and PM_POST_SUSPEND events to know when 
> system is entered in to suspend path
> and when it exited.
> 
> Then comes the CPU PM notifier.
> If we are not in suspend path, the CPU PM notifier immediately returns, 
> but when we are in suspend path,
> it starts marking CPUs to know when the last CPU notifies, it will 
> invoke pdc_suspend().
> That finally programs the HW accordingly.
> 
> Why last CPU? Because PDC is irq_chip and should execute suspend ops at 
> very last stage during suspend.
> 
> (if PDC registered for dev pm ops, It could happen that PDC's .suspend 
> callback gets called first and then
> Let's say EC driver's .suspend is called where it does above to disable 
> and mark IRQ as wakeup capable,
> however PDC should have called only after EC driver is done operating on 
> its IRQ from its .suspend call)
> 
> Syscore ops are good to handle such scenarios since by this time all 
> non-boot CPUs are offlined and we are
> running on boot CPU (last CPU) But then if someone chooses to enter 
> "s2idle", syscore ops are not invoked in
> s2idle suspend path.
> 
> I take we want to wake up with disabled but wakeup capable IRQ during 
> s2idle suspend as well?
> 
> If that is not the case, i can remove these notifiers and register for 
> sycore ops.
> from syscore op's .suspend we can call pdc_suspend()..
> 
> This answer's your below comment as well on why can't we use irqchip's 
> irq_suspend() and irq_resume() calls,
> since they also get internally invoked from syscore ops only.

Actually no it doesn't. It looks like the irqchip irq_suspend() and
irq_resume() ops are only called when the driver uses the generic
irqchip implementation. That doesn't look to be used here. Good point
about the syscore ops and s2idle interaction, but it's not the real
reason why these ops can't be used.

Maybe we can always call the ops from somewhere deep in the suspend path
instead of using a notifier. That may make it simpler to reason about
and fix the s2idle problem at the same time. Putting it into a notifier
is difficult to understand and could potentially have a problem if
something like the timer irq needs to stay enabled until the end of
suspend but our suspend ops turn it off. That's what syscore is
typically important for. It gets the callback out of the path where
interrupts are still enabled and could potentially interfere with the
irqchip being disrupted. Maybe suspend_device_irqs() can be extended
here. I'm not sure.

> 
> 
> > In suspend, we want wakeup enabled irqs to wake us up from
> > suspend no matter if the irq is enabled or not. But for idle, we don't
> > care about the wakeup enable bit at all. The only bit that matters is
> > that the irq is enabled and then the expectation is that it will wake us
> > up. If there's some irq that can't wake us up from idle then we'll have
> > to just ignore that interrupt across deep CPU idle states. Is that
> > actually a problem? Or the SoC architects have figured out that certain
> > irqs don't matter for deep CPU idle states and so we don't have to
> > monitor them?
Maulik Shah April 21, 2020, 6:35 a.m. UTC | #4
Hi,

On 4/15/2020 1:36 PM, Stephen Boyd wrote:
> Quoting Maulik Shah (2020-04-14 01:05:36)
>> Hi,
>>
>> On 4/14/2020 6:05 AM, Stephen Boyd wrote:
>>> Quoting Maulik Shah (2020-03-30 09:41:00)
>>>> diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
>>>> index 6ae9e1f..c43715b 100644
>>>> --- a/drivers/irqchip/qcom-pdc.c
>>>> +++ b/drivers/irqchip/qcom-pdc.c
>>>> +                        * the pending interrupt gets cleared at GIC before
>>>> +                        * enabling it from msm_gpio_irq_enable(). So CPU will
>>>> +                        * never see pending IRQ after resume if we disable them
>>>> +                        * here.
>>> Is there something that's doing this in the gpio driver? It sounds
>>> like the bug lies in that driver. Maybe the gpio driver should use
>>> irq_startup instead of irq_enable to clear anything pending? The comment
>>> in msm_gpio_irq_enable() talks a lot but doesn't actually say why it's a
>>> problem to be latched at the GIC as pending when the irq is enabled the
>>> first time.
>> This is to clear any erroneous interrupts that would have got latched
>> when the interrupt is not in use.
>>
>> There may be devices like UART which can use the same gpio line for data
>> rx as well as a wakeup gpio
>>
>> The data that was flowing on the line may latch the interrupt and when
>> we enable the interrupt,we see IRQ pending and unexpected IRQ gets
>> triggered.
> Isn't the interrupt supposed to latch in the hardware in this scenario?
> We wanted to wakeup from UART RX over GPIO, and we did, and we also
> wanted to send that data through the pin to the UART core, so I suspect
> we muxed it as a UART pin and also watched it for an irq wakeup in the
> GPIO driver and PDC? The wakeup irq handler can be ignored by the UART
> driver if it wants.
I will check this if i can operate only on PDC and GIC IRQs and can 
ignore PDC-GPIO domain IRQ.
Working on it.
>
>>>> +                               continue;
>>>> +                       }
>>>> +
>>>> +                       irq_chip_disable_parent(d);
>>>> +               }
>>>> +       }
>>>> +       p->from_pdc_suspend = false;
>>>> +}
>>>> +
>>>> +static int pdc_cpu_pm_callback(struct notifier_block *nfb,
>>>> +                              unsigned long action, void *v)
>>>> +{
>>>> +       struct pdc_pm_data *p = container_of(nfb, struct pdc_pm_data,
>>>> +                                            pdc_cpu_pm_nfb);
>>>> +       unsigned long flags;
>>>> +
>>>> +       if (!p->suspend_start)
>>>> +               return NOTIFY_OK;
>>>> +
>>>> +       spin_lock_irqsave(&p->pm_lock, flags);
>>>> +       switch (action) {
>>>> +       case CPU_PM_ENTER:
>>>> +               cpumask_set_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
>>>> +               if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
>>>> +                       pdc_suspend(p);
>>>> +               break;
>>>> +       case CPU_PM_ENTER_FAILED:
>>>> +       case CPU_PM_EXIT:
>>>> +               if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
>>>> +                       pdc_resume(p);
>>>> +               cpumask_clear_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
>>>> +               break;
>>>> +       }
>>>> +       spin_unlock_irqrestore(&p->pm_lock, flags);
>>> What is the point of this callback? Any irqs that we want to wakeup the
>>> CPUs from deep idle should be enabled via enable_irq(). Otherwise, they
>>> shouldn't wake up the system. That's the difference between idle and
>>> suspend.
>> We already discussed and agreed to treat IRQs differently in idle and
>> suspend.
>> In summary if a SW does disable and mark IRQ as wake up capable.
>> 1.    irq_disable()
>> 2.    enable_irq_wake()
>>
>> Since the HW don't understand wake, it only knows either enabled or
>> disabled IRQ. So the HW should do below.
>> 1.    if system is in suspend, the IRQ should be kept Enabled in HW
>> 2.    if system is out of suspend, the IRQ should be kept disabled in HW
> Why should it be kept disabled at the PDC and GIC when the system is out
> of suspend? Is that because software hasn't enabled the irq yet upon
> resume and we're waiting for the irq consumer driver (EC for example) to
> do that?
No, for validation of this change, i did below scenario

During a probe of a driver, requested an IRQ and then mark it as wakeup 
capable and disabled it.
 From driver, after probe is done with above operation, we never do any 
enable/disable/wakeup operation.

In above scenario,

1.    if system is in suspend, the IRQ should be kept Enabled in HW
     (since it was marked for wakeup during probe, right?)
2.    if system is out of suspend, the IRQ should be kept disabled in HW
     (since it was disabled in probe, and never again enabled it, right?)

i wanted to test if in HW its Enabled properly during suspend entry
(since we differ status for idle and suspend, in above scenario IRQ 
should be enabled in HW only during suspend
and then when system is out of suspend it should get disabled in HW again)

of course, if we ever woke up from suspend with this wake up capable 
IRQ, the handler will never get called
since its forever marked as disabled in SW but yet the HW is still 
allowed to wake from this IRQ.
>
> In the CPU idle path (i.e. the system isn't suspending) I'd expect the
> PDC and GIC hwirqs to be enabled so that the interrupt can trigger at
> anytime. This is the normal mode of operation.
Correct
> When the CPU goes to
> idle, and for that matter all the CPUs go to idle, the PDC should still
> be monitoring the irqs that are enabled with irq_enable() and wake up
> the CPU to receive the irq by taking all the CPUs out of deep idle
> states where the GIC is powered off. If the irq is disabled with
> irq_disable(), I'd think we would want to disable in the PDC so that the
> irq doesn't wake us up from idle unnecessarily.
yes, we do all this with current patch.
>
> Long story short, CPUs going in and out of idle and system not
> suspending or freezing for s2idle means the PDC enable state should
> follow irq enable state and completely ignore wake state. During system
> suspend or freezing for s2idle the PDC enable state should follow wake
> state. The tricky part is making sure the suspend and resume path
> doesn't miss some interrupt that would have woken us up.
Agree, we do all settings in PDC / GIC HW during deep suspend/ s2idle 
suspend
and revert back to orignal state in HW when coming out of suspend
>
> I thought that maybe lazy irq disable would save us here.
the lazy doesn't work for GPIO IRQs, gpiolib registers for .irq_disable 
for every gpio chip.
> The PDC
> monitored irq could be disabled in software during suspend but not
> actually disabled in hardware, so the irq could still latch. If it does
> latch during suspend then we'll abort suspend either via hardware or
> software detecting this case. Once we hit the end of suspend, we can
> disable the PDC interrupts that aren't marked for wakeup. Implicitly,
> the other interrupts that are marked for wakeup are already enabled
> because they must have been lazily disabled or left enabled during
> suspend. On the resume path we may have enabled some PDC interrupt for
> wakeup that wasn't supposed to be enabled because software disabled it,
> but it's all lazy so if it didn't trigger it doesn't matter, just let it
> trigger later and if it does genirq will mask it appropriately and mark
> it as pending.
>
>> As we have two different suspend states namely suspend-to-idle (s2idle)
>> and deep suspend.
> Great! I'm glad you're interested in enabling s2idle.
yes i am interested, s2idle is working fine on sc7180.
>
>> The PDC driver need to know "when" we are in suspend path. This is where
>> pdc_pm_callback is used, it will
>> listen to PM_SUSPEND_PREPARE and PM_POST_SUSPEND events to know when
>> system is entered in to suspend path
>> and when it exited.
>>
>> Then comes the CPU PM notifier.
>> If we are not in suspend path, the CPU PM notifier immediately returns,
>> but when we are in suspend path,
>> it starts marking CPUs to know when the last CPU notifies, it will
>> invoke pdc_suspend().
>> That finally programs the HW accordingly.
>>
>> Why last CPU? Because PDC is irq_chip and should execute suspend ops at
>> very last stage during suspend.
>>
>> (if PDC registered for dev pm ops, It could happen that PDC's .suspend
>> callback gets called first and then
>> Let's say EC driver's .suspend is called where it does above to disable
>> and mark IRQ as wakeup capable,
>> however PDC should have called only after EC driver is done operating on
>> its IRQ from its .suspend call)
>>
>> Syscore ops are good to handle such scenarios since by this time all
>> non-boot CPUs are offlined and we are
>> running on boot CPU (last CPU) But then if someone chooses to enter
>> "s2idle", syscore ops are not invoked in
>> s2idle suspend path.
>>
>> I take we want to wake up with disabled but wakeup capable IRQ during
>> s2idle suspend as well?
>>
>> If that is not the case, i can remove these notifiers and register for
>> sycore ops.
>> from syscore op's .suspend we can call pdc_suspend()..
>>
>> This answer's your below comment as well on why can't we use irqchip's
>> irq_suspend() and irq_resume() calls,
>> since they also get internally invoked from syscore ops only.
> Actually no it doesn't. It looks like the irqchip irq_suspend() and
> irq_resume() ops are only called when the driver uses the generic
> irqchip implementation. That doesn't look to be used here.
correct
> Good point
> about the syscore ops and s2idle interaction, but it's not the real
> reason why these ops can't be used.
No internally generic chip also looks using syscore ops 
(irq_gc_syscore_ops)
to invoke generic irq_chip's irq_suspend and irq_resume callbacks

since s2idle still don't call syscore ops, these callback will never get 
invoked
if s2idle suspend is entered.
>
> Maybe we can always call the ops from somewhere deep in the suspend path
> instead of using a notifier. That may make it simpler to reason about
> and fix the s2idle problem at the same time. Putting it into a notifier
> is difficult to understand and could potentially have a problem if
> something like the timer irq needs to stay enabled until the end of
> suspend but our suspend ops turn it off.
we are not using suspend ops for same reason, with notifiers this will 
never happen, since we can determine last cpu

1. during s2idle suspend
     for all CPUs cpu_pm notifier will be called. we can determine which 
one is last cpu calling it.

2. during deep suspend
     non-boot cpus get disabled and from last cpu, kernel/cpu_pm.c 
registers for syscore ops and via
    this ops it invokes cpu_pm notification. so we know it is last cpu.

so both cases these will be invoked at very last stage of suspend(be it 
s2idle or deep) when all other drivers are done

and we will never run into above scenario.

> That's what syscore is
> typically important for. It gets the callback out of the path where
> interrupts are still enabled and could potentially interfere with the
> irqchip being disrupted. Maybe suspend_device_irqs() can be extended
> here. I'm not sure.
>
>>> In suspend, we want wakeup enabled irqs to wake us up from
>>> suspend no matter if the irq is enabled or not. But for idle, we don't
>>> care about the wakeup enable bit at all. The only bit that matters is
>>> that the irq is enabled and then the expectation is that it will wake us
>>> up. If there's some irq that can't wake us up from idle then we'll have
>>> to just ignore that interrupt across deep CPU idle states. Is that
>>> actually a problem? Or the SoC architects have figured out that certain
>>> irqs don't matter for deep CPU idle states and so we don't have to
>>> monitor them?
Thanks,
Maulik
Stephen Boyd April 22, 2020, 10:42 a.m. UTC | #5
Quoting Maulik Shah (2020-04-20 23:35:09)
> Hi,
> 
> On 4/15/2020 1:36 PM, Stephen Boyd wrote:
> > Quoting Maulik Shah (2020-04-14 01:05:36)
> >> Hi,
> >>
> >> On 4/14/2020 6:05 AM, Stephen Boyd wrote:
> >>> Quoting Maulik Shah (2020-03-30 09:41:00)
> >>>> diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
> >>>> index 6ae9e1f..c43715b 100644
> >>>> --- a/drivers/irqchip/qcom-pdc.c
> >>>> +++ b/drivers/irqchip/qcom-pdc.c
> >>>> +                        * the pending interrupt gets cleared at GIC before
> >>>> +                        * enabling it from msm_gpio_irq_enable(). So CPU will
> >>>> +                        * never see pending IRQ after resume if we disable them
> >>>> +                        * here.
> >>> Is there something that's doing this in the gpio driver? It sounds
> >>> like the bug lies in that driver. Maybe the gpio driver should use
> >>> irq_startup instead of irq_enable to clear anything pending? The comment
> >>> in msm_gpio_irq_enable() talks a lot but doesn't actually say why it's a
> >>> problem to be latched at the GIC as pending when the irq is enabled the
> >>> first time.
> >> This is to clear any erroneous interrupts that would have got latched
> >> when the interrupt is not in use.
> >>
> >> There may be devices like UART which can use the same gpio line for data
> >> rx as well as a wakeup gpio
> >>
> >> The data that was flowing on the line may latch the interrupt and when
> >> we enable the interrupt,we see IRQ pending and unexpected IRQ gets
> >> triggered.
> > Isn't the interrupt supposed to latch in the hardware in this scenario?
> > We wanted to wakeup from UART RX over GPIO, and we did, and we also
> > wanted to send that data through the pin to the UART core, so I suspect
> > we muxed it as a UART pin and also watched it for an irq wakeup in the
> > GPIO driver and PDC? The wakeup irq handler can be ignored by the UART
> > driver if it wants.
> I will check this if i can operate only on PDC and GIC IRQs and can 
> ignore PDC-GPIO domain IRQ.
> Working on it.

Ok.

> >
> >>>> +                               continue;
> >>>> +                       }
> >>>> +
> >>>> +                       irq_chip_disable_parent(d);
> >>>> +               }
> >>>> +       }
> >>>> +       p->from_pdc_suspend = false;
> >>>> +}
> >>>> +
> >>>> +static int pdc_cpu_pm_callback(struct notifier_block *nfb,
> >>>> +                              unsigned long action, void *v)
> >>>> +{
> >>>> +       struct pdc_pm_data *p = container_of(nfb, struct pdc_pm_data,
> >>>> +                                            pdc_cpu_pm_nfb);
> >>>> +       unsigned long flags;
> >>>> +
> >>>> +       if (!p->suspend_start)
> >>>> +               return NOTIFY_OK;
> >>>> +
> >>>> +       spin_lock_irqsave(&p->pm_lock, flags);
> >>>> +       switch (action) {
> >>>> +       case CPU_PM_ENTER:
> >>>> +               cpumask_set_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
> >>>> +               if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
> >>>> +                       pdc_suspend(p);
> >>>> +               break;
> >>>> +       case CPU_PM_ENTER_FAILED:
> >>>> +       case CPU_PM_EXIT:
> >>>> +               if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
> >>>> +                       pdc_resume(p);
> >>>> +               cpumask_clear_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
> >>>> +               break;
> >>>> +       }
> >>>> +       spin_unlock_irqrestore(&p->pm_lock, flags);
> >>> What is the point of this callback? Any irqs that we want to wakeup the
> >>> CPUs from deep idle should be enabled via enable_irq(). Otherwise, they
> >>> shouldn't wake up the system. That's the difference between idle and
> >>> suspend.
> >> We already discussed and agreed to treat IRQs differently in idle and
> >> suspend.
> >> In summary if a SW does disable and mark IRQ as wake up capable.
> >> 1.    irq_disable()
> >> 2.    enable_irq_wake()
> >>
> >> Since the HW don't understand wake, it only knows either enabled or
> >> disabled IRQ. So the HW should do below.
> >> 1.    if system is in suspend, the IRQ should be kept Enabled in HW
> >> 2.    if system is out of suspend, the IRQ should be kept disabled in HW
> > Why should it be kept disabled at the PDC and GIC when the system is out
> > of suspend? Is that because software hasn't enabled the irq yet upon
> > resume and we're waiting for the irq consumer driver (EC for example) to
> > do that?
> No, for validation of this change, i did below scenario
> 
> During a probe of a driver, requested an IRQ and then mark it as wakeup 
> capable and disabled it.
>  From driver, after probe is done with above operation, we never do any 
> enable/disable/wakeup operation.
> 
> In above scenario,
> 
> 1.    if system is in suspend, the IRQ should be kept Enabled in HW
>      (since it was marked for wakeup during probe, right?)

Yes.

> 2.    if system is out of suspend, the IRQ should be kept disabled in HW
>      (since it was disabled in probe, and never again enabled it, right?)

Not exactly. It was requested in probe, so it was enabled, and then it
was lazy disabled so it has basically always been enabled since it was
requested. If lazy disable isn't used then there's a problem.

> 
> i wanted to test if in HW its Enabled properly during suspend entry
> (since we differ status for idle and suspend, in above scenario IRQ 
> should be enabled in HW only during suspend
> and then when system is out of suspend it should get disabled in HW again)
> 
> of course, if we ever woke up from suspend with this wake up capable 
> IRQ, the handler will never get called
> since its forever marked as disabled in SW but yet the HW is still 
> allowed to wake from this IRQ.

Alright. The other case is request an irq without auto enable flag set,
which is pretty rare, and then mark it as wakeup capable and suspend. I
hope we can ignore this one.

> >
> > In the CPU idle path (i.e. the system isn't suspending) I'd expect the
> > PDC and GIC hwirqs to be enabled so that the interrupt can trigger at
> > anytime. This is the normal mode of operation.
> Correct
> > When the CPU goes to
> > idle, and for that matter all the CPUs go to idle, the PDC should still
> > be monitoring the irqs that are enabled with irq_enable() and wake up
> > the CPU to receive the irq by taking all the CPUs out of deep idle
> > states where the GIC is powered off. If the irq is disabled with
> > irq_disable(), I'd think we would want to disable in the PDC so that the
> > irq doesn't wake us up from idle unnecessarily.
> yes, we do all this with current patch.
> >
> > Long story short, CPUs going in and out of idle and system not
> > suspending or freezing for s2idle means the PDC enable state should
> > follow irq enable state and completely ignore wake state. During system
> > suspend or freezing for s2idle the PDC enable state should follow wake
> > state. The tricky part is making sure the suspend and resume path
> > doesn't miss some interrupt that would have woken us up.
> Agree, we do all settings in PDC / GIC HW during deep suspend/ s2idle 
> suspend
> and revert back to orignal state in HW when coming out of suspend
> >
> > I thought that maybe lazy irq disable would save us here.
> the lazy doesn't work for GPIO IRQs, gpiolib registers for .irq_disable 
> for every gpio chip.

Why? Can the gpio chip stop doing that?

> >>
> >> This answer's your below comment as well on why can't we use irqchip's
> >> irq_suspend() and irq_resume() calls,
> >> since they also get internally invoked from syscore ops only.
> > Actually no it doesn't. It looks like the irqchip irq_suspend() and
> > irq_resume() ops are only called when the driver uses the generic
> > irqchip implementation. That doesn't look to be used here.
> correct
> > Good point
> > about the syscore ops and s2idle interaction, but it's not the real
> > reason why these ops can't be used.
> No internally generic chip also looks using syscore ops 
> (irq_gc_syscore_ops)
> to invoke generic irq_chip's irq_suspend and irq_resume callbacks
> 
> since s2idle still don't call syscore ops, these callback will never get 
> invoked
> if s2idle suspend is entered.

Are you using the generic irqchip code? I don't see that used here so
talking about the usage of syscore and how that doesn't work for s2idle
doesn't seem relevant here.

> >
> > Maybe we can always call the ops from somewhere deep in the suspend path
> > instead of using a notifier. That may make it simpler to reason about
> > and fix the s2idle problem at the same time. Putting it into a notifier
> > is difficult to understand and could potentially have a problem if
> > something like the timer irq needs to stay enabled until the end of
> > suspend but our suspend ops turn it off.
> we are not using suspend ops for same reason, with notifiers this will 
> never happen, since we can determine last cpu
> 
> 1. during s2idle suspend
>      for all CPUs cpu_pm notifier will be called. we can determine which 
> one is last cpu calling it.
> 
> 2. during deep suspend
>      non-boot cpus get disabled and from last cpu, kernel/cpu_pm.c 
> registers for syscore ops and via
>     this ops it invokes cpu_pm notification. so we know it is last cpu.
> 
> so both cases these will be invoked at very last stage of suspend(be it 
> s2idle or deep) when all other drivers are done
> 
> and we will never run into above scenario.
> 

I hope the irqs can be lazy disabled and whatever problem exists in gpio
driver can be resolved so that irqs can be left enabled in PDC and GIC
during suspend and idle entry. It's sort of odd to rely on lazy disable
behavior in this way, so it would need genirq maintainer approval.
Stephen Boyd April 24, 2020, 2:37 a.m. UTC | #6
Quoting Maulik Shah (2020-04-22 05:14:21)
> 
> We discussed this in RFC v2, pasting your reply from same.
> 
>> That looks like a bug. It appears that gpiolib is only hooking the irq
>> disable path here so that it can keep track of what irqs are not in use
>> by gpiolib and allow them to be used for GPIO purposes when the irqs are
>> disabled. See commit 461c1a7d4733 ("gpiolib: override
>> irq_enable/disable"). That code in gpiolib should probably see if lazy
>> mode has been disabled on the irq and do similar things to what genirq
>> does and then let genirq mask the gpios if they trigger during the time
>> when the irq is disabled. Regardless of this design though, I don't
>> understand why this matters.

Yeah. I'm saying that the gpiolib code that forces all gpio irqs to be
non-lazy is broken. Please send a patch to fix gpiolib so that irqs can
be lazy again. I thought it didn't matter before but now that I've
learned more it sounds like we have to use lazy irqs here so that irqs
stay enabled on the path to suspend while they're disabled but marked
for wakeup. Otherwise I don't know how to make this work.
Maulik Shah April 24, 2020, 9:39 a.m. UTC | #7
Hi,

On 4/24/2020 8:07 AM, Stephen Boyd wrote:
> Quoting Maulik Shah (2020-04-22 05:14:21)
>> We discussed this in RFC v2, pasting your reply from same.
>>
>>> That looks like a bug. It appears that gpiolib is only hooking the irq
>>> disable path here so that it can keep track of what irqs are not in use
>>> by gpiolib and allow them to be used for GPIO purposes when the irqs are
>>> disabled. See commit 461c1a7d4733 ("gpiolib: override
>>> irq_enable/disable"). That code in gpiolib should probably see if lazy
>>> mode has been disabled on the irq and do similar things to what genirq
>>> does and then let genirq mask the gpios if they trigger during the time
>>> when the irq is disabled. Regardless of this design though, I don't
>>> understand why this matters.
> Yeah. I'm saying that the gpiolib code that forces all gpio irqs to be
> non-lazy is broken. Please send a patch to fix gpiolib so that irqs can
> be lazy again.

Let me reiterate.

In below genirq code, if irq_chip did not choose to implement 
.irq_disable (and the mask argument is coming as false by default during 
first disable) from the caller of this,

Then yes control takes lazy path since it doesn't enter to  if 
(desc->irq_data.chip->irq_disable)  or else if (mask) case and the 
__irq_disable call simply returns  without executing anything.

This leaves IRQ enabled in HW during suspend even though driver done 
disable_irq()

static void __irq_disable(struct irq_desc *desc, bool mask)
{
         if (irqd_irq_disabled(&desc->irq_data)) {
                 if (mask)
                         mask_irq(desc);
         } else {
                 irq_state_set_disabled(desc);
                 if (desc->irq_data.chip->irq_disable) {
desc->irq_data.chip->irq_disable(&desc->irq_data);
                         irq_state_set_masked(desc);
                 } else if (mask) {
                         mask_irq(desc);
                 }
         }
}

With above understanding, we have two more points to consider

1. The gpiolib registers for .irq_disable for every gpio chip, making it 
unlazy for every gpio chip.

2. The irq_chip that registers with gpiolib (from pinctrl-msm.c, also 
has .irq_disable implemented, and its parent PDC also has .irq_disable 
implemented)

Even if we fix point (1), by making gpiolib replicating genirq code,
It still will call irq_disable (from replicated code in gpiolib now) 
since irq_chip that registers with gpiolib has chosen to implement 
.irq_disable.

For other irq_chip's which don't implement .irq_disable, it will help.
But since from point (2) we know that PDC irq_chip implements 
.irq_disable, and would disable in HW the moment .irq_disable call comes in.


> I thought it didn't matter before but now that I've
> learned more it sounds like we have to use lazy irqs here so that irqs
> stay enabled on the path to suspend while they're disabled but marked
> for wakeup.

The description for irq_disable() says...

  * If the chip does not implement the irq_disable callback, we
  * use a lazy disable approach. That means we mark the interrupt
  * disabled, but leave the hardware unmasked. That's an
  * optimization...

Since PDC irq_chip implements .irq_disable callback, IRQ gets disabled 
in HW immediatly.

The comment says that lazy approch will be used if .irq_disable callback 
is not implemented.

IMO, client driver's should not assume/expect that underlying irq_chip 
will take lazy path only,
As it depends on whether irq_chip implemented .irq_disable callback or not.

If a driver is relying on lazy disable approach till date, then the 
driver should get fixed.
(by removing the unnecessary disable_irq() during suspend)

i suspect this might be the case in EC driver, on the platforms where it 
worked fine till today,
irq may not have been disabled in HW (may be that working platform's 
irq_chip may not have implemented irq_disable
hence support lazy disable which leaves IRQ unmasked in HW even after 
disable_irq() and able to resume from suspend)


> Otherwise I don't know how to make this work.
There are 3 possible options to make it work

1. Fix the EC driver (to stop calling disable_irq() during suspend) / 
support unlazy disable in EC driver

2. Support lazy disable (requires fixes in gpiolib and irq_chip also 
should not implement .irq_disable callback)

With above explanation, The PDC irq_chip want to keep continue implement 
.irq_disable callback
(we have different functionality in .irq_mask and .irq_disable 
implementation)

so option 2 is ruled out IMO.

3. We use this patch

The current patch makes this work by re-enabling such IRQs in HW during 
suspend's very end point, and restores state during resume.

i can not think of any other option.

Let me know if you now agree to go with option (1)

Otherwise, for option (3),

i can post next revision after addressing some minor fixes/suggestions.
I have tried to make this further simpler by not operating on both 
PDC-GPIO and PDC irq domains,
but it seems we need to use both irq domains and take the action 
accordingly during pdc_suspend() and pdc_resume().

I have missed the default case in switch (noticed same during rpmh patch 
doug has posted :-)), as it can get called for CLUSTER notification.
Will fix in next revision.

Thanks,
Maulik
diff mbox series

Patch

diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 6ae9e1f..c43715b 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -1,9 +1,10 @@ 
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2017-2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/err.h>
+#include <linux/cpu_pm.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
@@ -17,6 +18,7 @@ 
 #include <linux/soc/qcom/irq.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/suspend.h>
 #include <linux/types.h>
 
 #define PDC_MAX_IRQS		168
@@ -36,6 +38,23 @@  struct pdc_pin_region {
 	u32 cnt;
 };
 
+struct pdc_pm_data {
+	struct cpumask cpus_in_pc;
+	spinlock_t pm_lock;
+	bool suspend_start;
+	bool from_pdc_suspend;
+	struct notifier_block pdc_pm_nfb;
+	struct notifier_block pdc_cpu_pm_nfb;
+
+	DECLARE_BITMAP(pdc_domain_enabled_irqs, PDC_MAX_IRQS);
+	DECLARE_BITMAP(pdc_domain_wake_irqs, PDC_MAX_IRQS);
+	DECLARE_BITMAP(pdc_gpio_domain_enabled_irqs, PDC_MAX_GPIO_IRQS);
+	DECLARE_BITMAP(pdc_gpio_domain_wake_irqs, PDC_MAX_GPIO_IRQS);
+
+	struct irq_domain *pdc_domain;
+	struct irq_domain *pdc_gpio_domain;
+};
+
 static DEFINE_RAW_SPINLOCK(pdc_lock);
 static void __iomem *pdc_base;
 static struct pdc_pin_region *pdc_region;
@@ -89,18 +108,38 @@  static void pdc_enable_intr(struct irq_data *d, bool on)
 
 static void qcom_pdc_gic_disable(struct irq_data *d)
 {
+	struct pdc_pm_data *p;
+
 	if (d->hwirq == GPIO_NO_WAKE_IRQ)
 		return;
 
+	p = irq_data_get_irq_chip_data(d);
+	if (!p->from_pdc_suspend) {
+		if (irq_domain_qcom_handle_wakeup(d->domain))
+			clear_bit(d->hwirq, p->pdc_gpio_domain_enabled_irqs);
+		else
+			clear_bit(d->hwirq, p->pdc_domain_enabled_irqs);
+	}
+
 	pdc_enable_intr(d, false);
 	irq_chip_disable_parent(d);
 }
 
 static void qcom_pdc_gic_enable(struct irq_data *d)
 {
+	struct pdc_pm_data *p;
+
 	if (d->hwirq == GPIO_NO_WAKE_IRQ)
 		return;
 
+	p = irq_data_get_irq_chip_data(d);
+	if (!p->from_pdc_suspend) {
+		if (irq_domain_qcom_handle_wakeup(d->domain))
+			set_bit(d->hwirq, p->pdc_gpio_domain_enabled_irqs);
+		else
+			set_bit(d->hwirq, p->pdc_domain_enabled_irqs);
+	}
+
 	pdc_enable_intr(d, true);
 	irq_chip_enable_parent(d);
 }
@@ -145,6 +184,39 @@  enum pdc_irq_config_bits {
 };
 
 /**
+ * qcom_pdc_gic_set_wake: Mark IRQ as wakeup capable
+ *
+ * @d: the interrupt data
+ * @on: enable or disable wakeup capability
+ *
+ * Mark IRQ as wake up capable at either pdc_domain or pdc_gpio_domain.
+ * This will be used when entering to suspend where if any wakeup capable
+ * IRQ is already disabled in SW, such IRQ needs to be re-enabled at HW.
+ */
+static int qcom_pdc_gic_set_wake(struct irq_data *d, unsigned int on)
+{
+	struct pdc_pm_data *p;
+
+	if (d->hwirq == GPIO_NO_WAKE_IRQ)
+		return 0;
+
+	p = irq_data_get_irq_chip_data(d);
+	if (on) {
+		if (irq_domain_qcom_handle_wakeup(d->domain))
+			set_bit(d->hwirq, p->pdc_gpio_domain_wake_irqs);
+		else
+			set_bit(d->hwirq, p->pdc_domain_wake_irqs);
+	} else {
+		if (irq_domain_qcom_handle_wakeup(d->domain))
+			clear_bit(d->hwirq, p->pdc_gpio_domain_wake_irqs);
+		else
+			clear_bit(d->hwirq, p->pdc_domain_wake_irqs);
+	}
+
+	return irq_chip_set_wake_parent(d, on);
+}
+
+/**
  * qcom_pdc_gic_set_type: Configure PDC for the interrupt
  *
  * @d: the interrupt data
@@ -202,14 +274,162 @@  static struct irq_chip qcom_pdc_gic_chip = {
 	.irq_get_irqchip_state	= qcom_pdc_gic_get_irqchip_state,
 	.irq_set_irqchip_state	= qcom_pdc_gic_set_irqchip_state,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_wake		= qcom_pdc_gic_set_wake,
 	.irq_set_type		= qcom_pdc_gic_set_type,
 	.flags			= IRQCHIP_MASK_ON_SUSPEND |
-				  IRQCHIP_SET_TYPE_MASKED |
-				  IRQCHIP_SKIP_SET_WAKE,
+				  IRQCHIP_SET_TYPE_MASKED,
 	.irq_set_vcpu_affinity	= irq_chip_set_vcpu_affinity_parent,
 	.irq_set_affinity	= irq_chip_set_affinity_parent,
 };
 
+static struct irq_data *pdc_find_irq_data(struct irq_domain *domain,
+					  int wake_irq)
+{
+	int irq = irq_find_mapping(domain, wake_irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	return &desc->irq_data;
+}
+
+/**
+ * pdc_suspend: Enable IRQs marked for wakeup
+ *
+ * @p: pdc_pm_data
+ *
+ * The SW expects that an IRQ that's disabled with disable_irq() can still
+ * wake the system from sleep states such as "suspend to RAM", if it has
+ * been marked for wakeup.
+ *
+ * While the SW may choose to differ status for "wake" and "enabled" interrupts,
+ * its not the case with HW. There is no dedicated config in HW to differ "wake"
+ * and "enabled". Same is case for PDC's parent irq_chip (ARM GIC) which has
+ * only GICD_ISENABLER to indicate "enabled" or "disabled" status.
+ *
+ * So, the HW ONLY understands either "enabled" or "disabled".
+ * The final status in HW should be an "OR" of "enable" and "wake" status.
+ * i.e. PDC (and GIC) irq enable in HW = irq enable | irq wake in SW
+ */
+static void pdc_suspend(struct pdc_pm_data *p)
+{
+	int wake_irq;
+	bool enabled;
+	struct irq_data *d;
+
+	p->from_pdc_suspend = true;
+	for_each_set_bit(wake_irq, p->pdc_domain_wake_irqs, PDC_MAX_IRQS) {
+		enabled = test_bit(wake_irq, p->pdc_domain_enabled_irqs);
+		if (!enabled) {
+			d = pdc_find_irq_data(p->pdc_domain, wake_irq);
+
+			pdc_enable_intr(d, true);
+			irq_chip_enable_parent(d);
+		}
+	}
+
+	for_each_set_bit(wake_irq, p->pdc_gpio_domain_wake_irqs,
+			 PDC_MAX_GPIO_IRQS) {
+		enabled = test_bit(wake_irq, p->pdc_gpio_domain_enabled_irqs);
+		if (!enabled) {
+			d = pdc_find_irq_data(p->pdc_gpio_domain, wake_irq);
+
+			irq_chip_enable_parent(d);
+		}
+	}
+}
+
+static void pdc_resume(struct pdc_pm_data *p)
+{
+	int wake_irq;
+	bool enabled, pending;
+	struct irq_data *d;
+
+	for_each_set_bit(wake_irq, p->pdc_domain_wake_irqs, PDC_MAX_IRQS) {
+		enabled = test_bit(wake_irq, p->pdc_domain_enabled_irqs);
+		if (!enabled) {
+			d = pdc_find_irq_data(p->pdc_domain, wake_irq);
+
+			pdc_enable_intr(d, false);
+			irq_chip_disable_parent(d);
+		}
+	}
+
+	for_each_set_bit(wake_irq, p->pdc_gpio_domain_wake_irqs,
+			 PDC_MAX_GPIO_IRQS) {
+		enabled = test_bit(wake_irq, p->pdc_gpio_domain_enabled_irqs);
+		if (!enabled) {
+			d = pdc_find_irq_data(p->pdc_gpio_domain, wake_irq);
+
+			/*
+			 * When the drivers invoke enablie_irq() on a GPIO IRQ,
+			 * the pending interrupt gets cleared at GIC before
+			 * enabling it from msm_gpio_irq_enable(). So CPU will
+			 * never see pending IRQ after resume if we disable them
+			 * here.
+			 *
+			 * If wakeup is due to GPIO interrupt do not disable it.
+			 * By not disabling, The IRQ will be delivered to CPU
+			 * and when driver invokes enable_irq(), The softirq
+			 * tasklet does resend_irqs() to call irq handler.
+			 */
+			irq_chip_get_parent_state(d, IRQCHIP_STATE_PENDING,
+						  &pending);
+			if (pending) {
+				pending = false;
+				continue;
+			}
+
+			irq_chip_disable_parent(d);
+		}
+	}
+	p->from_pdc_suspend = false;
+}
+
+static int pdc_cpu_pm_callback(struct notifier_block *nfb,
+			       unsigned long action, void *v)
+{
+	struct pdc_pm_data *p = container_of(nfb, struct pdc_pm_data,
+					     pdc_cpu_pm_nfb);
+	unsigned long flags;
+
+	if (!p->suspend_start)
+		return NOTIFY_OK;
+
+	spin_lock_irqsave(&p->pm_lock, flags);
+	switch (action) {
+	case CPU_PM_ENTER:
+		cpumask_set_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
+		if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
+			pdc_suspend(p);
+		break;
+	case CPU_PM_ENTER_FAILED:
+	case CPU_PM_EXIT:
+		if (cpumask_equal(&p->cpus_in_pc, cpu_online_mask))
+			pdc_resume(p);
+		cpumask_clear_cpu(raw_smp_processor_id(), &p->cpus_in_pc);
+		break;
+	}
+	spin_unlock_irqrestore(&p->pm_lock, flags);
+
+	return NOTIFY_OK;
+}
+
+static int pdc_pm_callback(struct notifier_block *nfb,
+			   unsigned long event, void *unused)
+{
+	struct pdc_pm_data *p = container_of(nfb, struct pdc_pm_data,
+					     pdc_pm_nfb);
+	switch (event) {
+	case PM_SUSPEND_PREPARE:
+		p->suspend_start = true;
+		break;
+	case PM_POST_SUSPEND:
+		p->suspend_start = false;
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 static irq_hw_number_t get_parent_hwirq(int pin)
 {
 	int i;
@@ -254,7 +474,8 @@  static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
 		return ret;
 
 	ret  = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-					     &qcom_pdc_gic_chip, NULL);
+					     &qcom_pdc_gic_chip,
+					     domain->host_data);
 	if (ret)
 		return ret;
 
@@ -298,7 +519,8 @@  static int qcom_pdc_gpio_alloc(struct irq_domain *domain, unsigned int virq,
 		return ret;
 
 	ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
-					    &qcom_pdc_gic_chip, NULL);
+					    &qcom_pdc_gic_chip,
+					    domain->host_data);
 	if (ret)
 		return ret;
 
@@ -376,7 +598,8 @@  static int pdc_setup_pin_mapping(struct device_node *np)
 
 static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
 {
-	struct irq_domain *parent_domain, *pdc_domain, *pdc_gpio_domain;
+	struct pdc_pm_data *p;
+	struct irq_domain *parent_domain;
 	int ret;
 
 	pdc_base = of_iomap(node, 0);
@@ -385,6 +608,10 @@  static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
 		return -ENXIO;
 	}
 
+	p = kzalloc(sizeof(struct pdc_pm_data), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
 	parent_domain = irq_find_host(parent);
 	if (!parent_domain) {
 		pr_err("%pOF: unable to find PDC's parent domain\n", node);
@@ -398,33 +625,47 @@  static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
 		goto fail;
 	}
 
-	pdc_domain = irq_domain_create_hierarchy(parent_domain, 0, PDC_MAX_IRQS,
-						 of_fwnode_handle(node),
-						 &qcom_pdc_ops, NULL);
-	if (!pdc_domain) {
+	p->pdc_domain = irq_domain_create_hierarchy(parent_domain, 0,
+						    PDC_MAX_IRQS,
+						    of_fwnode_handle(node),
+						    &qcom_pdc_ops, p);
+	if (!p->pdc_domain) {
 		pr_err("%pOF: GIC domain add failed\n", node);
 		ret = -ENOMEM;
 		goto fail;
 	}
 
-	pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain,
+	p->pdc_gpio_domain = irq_domain_create_hierarchy(parent_domain,
 					IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP,
 					PDC_MAX_GPIO_IRQS,
 					of_fwnode_handle(node),
-					&qcom_pdc_gpio_ops, NULL);
-	if (!pdc_gpio_domain) {
+					&qcom_pdc_gpio_ops, p);
+	if (!p->pdc_gpio_domain) {
 		pr_err("%pOF: PDC domain add failed for GPIO domain\n", node);
 		ret = -ENOMEM;
 		goto remove;
 	}
 
-	irq_domain_update_bus_token(pdc_gpio_domain, DOMAIN_BUS_WAKEUP);
+	irq_domain_update_bus_token(p->pdc_gpio_domain, DOMAIN_BUS_WAKEUP);
+
+	/* Register for PM-transition events */
+	p->pdc_pm_nfb.notifier_call = pdc_pm_callback;
+	ret = register_pm_notifier(&p->pdc_pm_nfb);
+	if (ret)
+		goto remove;
+
+	spin_lock_init(&p->pm_lock);
+
+	/* Register for CPU PM notifications */
+	p->pdc_cpu_pm_nfb.notifier_call = pdc_cpu_pm_callback;
+	cpu_pm_register_notifier(&p->pdc_cpu_pm_nfb);
 
 	return 0;
 
 remove:
-	irq_domain_remove(pdc_domain);
+	irq_domain_remove(p->pdc_domain);
 fail:
+	kfree(p);
 	kfree(pdc_region);
 	iounmap(pdc_base);
 	return ret;