diff mbox

[v3,1/1] x86/platform/intel-mid: Retrofit pci_platform_pm_ops ->get_state hook

Message ID 7c1567d4c49303a4aada94ba16275cbf56b8976b.1477221514.git.lukas@wunner.de (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Lukas Wunner Oct. 23, 2016, 11:55 a.m. UTC
Commit cc7cc02bada8 ("PCI: Query platform firmware for device power
state") augmented struct pci_platform_pm_ops with a ->get_state hook and
implemented it for acpi_pci_platform_pm, the only pci_platform_pm_ops
existing till v4.7.

However v4.8 introduced another pci_platform_pm_ops for Intel Mobile
Internet Devices with commit 5823d0893ec2 ("x86/platform/intel-mid: Add
Power Management Unit driver").  It is missing the ->get_state hook,
which is fatal since pci_set_platform_pm() enforces its presence.  Andy
Shevchenko reports that without the present commit, such a device
"crashes without even a character printed out on serial console and
reboots (since watchdog)".

Retrofit mid_pci_platform_pm with the missing callback to fix the
breakage.

Fixes: cc7cc02bada8 ("PCI: Query platform firmware for device power state")
Cc: x86@kernel.org
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Acked-and-tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
Changes v1 -> v2:
- Cast return value of intel_mid_pci_get_power_state() to
  (__force pci_power_t) to avoid "sparse -D__CHECK_ENDIAN__" warning.
- Add ack by Andy Shevchenko.

Changes v2 -> v3:
- Amend commit message to explain the user-visible failure mode as
  reported by Andy.
- Add ack by Bjorn Helgaas and Fixes tag.

 arch/x86/include/asm/intel-mid.h  |  1 +
 arch/x86/platform/intel-mid/pwr.c | 19 +++++++++++++++++++
 drivers/pci/pci-mid.c             |  6 ++++++
 3 files changed, 26 insertions(+)

Comments

Bryan O'Donoghue Oct. 23, 2016, 12:37 p.m. UTC | #1
On Sun, 2016-10-23 at 13:55 +0200, Lukas Wunner wrote:
> Commit cc7cc02bada8 ("PCI: Query platform firmware for device power
> state") augmented struct pci_platform_pm_ops with a ->get_state hook
> and
> implemented it for acpi_pci_platform_pm, the only pci_platform_pm_ops
> existing till v4.7.
> 
> However v4.8 introduced another pci_platform_pm_ops for Intel Mobile
> Internet Devices with commit 5823d0893ec2 ("x86/platform/intel-mid:
> Add
> Power Management Unit driver").  It is missing the ->get_state hook,
> which is fatal since pci_set_platform_pm() enforces its
> presence.  Andy
> Shevchenko reports that without the present commit, such a device
> "crashes without even a character printed out on serial console and
> reboots (since watchdog)".
> 
> Retrofit mid_pci_platform_pm with the missing callback to fix the
> breakage.
> 
> Fixes: cc7cc02bada8 ("PCI: Query platform firmware for device power
> state")
> Cc: x86@kernel.org
> Signed-off-by: Lukas Wunner <lukas@wunner.de>
> Acked-and-tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.c
> om>
> Acked-by: Bjorn Helgaas <bhelgaas@google.com>
> ---
> Changes v1 -> v2:
> - Cast return value of intel_mid_pci_get_power_state() to
>   (__force pci_power_t) to avoid "sparse -D__CHECK_ENDIAN__" warning.
> - Add ack by Andy Shevchenko.
> 
> Changes v2 -> v3:
> - Amend commit message to explain the user-visible failure mode as
>   reported by Andy.
> - Add ack by Bjorn Helgaas and Fixes tag.
> 
>  arch/x86/include/asm/intel-mid.h  |  1 +
>  arch/x86/platform/intel-mid/pwr.c | 19 +++++++++++++++++++
>  drivers/pci/pci-mid.c             |  6 ++++++
>  3 files changed, 26 insertions(+)
> 
> diff --git a/arch/x86/include/asm/intel-mid.h
> b/arch/x86/include/asm/intel-mid.h
> index 5b6753d..49da9f4 100644
> --- a/arch/x86/include/asm/intel-mid.h
> +++ b/arch/x86/include/asm/intel-mid.h
> @@ -17,6 +17,7 @@
>  
>  extern int intel_mid_pci_init(void);
>  extern int intel_mid_pci_set_power_state(struct pci_dev *pdev,
> pci_power_t state);
> +extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev
> *pdev);
>  
>  extern void intel_mid_pwr_power_off(void);
>  
> diff --git a/arch/x86/platform/intel-mid/pwr.c
> b/arch/x86/platform/intel-mid/pwr.c
> index 5d3b45a..67375dd 100644
> --- a/arch/x86/platform/intel-mid/pwr.c
> +++ b/arch/x86/platform/intel-mid/pwr.c
> @@ -272,6 +272,25 @@ int intel_mid_pci_set_power_state(struct pci_dev
> *pdev, pci_power_t state)
>  }
>  EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
>  
> +pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev)
> +{
> +	struct mid_pwr *pwr = midpwr;
> +	int id, reg, bit;
> +	u32 power;
> +
> +	if (!pwr || !pwr->available)
> +		return PCI_UNKNOWN;
> +
> +	id = intel_mid_pwr_get_lss_id(pdev);
> +	if (id < 0)
> +		return PCI_UNKNOWN;
> +
> +	reg = (id * LSS_PWS_BITS) / 32;
> +	bit = (id * LSS_PWS_BITS) % 32;
> +	power = mid_pwr_get_state(pwr, reg);
> +	return (__force pci_power_t)((power >> bit) & 3);
> +}
> +
>  void intel_mid_pwr_power_off(void)
>  {
>  	struct mid_pwr *pwr = midpwr;
> diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
> index 55f453d..c7f3408 100644
> --- a/drivers/pci/pci-mid.c
> +++ b/drivers/pci/pci-mid.c
> @@ -29,6 +29,11 @@ static int mid_pci_set_power_state(struct pci_dev
> *pdev, pci_power_t state)
>  	return intel_mid_pci_set_power_state(pdev, state);
>  }
>  
> +static pci_power_t mid_pci_get_power_state(struct pci_dev *pdev)
> +{
> +	return intel_mid_pci_get_power_state(pdev);
> +}
> +
>  static pci_power_t mid_pci_choose_state(struct pci_dev *pdev)
>  {
>  	return PCI_D3hot;
> @@ -52,6 +57,7 @@ static bool mid_pci_need_resume(struct pci_dev
> *dev)
>  static struct pci_platform_pm_ops mid_pci_platform_pm = {
>  	.is_manageable	= mid_pci_power_manageable,
>  	.set_state	= mid_pci_set_power_state,
> +	.get_state	= mid_pci_get_power_state,
>  	.choose_state	= mid_pci_choose_state,
>  	.sleep_wake	= mid_pci_sleep_wake,
>  	.run_wake	= mid_pci_run_wake,

Shouldn't this serialize like this

        might_sleep();

	reg = (id * LSS_PWS_BITS) / 32;
	bit = (id * LSS_PWS_BITS) % 32;

        mutex_lock(&pwr->lock);
	power = mid_pwr_get_state(pwr, reg);
        mutex_lock(&pwr->lock);

	return (__force pci_power_t)((power >> bit) & 3);

there's a corresponding flow in mid_pwr_set_power_state() that operates
in exactly that way.

---
bod
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lukas Wunner Oct. 23, 2016, 2:57 p.m. UTC | #2
On Sun, Oct 23, 2016 at 01:37:55PM +0100, Bryan O'Donoghue wrote:
> On Sun, 2016-10-23 at 13:55 +0200, Lukas Wunner wrote:
> > Commit cc7cc02bada8 ("PCI: Query platform firmware for device power
> > state") augmented struct pci_platform_pm_ops with a ->get_state hook
> > and
> > implemented it for acpi_pci_platform_pm, the only pci_platform_pm_ops
> > existing till v4.7.
> > 
> > However v4.8 introduced another pci_platform_pm_ops for Intel Mobile
> > Internet Devices with commit 5823d0893ec2 ("x86/platform/intel-mid:
> > Add
> > Power Management Unit driver").  It is missing the ->get_state hook,
> > which is fatal since pci_set_platform_pm() enforces its
> > presence.  Andy
> > Shevchenko reports that without the present commit, such a device
> > "crashes without even a character printed out on serial console and
> > reboots (since watchdog)".
> > 
> > Retrofit mid_pci_platform_pm with the missing callback to fix the
> > breakage.
> > 
> > Fixes: cc7cc02bada8 ("PCI: Query platform firmware for device power
> > state")
> > Cc: x86@kernel.org
> > Signed-off-by: Lukas Wunner <lukas@wunner.de>
> > Acked-and-tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.c
> > om>
> > Acked-by: Bjorn Helgaas <bhelgaas@google.com>
> > ---
> > Changes v1 -> v2:
> > - Cast return value of intel_mid_pci_get_power_state() to
> >   (__force pci_power_t) to avoid "sparse -D__CHECK_ENDIAN__" warning.
> > - Add ack by Andy Shevchenko.
> > 
> > Changes v2 -> v3:
> > - Amend commit message to explain the user-visible failure mode as
> >   reported by Andy.
> > - Add ack by Bjorn Helgaas and Fixes tag.
> > 
> >  arch/x86/include/asm/intel-mid.h  |  1 +
> >  arch/x86/platform/intel-mid/pwr.c | 19 +++++++++++++++++++
> >  drivers/pci/pci-mid.c             |  6 ++++++
> >  3 files changed, 26 insertions(+)
> > 
> > diff --git a/arch/x86/include/asm/intel-mid.h
> > b/arch/x86/include/asm/intel-mid.h
> > index 5b6753d..49da9f4 100644
> > --- a/arch/x86/include/asm/intel-mid.h
> > +++ b/arch/x86/include/asm/intel-mid.h
> > @@ -17,6 +17,7 @@
> >  
> >  extern int intel_mid_pci_init(void);
> >  extern int intel_mid_pci_set_power_state(struct pci_dev *pdev,
> > pci_power_t state);
> > +extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev
> > *pdev);
> >  
> >  extern void intel_mid_pwr_power_off(void);
> >  
> > diff --git a/arch/x86/platform/intel-mid/pwr.c
> > b/arch/x86/platform/intel-mid/pwr.c
> > index 5d3b45a..67375dd 100644
> > --- a/arch/x86/platform/intel-mid/pwr.c
> > +++ b/arch/x86/platform/intel-mid/pwr.c
> > @@ -272,6 +272,25 @@ int intel_mid_pci_set_power_state(struct pci_dev
> > *pdev, pci_power_t state)
> >  }
> >  EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
> >  
> > +pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev)
> > +{
> > +	struct mid_pwr *pwr = midpwr;
> > +	int id, reg, bit;
> > +	u32 power;
> > +
> > +	if (!pwr || !pwr->available)
> > +		return PCI_UNKNOWN;
> > +
> > +	id = intel_mid_pwr_get_lss_id(pdev);
> > +	if (id < 0)
> > +		return PCI_UNKNOWN;
> > +
> > +	reg = (id * LSS_PWS_BITS) / 32;
> > +	bit = (id * LSS_PWS_BITS) % 32;
> > +	power = mid_pwr_get_state(pwr, reg);
> > +	return (__force pci_power_t)((power >> bit) & 3);
> > +}
> > +
> >  void intel_mid_pwr_power_off(void)
> >  {
> >  	struct mid_pwr *pwr = midpwr;
> > diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
> > index 55f453d..c7f3408 100644
> > --- a/drivers/pci/pci-mid.c
> > +++ b/drivers/pci/pci-mid.c
> > @@ -29,6 +29,11 @@ static int mid_pci_set_power_state(struct pci_dev
> > *pdev, pci_power_t state)
> >  	return intel_mid_pci_set_power_state(pdev, state);
> >  }
> >  
> > +static pci_power_t mid_pci_get_power_state(struct pci_dev *pdev)
> > +{
> > +	return intel_mid_pci_get_power_state(pdev);
> > +}
> > +
> >  static pci_power_t mid_pci_choose_state(struct pci_dev *pdev)
> >  {
> >  	return PCI_D3hot;
> > @@ -52,6 +57,7 @@ static bool mid_pci_need_resume(struct pci_dev
> > *dev)
> >  static struct pci_platform_pm_ops mid_pci_platform_pm = {
> >  	.is_manageable	= mid_pci_power_manageable,
> >  	.set_state	= mid_pci_set_power_state,
> > +	.get_state	= mid_pci_get_power_state,
> >  	.choose_state	= mid_pci_choose_state,
> >  	.sleep_wake	= mid_pci_sleep_wake,
> >  	.run_wake	= mid_pci_run_wake,
> 
> Shouldn't this serialize like this
> 
>       might_sleep();
> 
> 	reg = (id * LSS_PWS_BITS) / 32;
> 	bit = (id * LSS_PWS_BITS) % 32;
> 
>       mutex_lock(&pwr->lock);
>       power = mid_pwr_get_state(pwr, reg);
>       mutex_lock(&pwr->lock);
> 
> 	return (__force pci_power_t)((power >> bit) & 3);
> 
> there's a corresponding flow in mid_pwr_set_power_state() that operates
> in exactly that way.

mid_pwr_set_power_state() uses a series of steps (set the power state,
wait for completion) so presumably Andy thought this needs to be done
under a lock to prevent concurrent execution.

mid_pwr_get_state() on the other hand is just a register read, which
I assume is atomic.  The other stuff (calling intel_mid_pwr_get_lss_id(),
calculation of reg and bit) seems to be static, it never changes across
invocations.  Hence there doesn't seem to be a necessity to acquire
the mutex and call might_sleep().

That said I'm not really familiar with these devices and rely on Andy's
ack for correctness.  Andy if I'm mistaken please shout, otherwise I
assume the patch is correct.

The usage of a mutex in mid_pwr_set_power_state() actually seems
questionable since this is called with interrupts disabled:

pci_pm_resume_noirq
  pci_pm_default_resume_early
    pci_power_up
      platform_pci_set_power_state
        mid_pci_set_power_state
          intel_mid_pci_set_power_state
            mid_pwr_set_power_state

Thanks,

Lukas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bryan O'Donoghue Oct. 23, 2016, 4:25 p.m. UTC | #3
On Sun, 2016-10-23 at 16:57 +0200, Lukas Wunner wrote:
> On Sun, Oct 23, 2016 at 01:37:55PM +0100, Bryan O'Donoghue wrote:
> > 
> The usage of a mutex in mid_pwr_set_power_state() actually seems
> questionable since this is called with interrupts disabled:

> pci_pm_resume_noirq
>   pci_pm_default_resume_early
>     pci_power_up
>       platform_pci_set_power_state
>         mid_pci_set_power_state
>           intel_mid_pci_set_power_state
>             mid_pwr_set_power_state


That was my other question then - though I assume the mutex is put in
place to future-proof the code.

I'm just wondering out loud - considering we have the case where we update a register and then spin waiting for a command completion - is it in fact logically valid to have a concurrent reader read out the power state - when another writer is executing mid_pwr_wait() - for example.

/* Wait 500ms that the latest PWRMU command finished */
static int mid_pwr_wait(struct mid_pwr *pwr)
{
        unsigned int count = 500000;
        bool busy;

        do {
                busy = mid_pwr_is_busy(pwr);
                if (!busy)
                        return 0;
                udelay(1);
        } while (--count);

        return -EBUSY;
}

static int mid_pwr_wait_for_cmd(struct mid_pwr *pwr, u8 cmd)
{
        writel(PM_CMD_CMD(cmd) | PM_CMD_CM_IMMEDIATE, pwr->regs +
PM_CMD);
        return mid_pwr_wait(pwr);
}

static int __update_power_state(struct mid_pwr *pwr, int reg, int bit,
int new)
{

<snip>
        /* Update the power state */
        mid_pwr_set_state(pwr, reg, (power & ~(3 << bit)) | (new <<
bit));

        /* Send command to SCU */
        ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG);
        if (ret)
                return ret;
<snip>
}

anyway...

I've tested your patch and it looks good. We can otherwise defer to
andy on the usage of the mutex.

Tested-by: Bryan O'Donoghue <pure.logic@nexus-software.ie>

---
bod
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Shevchenko Oct. 24, 2016, 9:15 a.m. UTC | #4
On Sun, 2016-10-23 at 16:57 +0200, Lukas Wunner wrote:
> On Sun, Oct 23, 2016 at 01:37:55PM +0100, Bryan O'Donoghue wrote:
> > On Sun, 2016-10-23 at 13:55 +0200, Lukas Wunner wrote:
> > > Commit cc7cc02bada8 ("PCI: Query platform firmware for device
> > > power
> > > state") augmented struct pci_platform_pm_ops with a ->get_state
> > > hook
> > > and
> > > implemented it for acpi_pci_platform_pm, the only
> > > pci_platform_pm_ops
> > > existing till v4.7.
> > > 
> > > However v4.8 introduced another pci_platform_pm_ops for Intel
> > > Mobile
> > > Internet Devices with commit 5823d0893ec2 ("x86/platform/intel-
> > > mid:
> > > Add
> > > Power Management Unit driver").  It is missing the ->get_state
> > > hook,
> > > which is fatal since pci_set_platform_pm() enforces its
> > > presence.  Andy
> > > Shevchenko reports that without the present commit, such a device
> > > "crashes without even a character printed out on serial console
> > > and
> > > reboots (since watchdog)".
> > > 
> > > Retrofit mid_pci_platform_pm with the missing callback to fix the
> > > breakage.
> > > 
> > > Fixes: cc7cc02bada8 ("PCI: Query platform firmware for device
> > > power
> > > state")
> > > Cc: x86@kernel.org
> > > Signed-off-by: Lukas Wunner <lukas@wunner.de>
> > > Acked-and-tested-by: Andy Shevchenko <andriy.shevchenko@linux.inte
> > > l.c
> > > om>
> > > Acked-by: Bjorn Helgaas <bhelgaas@google.com>
> > > ---
> > > Changes v1 -> v2:
> > > - Cast return value of intel_mid_pci_get_power_state() to
> > >   (__force pci_power_t) to avoid "sparse -D__CHECK_ENDIAN__"
> > > warning.
> > > - Add ack by Andy Shevchenko.
> > > 
> > > Changes v2 -> v3:
> > > - Amend commit message to explain the user-visible failure mode as
> > >   reported by Andy.
> > > - Add ack by Bjorn Helgaas and Fixes tag.
> > > 
> > >  arch/x86/include/asm/intel-mid.h  |  1 +
> > >  arch/x86/platform/intel-mid/pwr.c | 19 +++++++++++++++++++
> > >  drivers/pci/pci-mid.c             |  6 ++++++
> > >  3 files changed, 26 insertions(+)
> > > 
> > > diff --git a/arch/x86/include/asm/intel-mid.h
> > > b/arch/x86/include/asm/intel-mid.h
> > > index 5b6753d..49da9f4 100644
> > > --- a/arch/x86/include/asm/intel-mid.h
> > > +++ b/arch/x86/include/asm/intel-mid.h
> > > @@ -17,6 +17,7 @@
> > >  
> > >  extern int intel_mid_pci_init(void);
> > >  extern int intel_mid_pci_set_power_state(struct pci_dev *pdev,
> > > pci_power_t state);
> > > +extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev
> > > *pdev);
> > >  
> > >  extern void intel_mid_pwr_power_off(void);
> > >  
> > > diff --git a/arch/x86/platform/intel-mid/pwr.c
> > > b/arch/x86/platform/intel-mid/pwr.c
> > > index 5d3b45a..67375dd 100644
> > > --- a/arch/x86/platform/intel-mid/pwr.c
> > > +++ b/arch/x86/platform/intel-mid/pwr.c
> > > @@ -272,6 +272,25 @@ int intel_mid_pci_set_power_state(struct
> > > pci_dev
> > > *pdev, pci_power_t state)
> > >  }
> > >  EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
> > >  
> > > +pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev)
> > > +{
> > > +	struct mid_pwr *pwr = midpwr;
> > > +	int id, reg, bit;
> > > +	u32 power;
> > > +
> > > +	if (!pwr || !pwr->available)
> > > +		return PCI_UNKNOWN;
> > > +
> > > +	id = intel_mid_pwr_get_lss_id(pdev);
> > > +	if (id < 0)
> > > +		return PCI_UNKNOWN;
> > > +
> > > +	reg = (id * LSS_PWS_BITS) / 32;
> > > +	bit = (id * LSS_PWS_BITS) % 32;
> > > +	power = mid_pwr_get_state(pwr, reg);
> > > +	return (__force pci_power_t)((power >> bit) & 3);
> > > +}
> > > +
> > >  void intel_mid_pwr_power_off(void)
> > >  {
> > >  	struct mid_pwr *pwr = midpwr;
> > > diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
> > > index 55f453d..c7f3408 100644
> > > --- a/drivers/pci/pci-mid.c
> > > +++ b/drivers/pci/pci-mid.c
> > > @@ -29,6 +29,11 @@ static int mid_pci_set_power_state(struct
> > > pci_dev
> > > *pdev, pci_power_t state)
> > >  	return intel_mid_pci_set_power_state(pdev, state);
> > >  }
> > >  
> > > +static pci_power_t mid_pci_get_power_state(struct pci_dev *pdev)
> > > +{
> > > +	return intel_mid_pci_get_power_state(pdev);
> > > +}
> > > +
> > >  static pci_power_t mid_pci_choose_state(struct pci_dev *pdev)
> > >  {
> > >  	return PCI_D3hot;
> > > @@ -52,6 +57,7 @@ static bool mid_pci_need_resume(struct pci_dev
> > > *dev)
> > >  static struct pci_platform_pm_ops mid_pci_platform_pm = {
> > >  	.is_manageable	= mid_pci_power_manageable,
> > >  	.set_state	= mid_pci_set_power_state,
> > > +	.get_state	= mid_pci_get_power_state,
> > >  	.choose_state	= mid_pci_choose_state,
> > >  	.sleep_wake	= mid_pci_sleep_wake,
> > >  	.run_wake	= mid_pci_run_wake,
> > 
> > Shouldn't this serialize like this
> > 
> >       might_sleep();
> > 
> > 	reg = (id * LSS_PWS_BITS) / 32;
> > 	bit = (id * LSS_PWS_BITS) % 32;
> > 
> >       mutex_lock(&pwr->lock);
> >       power = mid_pwr_get_state(pwr, reg);
> >       mutex_lock(&pwr->lock);
> > 
> > 	return (__force pci_power_t)((power >> bit) & 3);
> > 
> > there's a corresponding flow in mid_pwr_set_power_state() that
> > operates
> > in exactly that way.
> 
> mid_pwr_set_power_state() uses a series of steps (set the power state,
> wait for completion) so presumably Andy thought this needs to be done
> under a lock to prevent concurrent execution.
> 
> mid_pwr_get_state() on the other hand is just a register read, which
> I assume is atomic.  The other stuff (calling
> intel_mid_pwr_get_lss_id(),
> calculation of reg and bit) seems to be static, it never changes
> across
> invocations.  Hence there doesn't seem to be a necessity to acquire
> the mutex and call might_sleep().
> 
> That said I'm not really familiar with these devices and rely on
> Andy's
> ack for correctness.  Andy if I'm mistaken please shout, otherwise I
> assume the patch is correct.

readl() is indeed atomic, the question is ordering of reads and writes,
but on this platform it's just an interface to PWRMU which is slow and
uses two sets of registers (one for read, one for write). Actual
operation happens after doorbell is written (with regard to PM_CMD
bits). So, there is a potential that read will return earlier state of
the device while PWRMU is processing new one, though I believe it's
prevented by PCI core.

> 
> The usage of a mutex in mid_pwr_set_power_state() actually seems
> questionable since this is called with interrupts disabled:
> 
> pci_pm_resume_noirq
>   pci_pm_default_resume_early
>     pci_power_up
>       platform_pci_set_power_state
>         mid_pci_set_power_state
>           intel_mid_pci_set_power_state
>             mid_pwr_set_power_state

Hmm... I have to look at this closer. I don't remember why I put mutex
in the first place there. Anyway it's another story.
Lukas Wunner Oct. 24, 2016, 10:09 a.m. UTC | #5
On Mon, Oct 24, 2016 at 12:15:05PM +0300, Andy Shevchenko wrote:
> On Sun, 2016-10-23 at 16:57 +0200, Lukas Wunner wrote:
> > On Sun, Oct 23, 2016 at 01:37:55PM +0100, Bryan O'Donoghue wrote:
> > > Shouldn't this serialize like this
> > > 
> > >       might_sleep();
> > > 
> > > 	reg = (id * LSS_PWS_BITS) / 32;
> > > 	bit = (id * LSS_PWS_BITS) % 32;
> > > 
> > >       mutex_lock(&pwr->lock);
> > >       power = mid_pwr_get_state(pwr, reg);
> > >       mutex_lock(&pwr->lock);
> > > 
> > > 	return (__force pci_power_t)((power >> bit) & 3);
> > > 
> > > there's a corresponding flow in mid_pwr_set_power_state() that
> > > operates
> > > in exactly that way.
> > 
> > mid_pwr_set_power_state() uses a series of steps (set the power state,
> > wait for completion) so presumably Andy thought this needs to be done
> > under a lock to prevent concurrent execution.
> > 
> > mid_pwr_get_state() on the other hand is just a register read, which
> > I assume is atomic.  The other stuff (calling
> > intel_mid_pwr_get_lss_id(),
> > calculation of reg and bit) seems to be static, it never changes
> > across
> > invocations.  Hence there doesn't seem to be a necessity to acquire
> > the mutex and call might_sleep().
> > 
> > That said I'm not really familiar with these devices and rely on
> > Andy's
> > ack for correctness.  Andy if I'm mistaken please shout, otherwise I
> > assume the patch is correct.
> 
> readl() is indeed atomic, the question is ordering of reads and writes,
> but on this platform it's just an interface to PWRMU which is slow and
> uses two sets of registers (one for read, one for write). Actual
> operation happens after doorbell is written (with regard to PM_CMD
> bits). So, there is a potential that read will return earlier state of
> the device while PWRMU is processing new one, though I believe it's
> prevented by PCI core.

The corresponding functions in pci-acpi.c don't perform any locking,
and AFAICS neither do the functions they call in drivers/acpi/.

The power state is read and written from the various pci_pm_* callbacks
and the PM core never executes those in parallel.

However there's pci_set_power_state(), this is exported and called by
various drivers, theoretically they would be able to execute that
concurrently to a pci_pm_* callback, it would be silly though.

Long story short, there's no locking needed unless you intend to call
intel_mid_pci_set_power_state() from other places.  I guess that's what
Bryan was alluding to when he wrote that the mutex might be "put in
place to future-proof the code".  I note that you're exporting
intel_mid_pci_set_power_state() even though there's currently no module
user, so perhaps you're intending to call the function from somewhere else.

Thanks,

Lukas

> > 
> > The usage of a mutex in mid_pwr_set_power_state() actually seems
> > questionable since this is called with interrupts disabled:
> > 
> > pci_pm_resume_noirq
> >   pci_pm_default_resume_early
> >     pci_power_up
> >       platform_pci_set_power_state
> >         mid_pci_set_power_state
> >           intel_mid_pci_set_power_state
> >             mid_pwr_set_power_state
> 
> Hmm... I have to look at this closer. I don't remember why I put mutex
> in the first place there. Anyway it's another story.
> 
> -- 
> Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> Intel Finland Oy
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Shevchenko Oct. 24, 2016, 11:05 a.m. UTC | #6
On Mon, 2016-10-24 at 12:09 +0200, Lukas Wunner wrote:
> On Mon, Oct 24, 2016 at 12:15:05PM +0300, Andy Shevchenko wrote:
> > On Sun, 2016-10-23 at 16:57 +0200, Lukas Wunner wrote:
> > > On Sun, Oct 23, 2016 at 01:37:55PM +0100, Bryan O'Donoghue wrote:
> > > > Shouldn't this serialize like this
> > > > 
> > > >       might_sleep();
> > > > 
> > > > 	reg = (id * LSS_PWS_BITS) / 32;
> > > > 	bit = (id * LSS_PWS_BITS) % 32;
> > > > 
> > > >       mutex_lock(&pwr->lock);
> > > >       power = mid_pwr_get_state(pwr, reg);
> > > >       mutex_lock(&pwr->lock);
> > > > 
> > > > 	return (__force pci_power_t)((power >> bit) & 3);
> > > > 
> > > > there's a corresponding flow in mid_pwr_set_power_state() that
> > > > operates
> > > > in exactly that way.
> > > 
> > > mid_pwr_set_power_state() uses a series of steps (set the power
> > > state,
> > > wait for completion) so presumably Andy thought this needs to be
> > > done
> > > under a lock to prevent concurrent execution.
> > > 
> > > mid_pwr_get_state() on the other hand is just a register read,
> > > which
> > > I assume is atomic.  The other stuff (calling
> > > intel_mid_pwr_get_lss_id(),
> > > calculation of reg and bit) seems to be static, it never changes
> > > across
> > > invocations.  Hence there doesn't seem to be a necessity to
> > > acquire
> > > the mutex and call might_sleep().
> > > 
> > > That said I'm not really familiar with these devices and rely on
> > > Andy's
> > > ack for correctness.  Andy if I'm mistaken please shout, otherwise
> > > I
> > > assume the patch is correct.
> > 
> > readl() is indeed atomic, the question is ordering of reads and
> > writes,
> > but on this platform it's just an interface to PWRMU which is slow
> > and
> > uses two sets of registers (one for read, one for write). Actual
> > operation happens after doorbell is written (with regard to PM_CMD
> > bits). So, there is a potential that read will return earlier state
> > of
> > the device while PWRMU is processing new one, though I believe it's
> > prevented by PCI core.
> 
> The corresponding functions in pci-acpi.c don't perform any locking,
> and AFAICS neither do the functions they call in drivers/acpi/.
> 
> The power state is read and written from the various pci_pm_*
> callbacks
> and the PM core never executes those in parallel.
> 
> However there's pci_set_power_state(), this is exported and called by
> various drivers, theoretically they would be able to execute that
> concurrently to a pci_pm_* callback, it would be silly though.
> 
> Long story short, there's no locking needed unless you intend to call
> intel_mid_pci_set_power_state() from other places.  I guess that's
> what
> Bryan was alluding to when he wrote that the mutex might be "put in
> place to future-proof the code".  I note that you're exporting
> intel_mid_pci_set_power_state() even though there's currently no
> module
> user, so perhaps you're intending to call the function from somewhere
> else.

The export there is purely dictated by leaving abstract stuff under
drivers/pci when platform code is kept under arch/x86/platform. Other
than that there is no plans to call this outside of pci-mid.c.

> 
> 
> > > 
> > > The usage of a mutex in mid_pwr_set_power_state() actually seems
> > > questionable since this is called with interrupts disabled:
> > > 
> > > pci_pm_resume_noirq
> > >   pci_pm_default_resume_early
> > >     pci_power_up
> > >       platform_pci_set_power_state
> > >         mid_pci_set_power_state
> > >           intel_mid_pci_set_power_state
> > >             mid_pwr_set_power_state
> > 
> > Hmm... I have to look at this closer. I don't remember why I put
> > mutex
> > in the first place there. Anyway it's another story.

There are two code paths
pci_power_up()
pci_platform_power_transition()

Second one can be called in non-atomic context for sure (consider
standard ->resume() callback).

First one runs when IRQ disabled on CPU side.

In any case we probably need to serialize access in our code to protect
against several PCI devices being powered up simultaneously.

Do you think we have to switch to spin_lock instead or remove it
completely?
Lukas Wunner Oct. 25, 2016, 6:19 a.m. UTC | #7
On Mon, Oct 24, 2016 at 02:05:45PM +0300, Andy Shevchenko wrote:
> On Mon, 2016-10-24 at 12:09 +0200, Lukas Wunner wrote:
> > I note that you're exporting intel_mid_pci_set_power_state() even
> > though there's currently no module user, so perhaps you're intending
> > to call the function from somewhere else.
> 
> The export there is purely dictated by leaving abstract stuff under
> drivers/pci when platform code is kept under arch/x86/platform. Other
> than that there is no plans to call this outside of pci-mid.c.

The PCI core, including pci-mid.o, is linked into vmlinux, not a module,
and exporting is only needed for module users.  A commit to unexport the
symbol has been sitting on my development branch for 2 weeks, I delayed
it because I wanted to wait for the regression to be fixed first.
Sending out now since the topic has come up.


> > > > The usage of a mutex in mid_pwr_set_power_state() actually seems
> > > > questionable since this is called with interrupts disabled:
> > > > 
> > > > pci_pm_resume_noirq
> > > >   pci_pm_default_resume_early
> > > >     pci_power_up
> > > >       platform_pci_set_power_state
> > > >         mid_pci_set_power_state
> > > >           intel_mid_pci_set_power_state
> > > >             mid_pwr_set_power_state
> > > 
> > > Hmm... I have to look at this closer. I don't remember why I put
> > > mutex
> > > in the first place there. Anyway it's another story.
> 
> There are two code paths
> pci_power_up()
> pci_platform_power_transition()
> 
> Second one can be called in non-atomic context for sure (consider
> standard ->resume() callback).
> 
> First one runs when IRQ disabled on CPU side.
> 
> In any case we probably need to serialize access in our code to protect
> against several PCI devices being powered up simultaneously.

Right, good point, the PM core will indeed parallelize suspend/resume
of the devices, so if __update_power_state() cannot be safely called
concurrently for different devices (I don't know if that's the case)
then indeed you need locking (with spinlocks).  It might be worth
pondering if the locking should happen further down in the call stack
because I assume the critical section would really just be in
__update_power_state().

Best regards,

Lukas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bryan O'Donoghue Oct. 26, 2016, 2:06 p.m. UTC | #8
On Tue, 2016-10-25 at 08:19 +0200, Lukas Wunner wrote:
> On Mon, Oct 24, 2016 at 02:05:45PM +0300, Andy Shevchenko wrote:
> > 
> > On Mon, 2016-10-24 at 12:09 +0200, Lukas Wunner wrote:
> > > 
> > > I note that you're exporting intel_mid_pci_set_power_state() even
> > > though there's currently no module user, so perhaps you're
> > > intending
> > > to call the function from somewhere else.
> > The export there is purely dictated by leaving abstract stuff under
> > drivers/pci when platform code is kept under arch/x86/platform.
> > Other
> > than that there is no plans to call this outside of pci-mid.c.
> The PCI core, including pci-mid.o, is linked into vmlinux, not a
> module,
> and exporting is only needed for module users.  A commit to unexport
> the
> symbol has been sitting on my development branch for 2 weeks, I
> delayed
> it because I wanted to wait for the regression to be fixed first.
> Sending out now since the topic has come up.
> 
> 
> > 
> > > 
> > > > 
> > > > > 
> > > > > The usage of a mutex in mid_pwr_set_power_state() actually
> > > > > seems
> > > > > questionable since this is called with interrupts disabled:
> > > > > 
> > > > > pci_pm_resume_noirq
> > > > >   pci_pm_default_resume_early
> > > > >     pci_power_up
> > > > >       platform_pci_set_power_state
> > > > >         mid_pci_set_power_state
> > > > >           intel_mid_pci_set_power_state
> > > > >             mid_pwr_set_power_state
> > > > Hmm... I have to look at this closer. I don't remember why I
> > > > put
> > > > mutex
> > > > in the first place there. Anyway it's another story.
> > There are two code paths
> > pci_power_up()
> > pci_platform_power_transition()
> > 
> > Second one can be called in non-atomic context for sure (consider
> > standard ->resume() callback).
> > 
> > First one runs when IRQ disabled on CPU side.
> > 
> > In any case we probably need to serialize access in our code to
> > protect
> > against several PCI devices being powered up simultaneously.
> Right, good point, the PM core will indeed parallelize suspend/resume
> of the devices, so if __update_power_state() cannot be safely called
> concurrently for different devices (I don't know if that's the case)
> then indeed you need locking (with spinlocks).  It might be worth
> pondering if the locking should happen further down in the call stack
> because I assume the critical section would really just be in
> __update_power_state().
> 
> Best regards,
> 
> Lukas

So the conclusion is to apply this patch now and go and look further @
locking in a separate series right ? There's not much point in leaving
Edison not booting as is the case with tip-of-tree right now.

---
bod
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Shevchenko Oct. 26, 2016, 3:01 p.m. UTC | #9
On Wed, 2016-10-26 at 15:06 +0100, Bryan O'Donoghue wrote:
> On Tue, 2016-10-25 at 08:19 +0200, Lukas Wunner wrote:

> 
> So the conclusion is to apply this patch now and go and look further @
> locking in a separate series right ? There's not much point in leaving
> Edison not booting as is the case with tip-of-tree right now.

That's what I think is a summary of the discussion.
Thorsten Leemhuis Nov. 6, 2016, 1:43 p.m. UTC | #10
Hi! On 26.10.2016 17:01, Andy Shevchenko wrote:
> On Wed, 2016-10-26 at 15:06 +0100, Bryan O'Donoghue wrote:
>> On Tue, 2016-10-25 at 08:19 +0200, Lukas Wunner wrote:
>
>> So the conclusion is to apply this patch now and go and look further @
>> locking in a separate series right ? There's not much point in leaving
>> Edison not booting as is the case with tip-of-tree right now.
> That's what I think is a summary of the discussion.

Did anything happen after that conclusion? Just wondering, because this
bug is on the list of regressions for 4.9 and I think the proposed fix
hasn't reached mainline yet (or did I miss it?)

Ciao, Thorsten (regression tracker for 4.9)

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lukas Wunner Nov. 6, 2016, 5:12 p.m. UTC | #11
On Sun, Nov 06, 2016 at 02:43:33PM +0100, Thorsten Leemhuis wrote:
> On 26.10.2016 17:01, Andy Shevchenko wrote:
> > On Wed, 2016-10-26 at 15:06 +0100, Bryan O'Donoghue wrote:
> >> So the conclusion is to apply this patch now and go and look further @
> >> locking in a separate series right ? There's not much point in leaving
> >> Edison not booting as is the case with tip-of-tree right now.
> >
> > That's what I think is a summary of the discussion.
> 
> Did anything happen after that conclusion? Just wondering, because this
> bug is on the list of regressions for 4.9 and I think the proposed fix
> hasn't reached mainline yet (or did I miss it?)

The patch still awaits merging through tip.  I assume the x86 maintainers
were traveling (KS, LPC) or generally busy.

Thanks,

Lukas
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 5b6753d..49da9f4 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -17,6 +17,7 @@ 
 
 extern int intel_mid_pci_init(void);
 extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
+extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev);
 
 extern void intel_mid_pwr_power_off(void);
 
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index 5d3b45a..67375dd 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -272,6 +272,25 @@  int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
 }
 EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
 
+pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev)
+{
+	struct mid_pwr *pwr = midpwr;
+	int id, reg, bit;
+	u32 power;
+
+	if (!pwr || !pwr->available)
+		return PCI_UNKNOWN;
+
+	id = intel_mid_pwr_get_lss_id(pdev);
+	if (id < 0)
+		return PCI_UNKNOWN;
+
+	reg = (id * LSS_PWS_BITS) / 32;
+	bit = (id * LSS_PWS_BITS) % 32;
+	power = mid_pwr_get_state(pwr, reg);
+	return (__force pci_power_t)((power >> bit) & 3);
+}
+
 void intel_mid_pwr_power_off(void)
 {
 	struct mid_pwr *pwr = midpwr;
diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
index 55f453d..c7f3408 100644
--- a/drivers/pci/pci-mid.c
+++ b/drivers/pci/pci-mid.c
@@ -29,6 +29,11 @@  static int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
 	return intel_mid_pci_set_power_state(pdev, state);
 }
 
+static pci_power_t mid_pci_get_power_state(struct pci_dev *pdev)
+{
+	return intel_mid_pci_get_power_state(pdev);
+}
+
 static pci_power_t mid_pci_choose_state(struct pci_dev *pdev)
 {
 	return PCI_D3hot;
@@ -52,6 +57,7 @@  static bool mid_pci_need_resume(struct pci_dev *dev)
 static struct pci_platform_pm_ops mid_pci_platform_pm = {
 	.is_manageable	= mid_pci_power_manageable,
 	.set_state	= mid_pci_set_power_state,
+	.get_state	= mid_pci_get_power_state,
 	.choose_state	= mid_pci_choose_state,
 	.sleep_wake	= mid_pci_sleep_wake,
 	.run_wake	= mid_pci_run_wake,