diff mbox series

[v6,6/6] PCI: qcom: Add OPP support to scale performance state of power domain

Message ID 20240112-opp_support-v6-6-77bbf7d0cc37@quicinc.com (mailing list archive)
State Superseded
Delegated to: Manivannan Sadhasivam
Headers show
Series PCI: qcom: Add support for OPP | expand

Commit Message

Krishna Chaitanya Chundru Jan. 12, 2024, 2:22 p.m. UTC
QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
maintains hardware state of a regulator by performing max aggregation of
the requests made by all of the processors.

PCIe controller can operate on different RPMh performance state of power
domain based up on the speed of the link. And this performance state varies
from target to target.

It is manadate to scale the performance state based up on the PCIe speed
link operates so that SoC can run under optimum power conditions.

Add Operating Performance Points(OPP) support to vote for RPMh state based
upon GEN speed link is operating.

OPP can handle ICC bw voting also, so move icc bw voting through opp
framework if opp entries are present.

In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
same icc bw and has frequency, so use frequency based search to reduce
number of entries in the opp table.

Don't initialize icc if opp is supported.

Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
---
 drivers/pci/controller/dwc/pcie-qcom.c | 83 ++++++++++++++++++++++++++++------
 1 file changed, 70 insertions(+), 13 deletions(-)

Comments

Dmitry Baryshkov Jan. 12, 2024, 3:33 p.m. UTC | #1
On Fri, 12 Jan 2024 at 16:25, Krishna chaitanya chundru
<quic_krichai@quicinc.com> wrote:
>
> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
> maintains hardware state of a regulator by performing max aggregation of
> the requests made by all of the processors.
>
> PCIe controller can operate on different RPMh performance state of power
> domain based up on the speed of the link. And this performance state varies
> from target to target.
>
> It is manadate to scale the performance state based up on the PCIe speed
> link operates so that SoC can run under optimum power conditions.
>
> Add Operating Performance Points(OPP) support to vote for RPMh state based
> upon GEN speed link is operating.
>
> OPP can handle ICC bw voting also, so move icc bw voting through opp
> framework if opp entries are present.
>
> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
> same icc bw and has frequency, so use frequency based search to reduce
> number of entries in the opp table.
>
> Don't initialize icc if opp is supported.
>
> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> ---
>  drivers/pci/controller/dwc/pcie-qcom.c | 83 ++++++++++++++++++++++++++++------
>  1 file changed, 70 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> index 035953f0b6d8..31512dc9d6ff 100644
> --- a/drivers/pci/controller/dwc/pcie-qcom.c
> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> @@ -22,6 +22,7 @@
>  #include <linux/of.h>
>  #include <linux/of_gpio.h>
>  #include <linux/pci.h>
> +#include <linux/pm_opp.h>
>  #include <linux/pm_runtime.h>
>  #include <linux/platform_device.h>
>  #include <linux/phy/pcie.h>
> @@ -244,6 +245,7 @@ struct qcom_pcie {
>         const struct qcom_pcie_cfg *cfg;
>         struct dentry *debugfs;
>         bool suspended;
> +       bool opp_supported;
>  };
>
>  #define to_qcom_pcie(x)                dev_get_drvdata((x)->dev)
> @@ -1404,16 +1406,14 @@ static int qcom_pcie_icc_init(struct qcom_pcie *pcie)
>         return 0;
>  }
>
> -static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
> +static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie)
>  {
>         struct dw_pcie *pci = pcie->pci;
> -       u32 offset, status;
> +       u32 offset, status, freq;
> +       struct dev_pm_opp *opp;
>         int speed, width;
>         int ret;
>
> -       if (!pcie->icc_mem)
> -               return;
> -
>         offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
>         status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
>
> @@ -1424,11 +1424,42 @@ static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
>         speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
>         width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
>
> -       ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
> -       if (ret) {
> -               dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
> -                       ret);
> +       if (pcie->opp_supported) {
> +               switch (speed) {
> +               case 1:
> +                       freq = 2500000;
> +                       break;
> +               case 2:
> +                       freq = 5000000;
> +                       break;
> +               case 3:
> +                       freq = 8000000;
> +                       break;
> +               default:
> +                       WARN_ON_ONCE(1);
> +                       fallthrough;
> +               case 4:
> +                       freq = 16000000;

I expected that this kind of detail goes to the OPP table itself. Can
we index the table using the generation instead of frequency?

> +                       break;
> +               }
> +
> +               opp = dev_pm_opp_find_freq_exact(pci->dev, freq * width, true);
> +               if (!IS_ERR(opp)) {
> +                       ret = dev_pm_opp_set_opp(pci->dev, opp);
> +                       if (ret)
> +                               dev_err(pci->dev, "Failed to set opp: freq %ld ret %d\n",
> +                                       dev_pm_opp_get_freq(opp), ret);
> +                       dev_pm_opp_put(opp);
> +               }
> +       } else {
> +               ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
> +               if (ret) {
> +                       dev_err(pci->dev, "failed to set interconnect bandwidth for pcie-mem: %d\n",
> +                               ret);
> +               }
>         }
> +
> +       return;
>  }
>
>  static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
> @@ -1471,8 +1502,10 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
>  static int qcom_pcie_probe(struct platform_device *pdev)
>  {
>         const struct qcom_pcie_cfg *pcie_cfg;
> +       unsigned long max_freq = INT_MAX;
>         struct device *dev = &pdev->dev;
>         struct qcom_pcie *pcie;
> +       struct dev_pm_opp *opp;
>         struct dw_pcie_rp *pp;
>         struct resource *res;
>         struct dw_pcie *pci;
> @@ -1539,9 +1572,33 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>                 goto err_pm_runtime_put;
>         }
>
> -       ret = qcom_pcie_icc_init(pcie);
> -       if (ret)
> +        /* OPP table is optional */
> +       ret = devm_pm_opp_of_add_table(dev);
> +       if (ret && ret != -ENODEV) {
> +               dev_err_probe(dev, ret, "Failed to add OPP table\n");
>                 goto err_pm_runtime_put;
> +       }

Can we initialise the table from the driver if it is not found? This
will help us by having the common code later on.

> +
> +       /* vote for max freq in the opp table if opp table is present */
> +       if (ret != -ENODEV) {
> +               opp = dev_pm_opp_find_freq_floor(dev, &max_freq);
> +               if (!IS_ERR(opp)) {
> +                       ret = dev_pm_opp_set_opp(dev, opp);
> +                       if (ret)
> +                               dev_err_probe(pci->dev, ret,
> +                                             "Failed to set opp: freq %ld\n",
> +                                             dev_pm_opp_get_freq(opp));
> +                       dev_pm_opp_put(opp);
> +               }
> +               pcie->opp_supported = true;
> +       }
> +
> +       /* Skip icc init if opp is supported as icc bw vote is handled by opp framework */
> +       if (!pcie->opp_supported) {
> +               ret = qcom_pcie_icc_init(pcie);
> +               if (ret)
> +                       goto err_pm_runtime_put;
> +       }
>
>         ret = pcie->cfg->ops->get_resources(pcie);
>         if (ret)
> @@ -1561,7 +1618,7 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>                 goto err_phy_exit;
>         }
>
> -       qcom_pcie_icc_update(pcie);
> +       qcom_pcie_icc_opp_update(pcie);
>
>         if (pcie->mhi)
>                 qcom_pcie_init_debugfs(pcie);
> @@ -1640,7 +1697,7 @@ static int qcom_pcie_resume_noirq(struct device *dev)
>                 pcie->suspended = false;
>         }
>
> -       qcom_pcie_icc_update(pcie);
> +       qcom_pcie_icc_opp_update(pcie);
>
>         return 0;
>  }
>
> --
> 2.42.0
>
>
Bjorn Helgaas Jan. 12, 2024, 4:50 p.m. UTC | #2
On Fri, Jan 12, 2024 at 07:52:05PM +0530, Krishna chaitanya chundru wrote:
> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
> maintains hardware state of a regulator by performing max aggregation of
> the requests made by all of the processors.
> 
> PCIe controller can operate on different RPMh performance state of power
> domain based up on the speed of the link. And this performance state varies
> from target to target.
> 
> It is manadate to scale the performance state based up on the PCIe speed
> link operates so that SoC can run under optimum power conditions.
> 
> Add Operating Performance Points(OPP) support to vote for RPMh state based
> upon GEN speed link is operating.

Thanks for this "OPP" expansion!  Maybe "GEN" is unnecessary in this
sentence?  And below, could be replaced with actual speeds?

> OPP can handle ICC bw voting also, so move icc bw voting through opp
> framework if opp entries are present.

s/opp/OPP/ to match
s/icc/ICC/ similarly (and perhaps expand once)
Also below in comments, etc.

> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
> same icc bw and has frequency, so use frequency based search to reduce
> number of entries in the opp table.
> 
> Don't initialize icc if opp is supported.

Bjorn
Konrad Dybcio Jan. 12, 2024, 10:44 p.m. UTC | #3
On 12.01.2024 15:22, Krishna chaitanya chundru wrote:
> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
> maintains hardware state of a regulator by performing max aggregation of
> the requests made by all of the processors.
> 
> PCIe controller can operate on different RPMh performance state of power
> domain based up on the speed of the link. And this performance state varies
> from target to target.
> 
> It is manadate to scale the performance state based up on the PCIe speed
> link operates so that SoC can run under optimum power conditions.
> 
> Add Operating Performance Points(OPP) support to vote for RPMh state based
> upon GEN speed link is operating.
> 
> OPP can handle ICC bw voting also, so move icc bw voting through opp
> framework if opp entries are present.
> 
> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
> same icc bw and has frequency, so use frequency based search to reduce
> number of entries in the opp table.
> 
> Don't initialize icc if opp is supported.
> 
> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> ---

[...]

>  
> -static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
> +static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie)

Or simply.. qcom_pcie_opp_update :) Especially with Dmitry's
suggestions

>  {
>  	struct dw_pcie *pci = pcie->pci;
> -	u32 offset, status;
> +	u32 offset, status, freq;
> +	struct dev_pm_opp *opp;
>  	int speed, width;
>  	int ret;
>  
> -	if (!pcie->icc_mem)
> -		return;
> -
>  	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
>  	status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
>  
> @@ -1424,11 +1424,42 @@ static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
>  	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
>  	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
>  
> -	ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
> -	if (ret) {
> -		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
> -			ret);
> +	if (pcie->opp_supported) {
> +		switch (speed) {
> +		case 1:
> +			freq = 2500000;
> +			break;
> +		case 2:
> +			freq = 5000000;
> +			break;
> +		case 3:
> +			freq = 8000000;
> +			break;
> +		default:
> +			WARN_ON_ONCE(1);
> +			fallthrough;
> +		case 4:
> +			freq = 16000000;
> +			break;
> +		}
Might as well add gen5 and 6 rates of 3200.. and 6400.. since they're
hard-in-stone in the spec by now, AFAIK

Konrad
Krishna Chaitanya Chundru Jan. 16, 2024, 5:07 a.m. UTC | #4
On 1/12/2024 10:20 PM, Bjorn Helgaas wrote:
> On Fri, Jan 12, 2024 at 07:52:05PM +0530, Krishna chaitanya chundru wrote:
>> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
>> maintains hardware state of a regulator by performing max aggregation of
>> the requests made by all of the processors.
>>
>> PCIe controller can operate on different RPMh performance state of power
>> domain based up on the speed of the link. And this performance state varies
>> from target to target.
>>
>> It is manadate to scale the performance state based up on the PCIe speed
>> link operates so that SoC can run under optimum power conditions.
>>
>> Add Operating Performance Points(OPP) support to vote for RPMh state based
>> upon GEN speed link is operating.
> 
> Thanks for this "OPP" expansion!  Maybe "GEN" is unnecessary in this
> sentence?  And below, could be replaced with actual speeds?
> 
ACK
>> OPP can handle ICC bw voting also, so move icc bw voting through opp
>> framework if opp entries are present.
> 
> s/opp/OPP/ to match
> s/icc/ICC/ similarly (and perhaps expand once)
> Also below in comments, etc.
> 
ACK.

-Krishna Chaitanya.
>> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
>> same icc bw and has frequency, so use frequency based search to reduce
>> number of entries in the opp table.
>>
>> Don't initialize icc if opp is supported.
> 
> Bjorn
Krishna Chaitanya Chundru Jan. 16, 2024, 5:17 a.m. UTC | #5
On 1/12/2024 9:03 PM, Dmitry Baryshkov wrote:
> On Fri, 12 Jan 2024 at 16:25, Krishna chaitanya chundru
> <quic_krichai@quicinc.com> wrote:
>>
>> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
>> maintains hardware state of a regulator by performing max aggregation of
>> the requests made by all of the processors.
>>
>> PCIe controller can operate on different RPMh performance state of power
>> domain based up on the speed of the link. And this performance state varies
>> from target to target.
>>
>> It is manadate to scale the performance state based up on the PCIe speed
>> link operates so that SoC can run under optimum power conditions.
>>
>> Add Operating Performance Points(OPP) support to vote for RPMh state based
>> upon GEN speed link is operating.
>>
>> OPP can handle ICC bw voting also, so move icc bw voting through opp
>> framework if opp entries are present.
>>
>> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
>> same icc bw and has frequency, so use frequency based search to reduce
>> number of entries in the opp table.
>>
>> Don't initialize icc if opp is supported.
>>
>> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
>> ---
>>   drivers/pci/controller/dwc/pcie-qcom.c | 83 ++++++++++++++++++++++++++++------
>>   1 file changed, 70 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
>> index 035953f0b6d8..31512dc9d6ff 100644
>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>> @@ -22,6 +22,7 @@
>>   #include <linux/of.h>
>>   #include <linux/of_gpio.h>
>>   #include <linux/pci.h>
>> +#include <linux/pm_opp.h>
>>   #include <linux/pm_runtime.h>
>>   #include <linux/platform_device.h>
>>   #include <linux/phy/pcie.h>
>> @@ -244,6 +245,7 @@ struct qcom_pcie {
>>          const struct qcom_pcie_cfg *cfg;
>>          struct dentry *debugfs;
>>          bool suspended;
>> +       bool opp_supported;
>>   };
>>
>>   #define to_qcom_pcie(x)                dev_get_drvdata((x)->dev)
>> @@ -1404,16 +1406,14 @@ static int qcom_pcie_icc_init(struct qcom_pcie *pcie)
>>          return 0;
>>   }
>>
>> -static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
>> +static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie)
>>   {
>>          struct dw_pcie *pci = pcie->pci;
>> -       u32 offset, status;
>> +       u32 offset, status, freq;
>> +       struct dev_pm_opp *opp;
>>          int speed, width;
>>          int ret;
>>
>> -       if (!pcie->icc_mem)
>> -               return;
>> -
>>          offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
>>          status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
>>
>> @@ -1424,11 +1424,42 @@ static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
>>          speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
>>          width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
>>
>> -       ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
>> -       if (ret) {
>> -               dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
>> -                       ret);
>> +       if (pcie->opp_supported) {
>> +               switch (speed) {
>> +               case 1:
>> +                       freq = 2500000;
>> +                       break;
>> +               case 2:
>> +                       freq = 5000000;
>> +                       break;
>> +               case 3:
>> +                       freq = 8000000;
>> +                       break;
>> +               default:
>> +                       WARN_ON_ONCE(1);
>> +                       fallthrough;
>> +               case 4:
>> +                       freq = 16000000;
> 
> I expected that this kind of detail goes to the OPP table itself. Can
> we index the table using the generation instead of frequency?
> 
In the previous patch also we tried to use index only, but problem using
index is we can define only GEN speed. As we are voting for the ICC BW
voting also we need to consider for lane width while configuring this
path. It is difficult to use index now as we need to consider both gen
speed and lane width.
For that reason we moved to frequencies to reduce number of entries in
OPP table.
for example if my controller supports GEN1 & GEN2 and MAX lane width is
2.

for GEN1x1
opp-2500000 {
};

for GEN2x1 & GEN1x2 as both use same frequiences and bandwidth.
opp-5000000 {
};

for GEN2x2
opp-10000000 {

};

- Krishna chaitanya.
>> +                       break;
>> +               }
>> +
>> +               opp = dev_pm_opp_find_freq_exact(pci->dev, freq * width, true);
>> +               if (!IS_ERR(opp)) {
>> +                       ret = dev_pm_opp_set_opp(pci->dev, opp);
>> +                       if (ret)
>> +                               dev_err(pci->dev, "Failed to set opp: freq %ld ret %d\n",
>> +                                       dev_pm_opp_get_freq(opp), ret);
>> +                       dev_pm_opp_put(opp);
>> +               }
>> +       } else {
>> +               ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
>> +               if (ret) {
>> +                       dev_err(pci->dev, "failed to set interconnect bandwidth for pcie-mem: %d\n",
>> +                               ret);
>> +               }
>>          }
>> +
>> +       return;
>>   }
>>
>>   static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
>> @@ -1471,8 +1502,10 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
>>   static int qcom_pcie_probe(struct platform_device *pdev)
>>   {
>>          const struct qcom_pcie_cfg *pcie_cfg;
>> +       unsigned long max_freq = INT_MAX;
>>          struct device *dev = &pdev->dev;
>>          struct qcom_pcie *pcie;
>> +       struct dev_pm_opp *opp;
>>          struct dw_pcie_rp *pp;
>>          struct resource *res;
>>          struct dw_pcie *pci;
>> @@ -1539,9 +1572,33 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>>                  goto err_pm_runtime_put;
>>          }
>>
>> -       ret = qcom_pcie_icc_init(pcie);
>> -       if (ret)
>> +        /* OPP table is optional */
>> +       ret = devm_pm_opp_of_add_table(dev);
>> +       if (ret && ret != -ENODEV) {
>> +               dev_err_probe(dev, ret, "Failed to add OPP table\n");
>>                  goto err_pm_runtime_put;
>> +       }
> 
> Can we initialise the table from the driver if it is not found? This
> will help us by having the common code later on.
> 
we already icc voting if there is no opp table present in the dts.
So I think this might not be needed.
Please let me know if you want to use for some other use case.

- Krishna Chaitanya.
>> +
>> +       /* vote for max freq in the opp table if opp table is present */
>> +       if (ret != -ENODEV) {
>> +               opp = dev_pm_opp_find_freq_floor(dev, &max_freq);
>> +               if (!IS_ERR(opp)) {
>> +                       ret = dev_pm_opp_set_opp(dev, opp);
>> +                       if (ret)
>> +                               dev_err_probe(pci->dev, ret,
>> +                                             "Failed to set opp: freq %ld\n",
>> +                                             dev_pm_opp_get_freq(opp));
>> +                       dev_pm_opp_put(opp);
>> +               }
>> +               pcie->opp_supported = true;
>> +       }
>> +
>> +       /* Skip icc init if opp is supported as icc bw vote is handled by opp framework */
>> +       if (!pcie->opp_supported) {
>> +               ret = qcom_pcie_icc_init(pcie);
>> +               if (ret)
>> +                       goto err_pm_runtime_put;
>> +       }
>>
>>          ret = pcie->cfg->ops->get_resources(pcie);
>>          if (ret)
>> @@ -1561,7 +1618,7 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>>                  goto err_phy_exit;
>>          }
>>
>> -       qcom_pcie_icc_update(pcie);
>> +       qcom_pcie_icc_opp_update(pcie);
>>
>>          if (pcie->mhi)
>>                  qcom_pcie_init_debugfs(pcie);
>> @@ -1640,7 +1697,7 @@ static int qcom_pcie_resume_noirq(struct device *dev)
>>                  pcie->suspended = false;
>>          }
>>
>> -       qcom_pcie_icc_update(pcie);
>> +       qcom_pcie_icc_opp_update(pcie);
>>
>>          return 0;
>>   }
>>
>> --
>> 2.42.0
>>
>>
> 
>
Krishna Chaitanya Chundru Jan. 16, 2024, 5:18 a.m. UTC | #6
On 1/13/2024 4:14 AM, Konrad Dybcio wrote:
> On 12.01.2024 15:22, Krishna chaitanya chundru wrote:
>> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
>> maintains hardware state of a regulator by performing max aggregation of
>> the requests made by all of the processors.
>>
>> PCIe controller can operate on different RPMh performance state of power
>> domain based up on the speed of the link. And this performance state varies
>> from target to target.
>>
>> It is manadate to scale the performance state based up on the PCIe speed
>> link operates so that SoC can run under optimum power conditions.
>>
>> Add Operating Performance Points(OPP) support to vote for RPMh state based
>> upon GEN speed link is operating.
>>
>> OPP can handle ICC bw voting also, so move icc bw voting through opp
>> framework if opp entries are present.
>>
>> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
>> same icc bw and has frequency, so use frequency based search to reduce
>> number of entries in the opp table.
>>
>> Don't initialize icc if opp is supported.
>>
>> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
>> ---
> 
> [...]
> 
>>   
>> -static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
>> +static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie)
> 
> Or simply.. qcom_pcie_opp_update :) Especially with Dmitry's
> suggestions
> 
If OPP path is not present we are still voting through ICC, so it is 
better to have name as it.
>>   {
>>   	struct dw_pcie *pci = pcie->pci;
>> -	u32 offset, status;
>> +	u32 offset, status, freq;
>> +	struct dev_pm_opp *opp;
>>   	int speed, width;
>>   	int ret;
>>   
>> -	if (!pcie->icc_mem)
>> -		return;
>> -
>>   	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
>>   	status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
>>   
>> @@ -1424,11 +1424,42 @@ static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
>>   	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
>>   	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
>>   
>> -	ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
>> -	if (ret) {
>> -		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
>> -			ret);
>> +	if (pcie->opp_supported) {
>> +		switch (speed) {
>> +		case 1:
>> +			freq = 2500000;
>> +			break;
>> +		case 2:
>> +			freq = 5000000;
>> +			break;
>> +		case 3:
>> +			freq = 8000000;
>> +			break;
>> +		default:
>> +			WARN_ON_ONCE(1);
>> +			fallthrough;
>> +		case 4:
>> +			freq = 16000000;
>> +			break;
>> +		}
> Might as well add gen5 and 6 rates of 3200.. and 6400.. since they're
> hard-in-stone in the spec by now, AFAIK
> 
> Konrad
ACK.

- Krishna Chaitanya.
Dmitry Baryshkov Jan. 16, 2024, 9:55 a.m. UTC | #7
On Tue, 16 Jan 2024 at 07:17, Krishna Chaitanya Chundru
<quic_krichai@quicinc.com> wrote:
>
>
>
> On 1/12/2024 9:03 PM, Dmitry Baryshkov wrote:
> > On Fri, 12 Jan 2024 at 16:25, Krishna chaitanya chundru
> > <quic_krichai@quicinc.com> wrote:
> >>
> >> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
> >> maintains hardware state of a regulator by performing max aggregation of
> >> the requests made by all of the processors.
> >>
> >> PCIe controller can operate on different RPMh performance state of power
> >> domain based up on the speed of the link. And this performance state varies
> >> from target to target.
> >>
> >> It is manadate to scale the performance state based up on the PCIe speed
> >> link operates so that SoC can run under optimum power conditions.
> >>
> >> Add Operating Performance Points(OPP) support to vote for RPMh state based
> >> upon GEN speed link is operating.
> >>
> >> OPP can handle ICC bw voting also, so move icc bw voting through opp
> >> framework if opp entries are present.
> >>
> >> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
> >> same icc bw and has frequency, so use frequency based search to reduce
> >> number of entries in the opp table.
> >>
> >> Don't initialize icc if opp is supported.
> >>
> >> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> >> ---
> >>   drivers/pci/controller/dwc/pcie-qcom.c | 83 ++++++++++++++++++++++++++++------
> >>   1 file changed, 70 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> >> index 035953f0b6d8..31512dc9d6ff 100644
> >> --- a/drivers/pci/controller/dwc/pcie-qcom.c
> >> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> >> @@ -22,6 +22,7 @@
> >>   #include <linux/of.h>
> >>   #include <linux/of_gpio.h>
> >>   #include <linux/pci.h>
> >> +#include <linux/pm_opp.h>
> >>   #include <linux/pm_runtime.h>
> >>   #include <linux/platform_device.h>
> >>   #include <linux/phy/pcie.h>
> >> @@ -244,6 +245,7 @@ struct qcom_pcie {
> >>          const struct qcom_pcie_cfg *cfg;
> >>          struct dentry *debugfs;
> >>          bool suspended;
> >> +       bool opp_supported;
> >>   };
> >>
> >>   #define to_qcom_pcie(x)                dev_get_drvdata((x)->dev)
> >> @@ -1404,16 +1406,14 @@ static int qcom_pcie_icc_init(struct qcom_pcie *pcie)
> >>          return 0;
> >>   }
> >>
> >> -static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
> >> +static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie)
> >>   {
> >>          struct dw_pcie *pci = pcie->pci;
> >> -       u32 offset, status;
> >> +       u32 offset, status, freq;
> >> +       struct dev_pm_opp *opp;
> >>          int speed, width;
> >>          int ret;
> >>
> >> -       if (!pcie->icc_mem)
> >> -               return;
> >> -
> >>          offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
> >>          status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
> >>
> >> @@ -1424,11 +1424,42 @@ static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
> >>          speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
> >>          width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
> >>
> >> -       ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
> >> -       if (ret) {
> >> -               dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
> >> -                       ret);
> >> +       if (pcie->opp_supported) {
> >> +               switch (speed) {
> >> +               case 1:
> >> +                       freq = 2500000;
> >> +                       break;
> >> +               case 2:
> >> +                       freq = 5000000;
> >> +                       break;
> >> +               case 3:
> >> +                       freq = 8000000;
> >> +                       break;
> >> +               default:
> >> +                       WARN_ON_ONCE(1);
> >> +                       fallthrough;
> >> +               case 4:
> >> +                       freq = 16000000;
> >
> > I expected that this kind of detail goes to the OPP table itself. Can
> > we index the table using the generation instead of frequency?
> >
> In the previous patch also we tried to use index only, but problem using
> index is we can define only GEN speed. As we are voting for the ICC BW
> voting also we need to consider for lane width while configuring this
> path. It is difficult to use index now as we need to consider both gen
> speed and lane width.
> For that reason we moved to frequencies to reduce number of entries in
> OPP table.
> for example if my controller supports GEN1 & GEN2 and MAX lane width is
> 2.
>
> for GEN1x1
> opp-2500000 {
> };
>
> for GEN2x1 & GEN1x2 as both use same frequiences and bandwidth.
> opp-5000000 {
> };
>
> for GEN2x2
> opp-10000000 {
>
> };

Ack. It would be nice to add this as a comment. Something as simple as
'gen1x2 and gen2x1 share the bandwidth value and thus the opp entry'

>
> - Krishna chaitanya.
> >> +                       break;
> >> +               }
> >> +
> >> +               opp = dev_pm_opp_find_freq_exact(pci->dev, freq * width, true);
> >> +               if (!IS_ERR(opp)) {
> >> +                       ret = dev_pm_opp_set_opp(pci->dev, opp);
> >> +                       if (ret)
> >> +                               dev_err(pci->dev, "Failed to set opp: freq %ld ret %d\n",
> >> +                                       dev_pm_opp_get_freq(opp), ret);
> >> +                       dev_pm_opp_put(opp);
> >> +               }
> >> +       } else {
> >> +               ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
> >> +               if (ret) {
> >> +                       dev_err(pci->dev, "failed to set interconnect bandwidth for pcie-mem: %d\n",
> >> +                               ret);
> >> +               }
> >>          }
> >> +
> >> +       return;
> >>   }
> >>
> >>   static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
> >> @@ -1471,8 +1502,10 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
> >>   static int qcom_pcie_probe(struct platform_device *pdev)
> >>   {
> >>          const struct qcom_pcie_cfg *pcie_cfg;
> >> +       unsigned long max_freq = INT_MAX;
> >>          struct device *dev = &pdev->dev;
> >>          struct qcom_pcie *pcie;
> >> +       struct dev_pm_opp *opp;
> >>          struct dw_pcie_rp *pp;
> >>          struct resource *res;
> >>          struct dw_pcie *pci;
> >> @@ -1539,9 +1572,33 @@ static int qcom_pcie_probe(struct platform_device *pdev)
> >>                  goto err_pm_runtime_put;
> >>          }
> >>
> >> -       ret = qcom_pcie_icc_init(pcie);
> >> -       if (ret)
> >> +        /* OPP table is optional */
> >> +       ret = devm_pm_opp_of_add_table(dev);
> >> +       if (ret && ret != -ENODEV) {
> >> +               dev_err_probe(dev, ret, "Failed to add OPP table\n");
> >>                  goto err_pm_runtime_put;
> >> +       }
> >
> > Can we initialise the table from the driver if it is not found? This
> > will help us by having the common code later on.
> >
> we already icc voting if there is no opp table present in the dts.

Yes. So later we have two different code paths: one for the OPP table
being present and another one for the absent OPP table. My suggestion
is to initialise minimal OPP table by hand and then have a common code
path in qcom_pcie_icc_update().

> So I think this might not be needed.
> Please let me know if you want to use for some other use case.
>
> - Krishna Chaitanya.
> >> +
> >> +       /* vote for max freq in the opp table if opp table is present */
> >> +       if (ret != -ENODEV) {
> >> +               opp = dev_pm_opp_find_freq_floor(dev, &max_freq);
> >> +               if (!IS_ERR(opp)) {
> >> +                       ret = dev_pm_opp_set_opp(dev, opp);
> >> +                       if (ret)
> >> +                               dev_err_probe(pci->dev, ret,
> >> +                                             "Failed to set opp: freq %ld\n",
> >> +                                             dev_pm_opp_get_freq(opp));
> >> +                       dev_pm_opp_put(opp);
> >> +               }
> >> +               pcie->opp_supported = true;
> >> +       }
> >> +
> >> +       /* Skip icc init if opp is supported as icc bw vote is handled by opp framework */
> >> +       if (!pcie->opp_supported) {
> >> +               ret = qcom_pcie_icc_init(pcie);
> >> +               if (ret)
> >> +                       goto err_pm_runtime_put;
> >> +       }
> >>
> >>          ret = pcie->cfg->ops->get_resources(pcie);
> >>          if (ret)
> >> @@ -1561,7 +1618,7 @@ static int qcom_pcie_probe(struct platform_device *pdev)
> >>                  goto err_phy_exit;
> >>          }
> >>
> >> -       qcom_pcie_icc_update(pcie);
> >> +       qcom_pcie_icc_opp_update(pcie);
> >>
> >>          if (pcie->mhi)
> >>                  qcom_pcie_init_debugfs(pcie);
> >> @@ -1640,7 +1697,7 @@ static int qcom_pcie_resume_noirq(struct device *dev)
> >>                  pcie->suspended = false;
> >>          }
> >>
> >> -       qcom_pcie_icc_update(pcie);
> >> +       qcom_pcie_icc_opp_update(pcie);
> >>
> >>          return 0;
> >>   }
> >>
> >> --
> >> 2.42.0
> >>
> >>
> >
> >
Johan Hovold Jan. 16, 2024, 11 a.m. UTC | #8
Please, people, remember to trim unnecessary context from your replies
before hitting send!

This thread is barely readable currently, and leaving all context in
place also makes revisiting threads using the lore web interface a pain.

Johan
Manivannan Sadhasivam Jan. 29, 2024, 4 p.m. UTC | #9
On Fri, Jan 12, 2024 at 07:52:05PM +0530, Krishna chaitanya chundru wrote:
> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
> maintains hardware state of a regulator by performing max aggregation of
> the requests made by all of the processors.
> 

s/processors/clients

> PCIe controller can operate on different RPMh performance state of power
> domain based up on the speed of the link. And this performance state varies
> from target to target.
> 
> It is manadate to scale the performance state based up on the PCIe speed
> link operates so that SoC can run under optimum power conditions.
> 
> Add Operating Performance Points(OPP) support to vote for RPMh state based
> upon GEN speed link is operating.
> 
> OPP can handle ICC bw voting also, so move icc bw voting through opp
> framework if opp entries are present.
> 
> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
> same icc bw and has frequency, so use frequency based search to reduce
> number of entries in the opp table.
> 
> Don't initialize icc if opp is supported.
> 
> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> ---
>  drivers/pci/controller/dwc/pcie-qcom.c | 83 ++++++++++++++++++++++++++++------
>  1 file changed, 70 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> index 035953f0b6d8..31512dc9d6ff 100644
> --- a/drivers/pci/controller/dwc/pcie-qcom.c
> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> @@ -22,6 +22,7 @@
>  #include <linux/of.h>
>  #include <linux/of_gpio.h>
>  #include <linux/pci.h>
> +#include <linux/pm_opp.h>
>  #include <linux/pm_runtime.h>
>  #include <linux/platform_device.h>
>  #include <linux/phy/pcie.h>
> @@ -244,6 +245,7 @@ struct qcom_pcie {
>  	const struct qcom_pcie_cfg *cfg;
>  	struct dentry *debugfs;
>  	bool suspended;
> +	bool opp_supported;
>  };
>  
>  #define to_qcom_pcie(x)		dev_get_drvdata((x)->dev)
> @@ -1404,16 +1406,14 @@ static int qcom_pcie_icc_init(struct qcom_pcie *pcie)
>  	return 0;
>  }
>  
> -static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
> +static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie)
>  {
>  	struct dw_pcie *pci = pcie->pci;
> -	u32 offset, status;
> +	u32 offset, status, freq;
> +	struct dev_pm_opp *opp;
>  	int speed, width;
>  	int ret;
>  
> -	if (!pcie->icc_mem)
> -		return;
> -
>  	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
>  	status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
>  
> @@ -1424,11 +1424,42 @@ static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
>  	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
>  	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
>  
> -	ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
> -	if (ret) {
> -		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
> -			ret);
> +	if (pcie->opp_supported) {
> +		switch (speed) {
> +		case 1:
> +			freq = 2500000;
> +			break;
> +		case 2:
> +			freq = 5000000;
> +			break;
> +		case 3:
> +			freq = 8000000;
> +			break;
> +		default:
> +			WARN_ON_ONCE(1);
> +			fallthrough;
> +		case 4:
> +			freq = 16000000;
> +			break;
> +		}

This switch case is PCIe generic, so need to be moved to drivers/pci/pci.c.
There is already an API, pcie_link_speed_mbps() that returns the frequency in
MBps but uses the pcie_capability_read_word() API to read LNKSTA of the device.

But you can move the switch case inside that API to a separate function and
reuse that here.

> +
> +		opp = dev_pm_opp_find_freq_exact(pci->dev, freq * width, true);
> +		if (!IS_ERR(opp)) {
> +			ret = dev_pm_opp_set_opp(pci->dev, opp);
> +			if (ret)
> +				dev_err(pci->dev, "Failed to set opp: freq %ld ret %d\n",
> +					dev_pm_opp_get_freq(opp), ret);
> +			dev_pm_opp_put(opp);
> +		}
> +	} else {
> +		ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
> +		if (ret) {
> +			dev_err(pci->dev, "failed to set interconnect bandwidth for pcie-mem: %d\n",
> +				ret);
> +		}
>  	}
> +
> +	return;
>  }
>  
>  static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
> @@ -1471,8 +1502,10 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
>  static int qcom_pcie_probe(struct platform_device *pdev)
>  {
>  	const struct qcom_pcie_cfg *pcie_cfg;
> +	unsigned long max_freq = INT_MAX;
>  	struct device *dev = &pdev->dev;
>  	struct qcom_pcie *pcie;
> +	struct dev_pm_opp *opp;
>  	struct dw_pcie_rp *pp;
>  	struct resource *res;
>  	struct dw_pcie *pci;
> @@ -1539,9 +1572,33 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>  		goto err_pm_runtime_put;
>  	}
>  
> -	ret = qcom_pcie_icc_init(pcie);
> -	if (ret)
> +	 /* OPP table is optional */
> +	ret = devm_pm_opp_of_add_table(dev);
> +	if (ret && ret != -ENODEV) {
> +		dev_err_probe(dev, ret, "Failed to add OPP table\n");
>  		goto err_pm_runtime_put;
> +	}
> +
> +	/* vote for max freq in the opp table if opp table is present */

/*
 * Use highest OPP here if the OPP table is present. At the end of the probe(),
 * OPP will be updated using qcom_pcie_icc_opp_update().
 */

- Mani
Manivannan Sadhasivam Feb. 1, 2024, 11:54 a.m. UTC | #10
On Tue, Jan 16, 2024 at 11:55:17AM +0200, Dmitry Baryshkov wrote:
> On Tue, 16 Jan 2024 at 07:17, Krishna Chaitanya Chundru
> <quic_krichai@quicinc.com> wrote:
> >
> >
> >
> > On 1/12/2024 9:03 PM, Dmitry Baryshkov wrote:
> > > On Fri, 12 Jan 2024 at 16:25, Krishna chaitanya chundru
> > > <quic_krichai@quicinc.com> wrote:
> > >>
> > >> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
> > >> maintains hardware state of a regulator by performing max aggregation of
> > >> the requests made by all of the processors.
> > >>
> > >> PCIe controller can operate on different RPMh performance state of power
> > >> domain based up on the speed of the link. And this performance state varies
> > >> from target to target.
> > >>
> > >> It is manadate to scale the performance state based up on the PCIe speed
> > >> link operates so that SoC can run under optimum power conditions.
> > >>
> > >> Add Operating Performance Points(OPP) support to vote for RPMh state based
> > >> upon GEN speed link is operating.
> > >>
> > >> OPP can handle ICC bw voting also, so move icc bw voting through opp
> > >> framework if opp entries are present.
> > >>
> > >> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
> > >> same icc bw and has frequency, so use frequency based search to reduce
> > >> number of entries in the opp table.
> > >>
> > >> Don't initialize icc if opp is supported.
> > >>
> > >> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> > >> ---
> > >>   drivers/pci/controller/dwc/pcie-qcom.c | 83 ++++++++++++++++++++++++++++------
> > >>   1 file changed, 70 insertions(+), 13 deletions(-)
> > >>
> > >> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> > >> index 035953f0b6d8..31512dc9d6ff 100644
> > >> --- a/drivers/pci/controller/dwc/pcie-qcom.c
> > >> +++ b/drivers/pci/controller/dwc/pcie-qcom.c

[...]

> > >>   static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
> > >> @@ -1471,8 +1502,10 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
> > >>   static int qcom_pcie_probe(struct platform_device *pdev)
> > >>   {
> > >>          const struct qcom_pcie_cfg *pcie_cfg;
> > >> +       unsigned long max_freq = INT_MAX;
> > >>          struct device *dev = &pdev->dev;
> > >>          struct qcom_pcie *pcie;
> > >> +       struct dev_pm_opp *opp;
> > >>          struct dw_pcie_rp *pp;
> > >>          struct resource *res;
> > >>          struct dw_pcie *pci;
> > >> @@ -1539,9 +1572,33 @@ static int qcom_pcie_probe(struct platform_device *pdev)
> > >>                  goto err_pm_runtime_put;
> > >>          }
> > >>
> > >> -       ret = qcom_pcie_icc_init(pcie);
> > >> -       if (ret)
> > >> +        /* OPP table is optional */
> > >> +       ret = devm_pm_opp_of_add_table(dev);
> > >> +       if (ret && ret != -ENODEV) {
> > >> +               dev_err_probe(dev, ret, "Failed to add OPP table\n");
> > >>                  goto err_pm_runtime_put;
> > >> +       }
> > >
> > > Can we initialise the table from the driver if it is not found? This
> > > will help us by having the common code later on.
> > >
> > we already icc voting if there is no opp table present in the dts.
> 
> Yes. So later we have two different code paths: one for the OPP table
> being present and another one for the absent OPP table. My suggestion
> is to initialise minimal OPP table by hand and then have a common code
> path in qcom_pcie_icc_update().
> 

Are you suggesting to duplicate DT in the driver?

- Mani
Dmitry Baryshkov Feb. 1, 2024, 11:58 a.m. UTC | #11
On Thu, 1 Feb 2024 at 13:54, Manivannan Sadhasivam <mani@kernel.org> wrote:
>
> On Tue, Jan 16, 2024 at 11:55:17AM +0200, Dmitry Baryshkov wrote:
> > On Tue, 16 Jan 2024 at 07:17, Krishna Chaitanya Chundru
> > <quic_krichai@quicinc.com> wrote:
> > >
> > >
> > >
> > > On 1/12/2024 9:03 PM, Dmitry Baryshkov wrote:
> > > > On Fri, 12 Jan 2024 at 16:25, Krishna chaitanya chundru
> > > > <quic_krichai@quicinc.com> wrote:
> > > >>
> > > >> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
> > > >> maintains hardware state of a regulator by performing max aggregation of
> > > >> the requests made by all of the processors.
> > > >>
> > > >> PCIe controller can operate on different RPMh performance state of power
> > > >> domain based up on the speed of the link. And this performance state varies
> > > >> from target to target.
> > > >>
> > > >> It is manadate to scale the performance state based up on the PCIe speed
> > > >> link operates so that SoC can run under optimum power conditions.
> > > >>
> > > >> Add Operating Performance Points(OPP) support to vote for RPMh state based
> > > >> upon GEN speed link is operating.
> > > >>
> > > >> OPP can handle ICC bw voting also, so move icc bw voting through opp
> > > >> framework if opp entries are present.
> > > >>
> > > >> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
> > > >> same icc bw and has frequency, so use frequency based search to reduce
> > > >> number of entries in the opp table.
> > > >>
> > > >> Don't initialize icc if opp is supported.
> > > >>
> > > >> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> > > >> ---
> > > >>   drivers/pci/controller/dwc/pcie-qcom.c | 83 ++++++++++++++++++++++++++++------
> > > >>   1 file changed, 70 insertions(+), 13 deletions(-)
> > > >>
> > > >> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> > > >> index 035953f0b6d8..31512dc9d6ff 100644
> > > >> --- a/drivers/pci/controller/dwc/pcie-qcom.c
> > > >> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>
> [...]
>
> > > >>   static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
> > > >> @@ -1471,8 +1502,10 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
> > > >>   static int qcom_pcie_probe(struct platform_device *pdev)
> > > >>   {
> > > >>          const struct qcom_pcie_cfg *pcie_cfg;
> > > >> +       unsigned long max_freq = INT_MAX;
> > > >>          struct device *dev = &pdev->dev;
> > > >>          struct qcom_pcie *pcie;
> > > >> +       struct dev_pm_opp *opp;
> > > >>          struct dw_pcie_rp *pp;
> > > >>          struct resource *res;
> > > >>          struct dw_pcie *pci;
> > > >> @@ -1539,9 +1572,33 @@ static int qcom_pcie_probe(struct platform_device *pdev)
> > > >>                  goto err_pm_runtime_put;
> > > >>          }
> > > >>
> > > >> -       ret = qcom_pcie_icc_init(pcie);
> > > >> -       if (ret)
> > > >> +        /* OPP table is optional */
> > > >> +       ret = devm_pm_opp_of_add_table(dev);
> > > >> +       if (ret && ret != -ENODEV) {
> > > >> +               dev_err_probe(dev, ret, "Failed to add OPP table\n");
> > > >>                  goto err_pm_runtime_put;
> > > >> +       }
> > > >
> > > > Can we initialise the table from the driver if it is not found? This
> > > > will help us by having the common code later on.
> > > >
> > > we already icc voting if there is no opp table present in the dts.
> >
> > Yes. So later we have two different code paths: one for the OPP table
> > being present and another one for the absent OPP table. My suggestion
> > is to initialise minimal OPP table by hand and then have a common code
> > path in qcom_pcie_icc_update().
> >
>
> Are you suggesting to duplicate DT in the driver?

As a fallback for the cases when there is no OPP table in the driver
it might make sense. See
Otherwise the DT is still somewhat duplicated in the form of calling
icc functions directly.
Manivannan Sadhasivam Feb. 1, 2024, 12:07 p.m. UTC | #12
On Thu, Feb 01, 2024 at 01:58:58PM +0200, Dmitry Baryshkov wrote:
> On Thu, 1 Feb 2024 at 13:54, Manivannan Sadhasivam <mani@kernel.org> wrote:
> >
> > On Tue, Jan 16, 2024 at 11:55:17AM +0200, Dmitry Baryshkov wrote:
> > > On Tue, 16 Jan 2024 at 07:17, Krishna Chaitanya Chundru
> > > <quic_krichai@quicinc.com> wrote:
> > > >
> > > >
> > > >
> > > > On 1/12/2024 9:03 PM, Dmitry Baryshkov wrote:
> > > > > On Fri, 12 Jan 2024 at 16:25, Krishna chaitanya chundru
> > > > > <quic_krichai@quicinc.com> wrote:
> > > > >>
> > > > >> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
> > > > >> maintains hardware state of a regulator by performing max aggregation of
> > > > >> the requests made by all of the processors.
> > > > >>
> > > > >> PCIe controller can operate on different RPMh performance state of power
> > > > >> domain based up on the speed of the link. And this performance state varies
> > > > >> from target to target.
> > > > >>
> > > > >> It is manadate to scale the performance state based up on the PCIe speed
> > > > >> link operates so that SoC can run under optimum power conditions.
> > > > >>
> > > > >> Add Operating Performance Points(OPP) support to vote for RPMh state based
> > > > >> upon GEN speed link is operating.
> > > > >>
> > > > >> OPP can handle ICC bw voting also, so move icc bw voting through opp
> > > > >> framework if opp entries are present.
> > > > >>
> > > > >> In PCIe certain gen speeds like GEN1x2 & GEN2X1 or GEN3x2 & GEN4x1 use
> > > > >> same icc bw and has frequency, so use frequency based search to reduce
> > > > >> number of entries in the opp table.
> > > > >>
> > > > >> Don't initialize icc if opp is supported.
> > > > >>
> > > > >> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> > > > >> ---
> > > > >>   drivers/pci/controller/dwc/pcie-qcom.c | 83 ++++++++++++++++++++++++++++------
> > > > >>   1 file changed, 70 insertions(+), 13 deletions(-)
> > > > >>
> > > > >> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> > > > >> index 035953f0b6d8..31512dc9d6ff 100644
> > > > >> --- a/drivers/pci/controller/dwc/pcie-qcom.c
> > > > >> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> >
> > [...]
> >
> > > > >>   static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
> > > > >> @@ -1471,8 +1502,10 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
> > > > >>   static int qcom_pcie_probe(struct platform_device *pdev)
> > > > >>   {
> > > > >>          const struct qcom_pcie_cfg *pcie_cfg;
> > > > >> +       unsigned long max_freq = INT_MAX;
> > > > >>          struct device *dev = &pdev->dev;
> > > > >>          struct qcom_pcie *pcie;
> > > > >> +       struct dev_pm_opp *opp;
> > > > >>          struct dw_pcie_rp *pp;
> > > > >>          struct resource *res;
> > > > >>          struct dw_pcie *pci;
> > > > >> @@ -1539,9 +1572,33 @@ static int qcom_pcie_probe(struct platform_device *pdev)
> > > > >>                  goto err_pm_runtime_put;
> > > > >>          }
> > > > >>
> > > > >> -       ret = qcom_pcie_icc_init(pcie);
> > > > >> -       if (ret)
> > > > >> +        /* OPP table is optional */
> > > > >> +       ret = devm_pm_opp_of_add_table(dev);
> > > > >> +       if (ret && ret != -ENODEV) {
> > > > >> +               dev_err_probe(dev, ret, "Failed to add OPP table\n");
> > > > >>                  goto err_pm_runtime_put;
> > > > >> +       }
> > > > >
> > > > > Can we initialise the table from the driver if it is not found? This
> > > > > will help us by having the common code later on.
> > > > >
> > > > we already icc voting if there is no opp table present in the dts.
> > >
> > > Yes. So later we have two different code paths: one for the OPP table
> > > being present and another one for the absent OPP table. My suggestion
> > > is to initialise minimal OPP table by hand and then have a common code
> > > path in qcom_pcie_icc_update().
> > >
> >
> > Are you suggesting to duplicate DT in the driver?
> 
> As a fallback for the cases when there is no OPP table in the driver
> it might make sense. See
> Otherwise the DT is still somewhat duplicated in the form of calling
> icc functions directly.
> 

No, DT is not duplicated. With this approach, we will end up hardcoding the DT
entries in the driver which sounds backwards to me. Even with 2 different code
paths, the hardware info will be left to the DT itself, so the driver just
consumes it.

So please, let's not do it.

- Mani
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 035953f0b6d8..31512dc9d6ff 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -22,6 +22,7 @@ 
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/pci.h>
+#include <linux/pm_opp.h>
 #include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
 #include <linux/phy/pcie.h>
@@ -244,6 +245,7 @@  struct qcom_pcie {
 	const struct qcom_pcie_cfg *cfg;
 	struct dentry *debugfs;
 	bool suspended;
+	bool opp_supported;
 };
 
 #define to_qcom_pcie(x)		dev_get_drvdata((x)->dev)
@@ -1404,16 +1406,14 @@  static int qcom_pcie_icc_init(struct qcom_pcie *pcie)
 	return 0;
 }
 
-static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
+static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie)
 {
 	struct dw_pcie *pci = pcie->pci;
-	u32 offset, status;
+	u32 offset, status, freq;
+	struct dev_pm_opp *opp;
 	int speed, width;
 	int ret;
 
-	if (!pcie->icc_mem)
-		return;
-
 	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
 	status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
 
@@ -1424,11 +1424,42 @@  static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
 	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
 	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
 
-	ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
-	if (ret) {
-		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
-			ret);
+	if (pcie->opp_supported) {
+		switch (speed) {
+		case 1:
+			freq = 2500000;
+			break;
+		case 2:
+			freq = 5000000;
+			break;
+		case 3:
+			freq = 8000000;
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			fallthrough;
+		case 4:
+			freq = 16000000;
+			break;
+		}
+
+		opp = dev_pm_opp_find_freq_exact(pci->dev, freq * width, true);
+		if (!IS_ERR(opp)) {
+			ret = dev_pm_opp_set_opp(pci->dev, opp);
+			if (ret)
+				dev_err(pci->dev, "Failed to set opp: freq %ld ret %d\n",
+					dev_pm_opp_get_freq(opp), ret);
+			dev_pm_opp_put(opp);
+		}
+	} else {
+		ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
+		if (ret) {
+			dev_err(pci->dev, "failed to set interconnect bandwidth for pcie-mem: %d\n",
+				ret);
+		}
 	}
+
+	return;
 }
 
 static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
@@ -1471,8 +1502,10 @@  static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
 static int qcom_pcie_probe(struct platform_device *pdev)
 {
 	const struct qcom_pcie_cfg *pcie_cfg;
+	unsigned long max_freq = INT_MAX;
 	struct device *dev = &pdev->dev;
 	struct qcom_pcie *pcie;
+	struct dev_pm_opp *opp;
 	struct dw_pcie_rp *pp;
 	struct resource *res;
 	struct dw_pcie *pci;
@@ -1539,9 +1572,33 @@  static int qcom_pcie_probe(struct platform_device *pdev)
 		goto err_pm_runtime_put;
 	}
 
-	ret = qcom_pcie_icc_init(pcie);
-	if (ret)
+	 /* OPP table is optional */
+	ret = devm_pm_opp_of_add_table(dev);
+	if (ret && ret != -ENODEV) {
+		dev_err_probe(dev, ret, "Failed to add OPP table\n");
 		goto err_pm_runtime_put;
+	}
+
+	/* vote for max freq in the opp table if opp table is present */
+	if (ret != -ENODEV) {
+		opp = dev_pm_opp_find_freq_floor(dev, &max_freq);
+		if (!IS_ERR(opp)) {
+			ret = dev_pm_opp_set_opp(dev, opp);
+			if (ret)
+				dev_err_probe(pci->dev, ret,
+					      "Failed to set opp: freq %ld\n",
+					      dev_pm_opp_get_freq(opp));
+			dev_pm_opp_put(opp);
+		}
+		pcie->opp_supported = true;
+	}
+
+	/* Skip icc init if opp is supported as icc bw vote is handled by opp framework */
+	if (!pcie->opp_supported) {
+		ret = qcom_pcie_icc_init(pcie);
+		if (ret)
+			goto err_pm_runtime_put;
+	}
 
 	ret = pcie->cfg->ops->get_resources(pcie);
 	if (ret)
@@ -1561,7 +1618,7 @@  static int qcom_pcie_probe(struct platform_device *pdev)
 		goto err_phy_exit;
 	}
 
-	qcom_pcie_icc_update(pcie);
+	qcom_pcie_icc_opp_update(pcie);
 
 	if (pcie->mhi)
 		qcom_pcie_init_debugfs(pcie);
@@ -1640,7 +1697,7 @@  static int qcom_pcie_resume_noirq(struct device *dev)
 		pcie->suspended = false;
 	}
 
-	qcom_pcie_icc_update(pcie);
+	qcom_pcie_icc_opp_update(pcie);
 
 	return 0;
 }