diff mbox series

[v8,7/7] PCI: qcom: Add OPP support to scale performance state of power domain

Message ID 20240302-opp_support-v8-7-158285b86b10@quicinc.com (mailing list archive)
State Superseded
Delegated to: Manivannan Sadhasivam
Headers show
Series PCI: qcom: Add support for OPP | expand

Commit Message

Krishna Chaitanya Chundru March 2, 2024, 4 a.m. UTC
QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
maintains hardware state of a regulator by performing max aggregation of
the requests made by all of the clients.

PCIe controller can operate on different RPMh performance state of power
domain based on the speed of the link. And this performance state varies
from target to target, like some controllers support GEN3 in NOM (Nominal)
voltage corner, while some other supports GEN3 in low SVS (static voltage
scaling).

The SoC can be more power efficient if we scale the performance state
based on the aggregate PCIe link bandwidth.

Add Operating Performance Points (OPP) support to vote for RPMh state based
on the aggregate link bandwidth.

OPP can handle ICC bw voting also, so move ICC bw voting through OPP
framework if OPP entries are present.

Different link configurations may share the same aggregate bandwidth,
e.g., a 2.5 GT/s x2 link and a 5.0 GT/s x1 link have the same bandwidth
and share the same OPP entry.

As we are moving ICC voting as part of OPP, don't initialize ICC if OPP
is supported.

Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
---
 drivers/pci/controller/dwc/pcie-qcom.c | 81 +++++++++++++++++++++++++++-------
 1 file changed, 66 insertions(+), 15 deletions(-)

Comments

Manivannan Sadhasivam March 4, 2024, 6:05 p.m. UTC | #1
On Sat, Mar 02, 2024 at 09:30:01AM +0530, Krishna chaitanya chundru wrote:
> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
> maintains hardware state of a regulator by performing max aggregation of
> the requests made by all of the clients.
> 
> PCIe controller can operate on different RPMh performance state of power
> domain based on the speed of the link. And this performance state varies
> from target to target, like some controllers support GEN3 in NOM (Nominal)
> voltage corner, while some other supports GEN3 in low SVS (static voltage
> scaling).
> 
> The SoC can be more power efficient if we scale the performance state
> based on the aggregate PCIe link bandwidth.
> 
> Add Operating Performance Points (OPP) support to vote for RPMh state based
> on the aggregate link bandwidth.
> 
> OPP can handle ICC bw voting also, so move ICC bw voting through OPP
> framework if OPP entries are present.
> 
> Different link configurations may share the same aggregate bandwidth,
> e.g., a 2.5 GT/s x2 link and a 5.0 GT/s x1 link have the same bandwidth
> and share the same OPP entry.
> 
> As we are moving ICC voting as part of OPP, don't initialize ICC if OPP
> is supported.
> 
> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> ---
>  drivers/pci/controller/dwc/pcie-qcom.c | 81 +++++++++++++++++++++++++++-------
>  1 file changed, 66 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> index a0266bfe71f1..2ec14bfafcfc 100644
> --- a/drivers/pci/controller/dwc/pcie-qcom.c
> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
> @@ -22,6 +22,7 @@
>  #include <linux/of.h>
>  #include <linux/of_gpio.h>
>  #include <linux/pci.h>
> +#include <linux/pm_opp.h>
>  #include <linux/pm_runtime.h>
>  #include <linux/platform_device.h>
>  #include <linux/phy/pcie.h>
> @@ -244,6 +245,7 @@ struct qcom_pcie {
>  	const struct qcom_pcie_cfg *cfg;
>  	struct dentry *debugfs;
>  	bool suspended;
> +	bool opp_supported;

You can just use "pcie->icc_mem" to differentiate between OPP and ICC. No need
of a new flag. 

>  };
>  
>  #define to_qcom_pcie(x)		dev_get_drvdata((x)->dev)
> @@ -1405,15 +1407,13 @@ static int qcom_pcie_icc_init(struct qcom_pcie *pcie)
>  	return 0;
>  }
>  
> -static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
> +static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie)
>  {
>  	struct dw_pcie *pci = pcie->pci;
> -	u32 offset, status;
> +	u32 offset, status, freq;
> +	struct dev_pm_opp *opp;
>  	int speed, width;
> -	int ret;
> -
> -	if (!pcie->icc_mem)
> -		return;
> +	int ret, mbps;
>  
>  	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
>  	status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
> @@ -1425,11 +1425,30 @@ static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
>  	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
>  	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
>  
> -	ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
> -	if (ret) {
> -		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
> -			ret);
> +	if (pcie->opp_supported) {
> +		mbps = pcie_link_speed_to_mbps(pcie_link_speed[speed]);
> +		if (mbps < 0)
> +			return;
> +
> +		freq = mbps * 1000;
> +		opp = dev_pm_opp_find_freq_exact(pci->dev, freq * width, true);
> +		if (!IS_ERR(opp)) {
> +			ret = dev_pm_opp_set_opp(pci->dev, opp);
> +			if (ret)
> +				dev_err(pci->dev, "Failed to set opp: freq %ld ret %d\n",
> +					dev_pm_opp_get_freq(opp), ret);
> +			dev_pm_opp_put(opp);
> +		}
> +	} else {
> +		ret = icc_set_bw(pcie->icc_mem, 0,
> +				 width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
> +		if (ret) {
> +			dev_err(pci->dev,
> +				"failed to set interconnect bandwidth for pcie-mem: %d\n", ret);

"PCIe-MEM"

> +		}
>  	}
> +
> +	return;
>  }
>  
>  static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
> @@ -1472,8 +1491,10 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
>  static int qcom_pcie_probe(struct platform_device *pdev)
>  {
>  	const struct qcom_pcie_cfg *pcie_cfg;
> +	unsigned long max_freq = INT_MAX;
>  	struct device *dev = &pdev->dev;
>  	struct qcom_pcie *pcie;
> +	struct dev_pm_opp *opp;
>  	struct dw_pcie_rp *pp;
>  	struct resource *res;
>  	struct dw_pcie *pci;
> @@ -1540,9 +1561,36 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>  		goto err_pm_runtime_put;
>  	}
>  
> -	ret = qcom_pcie_icc_init(pcie);
> -	if (ret)
> +	 /* OPP table is optional */
> +	ret = devm_pm_opp_of_add_table(dev);
> +	if (ret && ret != -ENODEV) {
> +		dev_err_probe(dev, ret, "Failed to add OPP table\n");
>  		goto err_pm_runtime_put;
> +	}
> +
> +	/*
> +	 * Use highest OPP here if the OPP table is present. At the end of

Why highest opp? For ICC, we set minimal bandwidth before.

> +	 * the probe(), OPP will be updated using qcom_pcie_icc_opp_update().
> +	 */
> +	if (ret != -ENODEV) {

if (!ret)

> +		opp = dev_pm_opp_find_freq_floor(dev, &max_freq);
> +		if (!IS_ERR(opp)) {
> +			ret = dev_pm_opp_set_opp(dev, opp);
> +			if (ret)
> +				dev_err_probe(pci->dev, ret,
> +					      "Failed to set opp: freq %ld\n",

	"Failed to set OPP for freq: %ld\n"

> +					      dev_pm_opp_get_freq(opp));
> +			dev_pm_opp_put(opp);
> +		}
> +		pcie->opp_supported = true;
> +	}
> +
> +	/* Skip ICC init if OPP is supported as ICC bw is handled by OPP */
> +	if (!pcie->opp_supported) {
> +		ret = qcom_pcie_icc_init(pcie);

First check whether ICC is present or not and then check OPP as a fallback. This
avoids an extra flag.

- Mani

> +		if (ret)
> +			goto err_pm_runtime_put;
> +	}
>  
>  	ret = pcie->cfg->ops->get_resources(pcie);
>  	if (ret)
> @@ -1562,7 +1610,7 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>  		goto err_phy_exit;
>  	}
>  
> -	qcom_pcie_icc_update(pcie);
> +	qcom_pcie_icc_opp_update(pcie);
>  
>  	if (pcie->mhi)
>  		qcom_pcie_init_debugfs(pcie);
> @@ -1621,10 +1669,13 @@ static int qcom_pcie_suspend_noirq(struct device *dev)
>  			qcom_pcie_host_init(&pcie->pci->pp);
>  			pcie->suspended = false;
>  		}
> -		qcom_pcie_icc_update(pcie);
> +		qcom_pcie_icc_opp_update(pcie);
>  		return ret;
>  	}
>  
> +	if (pcie->opp_supported)
> +		dev_pm_opp_set_opp(pcie->pci->dev, NULL);
> +
>  	return 0;
>  }
>  
> @@ -1647,7 +1698,7 @@ static int qcom_pcie_resume_noirq(struct device *dev)
>  		pcie->suspended = false;
>  	}
>  
> -	qcom_pcie_icc_update(pcie);
> +	qcom_pcie_icc_opp_update(pcie);
>  
>  	return 0;
>  }
> 
> -- 
> 2.42.0
>
Krishna Chaitanya Chundru March 5, 2024, 11:14 a.m. UTC | #2
On 3/4/2024 11:35 PM, Manivannan Sadhasivam wrote:
> On Sat, Mar 02, 2024 at 09:30:01AM +0530, Krishna chaitanya chundru wrote:
>> QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
>> maintains hardware state of a regulator by performing max aggregation of
>> the requests made by all of the clients.
>>
>> PCIe controller can operate on different RPMh performance state of power
>> domain based on the speed of the link. And this performance state varies
>> from target to target, like some controllers support GEN3 in NOM (Nominal)
>> voltage corner, while some other supports GEN3 in low SVS (static voltage
>> scaling).
>>
>> The SoC can be more power efficient if we scale the performance state
>> based on the aggregate PCIe link bandwidth.
>>
>> Add Operating Performance Points (OPP) support to vote for RPMh state based
>> on the aggregate link bandwidth.
>>
>> OPP can handle ICC bw voting also, so move ICC bw voting through OPP
>> framework if OPP entries are present.
>>
>> Different link configurations may share the same aggregate bandwidth,
>> e.g., a 2.5 GT/s x2 link and a 5.0 GT/s x1 link have the same bandwidth
>> and share the same OPP entry.
>>
>> As we are moving ICC voting as part of OPP, don't initialize ICC if OPP
>> is supported.
>>
>> Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
>> ---
>>   drivers/pci/controller/dwc/pcie-qcom.c | 81 +++++++++++++++++++++++++++-------
>>   1 file changed, 66 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
>> index a0266bfe71f1..2ec14bfafcfc 100644
>> --- a/drivers/pci/controller/dwc/pcie-qcom.c
>> +++ b/drivers/pci/controller/dwc/pcie-qcom.c
>> @@ -22,6 +22,7 @@
>>   #include <linux/of.h>
>>   #include <linux/of_gpio.h>
>>   #include <linux/pci.h>
>> +#include <linux/pm_opp.h>
>>   #include <linux/pm_runtime.h>
>>   #include <linux/platform_device.h>
>>   #include <linux/phy/pcie.h>
>> @@ -244,6 +245,7 @@ struct qcom_pcie {
>>   	const struct qcom_pcie_cfg *cfg;
>>   	struct dentry *debugfs;
>>   	bool suspended;
>> +	bool opp_supported;
> 
> You can just use "pcie->icc_mem" to differentiate between OPP and ICC. No need
> of a new flag.
>
Ack.

>>   };
>>   
>>   #define to_qcom_pcie(x)		dev_get_drvdata((x)->dev)
>> @@ -1405,15 +1407,13 @@ static int qcom_pcie_icc_init(struct qcom_pcie *pcie)
>>   	return 0;
>>   }
>>   
>> -static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
>> +static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie)
>>   {
>>   	struct dw_pcie *pci = pcie->pci;
>> -	u32 offset, status;
>> +	u32 offset, status, freq;
>> +	struct dev_pm_opp *opp;
>>   	int speed, width;
>> -	int ret;
>> -
>> -	if (!pcie->icc_mem)
>> -		return;
>> +	int ret, mbps;
>>   
>>   	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
>>   	status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
>> @@ -1425,11 +1425,30 @@ static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
>>   	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
>>   	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
>>   
>> -	ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
>> -	if (ret) {
>> -		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
>> -			ret);
>> +	if (pcie->opp_supported) {
>> +		mbps = pcie_link_speed_to_mbps(pcie_link_speed[speed]);
>> +		if (mbps < 0)
>> +			return;
>> +
>> +		freq = mbps * 1000;
>> +		opp = dev_pm_opp_find_freq_exact(pci->dev, freq * width, true);
>> +		if (!IS_ERR(opp)) {
>> +			ret = dev_pm_opp_set_opp(pci->dev, opp);
>> +			if (ret)
>> +				dev_err(pci->dev, "Failed to set opp: freq %ld ret %d\n",
>> +					dev_pm_opp_get_freq(opp), ret);
>> +			dev_pm_opp_put(opp);
>> +		}
>> +	} else {
>> +		ret = icc_set_bw(pcie->icc_mem, 0,
>> +				 width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
>> +		if (ret) {
>> +			dev_err(pci->dev,
>> +				"failed to set interconnect bandwidth for pcie-mem: %d\n", ret);
> 
> "PCIe-MEM"
> 
Ack.
>> +		}
>>   	}
>> +
>> +	return;
>>   }
>>   
>>   static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
>> @@ -1472,8 +1491,10 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
>>   static int qcom_pcie_probe(struct platform_device *pdev)
>>   {
>>   	const struct qcom_pcie_cfg *pcie_cfg;
>> +	unsigned long max_freq = INT_MAX;
>>   	struct device *dev = &pdev->dev;
>>   	struct qcom_pcie *pcie;
>> +	struct dev_pm_opp *opp;
>>   	struct dw_pcie_rp *pp;
>>   	struct resource *res;
>>   	struct dw_pcie *pci;
>> @@ -1540,9 +1561,36 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>>   		goto err_pm_runtime_put;
>>   	}
>>   
>> -	ret = qcom_pcie_icc_init(pcie);
>> -	if (ret)
>> +	 /* OPP table is optional */
>> +	ret = devm_pm_opp_of_add_table(dev);
>> +	if (ret && ret != -ENODEV) {
>> +		dev_err_probe(dev, ret, "Failed to add OPP table\n");
>>   		goto err_pm_runtime_put;
>> +	}
>> +
>> +	/*
>> +	 * Use highest OPP here if the OPP table is present. At the end of
> 
> Why highest opp? For ICC, we set minimal bandwidth before.
>
In OPP we are voting for both ICC and voltage corner also, if we didn't 
vote for maximum voltage core the PCIe link may not come in maximum 
supported speed. Due to that we are voting for Maximum value.

Anyway we are updating them based upon the link speed and width this 
should not create any issues.
>> +	 * the probe(), OPP will be updated using qcom_pcie_icc_opp_update().
>> +	 */
>> +	if (ret != -ENODEV) {
> 
> if (!ret)
> 
>> +		opp = dev_pm_opp_find_freq_floor(dev, &max_freq);
>> +		if (!IS_ERR(opp)) {
>> +			ret = dev_pm_opp_set_opp(dev, opp);
>> +			if (ret)
>> +				dev_err_probe(pci->dev, ret,
>> +					      "Failed to set opp: freq %ld\n",
> 
> 	"Failed to set OPP for freq: %ld\n"
> 
Ack
>> +					      dev_pm_opp_get_freq(opp));
>> +			dev_pm_opp_put(opp);
>> +		}
>> +		pcie->opp_supported = true;
>> +	}
>> +
>> +	/* Skip ICC init if OPP is supported as ICC bw is handled by OPP */
>> +	if (!pcie->opp_supported) {
>> +		ret = qcom_pcie_icc_init(pcie);
> 
> First check whether ICC is present or not and then check OPP as a fallback. This
> avoids an extra flag.
> 
> - Mani
Ack.

- Krishna Chaitanya.
> 
>> +		if (ret)
>> +			goto err_pm_runtime_put;
>> +	}
>>   
>>   	ret = pcie->cfg->ops->get_resources(pcie);
>>   	if (ret)
>> @@ -1562,7 +1610,7 @@ static int qcom_pcie_probe(struct platform_device *pdev)
>>   		goto err_phy_exit;
>>   	}
>>   
>> -	qcom_pcie_icc_update(pcie);
>> +	qcom_pcie_icc_opp_update(pcie);
>>   
>>   	if (pcie->mhi)
>>   		qcom_pcie_init_debugfs(pcie);
>> @@ -1621,10 +1669,13 @@ static int qcom_pcie_suspend_noirq(struct device *dev)
>>   			qcom_pcie_host_init(&pcie->pci->pp);
>>   			pcie->suspended = false;
>>   		}
>> -		qcom_pcie_icc_update(pcie);
>> +		qcom_pcie_icc_opp_update(pcie);
>>   		return ret;
>>   	}
>>   
>> +	if (pcie->opp_supported)
>> +		dev_pm_opp_set_opp(pcie->pci->dev, NULL);
>> +
>>   	return 0;
>>   }
>>   
>> @@ -1647,7 +1698,7 @@ static int qcom_pcie_resume_noirq(struct device *dev)
>>   		pcie->suspended = false;
>>   	}
>>   
>> -	qcom_pcie_icc_update(pcie);
>> +	qcom_pcie_icc_opp_update(pcie);
>>   
>>   	return 0;
>>   }
>>
>> -- 
>> 2.42.0
>>
>
Manivannan Sadhasivam April 5, 2024, 8:23 a.m. UTC | #3
On Tue, Mar 05, 2024 at 04:44:20PM +0530, Krishna Chaitanya Chundru wrote:
> 
> 
> On 3/4/2024 11:35 PM, Manivannan Sadhasivam wrote:
> > On Sat, Mar 02, 2024 at 09:30:01AM +0530, Krishna chaitanya chundru wrote:
> > > QCOM Resource Power Manager-hardened (RPMh) is a hardware block which
> > > maintains hardware state of a regulator by performing max aggregation of
> > > the requests made by all of the clients.
> > > 
> > > PCIe controller can operate on different RPMh performance state of power
> > > domain based on the speed of the link. And this performance state varies
> > > from target to target, like some controllers support GEN3 in NOM (Nominal)
> > > voltage corner, while some other supports GEN3 in low SVS (static voltage
> > > scaling).
> > > 
> > > The SoC can be more power efficient if we scale the performance state
> > > based on the aggregate PCIe link bandwidth.
> > > 
> > > Add Operating Performance Points (OPP) support to vote for RPMh state based
> > > on the aggregate link bandwidth.
> > > 
> > > OPP can handle ICC bw voting also, so move ICC bw voting through OPP
> > > framework if OPP entries are present.
> > > 
> > > Different link configurations may share the same aggregate bandwidth,
> > > e.g., a 2.5 GT/s x2 link and a 5.0 GT/s x1 link have the same bandwidth
> > > and share the same OPP entry.
> > > 
> > > As we are moving ICC voting as part of OPP, don't initialize ICC if OPP
> > > is supported.
> > > 
> > > Signed-off-by: Krishna chaitanya chundru <quic_krichai@quicinc.com>
> > > ---
> > >   drivers/pci/controller/dwc/pcie-qcom.c | 81 +++++++++++++++++++++++++++-------
> > >   1 file changed, 66 insertions(+), 15 deletions(-)
> > > 
> > > diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
> > > index a0266bfe71f1..2ec14bfafcfc 100644
> > > --- a/drivers/pci/controller/dwc/pcie-qcom.c
> > > +++ b/drivers/pci/controller/dwc/pcie-qcom.c

[...]

> > >   static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
> > > @@ -1472,8 +1491,10 @@ static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
> > >   static int qcom_pcie_probe(struct platform_device *pdev)
> > >   {
> > >   	const struct qcom_pcie_cfg *pcie_cfg;
> > > +	unsigned long max_freq = INT_MAX;
> > >   	struct device *dev = &pdev->dev;
> > >   	struct qcom_pcie *pcie;
> > > +	struct dev_pm_opp *opp;
> > >   	struct dw_pcie_rp *pp;
> > >   	struct resource *res;
> > >   	struct dw_pcie *pci;
> > > @@ -1540,9 +1561,36 @@ static int qcom_pcie_probe(struct platform_device *pdev)
> > >   		goto err_pm_runtime_put;
> > >   	}
> > > -	ret = qcom_pcie_icc_init(pcie);
> > > -	if (ret)
> > > +	 /* OPP table is optional */
> > > +	ret = devm_pm_opp_of_add_table(dev);
> > > +	if (ret && ret != -ENODEV) {
> > > +		dev_err_probe(dev, ret, "Failed to add OPP table\n");
> > >   		goto err_pm_runtime_put;
> > > +	}
> > > +
> > > +	/*
> > > +	 * Use highest OPP here if the OPP table is present. At the end of
> > 
> > Why highest opp? For ICC, we set minimal bandwidth before.
> > 
> In OPP we are voting for both ICC and voltage corner also, if we didn't vote
> for maximum voltage core the PCIe link may not come in maximum supported
> speed. Due to that we are voting for Maximum value.
> 

Okay, then this information should be part of the comment.

- Mani
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index a0266bfe71f1..2ec14bfafcfc 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -22,6 +22,7 @@ 
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/pci.h>
+#include <linux/pm_opp.h>
 #include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
 #include <linux/phy/pcie.h>
@@ -244,6 +245,7 @@  struct qcom_pcie {
 	const struct qcom_pcie_cfg *cfg;
 	struct dentry *debugfs;
 	bool suspended;
+	bool opp_supported;
 };
 
 #define to_qcom_pcie(x)		dev_get_drvdata((x)->dev)
@@ -1405,15 +1407,13 @@  static int qcom_pcie_icc_init(struct qcom_pcie *pcie)
 	return 0;
 }
 
-static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
+static void qcom_pcie_icc_opp_update(struct qcom_pcie *pcie)
 {
 	struct dw_pcie *pci = pcie->pci;
-	u32 offset, status;
+	u32 offset, status, freq;
+	struct dev_pm_opp *opp;
 	int speed, width;
-	int ret;
-
-	if (!pcie->icc_mem)
-		return;
+	int ret, mbps;
 
 	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
 	status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
@@ -1425,11 +1425,30 @@  static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
 	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
 	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
 
-	ret = icc_set_bw(pcie->icc_mem, 0, width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
-	if (ret) {
-		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
-			ret);
+	if (pcie->opp_supported) {
+		mbps = pcie_link_speed_to_mbps(pcie_link_speed[speed]);
+		if (mbps < 0)
+			return;
+
+		freq = mbps * 1000;
+		opp = dev_pm_opp_find_freq_exact(pci->dev, freq * width, true);
+		if (!IS_ERR(opp)) {
+			ret = dev_pm_opp_set_opp(pci->dev, opp);
+			if (ret)
+				dev_err(pci->dev, "Failed to set opp: freq %ld ret %d\n",
+					dev_pm_opp_get_freq(opp), ret);
+			dev_pm_opp_put(opp);
+		}
+	} else {
+		ret = icc_set_bw(pcie->icc_mem, 0,
+				 width * QCOM_PCIE_LINK_SPEED_TO_BW(speed));
+		if (ret) {
+			dev_err(pci->dev,
+				"failed to set interconnect bandwidth for pcie-mem: %d\n", ret);
+		}
 	}
+
+	return;
 }
 
 static int qcom_pcie_link_transition_count(struct seq_file *s, void *data)
@@ -1472,8 +1491,10 @@  static void qcom_pcie_init_debugfs(struct qcom_pcie *pcie)
 static int qcom_pcie_probe(struct platform_device *pdev)
 {
 	const struct qcom_pcie_cfg *pcie_cfg;
+	unsigned long max_freq = INT_MAX;
 	struct device *dev = &pdev->dev;
 	struct qcom_pcie *pcie;
+	struct dev_pm_opp *opp;
 	struct dw_pcie_rp *pp;
 	struct resource *res;
 	struct dw_pcie *pci;
@@ -1540,9 +1561,36 @@  static int qcom_pcie_probe(struct platform_device *pdev)
 		goto err_pm_runtime_put;
 	}
 
-	ret = qcom_pcie_icc_init(pcie);
-	if (ret)
+	 /* OPP table is optional */
+	ret = devm_pm_opp_of_add_table(dev);
+	if (ret && ret != -ENODEV) {
+		dev_err_probe(dev, ret, "Failed to add OPP table\n");
 		goto err_pm_runtime_put;
+	}
+
+	/*
+	 * Use highest OPP here if the OPP table is present. At the end of
+	 * the probe(), OPP will be updated using qcom_pcie_icc_opp_update().
+	 */
+	if (ret != -ENODEV) {
+		opp = dev_pm_opp_find_freq_floor(dev, &max_freq);
+		if (!IS_ERR(opp)) {
+			ret = dev_pm_opp_set_opp(dev, opp);
+			if (ret)
+				dev_err_probe(pci->dev, ret,
+					      "Failed to set opp: freq %ld\n",
+					      dev_pm_opp_get_freq(opp));
+			dev_pm_opp_put(opp);
+		}
+		pcie->opp_supported = true;
+	}
+
+	/* Skip ICC init if OPP is supported as ICC bw is handled by OPP */
+	if (!pcie->opp_supported) {
+		ret = qcom_pcie_icc_init(pcie);
+		if (ret)
+			goto err_pm_runtime_put;
+	}
 
 	ret = pcie->cfg->ops->get_resources(pcie);
 	if (ret)
@@ -1562,7 +1610,7 @@  static int qcom_pcie_probe(struct platform_device *pdev)
 		goto err_phy_exit;
 	}
 
-	qcom_pcie_icc_update(pcie);
+	qcom_pcie_icc_opp_update(pcie);
 
 	if (pcie->mhi)
 		qcom_pcie_init_debugfs(pcie);
@@ -1621,10 +1669,13 @@  static int qcom_pcie_suspend_noirq(struct device *dev)
 			qcom_pcie_host_init(&pcie->pci->pp);
 			pcie->suspended = false;
 		}
-		qcom_pcie_icc_update(pcie);
+		qcom_pcie_icc_opp_update(pcie);
 		return ret;
 	}
 
+	if (pcie->opp_supported)
+		dev_pm_opp_set_opp(pcie->pci->dev, NULL);
+
 	return 0;
 }
 
@@ -1647,7 +1698,7 @@  static int qcom_pcie_resume_noirq(struct device *dev)
 		pcie->suspended = false;
 	}
 
-	qcom_pcie_icc_update(pcie);
+	qcom_pcie_icc_opp_update(pcie);
 
 	return 0;
 }