diff mbox

[RESEND,v4,5/7] remoteproc: qcom: Modify reset sequence for hexagon to support v56 1.5.0

Message ID 1479981638-32069-6-git-send-email-akdwived@codeaurora.org (mailing list archive)
State Not Applicable, archived
Delegated to: Andy Gross
Headers show

Commit Message

Dwivedi, Avaneesh Kumar (avani) Nov. 24, 2016, 10 a.m. UTC
This change introduces appropriate additional steps in reset sequence so
that hexagon v56 1.5.0 is brough out of reset.

Signed-off-by: Avaneesh Kumar Dwivedi <akdwived@codeaurora.org>
---
 drivers/remoteproc/qcom_q6v5_pil.c | 125 ++++++++++++++++++++++++++++++-------
 1 file changed, 104 insertions(+), 21 deletions(-)

Comments

Bjorn Andersson Dec. 9, 2016, 4:35 a.m. UTC | #1
On Thu 24 Nov 02:00 PST 2016, Avaneesh Kumar Dwivedi wrote:

> This change introduces appropriate additional steps in reset sequence so
> that hexagon v56 1.5.0 is brough out of reset.
> 

You should use the non-_relaxed version throughout this patch, unless
you have good reason not to.

> Signed-off-by: Avaneesh Kumar Dwivedi <akdwived@codeaurora.org>
> ---
>  drivers/remoteproc/qcom_q6v5_pil.c | 125 ++++++++++++++++++++++++++++++-------
>  1 file changed, 104 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c
> index c55dc9a..7ea2f53 100644
> --- a/drivers/remoteproc/qcom_q6v5_pil.c
> +++ b/drivers/remoteproc/qcom_q6v5_pil.c
> @@ -65,6 +65,8 @@
>  #define QDSP6SS_RESET_REG		0x014
>  #define QDSP6SS_GFMUX_CTL_REG		0x020
>  #define QDSP6SS_PWR_CTL_REG		0x030
> +#define QDSP6SS_MEM_PWR_CTL		0x0B0
> +#define QDSP6SS_STRAP_ACC		0x110
>  
>  /* AXI Halt Register Offsets */
>  #define AXI_HALTREQ_REG			0x0
> @@ -93,6 +95,15 @@
>  #define QDSS_BHS_ON			BIT(21)
>  #define QDSS_LDO_BYP			BIT(22)
>  
> +/* QDSP6v56 parameters */
> +#define QDSP6v56_LDO_BYP                BIT(25)
> +#define QDSP6v56_BHS_ON                 BIT(24)
> +#define QDSP6v56_CLAMP_WL               BIT(21)
> +#define QDSP6v56_CLAMP_QMC_MEM          BIT(22)
> +#define HALT_CHECK_MAX_LOOPS            (200)
> +#define QDSP6SS_XO_CBCR                 (0x0038)

Please drop these parenthesis.

> +#define QDSP6SS_ACC_OVERRIDE_VAL	0x20
> +
>  struct rproc_hexagon_res {
>  	char *hexagon_mba_image;
>  	char **proxy_reg_string;
> @@ -147,6 +158,8 @@ struct q6v5 {
>  	phys_addr_t mpss_reloc;
>  	void *mpss_region;
>  	size_t mpss_size;
> +
> +	int hexagon_ver;
>  };
>  
>  enum {
> @@ -388,35 +401,103 @@ static int q6v5_rmb_mba_wait(struct q6v5 *qproc, u32 status, int ms)
>  
>  static int q6v5proc_reset(struct q6v5 *qproc)
>  {
> -	u32 val;
> -	int ret;
> +	u64 val;
> +	int ret, i, count;

One variable per line, sorted descending by length, please.

> +
> +	/* Override the ACC value if required */
> +	if (qproc->hexagon_ver & 0x2)

This will break when we reach the 6th version, compare (==) with the
related enum.

> +		writel_relaxed(QDSP6SS_ACC_OVERRIDE_VAL,
> +				qproc->reg_base + QDSP6SS_STRAP_ACC);
>  
>  	/* Assert resets, stop core */
>  	val = readl(qproc->reg_base + QDSP6SS_RESET_REG);
>  	val |= (Q6SS_CORE_ARES | Q6SS_BUS_ARES_ENABLE | Q6SS_STOP_CORE);
>  	writel(val, qproc->reg_base + QDSP6SS_RESET_REG);
>  
> +	/* BHS require xo cbcr to be enabled */

This comment goes inside the if-statement.

> +	if (qproc->hexagon_ver & 0x2) {

==

> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_XO_CBCR);
> +		val |= 0x1;
> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_XO_CBCR);

Replace the rest of this block with:

ret = readl_poll_timeout(qproc->reg_base + QDSP6SS_XO_CBCR,
			 val, !(val & BIT(31)), 1, 200);

> +		for (count = HALT_CHECK_MAX_LOOPS; count > 0; count--) {
> +			val = readl_relaxed(qproc->reg_base + QDSP6SS_XO_CBCR);
> +			if (!(val & BIT(31)))
> +				break;
> +			udelay(1);
> +		}
> +
> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_XO_CBCR);
> +		if ((val & BIT(31)))
> +			dev_err(qproc->dev, "Failed to enable xo branch clock.\n");
> +	}
> +
> +	if (qproc->hexagon_ver & 0x2) {

==

>  	/* Enable power block headswitch, and wait for it to stabilize */
> -	val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> -	val |= QDSS_BHS_ON | QDSS_LDO_BYP;
> -	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> -	udelay(1);
> -
> -	/*
> -	 * Turn on memories. L2 banks should be done individually
> -	 * to minimize inrush current.
> -	 */
> -	val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> -	val |= Q6SS_SLP_RET_N | Q6SS_L2TAG_SLP_NRET_N |
> -		Q6SS_ETB_SLP_NRET_N | Q6SS_L2DATA_STBY_N;
> -	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> -	val |= Q6SS_L2DATA_SLP_NRET_N_2;
> -	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> -	val |= Q6SS_L2DATA_SLP_NRET_N_1;
> -	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> -	val |= Q6SS_L2DATA_SLP_NRET_N_0;
> -	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_PWR_CTL_REG);

Use non-relaxed version of readl and writel, please.

> +		val |= QDSP6v56_BHS_ON;
> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +		udelay(1);

Please add a short comment on why this delay is needed.

>  
> +		/* Put LDO in bypass mode */
> +		val |= QDSP6v56_LDO_BYP;
> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +

Remove empty line

> +	} else {
> +		/*
> +		 * Enable power block headswitch,
> +		 * and wait for it to stabilize
> +		 */
> +		val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +		val |= QDSS_BHS_ON | QDSS_LDO_BYP;
> +		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +		udelay(1);
> +	}
> +
> +	if (qproc->hexagon_ver & 0x2) {

Why do you have:

if (ver == 2) {
	...
}

if (ver == 2) {
	...
} else {
	...
}

if (ver == 2) {
	...
} else {
	...
}

As far as I can see you should be able to merge those into one if/else.

> +		/*
> +		 * Deassert QDSP6 compiler memory clamp
> +		 */
> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +		val &= ~QDSP6v56_CLAMP_QMC_MEM;
> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +
> +		/* Deassert memory peripheral sleep and L2 memory standby */
> +		val |= (Q6SS_L2DATA_STBY_N | Q6SS_SLP_RET_N);
> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +
> +		/* Turn on L1, L2, ETB and JU memories 1 at a time */
> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_MEM_PWR_CTL);
> +		for (i = 19; i >= 0; i--) {
> +			val |= BIT(i);
> +			writel_relaxed(val, qproc->reg_base +
> +						QDSP6SS_MEM_PWR_CTL);
> +			/*
> +			 * Wait for 1us for both memory peripheral and
> +			 * data array to turn on.
> +			 */
> +			 mb();

mb() ensures that your writes are ordered, it does not ensure that the
write is done before the sleep. What is the actual requirement here?

> +			udelay(1);
> +		}
> +		/* Remove word line clamp */
> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +		val &= ~QDSP6v56_CLAMP_WL;
> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +	} else {
> +		/*
> +		 * Turn on memories. L2 banks should be done individually
> +		 * to minimize inrush current.
> +		 */
> +		val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +		val |= Q6SS_SLP_RET_N | Q6SS_L2TAG_SLP_NRET_N |
> +			Q6SS_ETB_SLP_NRET_N | Q6SS_L2DATA_STBY_N;
> +		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +		val |= Q6SS_L2DATA_SLP_NRET_N_2;
> +		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +		val |= Q6SS_L2DATA_SLP_NRET_N_1;
> +		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +		val |= Q6SS_L2DATA_SLP_NRET_N_0;
> +		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> +	}
>  	/* Remove IO clamp */
>  	val &= ~Q6SS_CLAMP_IO;
>  	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);

Regards,
Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dwivedi, Avaneesh Kumar (avani) Dec. 12, 2016, 12:45 p.m. UTC | #2
On 12/9/2016 10:05 AM, Bjorn Andersson wrote:
> On Thu 24 Nov 02:00 PST 2016, Avaneesh Kumar Dwivedi wrote:
>
>> This change introduces appropriate additional steps in reset sequence so
>> that hexagon v56 1.5.0 is brough out of reset.
>>
> You should use the non-_relaxed version throughout this patch, unless
> you have good reason not to.
OK.
>
>> Signed-off-by: Avaneesh Kumar Dwivedi <akdwived@codeaurora.org>
>> ---
>>   drivers/remoteproc/qcom_q6v5_pil.c | 125 ++++++++++++++++++++++++++++++-------
>>   1 file changed, 104 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c
>> index c55dc9a..7ea2f53 100644
>> --- a/drivers/remoteproc/qcom_q6v5_pil.c
>> +++ b/drivers/remoteproc/qcom_q6v5_pil.c
>> @@ -65,6 +65,8 @@
>>   #define QDSP6SS_RESET_REG		0x014
>>   #define QDSP6SS_GFMUX_CTL_REG		0x020
>>   #define QDSP6SS_PWR_CTL_REG		0x030
>> +#define QDSP6SS_MEM_PWR_CTL		0x0B0
>> +#define QDSP6SS_STRAP_ACC		0x110
>>   
>>   /* AXI Halt Register Offsets */
>>   #define AXI_HALTREQ_REG			0x0
>> @@ -93,6 +95,15 @@
>>   #define QDSS_BHS_ON			BIT(21)
>>   #define QDSS_LDO_BYP			BIT(22)
>>   
>> +/* QDSP6v56 parameters */
>> +#define QDSP6v56_LDO_BYP                BIT(25)
>> +#define QDSP6v56_BHS_ON                 BIT(24)
>> +#define QDSP6v56_CLAMP_WL               BIT(21)
>> +#define QDSP6v56_CLAMP_QMC_MEM          BIT(22)
>> +#define HALT_CHECK_MAX_LOOPS            (200)
>> +#define QDSP6SS_XO_CBCR                 (0x0038)
> Please drop these parenthesis.
OK.
>
>> +#define QDSP6SS_ACC_OVERRIDE_VAL	0x20
>> +
>>   struct rproc_hexagon_res {
>>   	char *hexagon_mba_image;
>>   	char **proxy_reg_string;
>> @@ -147,6 +158,8 @@ struct q6v5 {
>>   	phys_addr_t mpss_reloc;
>>   	void *mpss_region;
>>   	size_t mpss_size;
>> +
>> +	int hexagon_ver;
>>   };
>>   
>>   enum {
>> @@ -388,35 +401,103 @@ static int q6v5_rmb_mba_wait(struct q6v5 *qproc, u32 status, int ms)
>>   
>>   static int q6v5proc_reset(struct q6v5 *qproc)
>>   {
>> -	u32 val;
>> -	int ret;
>> +	u64 val;
>> +	int ret, i, count;
> One variable per line, sorted descending by length, please.
OK.
>
>> +
>> +	/* Override the ACC value if required */
>> +	if (qproc->hexagon_ver & 0x2)
> This will break when we reach the 6th version, compare (==) with the
> related enum.
OK.
>
>> +		writel_relaxed(QDSP6SS_ACC_OVERRIDE_VAL,
>> +				qproc->reg_base + QDSP6SS_STRAP_ACC);
>>   
>>   	/* Assert resets, stop core */
>>   	val = readl(qproc->reg_base + QDSP6SS_RESET_REG);
>>   	val |= (Q6SS_CORE_ARES | Q6SS_BUS_ARES_ENABLE | Q6SS_STOP_CORE);
>>   	writel(val, qproc->reg_base + QDSP6SS_RESET_REG);
>>   
>> +	/* BHS require xo cbcr to be enabled */
> This comment goes inside the if-statement.
OK.
>
>> +	if (qproc->hexagon_ver & 0x2) {
> ==
>
>> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_XO_CBCR);
>> +		val |= 0x1;
>> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_XO_CBCR);
> Replace the rest of this block with:
>
> ret = readl_poll_timeout(qproc->reg_base + QDSP6SS_XO_CBCR,
> 			 val, !(val & BIT(31)), 1, 200);
OK.
>
>> +		for (count = HALT_CHECK_MAX_LOOPS; count > 0; count--) {
>> +			val = readl_relaxed(qproc->reg_base + QDSP6SS_XO_CBCR);
>> +			if (!(val & BIT(31)))
>> +				break;
>> +			udelay(1);
>> +		}
>> +
>> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_XO_CBCR);
>> +		if ((val & BIT(31)))
>> +			dev_err(qproc->dev, "Failed to enable xo branch clock.\n");
>> +	}
>> +
>> +	if (qproc->hexagon_ver & 0x2) {
> ==
>
>>   	/* Enable power block headswitch, and wait for it to stabilize */
>> -	val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> -	val |= QDSS_BHS_ON | QDSS_LDO_BYP;
>> -	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> -	udelay(1);
>> -
>> -	/*
>> -	 * Turn on memories. L2 banks should be done individually
>> -	 * to minimize inrush current.
>> -	 */
>> -	val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> -	val |= Q6SS_SLP_RET_N | Q6SS_L2TAG_SLP_NRET_N |
>> -		Q6SS_ETB_SLP_NRET_N | Q6SS_L2DATA_STBY_N;
>> -	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> -	val |= Q6SS_L2DATA_SLP_NRET_N_2;
>> -	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> -	val |= Q6SS_L2DATA_SLP_NRET_N_1;
>> -	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> -	val |= Q6SS_L2DATA_SLP_NRET_N_0;
>> -	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> Use non-relaxed version of readl and writel, please.
OK.
>
>> +		val |= QDSP6v56_BHS_ON;
>> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +		udelay(1);
> Please add a short comment on why this delay is needed.
I think there is a comment that
     /* Enable power block headswitch, and wait for it to stabilize */
so block head switch need stabilization after enabling so 1 us delay, 
before LDO being put in bypass mode.
>
>>   
>> +		/* Put LDO in bypass mode */
>> +		val |= QDSP6v56_LDO_BYP;
>> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +
> Remove empty line
ok.
>
>> +	} else {
>> +		/*
>> +		 * Enable power block headswitch,
>> +		 * and wait for it to stabilize
>> +		 */
>> +		val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +		val |= QDSS_BHS_ON | QDSS_LDO_BYP;
>> +		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +		udelay(1);
>> +	}
>> +
>> +	if (qproc->hexagon_ver & 0x2) {
> Why do you have:
>
> if (ver == 2) {
> 	...
> }
>
> if (ver == 2) {
> 	...
> } else {
> 	...
> }
>
> if (ver == 2) {
> 	...
> } else {
> 	...
> }
>
> As far as I can see you should be able to merge those into one if/else.
wanted to utilize little piece of common code so put different if block.
OK will combine them.
>
>> +		/*
>> +		 * Deassert QDSP6 compiler memory clamp
>> +		 */
>> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +		val &= ~QDSP6v56_CLAMP_QMC_MEM;
>> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +
>> +		/* Deassert memory peripheral sleep and L2 memory standby */
>> +		val |= (Q6SS_L2DATA_STBY_N | Q6SS_SLP_RET_N);
>> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +
>> +		/* Turn on L1, L2, ETB and JU memories 1 at a time */
>> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_MEM_PWR_CTL);
>> +		for (i = 19; i >= 0; i--) {
>> +			val |= BIT(i);
>> +			writel_relaxed(val, qproc->reg_base +
>> +						QDSP6SS_MEM_PWR_CTL);
>> +			/*
>> +			 * Wait for 1us for both memory peripheral and
>> +			 * data array to turn on.
>> +			 */
>> +			 mb();
> mb() ensures that your writes are ordered, it does not ensure that the
> write is done before the sleep. What is the actual requirement here?
As in comment, order of turning need to be serialized so this memory 
barrier.
Do u think its not required?
>> +			udelay(1);
>> +		}
>> +		/* Remove word line clamp */
>> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +		val &= ~QDSP6v56_CLAMP_WL;
>> +		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +	} else {
>> +		/*
>> +		 * Turn on memories. L2 banks should be done individually
>> +		 * to minimize inrush current.
>> +		 */
>> +		val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +		val |= Q6SS_SLP_RET_N | Q6SS_L2TAG_SLP_NRET_N |
>> +			Q6SS_ETB_SLP_NRET_N | Q6SS_L2DATA_STBY_N;
>> +		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +		val |= Q6SS_L2DATA_SLP_NRET_N_2;
>> +		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +		val |= Q6SS_L2DATA_SLP_NRET_N_1;
>> +		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +		val |= Q6SS_L2DATA_SLP_NRET_N_0;
>> +		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>> +	}
>>   	/* Remove IO clamp */
>>   	val &= ~Q6SS_CLAMP_IO;
>>   	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> Regards,
> Bjorn
Bjorn Andersson Dec. 13, 2016, 6:09 p.m. UTC | #3
On Mon 12 Dec 04:45 PST 2016, Dwivedi, Avaneesh Kumar (avani) wrote:
> On 12/9/2016 10:05 AM, Bjorn Andersson wrote:
> >On Thu 24 Nov 02:00 PST 2016, Avaneesh Kumar Dwivedi wrote:
[..]
> >>+
> >>+		/* Turn on L1, L2, ETB and JU memories 1 at a time */
> >>+		val = readl_relaxed(qproc->reg_base + QDSP6SS_MEM_PWR_CTL);
> >>+		for (i = 19; i >= 0; i--) {
> >>+			val |= BIT(i);
> >>+			writel_relaxed(val, qproc->reg_base +
> >>+						QDSP6SS_MEM_PWR_CTL);
> >>+			/*
> >>+			 * Wait for 1us for both memory peripheral and
> >>+			 * data array to turn on.
> >>+			 */
> >>+			 mb();
> >mb() ensures that your writes are ordered, it does not ensure that the
> >write is done before the sleep. What is the actual requirement here?
> As in comment, order of turning need to be serialized so this memory
> barrier.
> Do u think its not required?

The problem is that mb() don't actually wait for the write to finish, it
simply makes sure that any subsequent writes will come after this one.

If we want to make sure the write actually hits the hardware before the
delay we should read the register back after the write - as that would
stall execution until the write is available.

Either way, using the non-_relaxed version of writel() will be
equivalent to what you have now.

> >>+			udelay(1);
> >>+		}

Regards,
Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dwivedi, Avaneesh Kumar (avani) Dec. 13, 2016, 7:45 p.m. UTC | #4
On 12/13/2016 11:39 PM, Bjorn Andersson wrote:
> On Mon 12 Dec 04:45 PST 2016, Dwivedi, Avaneesh Kumar (avani) wrote:
>> On 12/9/2016 10:05 AM, Bjorn Andersson wrote:
>>> On Thu 24 Nov 02:00 PST 2016, Avaneesh Kumar Dwivedi wrote:
> [..]
>>>> +
>>>> +		/* Turn on L1, L2, ETB and JU memories 1 at a time */
>>>> +		val = readl_relaxed(qproc->reg_base + QDSP6SS_MEM_PWR_CTL);
>>>> +		for (i = 19; i >= 0; i--) {
>>>> +			val |= BIT(i);
>>>> +			writel_relaxed(val, qproc->reg_base +
>>>> +						QDSP6SS_MEM_PWR_CTL);
>>>> +			/*
>>>> +			 * Wait for 1us for both memory peripheral and
>>>> +			 * data array to turn on.
>>>> +			 */
>>>> +			 mb();
>>> mb() ensures that your writes are ordered, it does not ensure that the
>>> write is done before the sleep. What is the actual requirement here?
>> As in comment, order of turning need to be serialized so this memory
>> barrier.
>> Do u think its not required?
> The problem is that mb() don't actually wait for the write to finish, it
> simply makes sure that any subsequent writes will come after this one.
mb() is for below comment

/* Turn on L1, L2, ETB and JU memories 1 at a time */

While delay corresponding to

  /*
+ * Wait for 1us for both memory peripheral and
+ * data array to turn on.
+ */

>
> If we want to make sure the write actually hits the hardware before the
> delay we should read the register back after the write - as that would
> stall execution until the write is available.
>
> Either way, using the non-_relaxed version of writel() will be
> equivalent to what you have now.
Do you mean if writel is used , udelay()  should be removed? i 
understand writel will not return before register write operation is 
actually done.
udelay() is to give enough time so that after writel , there is some 
time available to turn on mem peripheral and data array.
>
>>>> +			udelay(1);
>>>> +		}
> Regards,
> Bjorn
Bjorn Andersson Dec. 13, 2016, 10:07 p.m. UTC | #5
On Tue 13 Dec 11:45 PST 2016, Dwivedi, Avaneesh Kumar (avani) wrote:
> On 12/13/2016 11:39 PM, Bjorn Andersson wrote:
[..]
> >Either way, using the non-_relaxed version of writel() will be
> >equivalent to what you have now.
> Do you mean if writel is used , udelay()  should be removed?

No, I mean that looping writel_relaxed() + wmb() is roughly
equivalent to writel(). So with the overall comment of you replacing
readl_relaxed() and writel_relaxed() with their plain readl/writel
counterparts takes care of the wmb().

> i understand
> writel will not return before register write operation is actually done.
> udelay() is to give enough time so that after writel , there is some time
> available to turn on mem peripheral and data array.

As far as I understand, wmb() will ensure that any cache coherent or
write-back buffered writes are committed before any subsequent writes.
But that this is not the same as the write has finished.

As far as I can see, the downstream code (msm-3.18) do:

for (i = 19; i >= 0; i--) {
	val |= BIT(i);
	writel_relaxed(val, MEM_PWR_CTL);
	val |= readl_relaxed(MEM_PWR_CTL);
	udelay(1);
}

I.e. for this particular version it actually does read back the value,
which will cause a wait for the write to be propagated. But I'm not sure
why this is the only version doing this.

Regards,
Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dwivedi, Avaneesh Kumar (avani) Dec. 14, 2016, 3:50 p.m. UTC | #6
On 12/14/2016 3:37 AM, Bjorn Andersson wrote:
> On Tue 13 Dec 11:45 PST 2016, Dwivedi, Avaneesh Kumar (avani) wrote:
>> On 12/13/2016 11:39 PM, Bjorn Andersson wrote:
> [..]
>>> Either way, using the non-_relaxed version of writel() will be
>>> equivalent to what you have now.
>> Do you mean if writel is used , udelay()  should be removed?
> No, I mean that looping writel_relaxed() + wmb() is roughly
> equivalent to writel(). So with the overall comment of you replacing
> readl_relaxed() and writel_relaxed() with their plain readl/writel
> counterparts takes care of the wmb().

Thanks got it, yes my requirement was to get write done before control 
reaches udelay(), so i will add readl() before udelay and remove mb()
I hope after this change i can submit next set of patches?
>
>> i understand
>> writel will not return before register write operation is actually done.
>> udelay() is to give enough time so that after writel , there is some time
>> available to turn on mem peripheral and data array.
> As far as I understand, wmb() will ensure that any cache coherent or
> write-back buffered writes are committed before any subsequent writes.
> But that this is not the same as the write has finished.
>
> As far as I can see, the downstream code (msm-3.18) do:
>
> for (i = 19; i >= 0; i--) {
> 	val |= BIT(i);
> 	writel_relaxed(val, MEM_PWR_CTL);
> 	val |= readl_relaxed(MEM_PWR_CTL);
> 	udelay(1);
> }
>
> I.e. for this particular version it actually does read back the value,
> which will cause a wait for the write to be propagated. But I'm not sure
> why this is the only version doing this.
>
> Regards,
> Bjorn
Bjorn Andersson Dec. 14, 2016, 8:13 p.m. UTC | #7
On Wed 14 Dec 07:50 PST 2016, Dwivedi, Avaneesh Kumar (avani) wrote:

> 
> 
> On 12/14/2016 3:37 AM, Bjorn Andersson wrote:
> >On Tue 13 Dec 11:45 PST 2016, Dwivedi, Avaneesh Kumar (avani) wrote:
> >>On 12/13/2016 11:39 PM, Bjorn Andersson wrote:
> >[..]
> >>>Either way, using the non-_relaxed version of writel() will be
> >>>equivalent to what you have now.
> >>Do you mean if writel is used , udelay()  should be removed?
> >No, I mean that looping writel_relaxed() + wmb() is roughly
> >equivalent to writel(). So with the overall comment of you replacing
> >readl_relaxed() and writel_relaxed() with their plain readl/writel
> >counterparts takes care of the wmb().
> 
> Thanks got it, yes my requirement was to get write done before control
> reaches udelay(), so i will add readl() before udelay and remove mb()

Sounds good.

> I hope after this change i can submit next set of patches?

Yes, please do.

Regards,
Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c
index c55dc9a..7ea2f53 100644
--- a/drivers/remoteproc/qcom_q6v5_pil.c
+++ b/drivers/remoteproc/qcom_q6v5_pil.c
@@ -65,6 +65,8 @@ 
 #define QDSP6SS_RESET_REG		0x014
 #define QDSP6SS_GFMUX_CTL_REG		0x020
 #define QDSP6SS_PWR_CTL_REG		0x030
+#define QDSP6SS_MEM_PWR_CTL		0x0B0
+#define QDSP6SS_STRAP_ACC		0x110
 
 /* AXI Halt Register Offsets */
 #define AXI_HALTREQ_REG			0x0
@@ -93,6 +95,15 @@ 
 #define QDSS_BHS_ON			BIT(21)
 #define QDSS_LDO_BYP			BIT(22)
 
+/* QDSP6v56 parameters */
+#define QDSP6v56_LDO_BYP                BIT(25)
+#define QDSP6v56_BHS_ON                 BIT(24)
+#define QDSP6v56_CLAMP_WL               BIT(21)
+#define QDSP6v56_CLAMP_QMC_MEM          BIT(22)
+#define HALT_CHECK_MAX_LOOPS            (200)
+#define QDSP6SS_XO_CBCR                 (0x0038)
+#define QDSP6SS_ACC_OVERRIDE_VAL	0x20
+
 struct rproc_hexagon_res {
 	char *hexagon_mba_image;
 	char **proxy_reg_string;
@@ -147,6 +158,8 @@  struct q6v5 {
 	phys_addr_t mpss_reloc;
 	void *mpss_region;
 	size_t mpss_size;
+
+	int hexagon_ver;
 };
 
 enum {
@@ -388,35 +401,103 @@  static int q6v5_rmb_mba_wait(struct q6v5 *qproc, u32 status, int ms)
 
 static int q6v5proc_reset(struct q6v5 *qproc)
 {
-	u32 val;
-	int ret;
+	u64 val;
+	int ret, i, count;
+
+	/* Override the ACC value if required */
+	if (qproc->hexagon_ver & 0x2)
+		writel_relaxed(QDSP6SS_ACC_OVERRIDE_VAL,
+				qproc->reg_base + QDSP6SS_STRAP_ACC);
 
 	/* Assert resets, stop core */
 	val = readl(qproc->reg_base + QDSP6SS_RESET_REG);
 	val |= (Q6SS_CORE_ARES | Q6SS_BUS_ARES_ENABLE | Q6SS_STOP_CORE);
 	writel(val, qproc->reg_base + QDSP6SS_RESET_REG);
 
+	/* BHS require xo cbcr to be enabled */
+	if (qproc->hexagon_ver & 0x2) {
+		val = readl_relaxed(qproc->reg_base + QDSP6SS_XO_CBCR);
+		val |= 0x1;
+		writel_relaxed(val, qproc->reg_base + QDSP6SS_XO_CBCR);
+		for (count = HALT_CHECK_MAX_LOOPS; count > 0; count--) {
+			val = readl_relaxed(qproc->reg_base + QDSP6SS_XO_CBCR);
+			if (!(val & BIT(31)))
+				break;
+			udelay(1);
+		}
+
+		val = readl_relaxed(qproc->reg_base + QDSP6SS_XO_CBCR);
+		if ((val & BIT(31)))
+			dev_err(qproc->dev, "Failed to enable xo branch clock.\n");
+	}
+
+	if (qproc->hexagon_ver & 0x2) {
 	/* Enable power block headswitch, and wait for it to stabilize */
-	val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
-	val |= QDSS_BHS_ON | QDSS_LDO_BYP;
-	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
-	udelay(1);
-
-	/*
-	 * Turn on memories. L2 banks should be done individually
-	 * to minimize inrush current.
-	 */
-	val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
-	val |= Q6SS_SLP_RET_N | Q6SS_L2TAG_SLP_NRET_N |
-		Q6SS_ETB_SLP_NRET_N | Q6SS_L2DATA_STBY_N;
-	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
-	val |= Q6SS_L2DATA_SLP_NRET_N_2;
-	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
-	val |= Q6SS_L2DATA_SLP_NRET_N_1;
-	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
-	val |= Q6SS_L2DATA_SLP_NRET_N_0;
-	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		val = readl_relaxed(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		val |= QDSP6v56_BHS_ON;
+		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		udelay(1);
 
+		/* Put LDO in bypass mode */
+		val |= QDSP6v56_LDO_BYP;
+		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+
+	} else {
+		/*
+		 * Enable power block headswitch,
+		 * and wait for it to stabilize
+		 */
+		val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		val |= QDSS_BHS_ON | QDSS_LDO_BYP;
+		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		udelay(1);
+	}
+
+	if (qproc->hexagon_ver & 0x2) {
+		/*
+		 * Deassert QDSP6 compiler memory clamp
+		 */
+		val = readl_relaxed(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		val &= ~QDSP6v56_CLAMP_QMC_MEM;
+		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+
+		/* Deassert memory peripheral sleep and L2 memory standby */
+		val |= (Q6SS_L2DATA_STBY_N | Q6SS_SLP_RET_N);
+		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+
+		/* Turn on L1, L2, ETB and JU memories 1 at a time */
+		val = readl_relaxed(qproc->reg_base + QDSP6SS_MEM_PWR_CTL);
+		for (i = 19; i >= 0; i--) {
+			val |= BIT(i);
+			writel_relaxed(val, qproc->reg_base +
+						QDSP6SS_MEM_PWR_CTL);
+			/*
+			 * Wait for 1us for both memory peripheral and
+			 * data array to turn on.
+			 */
+			 mb();
+			udelay(1);
+		}
+		/* Remove word line clamp */
+		val = readl_relaxed(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		val &= ~QDSP6v56_CLAMP_WL;
+		writel_relaxed(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+	} else {
+		/*
+		 * Turn on memories. L2 banks should be done individually
+		 * to minimize inrush current.
+		 */
+		val = readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		val |= Q6SS_SLP_RET_N | Q6SS_L2TAG_SLP_NRET_N |
+			Q6SS_ETB_SLP_NRET_N | Q6SS_L2DATA_STBY_N;
+		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		val |= Q6SS_L2DATA_SLP_NRET_N_2;
+		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		val |= Q6SS_L2DATA_SLP_NRET_N_1;
+		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+		val |= Q6SS_L2DATA_SLP_NRET_N_0;
+		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
+	}
 	/* Remove IO clamp */
 	val &= ~Q6SS_CLAMP_IO;
 	writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
@@ -1031,6 +1112,8 @@  static int q6v5_probe(struct platform_device *pdev)
 	}
 	qproc->active_reg_count = count;
 
+	qproc->hexagon_ver = desc->hexagon_ver;
+
 	ret = q6v5_request_irq(qproc, pdev, "wdog", q6v5_wdog_interrupt);
 	if (ret < 0)
 		goto free_rproc;