[v3,1/2] remoteproc: qcom_q6v5_mss: Don't reassign mpss region on shutdown
diff mbox series

Message ID 20200204062641.393949-2-bjorn.andersson@linaro.org
State New
Headers show
Series
  • remoteproc: mss: Improve mem_assign and firmware load
Related show

Commit Message

Bjorn Andersson Feb. 4, 2020, 6:26 a.m. UTC
Trying to reclaim mpss memory while the mba is not running causes the
system to crash on devices with security fuses blown, so leave it
assigned to the remote on shutdown and recover it on a subsequent boot.

Fixes: 6c5a9dc2481b ("remoteproc: qcom: Make secure world call for mem ownership switch")
Cc: stable@vger.kernel.org
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---

Changes since v2:
- The assignment of mpss memory back to Linux is rejected in the coredump case
  on production devices, so check the return value of q6v5_xfer_mem_ownership()
  before attempting to memcpy() the data.

 drivers/remoteproc/qcom_q6v5_mss.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

Comments

Mathieu Poirier Feb. 10, 2020, 11:05 p.m. UTC | #1
Hi Bjorn,

On Mon, Feb 03, 2020 at 10:26:40PM -0800, Bjorn Andersson wrote:
> Trying to reclaim mpss memory while the mba is not running causes the
> system to crash on devices with security fuses blown, so leave it
> assigned to the remote on shutdown and recover it on a subsequent boot.
> 
> Fixes: 6c5a9dc2481b ("remoteproc: qcom: Make secure world call for mem ownership switch")
> Cc: stable@vger.kernel.org
> Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
> ---
> 
> Changes since v2:
> - The assignment of mpss memory back to Linux is rejected in the coredump case
>   on production devices, so check the return value of q6v5_xfer_mem_ownership()
>   before attempting to memcpy() the data.
> 
>  drivers/remoteproc/qcom_q6v5_mss.c | 23 +++++++++++++----------
>  1 file changed, 13 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c
> index 471128a2e723..25c03a26bf88 100644
> --- a/drivers/remoteproc/qcom_q6v5_mss.c
> +++ b/drivers/remoteproc/qcom_q6v5_mss.c
> @@ -887,11 +887,6 @@ static void q6v5_mba_reclaim(struct q6v5 *qproc)
>  		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
>  	}
>  
> -	ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
> -				      false, qproc->mpss_phys,
> -				      qproc->mpss_size);
> -	WARN_ON(ret);
> -
>  	q6v5_reset_assert(qproc);
>  
>  	q6v5_clk_disable(qproc->dev, qproc->reset_clks,
> @@ -981,6 +976,10 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
>  			max_addr = ALIGN(phdr->p_paddr + phdr->p_memsz, SZ_4K);
>  	}
>  
> +	/* Try to reset ownership back to Linux */
> +	q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, false,
> +				qproc->mpss_phys, qproc->mpss_size);

Would you mind adding more chatter here, something like what is mentioned in the
changelog?  That way I anyone trying to review this code doesn't have to suffer
through the same mental gymnastic. 

> +
>  	mpss_reloc = relocate ? min_addr : qproc->mpss_phys;
>  	qproc->mpss_reloc = mpss_reloc;
>  	/* Load firmware segments */
> @@ -1070,8 +1069,16 @@ static void qcom_q6v5_dump_segment(struct rproc *rproc,
>  	void *ptr = rproc_da_to_va(rproc, segment->da, segment->size);
>  
>  	/* Unlock mba before copying segments */
> -	if (!qproc->dump_mba_loaded)
> +	if (!qproc->dump_mba_loaded) {
>  		ret = q6v5_mba_load(qproc);
> +		if (!ret) {
> +			/* Try to reset ownership back to Linux */
> +			ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
> +						      false,
> +						      qproc->mpss_phys,
> +						      qproc->mpss_size);
> +		}

I'm a bit puzzled here as to why a different reclaim strategy is needed.  It is
clear to me that if q6v5_mba_load() returns 0 then the MBA is running and we can
safely reclaim ownership of the memory.  But is it absolutely needed when we
know that 1) the MCU has crashed and 2) said memory will be reclaimed in
q6v5_mpss_load()?

If so I think an explanation in the code is needed.

I also assume there is no way to know if the mba is running, hence not taking
any chance.  If that's the case it would be nice to add that to the comment in
q6v5_mpss_load().

Thanks,
Mathieu

> +	}
>  
>  	if (!ptr || ret)
>  		memset(dest, 0xff, segment->size);
> @@ -1123,10 +1130,6 @@ static int q6v5_start(struct rproc *rproc)
>  	return 0;
>  
>  reclaim_mpss:
> -	xfermemop_ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
> -						false, qproc->mpss_phys,
> -						qproc->mpss_size);
> -	WARN_ON(xfermemop_ret);
>  	q6v5_mba_reclaim(qproc);
>  
>  	return ret;
> -- 
> 2.23.0
>
Bjorn Andersson Feb. 11, 2020, 1:16 a.m. UTC | #2
On Mon 10 Feb 15:05 PST 2020, Mathieu Poirier wrote:

> Hi Bjorn,
> 
> On Mon, Feb 03, 2020 at 10:26:40PM -0800, Bjorn Andersson wrote:
> > Trying to reclaim mpss memory while the mba is not running causes the
> > system to crash on devices with security fuses blown, so leave it
> > assigned to the remote on shutdown and recover it on a subsequent boot.
> > 
> > Fixes: 6c5a9dc2481b ("remoteproc: qcom: Make secure world call for mem ownership switch")
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
> > ---
> > 
> > Changes since v2:
> > - The assignment of mpss memory back to Linux is rejected in the coredump case
> >   on production devices, so check the return value of q6v5_xfer_mem_ownership()
> >   before attempting to memcpy() the data.
> > 
> >  drivers/remoteproc/qcom_q6v5_mss.c | 23 +++++++++++++----------
> >  1 file changed, 13 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c
> > index 471128a2e723..25c03a26bf88 100644
> > --- a/drivers/remoteproc/qcom_q6v5_mss.c
> > +++ b/drivers/remoteproc/qcom_q6v5_mss.c
> > @@ -887,11 +887,6 @@ static void q6v5_mba_reclaim(struct q6v5 *qproc)
> >  		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> >  	}
> >  
> > -	ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
> > -				      false, qproc->mpss_phys,
> > -				      qproc->mpss_size);
> > -	WARN_ON(ret);
> > -
> >  	q6v5_reset_assert(qproc);
> >  
> >  	q6v5_clk_disable(qproc->dev, qproc->reset_clks,
> > @@ -981,6 +976,10 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
> >  			max_addr = ALIGN(phdr->p_paddr + phdr->p_memsz, SZ_4K);
> >  	}
> >  
> > +	/* Try to reset ownership back to Linux */
> > +	q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, false,
> > +				qproc->mpss_phys, qproc->mpss_size);
> 
> Would you mind adding more chatter here, something like what is mentioned in the
> changelog?  That way I anyone trying to review this code doesn't have to suffer
> through the same mental gymnastic. 
> 

Sure thing, as this patch shows this dynamic wasn't clear - and this
patch is based on my observations. With it we no longer crash the entire
system by hitting a security violation during a crash, but there's still
some details that I'm uncertain about.

> > +
> >  	mpss_reloc = relocate ? min_addr : qproc->mpss_phys;
> >  	qproc->mpss_reloc = mpss_reloc;
> >  	/* Load firmware segments */
> > @@ -1070,8 +1069,16 @@ static void qcom_q6v5_dump_segment(struct rproc *rproc,
> >  	void *ptr = rproc_da_to_va(rproc, segment->da, segment->size);
> >  
> >  	/* Unlock mba before copying segments */
> > -	if (!qproc->dump_mba_loaded)
> > +	if (!qproc->dump_mba_loaded) {
> >  		ret = q6v5_mba_load(qproc);
> > +		if (!ret) {
> > +			/* Try to reset ownership back to Linux */
> > +			ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
> > +						      false,
> > +						      qproc->mpss_phys,
> > +						      qproc->mpss_size);
> > +		}
> 
> I'm a bit puzzled here as to why a different reclaim strategy is needed.  It is
> clear to me that if q6v5_mba_load() returns 0 then the MBA is running and we can
> safely reclaim ownership of the memory.  But is it absolutely needed when we
> know that 1) the MCU has crashed and 2) said memory will be reclaimed in
> q6v5_mpss_load()?
> 

The ownership transfer here is a jump into secure world, which somehow
together with the firmware running on the modem processor will update
the access permissions for the mpss memory region.

As we enter this function the recovery handling in the core has just
stopped the remote processor, so we know it's off. As such we must first
boot the remote processor again, in order to reclaim the access to the
mpss memory region.

New in this revision is the fact that this operation might actually be
rejected (e.g. on end-user hardware).

So we need to guard the memcpy below on either of these cases, as unless
we've successfully booted the modem processor and gotten permission to
read the mpss memory this operation will trigger a security violation
and the device will reboot.

> If so I think an explanation in the code is needed.
> 

Makes sense, I will formulate above explanation into a comment. As well
as review the other callers of q6v5_xfer_mem_ownership().

> I also assume there is no way to know if the mba is running, hence not taking
> any chance.  If that's the case it would be nice to add that to the comment in
> q6v5_mpss_load().
> 

We know that we enter q6v5_mpss_load() with the modem processor just
booted, but the memory assignment is there to handle the case where the
mpss memory region for some reason was left in the hands on the modem.
I will have to do some more digging to figure out if this is a valid
scenario or not.

Thanks for your review Mathieu!

Regards,
Bjorn

> Thanks,
> Mathieu
> 
> > +	}
> >  
> >  	if (!ptr || ret)
> >  		memset(dest, 0xff, segment->size);
> > @@ -1123,10 +1130,6 @@ static int q6v5_start(struct rproc *rproc)
> >  	return 0;
> >  
> >  reclaim_mpss:
> > -	xfermemop_ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
> > -						false, qproc->mpss_phys,
> > -						qproc->mpss_size);
> > -	WARN_ON(xfermemop_ret);
> >  	q6v5_mba_reclaim(qproc);
> >  
> >  	return ret;
> > -- 
> > 2.23.0
> >
Mathieu Poirier Feb. 11, 2020, 6:42 p.m. UTC | #3
On Mon, 10 Feb 2020 at 18:16, Bjorn Andersson
<bjorn.andersson@linaro.org> wrote:
>
> On Mon 10 Feb 15:05 PST 2020, Mathieu Poirier wrote:
>
> > Hi Bjorn,
> >
> > On Mon, Feb 03, 2020 at 10:26:40PM -0800, Bjorn Andersson wrote:
> > > Trying to reclaim mpss memory while the mba is not running causes the
> > > system to crash on devices with security fuses blown, so leave it
> > > assigned to the remote on shutdown and recover it on a subsequent boot.
> > >
> > > Fixes: 6c5a9dc2481b ("remoteproc: qcom: Make secure world call for mem ownership switch")
> > > Cc: stable@vger.kernel.org
> > > Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
> > > ---
> > >
> > > Changes since v2:
> > > - The assignment of mpss memory back to Linux is rejected in the coredump case
> > >   on production devices, so check the return value of q6v5_xfer_mem_ownership()
> > >   before attempting to memcpy() the data.
> > >
> > >  drivers/remoteproc/qcom_q6v5_mss.c | 23 +++++++++++++----------
> > >  1 file changed, 13 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c
> > > index 471128a2e723..25c03a26bf88 100644
> > > --- a/drivers/remoteproc/qcom_q6v5_mss.c
> > > +++ b/drivers/remoteproc/qcom_q6v5_mss.c
> > > @@ -887,11 +887,6 @@ static void q6v5_mba_reclaim(struct q6v5 *qproc)
> > >             writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
> > >     }
> > >
> > > -   ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
> > > -                                 false, qproc->mpss_phys,
> > > -                                 qproc->mpss_size);
> > > -   WARN_ON(ret);
> > > -
> > >     q6v5_reset_assert(qproc);
> > >
> > >     q6v5_clk_disable(qproc->dev, qproc->reset_clks,
> > > @@ -981,6 +976,10 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
> > >                     max_addr = ALIGN(phdr->p_paddr + phdr->p_memsz, SZ_4K);
> > >     }
> > >
> > > +   /* Try to reset ownership back to Linux */
> > > +   q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, false,
> > > +                           qproc->mpss_phys, qproc->mpss_size);
> >
> > Would you mind adding more chatter here, something like what is mentioned in the
> > changelog?  That way I anyone trying to review this code doesn't have to suffer
> > through the same mental gymnastic.
> >
>
> Sure thing, as this patch shows this dynamic wasn't clear - and this
> patch is based on my observations. With it we no longer crash the entire
> system by hitting a security violation during a crash, but there's still
> some details that I'm uncertain about.
>
> > > +
> > >     mpss_reloc = relocate ? min_addr : qproc->mpss_phys;
> > >     qproc->mpss_reloc = mpss_reloc;
> > >     /* Load firmware segments */
> > > @@ -1070,8 +1069,16 @@ static void qcom_q6v5_dump_segment(struct rproc *rproc,
> > >     void *ptr = rproc_da_to_va(rproc, segment->da, segment->size);
> > >
> > >     /* Unlock mba before copying segments */
> > > -   if (!qproc->dump_mba_loaded)
> > > +   if (!qproc->dump_mba_loaded) {
> > >             ret = q6v5_mba_load(qproc);
> > > +           if (!ret) {
> > > +                   /* Try to reset ownership back to Linux */
> > > +                   ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
> > > +                                                 false,
> > > +                                                 qproc->mpss_phys,
> > > +                                                 qproc->mpss_size);
> > > +           }
> >
> > I'm a bit puzzled here as to why a different reclaim strategy is needed.  It is
> > clear to me that if q6v5_mba_load() returns 0 then the MBA is running and we can
> > safely reclaim ownership of the memory.  But is it absolutely needed when we
> > know that 1) the MCU has crashed and 2) said memory will be reclaimed in
> > q6v5_mpss_load()?
> >
>
> The ownership transfer here is a jump into secure world, which somehow
> together with the firmware running on the modem processor will update
> the access permissions for the mpss memory region.
>
> As we enter this function the recovery handling in the core has just
> stopped the remote processor, so we know it's off. As such we must first
> boot the remote processor again, in order to reclaim the access to the
> mpss memory region.
>
> New in this revision is the fact that this operation might actually be
> rejected (e.g. on end-user hardware).
>
> So we need to guard the memcpy below on either of these cases, as unless
> we've successfully booted the modem processor and gotten permission to
> read the mpss memory this operation will trigger a security violation
> and the device will reboot.
>
> > If so I think an explanation in the code is needed.
> >
>
> Makes sense, I will formulate above explanation into a comment. As well
> as review the other callers of q6v5_xfer_mem_ownership().
>
> > I also assume there is no way to know if the mba is running, hence not taking
> > any chance.  If that's the case it would be nice to add that to the comment in
> > q6v5_mpss_load().
> >
>
> We know that we enter q6v5_mpss_load() with the modem processor just
> booted, but the memory assignment is there to handle the case where the
> mpss memory region for some reason was left in the hands on the modem.
> I will have to do some more digging to figure out if this is a valid
> scenario or not.

I'm really happy that you're also not sure about this patch... I spent
hours (no joke) trying to figure out the workflow and logic of using
q6v5_xfer_mem_ownership() and even then I'm ambivalent...  Carefully
understanding and documenting the scenarios we trying to handle will
go a long way in terms of future stability of the system.

>
> Thanks for your review Mathieu!
>
> Regards,
> Bjorn
>
> > Thanks,
> > Mathieu
> >
> > > +   }
> > >
> > >     if (!ptr || ret)
> > >             memset(dest, 0xff, segment->size);
> > > @@ -1123,10 +1130,6 @@ static int q6v5_start(struct rproc *rproc)
> > >     return 0;
> > >
> > >  reclaim_mpss:
> > > -   xfermemop_ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
> > > -                                           false, qproc->mpss_phys,
> > > -                                           qproc->mpss_size);
> > > -   WARN_ON(xfermemop_ret);
> > >     q6v5_mba_reclaim(qproc);
> > >
> > >     return ret;
> > > --
> > > 2.23.0
> > >

Patch
diff mbox series

diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c
index 471128a2e723..25c03a26bf88 100644
--- a/drivers/remoteproc/qcom_q6v5_mss.c
+++ b/drivers/remoteproc/qcom_q6v5_mss.c
@@ -887,11 +887,6 @@  static void q6v5_mba_reclaim(struct q6v5 *qproc)
 		writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
 	}
 
-	ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
-				      false, qproc->mpss_phys,
-				      qproc->mpss_size);
-	WARN_ON(ret);
-
 	q6v5_reset_assert(qproc);
 
 	q6v5_clk_disable(qproc->dev, qproc->reset_clks,
@@ -981,6 +976,10 @@  static int q6v5_mpss_load(struct q6v5 *qproc)
 			max_addr = ALIGN(phdr->p_paddr + phdr->p_memsz, SZ_4K);
 	}
 
+	/* Try to reset ownership back to Linux */
+	q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, false,
+				qproc->mpss_phys, qproc->mpss_size);
+
 	mpss_reloc = relocate ? min_addr : qproc->mpss_phys;
 	qproc->mpss_reloc = mpss_reloc;
 	/* Load firmware segments */
@@ -1070,8 +1069,16 @@  static void qcom_q6v5_dump_segment(struct rproc *rproc,
 	void *ptr = rproc_da_to_va(rproc, segment->da, segment->size);
 
 	/* Unlock mba before copying segments */
-	if (!qproc->dump_mba_loaded)
+	if (!qproc->dump_mba_loaded) {
 		ret = q6v5_mba_load(qproc);
+		if (!ret) {
+			/* Try to reset ownership back to Linux */
+			ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
+						      false,
+						      qproc->mpss_phys,
+						      qproc->mpss_size);
+		}
+	}
 
 	if (!ptr || ret)
 		memset(dest, 0xff, segment->size);
@@ -1123,10 +1130,6 @@  static int q6v5_start(struct rproc *rproc)
 	return 0;
 
 reclaim_mpss:
-	xfermemop_ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm,
-						false, qproc->mpss_phys,
-						qproc->mpss_size);
-	WARN_ON(xfermemop_ret);
 	q6v5_mba_reclaim(qproc);
 
 	return ret;