diff mbox series

[v7,4/4] drm/msm: Extend gpu devcore dumps with pgtbl info

Message ID 20240820171652.145673-5-robdclark@gmail.com (mailing list archive)
State Superseded
Headers show
Series io-pgtable-arm + drm/msm: Extend iova fault debugging | expand

Commit Message

Rob Clark Aug. 20, 2024, 5:16 p.m. UTC
From: Rob Clark <robdclark@chromium.org>

In the case of iova fault triggered devcore dumps, include additional
debug information based on what we think is the current page tables,
including the TTBR0 value (which should match what we have in
adreno_smmu_fault_info unless things have gone horribly wrong), and
the pagetable entries traversed in the process of resolving the
faulting iova.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 10 ++++++++++
 drivers/gpu/drm/msm/msm_gpu.c           |  9 +++++++++
 drivers/gpu/drm/msm/msm_gpu.h           |  8 ++++++++
 drivers/gpu/drm/msm/msm_iommu.c         | 22 ++++++++++++++++++++++
 drivers/gpu/drm/msm/msm_mmu.h           |  3 ++-
 5 files changed, 51 insertions(+), 1 deletion(-)

Comments

Akhil P Oommen Aug. 22, 2024, 8:34 p.m. UTC | #1
On Tue, Aug 20, 2024 at 10:16:47AM -0700, Rob Clark wrote: > From: Rob Clark <robdclark@chromium.org>
> 
> In the case of iova fault triggered devcore dumps, include additional
> debug information based on what we think is the current page tables,
> including the TTBR0 value (which should match what we have in
> adreno_smmu_fault_info unless things have gone horribly wrong), and
> the pagetable entries traversed in the process of resolving the
> faulting iova.
> 
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
>  drivers/gpu/drm/msm/adreno/adreno_gpu.c | 10 ++++++++++
>  drivers/gpu/drm/msm/msm_gpu.c           |  9 +++++++++
>  drivers/gpu/drm/msm/msm_gpu.h           |  8 ++++++++
>  drivers/gpu/drm/msm/msm_iommu.c         | 22 ++++++++++++++++++++++
>  drivers/gpu/drm/msm/msm_mmu.h           |  3 ++-
>  5 files changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index 1c6626747b98..3848b5a64351 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -864,6 +864,16 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
>  		drm_printf(p, "  - dir=%s\n", info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ");
>  		drm_printf(p, "  - type=%s\n", info->type);
>  		drm_printf(p, "  - source=%s\n", info->block);
> +
> +		/* Information extracted from what we think are the current
> +		 * pgtables.  Hopefully the TTBR0 matches what we've extracted
> +		 * from the SMMU registers in smmu_info!
> +		 */
> +		drm_puts(p, "pgtable-fault-info:\n");
> +		drm_printf(p, "  - ttbr0: %.16llx\n", (u64)info->pgtbl_ttbr0);

"0x" prefix? Otherwise, it is a bit confusing when the below one is
decimal.

> +		drm_printf(p, "  - asid: %d\n", info->asid);
> +		drm_printf(p, "  - ptes: %.16llx %.16llx %.16llx %.16llx\n",
> +			   info->ptes[0], info->ptes[1], info->ptes[2], info->ptes[3]);

Does crashdec decodes this?

-Akhil.

>  	}
>  
>  	drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status);
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index 3666b42b4ecd..bf2f8b2a7ccc 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -281,6 +281,15 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
>  	if (submit) {
>  		int i;
>  
> +		if (state->fault_info.ttbr0) {
> +			struct msm_gpu_fault_info *info = &state->fault_info;
> +			struct msm_mmu *mmu = submit->aspace->mmu;
> +
> +			msm_iommu_pagetable_params(mmu, &info->pgtbl_ttbr0,
> +						   &info->asid);
> +			msm_iommu_pagetable_walk(mmu, info->iova, info->ptes);
> +		}
> +
>  		state->bos = kcalloc(submit->nr_bos,
>  			sizeof(struct msm_gpu_state_bo), GFP_KERNEL);
>  
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index 1f02bb9956be..82e838ba8c80 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -101,6 +101,14 @@ struct msm_gpu_fault_info {
>  	int flags;
>  	const char *type;
>  	const char *block;
> +
> +	/* Information about what we think/expect is the current SMMU state,
> +	 * for example expected_ttbr0 should match smmu_info.ttbr0 which
> +	 * was read back from SMMU registers.
> +	 */
> +	phys_addr_t pgtbl_ttbr0;
> +	u64 ptes[4];
> +	int asid;
>  };
>  
>  /**
> diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
> index 2a94e82316f9..3e692818ba1f 100644
> --- a/drivers/gpu/drm/msm/msm_iommu.c
> +++ b/drivers/gpu/drm/msm/msm_iommu.c
> @@ -195,6 +195,28 @@ struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu)
>  	return &iommu->domain->geometry;
>  }
>  
> +int
> +msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova, uint64_t ptes[4])
> +{
> +	struct msm_iommu_pagetable *pagetable;
> +	struct arm_lpae_io_pgtable_walk_data wd = {};
> +
> +	if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)
> +		return -EINVAL;
> +
> +	pagetable = to_pagetable(mmu);
> +
> +	if (!pagetable->pgtbl_ops->pgtable_walk)
> +		return -EINVAL;
> +
> +	pagetable->pgtbl_ops->pgtable_walk(pagetable->pgtbl_ops, iova, &wd);
> +
> +	for (int i = 0; i < ARRAY_SIZE(wd.ptes); i++)
> +		ptes[i] = wd.ptes[i];
> +
> +	return 0;
> +}
> +
>  static const struct msm_mmu_funcs pagetable_funcs = {
>  		.map = msm_iommu_pagetable_map,
>  		.unmap = msm_iommu_pagetable_unmap,
> diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
> index 88af4f490881..96e509bd96a6 100644
> --- a/drivers/gpu/drm/msm/msm_mmu.h
> +++ b/drivers/gpu/drm/msm/msm_mmu.h
> @@ -53,7 +53,8 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,
>  struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);
>  
>  int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,
> -		int *asid);
> +			       int *asid);
> +int msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova, uint64_t ptes[4]);
>  struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu);
>  
>  #endif /* __MSM_MMU_H__ */
> -- 
> 2.46.0
>
Rob Clark Aug. 22, 2024, 11:15 p.m. UTC | #2
On Thu, Aug 22, 2024 at 1:34 PM Akhil P Oommen <quic_akhilpo@quicinc.com> wrote:
>
> On Tue, Aug 20, 2024 at 10:16:47AM -0700, Rob Clark wrote: > From: Rob Clark <robdclark@chromium.org>
> >
> > In the case of iova fault triggered devcore dumps, include additional
> > debug information based on what we think is the current page tables,
> > including the TTBR0 value (which should match what we have in
> > adreno_smmu_fault_info unless things have gone horribly wrong), and
> > the pagetable entries traversed in the process of resolving the
> > faulting iova.
> >
> > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > ---
> >  drivers/gpu/drm/msm/adreno/adreno_gpu.c | 10 ++++++++++
> >  drivers/gpu/drm/msm/msm_gpu.c           |  9 +++++++++
> >  drivers/gpu/drm/msm/msm_gpu.h           |  8 ++++++++
> >  drivers/gpu/drm/msm/msm_iommu.c         | 22 ++++++++++++++++++++++
> >  drivers/gpu/drm/msm/msm_mmu.h           |  3 ++-
> >  5 files changed, 51 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > index 1c6626747b98..3848b5a64351 100644
> > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > @@ -864,6 +864,16 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
> >               drm_printf(p, "  - dir=%s\n", info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ");
> >               drm_printf(p, "  - type=%s\n", info->type);
> >               drm_printf(p, "  - source=%s\n", info->block);
> > +
> > +             /* Information extracted from what we think are the current
> > +              * pgtables.  Hopefully the TTBR0 matches what we've extracted
> > +              * from the SMMU registers in smmu_info!
> > +              */
> > +             drm_puts(p, "pgtable-fault-info:\n");
> > +             drm_printf(p, "  - ttbr0: %.16llx\n", (u64)info->pgtbl_ttbr0);
>
> "0x" prefix? Otherwise, it is a bit confusing when the below one is
> decimal.

mixed feelings, the extra 0x is annoying when pasting into calc which
is a simple way to get binary decoding

OTOH none of this is machine decoded so I guess we could change it

> > +             drm_printf(p, "  - asid: %d\n", info->asid);
> > +             drm_printf(p, "  - ptes: %.16llx %.16llx %.16llx %.16llx\n",
> > +                        info->ptes[0], info->ptes[1], info->ptes[2], info->ptes[3]);
>
> Does crashdec decodes this?

No, it just passed thru for human eyeballs

crashdec _does_ have some logic to flag buffers that are "near" the
faulting iova to help identify if the fault is an underflow/overflow
(which has been, along with the pte trail, useful to debug some
issues)

BR,
-R

> -Akhil.
>
> >       }
> >
> >       drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status);
> > diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> > index 3666b42b4ecd..bf2f8b2a7ccc 100644
> > --- a/drivers/gpu/drm/msm/msm_gpu.c
> > +++ b/drivers/gpu/drm/msm/msm_gpu.c
> > @@ -281,6 +281,15 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
> >       if (submit) {
> >               int i;
> >
> > +             if (state->fault_info.ttbr0) {
> > +                     struct msm_gpu_fault_info *info = &state->fault_info;
> > +                     struct msm_mmu *mmu = submit->aspace->mmu;
> > +
> > +                     msm_iommu_pagetable_params(mmu, &info->pgtbl_ttbr0,
> > +                                                &info->asid);
> > +                     msm_iommu_pagetable_walk(mmu, info->iova, info->ptes);
> > +             }
> > +
> >               state->bos = kcalloc(submit->nr_bos,
> >                       sizeof(struct msm_gpu_state_bo), GFP_KERNEL);
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> > index 1f02bb9956be..82e838ba8c80 100644
> > --- a/drivers/gpu/drm/msm/msm_gpu.h
> > +++ b/drivers/gpu/drm/msm/msm_gpu.h
> > @@ -101,6 +101,14 @@ struct msm_gpu_fault_info {
> >       int flags;
> >       const char *type;
> >       const char *block;
> > +
> > +     /* Information about what we think/expect is the current SMMU state,
> > +      * for example expected_ttbr0 should match smmu_info.ttbr0 which
> > +      * was read back from SMMU registers.
> > +      */
> > +     phys_addr_t pgtbl_ttbr0;
> > +     u64 ptes[4];
> > +     int asid;
> >  };
> >
> >  /**
> > diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
> > index 2a94e82316f9..3e692818ba1f 100644
> > --- a/drivers/gpu/drm/msm/msm_iommu.c
> > +++ b/drivers/gpu/drm/msm/msm_iommu.c
> > @@ -195,6 +195,28 @@ struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu)
> >       return &iommu->domain->geometry;
> >  }
> >
> > +int
> > +msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova, uint64_t ptes[4])
> > +{
> > +     struct msm_iommu_pagetable *pagetable;
> > +     struct arm_lpae_io_pgtable_walk_data wd = {};
> > +
> > +     if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)
> > +             return -EINVAL;
> > +
> > +     pagetable = to_pagetable(mmu);
> > +
> > +     if (!pagetable->pgtbl_ops->pgtable_walk)
> > +             return -EINVAL;
> > +
> > +     pagetable->pgtbl_ops->pgtable_walk(pagetable->pgtbl_ops, iova, &wd);
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(wd.ptes); i++)
> > +             ptes[i] = wd.ptes[i];
> > +
> > +     return 0;
> > +}
> > +
> >  static const struct msm_mmu_funcs pagetable_funcs = {
> >               .map = msm_iommu_pagetable_map,
> >               .unmap = msm_iommu_pagetable_unmap,
> > diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
> > index 88af4f490881..96e509bd96a6 100644
> > --- a/drivers/gpu/drm/msm/msm_mmu.h
> > +++ b/drivers/gpu/drm/msm/msm_mmu.h
> > @@ -53,7 +53,8 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,
> >  struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);
> >
> >  int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,
> > -             int *asid);
> > +                            int *asid);
> > +int msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova, uint64_t ptes[4]);
> >  struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu);
> >
> >  #endif /* __MSM_MMU_H__ */
> > --
> > 2.46.0
> >
Akhil P Oommen Aug. 26, 2024, 6:29 p.m. UTC | #3
On Thu, Aug 22, 2024 at 04:15:24PM -0700, Rob Clark wrote:
> On Thu, Aug 22, 2024 at 1:34 PM Akhil P Oommen <quic_akhilpo@quicinc.com> wrote:
> >
> > On Tue, Aug 20, 2024 at 10:16:47AM -0700, Rob Clark wrote: > From: Rob Clark <robdclark@chromium.org>
> > >
> > > In the case of iova fault triggered devcore dumps, include additional
> > > debug information based on what we think is the current page tables,
> > > including the TTBR0 value (which should match what we have in
> > > adreno_smmu_fault_info unless things have gone horribly wrong), and
> > > the pagetable entries traversed in the process of resolving the
> > > faulting iova.
> > >
> > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > ---
> > >  drivers/gpu/drm/msm/adreno/adreno_gpu.c | 10 ++++++++++
> > >  drivers/gpu/drm/msm/msm_gpu.c           |  9 +++++++++
> > >  drivers/gpu/drm/msm/msm_gpu.h           |  8 ++++++++
> > >  drivers/gpu/drm/msm/msm_iommu.c         | 22 ++++++++++++++++++++++
> > >  drivers/gpu/drm/msm/msm_mmu.h           |  3 ++-
> > >  5 files changed, 51 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > index 1c6626747b98..3848b5a64351 100644
> > > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> > > @@ -864,6 +864,16 @@ void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
> > >               drm_printf(p, "  - dir=%s\n", info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ");
> > >               drm_printf(p, "  - type=%s\n", info->type);
> > >               drm_printf(p, "  - source=%s\n", info->block);
> > > +
> > > +             /* Information extracted from what we think are the current
> > > +              * pgtables.  Hopefully the TTBR0 matches what we've extracted
> > > +              * from the SMMU registers in smmu_info!
> > > +              */
> > > +             drm_puts(p, "pgtable-fault-info:\n");
> > > +             drm_printf(p, "  - ttbr0: %.16llx\n", (u64)info->pgtbl_ttbr0);
> >
> > "0x" prefix? Otherwise, it is a bit confusing when the below one is
> > decimal.
> 
> mixed feelings, the extra 0x is annoying when pasting into calc which
> is a simple way to get binary decoding
> 
> OTOH none of this is machine decoded so I guess we could change it

On second thought, I think it is fine as this is an address. Probably,
it is helpful for the pte values below.

> 
> > > +             drm_printf(p, "  - asid: %d\n", info->asid);
> > > +             drm_printf(p, "  - ptes: %.16llx %.16llx %.16llx %.16llx\n",
> > > +                        info->ptes[0], info->ptes[1], info->ptes[2], info->ptes[3]);
> >
> > Does crashdec decodes this?
> 
> No, it just passed thru for human eyeballs
> 
> crashdec _does_ have some logic to flag buffers that are "near" the
> faulting iova to help identify if the fault is an underflow/overflow
> (which has been, along with the pte trail, useful to debug some
> issues)

Alright.

Reviewed-by: Akhil P Oommen <quic_akhilpo@quicinc.com>

-Akhil.
> 
> BR,
> -R
> 
> > -Akhil.
> >
> > >       }
> > >
> > >       drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status);
> > > diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> > > index 3666b42b4ecd..bf2f8b2a7ccc 100644
> > > --- a/drivers/gpu/drm/msm/msm_gpu.c
> > > +++ b/drivers/gpu/drm/msm/msm_gpu.c
> > > @@ -281,6 +281,15 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
> > >       if (submit) {
> > >               int i;
> > >
> > > +             if (state->fault_info.ttbr0) {
> > > +                     struct msm_gpu_fault_info *info = &state->fault_info;
> > > +                     struct msm_mmu *mmu = submit->aspace->mmu;
> > > +
> > > +                     msm_iommu_pagetable_params(mmu, &info->pgtbl_ttbr0,
> > > +                                                &info->asid);
> > > +                     msm_iommu_pagetable_walk(mmu, info->iova, info->ptes);
> > > +             }
> > > +
> > >               state->bos = kcalloc(submit->nr_bos,
> > >                       sizeof(struct msm_gpu_state_bo), GFP_KERNEL);
> > >
> > > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> > > index 1f02bb9956be..82e838ba8c80 100644
> > > --- a/drivers/gpu/drm/msm/msm_gpu.h
> > > +++ b/drivers/gpu/drm/msm/msm_gpu.h
> > > @@ -101,6 +101,14 @@ struct msm_gpu_fault_info {
> > >       int flags;
> > >       const char *type;
> > >       const char *block;
> > > +
> > > +     /* Information about what we think/expect is the current SMMU state,
> > > +      * for example expected_ttbr0 should match smmu_info.ttbr0 which
> > > +      * was read back from SMMU registers.
> > > +      */
> > > +     phys_addr_t pgtbl_ttbr0;
> > > +     u64 ptes[4];
> > > +     int asid;
> > >  };
> > >
> > >  /**
> > > diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
> > > index 2a94e82316f9..3e692818ba1f 100644
> > > --- a/drivers/gpu/drm/msm/msm_iommu.c
> > > +++ b/drivers/gpu/drm/msm/msm_iommu.c
> > > @@ -195,6 +195,28 @@ struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu)
> > >       return &iommu->domain->geometry;
> > >  }
> > >
> > > +int
> > > +msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova, uint64_t ptes[4])
> > > +{
> > > +     struct msm_iommu_pagetable *pagetable;
> > > +     struct arm_lpae_io_pgtable_walk_data wd = {};
> > > +
> > > +     if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)
> > > +             return -EINVAL;
> > > +
> > > +     pagetable = to_pagetable(mmu);
> > > +
> > > +     if (!pagetable->pgtbl_ops->pgtable_walk)
> > > +             return -EINVAL;
> > > +
> > > +     pagetable->pgtbl_ops->pgtable_walk(pagetable->pgtbl_ops, iova, &wd);
> > > +
> > > +     for (int i = 0; i < ARRAY_SIZE(wd.ptes); i++)
> > > +             ptes[i] = wd.ptes[i];
> > > +
> > > +     return 0;
> > > +}
> > > +
> > >  static const struct msm_mmu_funcs pagetable_funcs = {
> > >               .map = msm_iommu_pagetable_map,
> > >               .unmap = msm_iommu_pagetable_unmap,
> > > diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
> > > index 88af4f490881..96e509bd96a6 100644
> > > --- a/drivers/gpu/drm/msm/msm_mmu.h
> > > +++ b/drivers/gpu/drm/msm/msm_mmu.h
> > > @@ -53,7 +53,8 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,
> > >  struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);
> > >
> > >  int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,
> > > -             int *asid);
> > > +                            int *asid);
> > > +int msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova, uint64_t ptes[4]);
> > >  struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu);
> > >
> > >  #endif /* __MSM_MMU_H__ */
> > > --
> > > 2.46.0
> > >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 1c6626747b98..3848b5a64351 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -864,6 +864,16 @@  void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
 		drm_printf(p, "  - dir=%s\n", info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ");
 		drm_printf(p, "  - type=%s\n", info->type);
 		drm_printf(p, "  - source=%s\n", info->block);
+
+		/* Information extracted from what we think are the current
+		 * pgtables.  Hopefully the TTBR0 matches what we've extracted
+		 * from the SMMU registers in smmu_info!
+		 */
+		drm_puts(p, "pgtable-fault-info:\n");
+		drm_printf(p, "  - ttbr0: %.16llx\n", (u64)info->pgtbl_ttbr0);
+		drm_printf(p, "  - asid: %d\n", info->asid);
+		drm_printf(p, "  - ptes: %.16llx %.16llx %.16llx %.16llx\n",
+			   info->ptes[0], info->ptes[1], info->ptes[2], info->ptes[3]);
 	}
 
 	drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 3666b42b4ecd..bf2f8b2a7ccc 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -281,6 +281,15 @@  static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
 	if (submit) {
 		int i;
 
+		if (state->fault_info.ttbr0) {
+			struct msm_gpu_fault_info *info = &state->fault_info;
+			struct msm_mmu *mmu = submit->aspace->mmu;
+
+			msm_iommu_pagetable_params(mmu, &info->pgtbl_ttbr0,
+						   &info->asid);
+			msm_iommu_pagetable_walk(mmu, info->iova, info->ptes);
+		}
+
 		state->bos = kcalloc(submit->nr_bos,
 			sizeof(struct msm_gpu_state_bo), GFP_KERNEL);
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 1f02bb9956be..82e838ba8c80 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -101,6 +101,14 @@  struct msm_gpu_fault_info {
 	int flags;
 	const char *type;
 	const char *block;
+
+	/* Information about what we think/expect is the current SMMU state,
+	 * for example expected_ttbr0 should match smmu_info.ttbr0 which
+	 * was read back from SMMU registers.
+	 */
+	phys_addr_t pgtbl_ttbr0;
+	u64 ptes[4];
+	int asid;
 };
 
 /**
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 2a94e82316f9..3e692818ba1f 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -195,6 +195,28 @@  struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu)
 	return &iommu->domain->geometry;
 }
 
+int
+msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova, uint64_t ptes[4])
+{
+	struct msm_iommu_pagetable *pagetable;
+	struct arm_lpae_io_pgtable_walk_data wd = {};
+
+	if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)
+		return -EINVAL;
+
+	pagetable = to_pagetable(mmu);
+
+	if (!pagetable->pgtbl_ops->pgtable_walk)
+		return -EINVAL;
+
+	pagetable->pgtbl_ops->pgtable_walk(pagetable->pgtbl_ops, iova, &wd);
+
+	for (int i = 0; i < ARRAY_SIZE(wd.ptes); i++)
+		ptes[i] = wd.ptes[i];
+
+	return 0;
+}
+
 static const struct msm_mmu_funcs pagetable_funcs = {
 		.map = msm_iommu_pagetable_map,
 		.unmap = msm_iommu_pagetable_unmap,
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
index 88af4f490881..96e509bd96a6 100644
--- a/drivers/gpu/drm/msm/msm_mmu.h
+++ b/drivers/gpu/drm/msm/msm_mmu.h
@@ -53,7 +53,8 @@  static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,
 struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);
 
 int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,
-		int *asid);
+			       int *asid);
+int msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova, uint64_t ptes[4]);
 struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu);
 
 #endif /* __MSM_MMU_H__ */