diff mbox series

[2/3] coresight etm4x: Add 32-bit read/write option to split 64-bit words

Message ID 126be7005569dadf5bb8970c45843848134828f7.1674174972.git.scclevenger@os.amperecomputing.com (mailing list archive)
State New, archived
Headers show
Series Ampere Computing ETMv4.x Support | expand

Commit Message

Steve Clevenger Jan. 20, 2023, 12:51 a.m. UTC
Add 32-bit read/write access option for Ampere ETMv4.6 64-bit registers.
Ampere Computing erratum AC03_DEBUG_10 describes a design decision where
64-bit read/write access is not supported for the ETMv4.6 implementation.
These 64-bit registers must be accessed as 2 ea. 32-bit registers.
AC03_DEBUG_10 is described in the AmpereOne Developer Errata:
https://solutions.amperecomputing.com/customer-connect/products/AmpereOne-device-documentation

Fix drvdata->nr_addr_cmp for() loop range bug to drvdata->nr_addr_cmp * 2
in etm_enable_hw.

Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
---
 .../coresight/coresight-etm4x-core.c          | 81 ++++++++++++++-----
 drivers/hwtracing/coresight/coresight-etm4x.h | 32 ++++++++
 2 files changed, 93 insertions(+), 20 deletions(-)

Comments

Suzuki K Poulose Jan. 20, 2023, 11:19 a.m. UTC | #1
On 20/01/2023 00:51, Steve Clevenger wrote:
> Add 32-bit read/write access option for Ampere ETMv4.6 64-bit registers.
> Ampere Computing erratum AC03_DEBUG_10 describes a design decision where
> 64-bit read/write access is not supported for the ETMv4.6 implementation.
> These 64-bit registers must be accessed as 2 ea. 32-bit registers.
> AC03_DEBUG_10 is described in the AmpereOne Developer Errata:
> https://solutions.amperecomputing.com/customer-connect/products/AmpereOne-device-documentation

As with the previous comment, please :
   a) If this is because of the system instruction access support
   b) Document the erratum

> 
> Fix drvdata->nr_addr_cmp for() loop range bug to drvdata->nr_addr_cmp * 2
> in etm_enable_hw.

Good catch ! Please separate this out and send it as a fix. I can queue 
this.

> 
> Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
> ---
>   .../coresight/coresight-etm4x-core.c          | 81 ++++++++++++++-----
>   drivers/hwtracing/coresight/coresight-etm4x.h | 32 ++++++++
>   2 files changed, 93 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> index 533be1928a09..bf4daa649cdf 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> @@ -452,18 +452,31 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
>   		if (etm4x_sspcicrn_present(drvdata, i))
>   			etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i));
>   	}
> -	for (i = 0; i < drvdata->nr_addr_cmp; i++) {
> -		etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
> -		etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
> +	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> +		if (drvdata->no_quad_mmio) {
> +			etm4x_split_write64(csa, config->addr_val[i], TRCACVRn(i));
> +			etm4x_split_write64(csa, config->addr_acc[i], TRCACATRn(i));
> +		} else {
> +			etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
> +			etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
> +		}
> +	}

Something like this can be leave some places out. I think we could hide 
it under the generic helpers and handle it there. May be "struct 
csdev_access" can cache this "no_quad_mmio" and do the right thing ?


> +	for (i = 0; i < drvdata->numcidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			etm4x_split_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
> +		else
> +			etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
>   	}
> -	for (i = 0; i < drvdata->numcidc; i++)
> -		etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
>   	etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0);
>   	if (drvdata->numcidc > 4)
>   		etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);
>   
> -	for (i = 0; i < drvdata->numvmidc; i++)
> -		etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
> +	for (i = 0; i < drvdata->numvmidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			etm4x_split_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
> +		else
> +			etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
> +	}
>   	etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0);
>   	if (drvdata->numvmidc > 4)
>   		etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
> @@ -1670,8 +1683,13 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
>   	}
>   
>   	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> -		state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
> -		state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
> +		if (drvdata->no_quad_mmio) {
> +			state->trcacvr[i] = etm4x_split_read64(csa, TRCACVRn(i));
> +			state->trcacatr[i] = etm4x_split_read64(csa, TRCACATRn(i));
> +		} else {
> +			state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
> +			state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
> +		}
>   	}
>   
>   	/*
> @@ -1681,11 +1699,19 @@ static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
>   	 * unit") of ARM IHI 0064D.
>   	 */
>   
> -	for (i = 0; i < drvdata->numcidc; i++)
> -		state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
> +	for (i = 0; i < drvdata->numcidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			state->trccidcvr[i] = etm4x_split_read64(csa, TRCCIDCVRn(i));
> +		else
> +			state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
> +	}
>   
> -	for (i = 0; i < drvdata->numvmidc; i++)
> -		state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
> +	for (i = 0; i < drvdata->numvmidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			state->trcvmidcvr[i] = etm4x_split_read64(csa, TRCVMIDCVRn(i));
> +		else
> +			state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
> +	}
>   
>   	state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0);
>   	if (drvdata->numcidc > 4)
> @@ -1799,15 +1825,28 @@ static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
>   	}
>   
>   	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
> -		etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
> -		etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
> +		if (drvdata->no_quad_mmio) {
> +			etm4x_split_write64(csa, state->trcacvr[i], TRCACVRn(i));
> +			etm4x_split_write64(csa, state->trcacatr[i], TRCACATRn(i));
> +		} else {
> +			etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
> +			etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
> +		}
>   	}
>   
> -	for (i = 0; i < drvdata->numcidc; i++)
> -		etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
> +	for (i = 0; i < drvdata->numcidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			etm4x_split_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
> +		else
> +			etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
> +	}
>   
> -	for (i = 0; i < drvdata->numvmidc; i++)
> -		etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
> +	for (i = 0; i < drvdata->numvmidc; i++) {
> +		if (drvdata->no_quad_mmio)
> +			etm4x_split_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
> +		else
> +			etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
> +	}
>   
>   	etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0);
>   	if (drvdata->numcidc > 4)
> @@ -2047,8 +2086,10 @@ static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid)
>   	 * isolates the manufacturer JEP106 ID in the PID.
>   	 * TRCPIDR2 (JEDC|DES_1) << 16 | TRCPIDR1 (DES_0) << 8)
>   	 */
> -	if ((init_arg.pid & 0x000FF000) == 0x00096000)
> +	if ((init_arg.pid & 0x000FF000) == 0x00096000) {
>   		drvdata->mmio_external = true;
> +		drvdata->no_quad_mmio = true;
> +	}
>   
>   	/*
>   	 * Serialize against CPUHP callbacks to avoid race condition
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
> index cf4f9f2e1807..0650bcdff410 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x.h
> +++ b/drivers/hwtracing/coresight/coresight-etm4x.h
> @@ -1016,6 +1016,7 @@ struct etmv4_save_state {
>    *		   the trace unit.
>    * @arch_features: Bitmap of arch features of etmv4 devices.
>    * @mmio_external: True if ETM considers MMIO an external access.
> + * @no_quad_mmio:  True if ETM does not support 64-bit (quad) access.
>    */
>   struct etmv4_drvdata {
>   	void __iomem			*base;
> @@ -1069,6 +1070,7 @@ struct etmv4_drvdata {
>   	bool				skip_power_up;
>   	DECLARE_BITMAP(arch_features, ETM4_IMPDEF_FEATURE_MAX);
>   	bool				mmio_external;
> +	bool				no_quad_mmio;
>   };
>   
>   /* Address comparator access types */
> @@ -1093,6 +1095,36 @@ void etm4_config_trace_mode(struct etmv4_config *config);
>   u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit);
>   void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit);
>   
> +/* 64-bit aligned to convert 64-bit access to 2 ea. 32-bit access */
> +#pragma pack(push, 8)
> +
> +struct etm_quad_split {
> +	u32 lsw;
> +	u32 msw;
> +};
> +
> +#pragma pack(pop)
> +
> +static inline u64 etm4x_split_read64(struct csdev_access *csa, unsigned int offset)
> +{
> +	struct etm_quad_split container;
> +
> +	container.lsw = etm4x_read32(csa, offset);
> +	container.msw = etm4x_read32(csa, offset + sizeof(u32));
> +
> +	return *(u64 *) &container;

Wouldn't this break with the "endianness" flip ? (Not that we have BE 
implementations). Could we not combine the two values to a 64bit value 
and pass that instead ?

Similarly below.

Suzuki

> +}
> +
> +static inline void etm4x_split_write64(struct csdev_access *csa, u64 quad, unsigned int offset)
> +{
> +	struct etm_quad_split container;
> +
> +	*(u64 *) &container = quad;
> +
> +	etm4x_relaxed_write32(csa, container.lsw, offset);
> +	etm4x_relaxed_write32(csa, container.msw, offset + sizeof(u32));
> +}
> +
>   static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata)
>   {
>   	return drvdata->arch >= ETM_ARCH_ETE;
Steve Clevenger Jan. 22, 2023, 8:32 a.m. UTC | #2
Hi Suzuki,

Comments in-line.

Steve

On 1/20/2023 3:19 AM, Suzuki K Poulose wrote:
> On 20/01/2023 00:51, Steve Clevenger wrote:
>> Add 32-bit read/write access option for Ampere ETMv4.6 64-bit registers.
>> Ampere Computing erratum AC03_DEBUG_10 describes a design decision where
>> 64-bit read/write access is not supported for the ETMv4.6 implementation.
>> These 64-bit registers must be accessed as 2 ea. 32-bit registers.
>> AC03_DEBUG_10 is described in the AmpereOne Developer Errata:
>> https://solutions.amperecomputing.com/customer-connect/products/AmpereOne-device-documentation
> 
> As with the previous comment, please :
>   a) If this is because of the system instruction access support
>   b) Document the erratum
>
I presume you're referring to your previous comment about adding these
errata to "Documentation/arm64/silicon-errata.rst". Let me see if
there's any heartburn with this internal to Ampere. I don't expect there
to be.

>>
>> Fix drvdata->nr_addr_cmp for() loop range bug to drvdata->nr_addr_cmp * 2
>> in etm_enable_hw.
> 
> Good catch ! Please separate this out and send it as a fix. I can queue
> this.
I'll submit it as a separate patch.

> 
>>
>> Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
>> ---
>>   .../coresight/coresight-etm4x-core.c          | 81 ++++++++++++++-----
>>   drivers/hwtracing/coresight/coresight-etm4x.h | 32 ++++++++
>>   2 files changed, 93 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c
>> b/drivers/hwtracing/coresight/coresight-etm4x-core.c
>> index 533be1928a09..bf4daa649cdf 100644
>> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
>> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
>> @@ -452,18 +452,31 @@ static int etm4_enable_hw(struct etmv4_drvdata
>> *drvdata)
>>           if (etm4x_sspcicrn_present(drvdata, i))
>>               etm4x_relaxed_write32(csa, config->ss_pe_cmp[i],
>> TRCSSPCICRn(i));
>>       }
>> -    for (i = 0; i < drvdata->nr_addr_cmp; i++) {
>> -        etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
>> -        etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
>> +    for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>> +        if (drvdata->no_quad_mmio) {
>> +            etm4x_split_write64(csa, config->addr_val[i], TRCACVRn(i));
>> +            etm4x_split_write64(csa, config->addr_acc[i], TRCACATRn(i));
>> +        } else {
>> +            etm4x_relaxed_write64(csa, config->addr_val[i],
>> TRCACVRn(i));
>> +            etm4x_relaxed_write64(csa, config->addr_acc[i],
>> TRCACATRn(i));
>> +        }
>> +    }
> 
> Something like this can be leave some places out. I think we could hide
> it under the generic helpers and handle it there. May be "struct
> csdev_access" can cache this "no_quad_mmio" and do the right thing ?
I'm not sure what you're suggesting here. Please be more specific.

> 
> 
>> +    for (i = 0; i < drvdata->numcidc; i++) {
>> +        if (drvdata->no_quad_mmio)
>> +            etm4x_split_write64(csa, config->ctxid_pid[i],
>> TRCCIDCVRn(i));
>> +        else
>> +            etm4x_relaxed_write64(csa, config->ctxid_pid[i],
>> TRCCIDCVRn(i));
>>       }
>> -    for (i = 0; i < drvdata->numcidc; i++)
>> -        etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
>>       etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0);
>>       if (drvdata->numcidc > 4)
>>           etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);
>>   -    for (i = 0; i < drvdata->numvmidc; i++)
>> -        etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
>> +    for (i = 0; i < drvdata->numvmidc; i++) {
>> +        if (drvdata->no_quad_mmio)
>> +            etm4x_split_write64(csa, config->vmid_val[i],
>> TRCVMIDCVRn(i));
>> +        else
>> +            etm4x_relaxed_write64(csa, config->vmid_val[i],
>> TRCVMIDCVRn(i));
>> +    }
>>       etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0);
>>       if (drvdata->numvmidc > 4)
>>           etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
>> @@ -1670,8 +1683,13 @@ static int __etm4_cpu_save(struct etmv4_drvdata
>> *drvdata)
>>       }
>>         for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>> -        state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
>> -        state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
>> +        if (drvdata->no_quad_mmio) {
>> +            state->trcacvr[i] = etm4x_split_read64(csa, TRCACVRn(i));
>> +            state->trcacatr[i] = etm4x_split_read64(csa, TRCACATRn(i));
>> +        } else {
>> +            state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
>> +            state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
>> +        }
>>       }
>>         /*
>> @@ -1681,11 +1699,19 @@ static int __etm4_cpu_save(struct
>> etmv4_drvdata *drvdata)
>>        * unit") of ARM IHI 0064D.
>>        */
>>   -    for (i = 0; i < drvdata->numcidc; i++)
>> -        state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
>> +    for (i = 0; i < drvdata->numcidc; i++) {
>> +        if (drvdata->no_quad_mmio)
>> +            state->trccidcvr[i] = etm4x_split_read64(csa,
>> TRCCIDCVRn(i));
>> +        else
>> +            state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
>> +    }
>>   -    for (i = 0; i < drvdata->numvmidc; i++)
>> -        state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
>> +    for (i = 0; i < drvdata->numvmidc; i++) {
>> +        if (drvdata->no_quad_mmio)
>> +            state->trcvmidcvr[i] = etm4x_split_read64(csa,
>> TRCVMIDCVRn(i));
>> +        else
>> +            state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
>> +    }
>>         state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0);
>>       if (drvdata->numcidc > 4)
>> @@ -1799,15 +1825,28 @@ static void __etm4_cpu_restore(struct
>> etmv4_drvdata *drvdata)
>>       }
>>         for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>> -        etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
>> -        etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
>> +        if (drvdata->no_quad_mmio) {
>> +            etm4x_split_write64(csa, state->trcacvr[i], TRCACVRn(i));
>> +            etm4x_split_write64(csa, state->trcacatr[i], TRCACATRn(i));
>> +        } else {
>> +            etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
>> +            etm4x_relaxed_write64(csa, state->trcacatr[i],
>> TRCACATRn(i));
>> +        }
>>       }
>>   -    for (i = 0; i < drvdata->numcidc; i++)
>> -        etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
>> +    for (i = 0; i < drvdata->numcidc; i++) {
>> +        if (drvdata->no_quad_mmio)
>> +            etm4x_split_write64(csa, state->trccidcvr[i],
>> TRCCIDCVRn(i));
>> +        else
>> +            etm4x_relaxed_write64(csa, state->trccidcvr[i],
>> TRCCIDCVRn(i));
>> +    }
>>   -    for (i = 0; i < drvdata->numvmidc; i++)
>> -        etm4x_relaxed_write64(csa, state->trcvmidcvr[i],
>> TRCVMIDCVRn(i));
>> +    for (i = 0; i < drvdata->numvmidc; i++) {
>> +        if (drvdata->no_quad_mmio)
>> +            etm4x_split_write64(csa, state->trcvmidcvr[i],
>> TRCVMIDCVRn(i));
>> +        else
>> +            etm4x_relaxed_write64(csa, state->trcvmidcvr[i],
>> TRCVMIDCVRn(i));
>> +    }
>>         etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0);
>>       if (drvdata->numcidc > 4)
>> @@ -2047,8 +2086,10 @@ static int etm4_probe(struct device *dev, void
>> __iomem *base, u32 etm_pid)
>>        * isolates the manufacturer JEP106 ID in the PID.
>>        * TRCPIDR2 (JEDC|DES_1) << 16 | TRCPIDR1 (DES_0) << 8)
>>        */
>> -    if ((init_arg.pid & 0x000FF000) == 0x00096000)
>> +    if ((init_arg.pid & 0x000FF000) == 0x00096000) {
>>           drvdata->mmio_external = true;
>> +        drvdata->no_quad_mmio = true;
>> +    }
>>         /*
>>        * Serialize against CPUHP callbacks to avoid race condition
>> diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h
>> b/drivers/hwtracing/coresight/coresight-etm4x.h
>> index cf4f9f2e1807..0650bcdff410 100644
>> --- a/drivers/hwtracing/coresight/coresight-etm4x.h
>> +++ b/drivers/hwtracing/coresight/coresight-etm4x.h
>> @@ -1016,6 +1016,7 @@ struct etmv4_save_state {
>>    *           the trace unit.
>>    * @arch_features: Bitmap of arch features of etmv4 devices.
>>    * @mmio_external: True if ETM considers MMIO an external access.
>> + * @no_quad_mmio:  True if ETM does not support 64-bit (quad) access.
>>    */
>>   struct etmv4_drvdata {
>>       void __iomem            *base;
>> @@ -1069,6 +1070,7 @@ struct etmv4_drvdata {
>>       bool                skip_power_up;
>>       DECLARE_BITMAP(arch_features, ETM4_IMPDEF_FEATURE_MAX);
>>       bool                mmio_external;
>> +    bool                no_quad_mmio;
>>   };
>>     /* Address comparator access types */
>> @@ -1093,6 +1095,36 @@ void etm4_config_trace_mode(struct etmv4_config
>> *config);
>>   u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit);
>>   void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool
>> _64bit);
>>   +/* 64-bit aligned to convert 64-bit access to 2 ea. 32-bit access */
>> +#pragma pack(push, 8)
>> +
>> +struct etm_quad_split {
>> +    u32 lsw;
>> +    u32 msw;
>> +};
>> +
>> +#pragma pack(pop)
>> +
>> +static inline u64 etm4x_split_read64(struct csdev_access *csa,
>> unsigned int offset)
>> +{
>> +    struct etm_quad_split container;
>> +
>> +    container.lsw = etm4x_read32(csa, offset);
>> +    container.msw = etm4x_read32(csa, offset + sizeof(u32));
>> +
>> +    return *(u64 *) &container;
> 
> Wouldn't this break with the "endianness" flip ? (Not that we have BE
> implementations). Could we not combine the two values to a 64bit value
> and pass that instead ?
The split implementation writes/reads 32-bit words to/from 2 consecutive
32-bit aligned memory addresses independent of endianness so it doesn't
care. I'm not sure I understand what you're getting at by combining the
2 ea. 32-bit values into a 1 ea. 64-bit value. The etm4x_split_read64
and etm4x_split_write64 calls both use 64-bit values in and out.
Internal to this code, both read and write accesses must use 32-bit values.

> 
> Similarly below.
> 
> Suzuki
> 
>> +}
>> +
>> +static inline void etm4x_split_write64(struct csdev_access *csa, u64
>> quad, unsigned int offset)
>> +{
>> +    struct etm_quad_split container;
>> +
>> +    *(u64 *) &container = quad;
>> +
>> +    etm4x_relaxed_write32(csa, container.lsw, offset);
>> +    etm4x_relaxed_write32(csa, container.msw, offset + sizeof(u32));
>> +}
>> +
>>   static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata)
>>   {
>>       return drvdata->arch >= ETM_ARCH_ETE;
>
Suzuki K Poulose Jan. 23, 2023, 5:58 p.m. UTC | #3
On 22/01/2023 08:32, Steve Clevenger wrote:
> 
> Hi Suzuki,
> 
> Comments in-line.
> 
> Steve
> 
> On 1/20/2023 3:19 AM, Suzuki K Poulose wrote:
>> On 20/01/2023 00:51, Steve Clevenger wrote:
>>> Add 32-bit read/write access option for Ampere ETMv4.6 64-bit registers.
>>> Ampere Computing erratum AC03_DEBUG_10 describes a design decision where
>>> 64-bit read/write access is not supported for the ETMv4.6 implementation.
>>> These 64-bit registers must be accessed as 2 ea. 32-bit registers.
>>> AC03_DEBUG_10 is described in the AmpereOne Developer Errata:
>>> https://solutions.amperecomputing.com/customer-connect/products/AmpereOne-device-documentation
>>
>> As with the previous comment, please :
>>    a) If this is because of the system instruction access support
>>    b) Document the erratum
>>
> I presume you're referring to your previous comment about adding these
> errata to "Documentation/arm64/silicon-errata.rst". Let me see if
> there's any heartburn with this internal to Ampere. I don't expect there
> to be.
> 
>>>
>>> Fix drvdata->nr_addr_cmp for() loop range bug to drvdata->nr_addr_cmp * 2
>>> in etm_enable_hw.
>>
>> Good catch ! Please separate this out and send it as a fix. I can queue
>> this.
> I'll submit it as a separate patch.
> 
>>
>>>
>>> Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
>>> ---
>>>    .../coresight/coresight-etm4x-core.c          | 81 ++++++++++++++-----
>>>    drivers/hwtracing/coresight/coresight-etm4x.h | 32 ++++++++
>>>    2 files changed, 93 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c
>>> b/drivers/hwtracing/coresight/coresight-etm4x-core.c
>>> index 533be1928a09..bf4daa649cdf 100644
>>> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
>>> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
>>> @@ -452,18 +452,31 @@ static int etm4_enable_hw(struct etmv4_drvdata
>>> *drvdata)
>>>            if (etm4x_sspcicrn_present(drvdata, i))
>>>                etm4x_relaxed_write32(csa, config->ss_pe_cmp[i],
>>> TRCSSPCICRn(i));
>>>        }
>>> -    for (i = 0; i < drvdata->nr_addr_cmp; i++) {
>>> -        etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
>>> -        etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
>>> +    for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>>> +        if (drvdata->no_quad_mmio) {
>>> +            etm4x_split_write64(csa, config->addr_val[i], TRCACVRn(i));
>>> +            etm4x_split_write64(csa, config->addr_acc[i], TRCACATRn(i));
>>> +        } else {
>>> +            etm4x_relaxed_write64(csa, config->addr_val[i],
>>> TRCACVRn(i));
>>> +            etm4x_relaxed_write64(csa, config->addr_acc[i],
>>> TRCACATRn(i));
>>> +        }
>>> +    }
>>
>> Something like this can be leave some places out. I think we could hide
>> it under the generic helpers and handle it there. May be "struct
>> csdev_access" can cache this "no_quad_mmio" and do the right thing ?
> I'm not sure what you're suggesting here. Please be more specific.
> 

e.g.,

struct csdev_access {

	bool no_64bit_access;
}


And use the csdev_*_ operations could :

  if (csa->no_64bit_access) {
	split access
  } else {

  }

i.e., move the tracking of no_quad_mmio to "csa" from "drvdata"

Suzuki

>>
>>
>>> +    for (i = 0; i < drvdata->numcidc; i++) {
>>> +        if (drvdata->no_quad_mmio)
>>> +            etm4x_split_write64(csa, config->ctxid_pid[i],
>>> TRCCIDCVRn(i));
>>> +        else
>>> +            etm4x_relaxed_write64(csa, config->ctxid_pid[i],
>>> TRCCIDCVRn(i));
>>>        }
>>> -    for (i = 0; i < drvdata->numcidc; i++)
>>> -        etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
>>>        etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0);
>>>        if (drvdata->numcidc > 4)
>>>            etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);
>>>    -    for (i = 0; i < drvdata->numvmidc; i++)
>>> -        etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
>>> +    for (i = 0; i < drvdata->numvmidc; i++) {
>>> +        if (drvdata->no_quad_mmio)
>>> +            etm4x_split_write64(csa, config->vmid_val[i],
>>> TRCVMIDCVRn(i));
>>> +        else
>>> +            etm4x_relaxed_write64(csa, config->vmid_val[i],
>>> TRCVMIDCVRn(i));
>>> +    }
>>>        etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0);
>>>        if (drvdata->numvmidc > 4)
>>>            etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
>>> @@ -1670,8 +1683,13 @@ static int __etm4_cpu_save(struct etmv4_drvdata
>>> *drvdata)
>>>        }
>>>          for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>>> -        state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
>>> -        state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
>>> +        if (drvdata->no_quad_mmio) {
>>> +            state->trcacvr[i] = etm4x_split_read64(csa, TRCACVRn(i));
>>> +            state->trcacatr[i] = etm4x_split_read64(csa, TRCACATRn(i));
>>> +        } else {
>>> +            state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
>>> +            state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
>>> +        }
>>>        }
>>>          /*
>>> @@ -1681,11 +1699,19 @@ static int __etm4_cpu_save(struct
>>> etmv4_drvdata *drvdata)
>>>         * unit") of ARM IHI 0064D.
>>>         */
>>>    -    for (i = 0; i < drvdata->numcidc; i++)
>>> -        state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
>>> +    for (i = 0; i < drvdata->numcidc; i++) {
>>> +        if (drvdata->no_quad_mmio)
>>> +            state->trccidcvr[i] = etm4x_split_read64(csa,
>>> TRCCIDCVRn(i));
>>> +        else
>>> +            state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
>>> +    }
>>>    -    for (i = 0; i < drvdata->numvmidc; i++)
>>> -        state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
>>> +    for (i = 0; i < drvdata->numvmidc; i++) {
>>> +        if (drvdata->no_quad_mmio)
>>> +            state->trcvmidcvr[i] = etm4x_split_read64(csa,
>>> TRCVMIDCVRn(i));
>>> +        else
>>> +            state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
>>> +    }
>>>          state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0);
>>>        if (drvdata->numcidc > 4)
>>> @@ -1799,15 +1825,28 @@ static void __etm4_cpu_restore(struct
>>> etmv4_drvdata *drvdata)
>>>        }
>>>          for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>>> -        etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
>>> -        etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
>>> +        if (drvdata->no_quad_mmio) {
>>> +            etm4x_split_write64(csa, state->trcacvr[i], TRCACVRn(i));
>>> +            etm4x_split_write64(csa, state->trcacatr[i], TRCACATRn(i));
>>> +        } else {
>>> +            etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
>>> +            etm4x_relaxed_write64(csa, state->trcacatr[i],
>>> TRCACATRn(i));
>>> +        }
>>>        }
>>>    -    for (i = 0; i < drvdata->numcidc; i++)
>>> -        etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
>>> +    for (i = 0; i < drvdata->numcidc; i++) {
>>> +        if (drvdata->no_quad_mmio)
>>> +            etm4x_split_write64(csa, state->trccidcvr[i],
>>> TRCCIDCVRn(i));
>>> +        else
>>> +            etm4x_relaxed_write64(csa, state->trccidcvr[i],
>>> TRCCIDCVRn(i));
>>> +    }
>>>    -    for (i = 0; i < drvdata->numvmidc; i++)
>>> -        etm4x_relaxed_write64(csa, state->trcvmidcvr[i],
>>> TRCVMIDCVRn(i));
>>> +    for (i = 0; i < drvdata->numvmidc; i++) {
>>> +        if (drvdata->no_quad_mmio)
>>> +            etm4x_split_write64(csa, state->trcvmidcvr[i],
>>> TRCVMIDCVRn(i));
>>> +        else
>>> +            etm4x_relaxed_write64(csa, state->trcvmidcvr[i],
>>> TRCVMIDCVRn(i));
>>> +    }
>>>          etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0);
>>>        if (drvdata->numcidc > 4)
>>> @@ -2047,8 +2086,10 @@ static int etm4_probe(struct device *dev, void
>>> __iomem *base, u32 etm_pid)
>>>         * isolates the manufacturer JEP106 ID in the PID.
>>>         * TRCPIDR2 (JEDC|DES_1) << 16 | TRCPIDR1 (DES_0) << 8)
>>>         */
>>> -    if ((init_arg.pid & 0x000FF000) == 0x00096000)
>>> +    if ((init_arg.pid & 0x000FF000) == 0x00096000) {
>>>            drvdata->mmio_external = true;
>>> +        drvdata->no_quad_mmio = true;
>>> +    }
>>>          /*
>>>         * Serialize against CPUHP callbacks to avoid race condition
>>> diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h
>>> b/drivers/hwtracing/coresight/coresight-etm4x.h
>>> index cf4f9f2e1807..0650bcdff410 100644
>>> --- a/drivers/hwtracing/coresight/coresight-etm4x.h
>>> +++ b/drivers/hwtracing/coresight/coresight-etm4x.h
>>> @@ -1016,6 +1016,7 @@ struct etmv4_save_state {
>>>     *           the trace unit.
>>>     * @arch_features: Bitmap of arch features of etmv4 devices.
>>>     * @mmio_external: True if ETM considers MMIO an external access.
>>> + * @no_quad_mmio:  True if ETM does not support 64-bit (quad) access.
>>>     */
>>>    struct etmv4_drvdata {
>>>        void __iomem            *base;
>>> @@ -1069,6 +1070,7 @@ struct etmv4_drvdata {
>>>        bool                skip_power_up;
>>>        DECLARE_BITMAP(arch_features, ETM4_IMPDEF_FEATURE_MAX);
>>>        bool                mmio_external;
>>> +    bool                no_quad_mmio;
>>>    };
>>>      /* Address comparator access types */
>>> @@ -1093,6 +1095,36 @@ void etm4_config_trace_mode(struct etmv4_config
>>> *config);
>>>    u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit);
>>>    void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool
>>> _64bit);
>>>    +/* 64-bit aligned to convert 64-bit access to 2 ea. 32-bit access */
>>> +#pragma pack(push, 8)
>>> +
>>> +struct etm_quad_split {
>>> +    u32 lsw;
>>> +    u32 msw;
>>> +};
>>> +
>>> +#pragma pack(pop)
>>> +
>>> +static inline u64 etm4x_split_read64(struct csdev_access *csa,
>>> unsigned int offset)
>>> +{
>>> +    struct etm_quad_split container;
>>> +
>>> +    container.lsw = etm4x_read32(csa, offset);
>>> +    container.msw = etm4x_read32(csa, offset + sizeof(u32));
>>> +
>>> +    return *(u64 *) &container;
>>
>> Wouldn't this break with the "endianness" flip ? (Not that we have BE
>> implementations). Could we not combine the two values to a 64bit value
>> and pass that instead ?
> The split implementation writes/reads 32-bit words to/from 2 consecutive
> 32-bit aligned memory addresses independent of endianness so it doesn't
> care. I'm not sure I understand what you're getting at by combining the
> 2 ea. 32-bit values into a 1 ea. 64-bit value. The etm4x_split_read64
> and etm4x_split_write64 calls both use 64-bit values in and out.
> Internal to this code, both read and write accesses must use 32-bit values.
> 
>>
>> Similarly below.
>>
>> Suzuki
>>
>>> +}
>>> +
>>> +static inline void etm4x_split_write64(struct csdev_access *csa, u64
>>> quad, unsigned int offset)
>>> +{
>>> +    struct etm_quad_split container;
>>> +
>>> +    *(u64 *) &container = quad;
>>> +
>>> +    etm4x_relaxed_write32(csa, container.lsw, offset);
>>> +    etm4x_relaxed_write32(csa, container.msw, offset + sizeof(u32));
>>> +}
>>> +
>>>    static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata)
>>>    {
>>>        return drvdata->arch >= ETM_ARCH_ETE;
>>
James Clark March 6, 2023, 10:37 a.m. UTC | #4
On 20/01/2023 11:19, Suzuki K Poulose wrote:
> On 20/01/2023 00:51, Steve Clevenger wrote:
[...]
>>       }
>> -    for (i = 0; i < drvdata->nr_addr_cmp; i++) {
>> -        etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
>> -        etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
>> +    for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>> +        if (drvdata->no_quad_mmio) {
>> +            etm4x_split_write64(csa, config->addr_val[i], TRCACVRn(i));
>> +            etm4x_split_write64(csa, config->addr_acc[i], TRCACATRn(i));
>> +        } else {
>> +            etm4x_relaxed_write64(csa, config->addr_val[i],
>> TRCACVRn(i));
>> +            etm4x_relaxed_write64(csa, config->addr_acc[i],
>> TRCACATRn(i));
>> +        }
>> +    }
> 
> Something like this can be leave some places out. I think we could hide
> it under the generic helpers and handle it there. May be "struct
> csdev_access" can cache this "no_quad_mmio" and do the right thing ?

+1 for this, or just pass drvdata to etm4x_relaxed_write64() and then it
can decide what to do. I'd prefer that to caching the value in
csdev_access because it would just be a copy of some other value and
might go stale or not be set at some point.

James
Steve Clevenger March 7, 2023, 1:24 a.m. UTC | #5
Hi James,

Thanks for the feedback. I did hide the split 64-bit implementation
under the generic helpers. Note these helpers are now static inline code
instead of macro implementations. This significantly reduced the number
of changes where the etm4x_relaxed write64 and etm4x_relaxed_read64
calls were used.

Steve

On 3/6/2023 2:37 AM, James Clark wrote:
> 
> 
> On 20/01/2023 11:19, Suzuki K Poulose wrote:
>> On 20/01/2023 00:51, Steve Clevenger wrote:
> [...]
>>>       }
>>> -    for (i = 0; i < drvdata->nr_addr_cmp; i++) {
>>> -        etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
>>> -        etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
>>> +    for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
>>> +        if (drvdata->no_quad_mmio) {
>>> +            etm4x_split_write64(csa, config->addr_val[i], TRCACVRn(i));
>>> +            etm4x_split_write64(csa, config->addr_acc[i], TRCACATRn(i));
>>> +        } else {
>>> +            etm4x_relaxed_write64(csa, config->addr_val[i],
>>> TRCACVRn(i));
>>> +            etm4x_relaxed_write64(csa, config->addr_acc[i],
>>> TRCACATRn(i));
>>> +        }
>>> +    }
>>
>> Something like this can be leave some places out. I think we could hide
>> it under the generic helpers and handle it there. May be "struct
>> csdev_access" can cache this "no_quad_mmio" and do the right thing ?
> 
> +1 for this, or just pass drvdata to etm4x_relaxed_write64() and then it
> can decide what to do. I'd prefer that to caching the value in
> csdev_access because it would just be a copy of some other value and
> might go stale or not be set at some point.
> 
> James
diff mbox series

Patch

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 533be1928a09..bf4daa649cdf 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -452,18 +452,31 @@  static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
 		if (etm4x_sspcicrn_present(drvdata, i))
 			etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i));
 	}
-	for (i = 0; i < drvdata->nr_addr_cmp; i++) {
-		etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
-		etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
+	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+		if (drvdata->no_quad_mmio) {
+			etm4x_split_write64(csa, config->addr_val[i], TRCACVRn(i));
+			etm4x_split_write64(csa, config->addr_acc[i], TRCACATRn(i));
+		} else {
+			etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
+			etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
+		}
+	}
+	for (i = 0; i < drvdata->numcidc; i++) {
+		if (drvdata->no_quad_mmio)
+			etm4x_split_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
+		else
+			etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
 	}
-	for (i = 0; i < drvdata->numcidc; i++)
-		etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
 	etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0);
 	if (drvdata->numcidc > 4)
 		etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);
 
-	for (i = 0; i < drvdata->numvmidc; i++)
-		etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
+	for (i = 0; i < drvdata->numvmidc; i++) {
+		if (drvdata->no_quad_mmio)
+			etm4x_split_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
+		else
+			etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
+	}
 	etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0);
 	if (drvdata->numvmidc > 4)
 		etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
@@ -1670,8 +1683,13 @@  static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
 	}
 
 	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
-		state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
-		state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
+		if (drvdata->no_quad_mmio) {
+			state->trcacvr[i] = etm4x_split_read64(csa, TRCACVRn(i));
+			state->trcacatr[i] = etm4x_split_read64(csa, TRCACATRn(i));
+		} else {
+			state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
+			state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
+		}
 	}
 
 	/*
@@ -1681,11 +1699,19 @@  static int __etm4_cpu_save(struct etmv4_drvdata *drvdata)
 	 * unit") of ARM IHI 0064D.
 	 */
 
-	for (i = 0; i < drvdata->numcidc; i++)
-		state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
+	for (i = 0; i < drvdata->numcidc; i++) {
+		if (drvdata->no_quad_mmio) 
+			state->trccidcvr[i] = etm4x_split_read64(csa, TRCCIDCVRn(i));
+		else
+			state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
+	}
 
-	for (i = 0; i < drvdata->numvmidc; i++)
-		state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
+	for (i = 0; i < drvdata->numvmidc; i++) {
+		if (drvdata->no_quad_mmio)
+			state->trcvmidcvr[i] = etm4x_split_read64(csa, TRCVMIDCVRn(i));
+		else
+			state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
+	}
 
 	state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0);
 	if (drvdata->numcidc > 4)
@@ -1799,15 +1825,28 @@  static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 	}
 
 	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
-		etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
-		etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
+		if (drvdata->no_quad_mmio) {
+			etm4x_split_write64(csa, state->trcacvr[i], TRCACVRn(i));
+			etm4x_split_write64(csa, state->trcacatr[i], TRCACATRn(i));
+		} else {
+			etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
+			etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
+		}
 	}
 
-	for (i = 0; i < drvdata->numcidc; i++)
-		etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
+	for (i = 0; i < drvdata->numcidc; i++) {
+		if (drvdata->no_quad_mmio)
+			etm4x_split_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
+		else
+			etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
+	}
 
-	for (i = 0; i < drvdata->numvmidc; i++)
-		etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
+	for (i = 0; i < drvdata->numvmidc; i++) {
+		if (drvdata->no_quad_mmio)
+			etm4x_split_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
+		else
+			etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
+	}
 
 	etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0);
 	if (drvdata->numcidc > 4)
@@ -2047,8 +2086,10 @@  static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid)
 	 * isolates the manufacturer JEP106 ID in the PID.
 	 * TRCPIDR2 (JEDC|DES_1) << 16 | TRCPIDR1 (DES_0) << 8)
 	 */
-	if ((init_arg.pid & 0x000FF000) == 0x00096000)
+	if ((init_arg.pid & 0x000FF000) == 0x00096000) {
 		drvdata->mmio_external = true;
+		drvdata->no_quad_mmio = true;
+	}
 
 	/*
 	 * Serialize against CPUHP callbacks to avoid race condition
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index cf4f9f2e1807..0650bcdff410 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -1016,6 +1016,7 @@  struct etmv4_save_state {
  *		   the trace unit.
  * @arch_features: Bitmap of arch features of etmv4 devices.
  * @mmio_external: True if ETM considers MMIO an external access.
+ * @no_quad_mmio:  True if ETM does not support 64-bit (quad) access.
  */
 struct etmv4_drvdata {
 	void __iomem			*base;
@@ -1069,6 +1070,7 @@  struct etmv4_drvdata {
 	bool				skip_power_up;
 	DECLARE_BITMAP(arch_features, ETM4_IMPDEF_FEATURE_MAX);
 	bool				mmio_external;
+	bool				no_quad_mmio;
 };
 
 /* Address comparator access types */
@@ -1093,6 +1095,36 @@  void etm4_config_trace_mode(struct etmv4_config *config);
 u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit);
 void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit);
 
+/* 64-bit aligned to convert 64-bit access to 2 ea. 32-bit access */
+#pragma pack(push, 8)
+
+struct etm_quad_split {
+	u32 lsw;
+	u32 msw;
+};
+
+#pragma pack(pop)
+
+static inline u64 etm4x_split_read64(struct csdev_access *csa, unsigned int offset)
+{
+	struct etm_quad_split container;
+
+	container.lsw = etm4x_read32(csa, offset);
+	container.msw = etm4x_read32(csa, offset + sizeof(u32));
+
+	return *(u64 *) &container;
+}
+
+static inline void etm4x_split_write64(struct csdev_access *csa, u64 quad, unsigned int offset)
+{
+	struct etm_quad_split container;
+
+	*(u64 *) &container = quad;
+
+	etm4x_relaxed_write32(csa, container.lsw, offset);
+	etm4x_relaxed_write32(csa, container.msw, offset + sizeof(u32));
+}
+
 static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata)
 {
 	return drvdata->arch >= ETM_ARCH_ETE;