diff mbox series

[v2] coresight: etm4x: avoid build failure with unrolled loops

Message ID 20210429145752.3218324-1-arnd@kernel.org (mailing list archive)
State New, archived
Headers show
Series [v2] coresight: etm4x: avoid build failure with unrolled loops | expand

Commit Message

Arnd Bergmann April 29, 2021, 2:57 p.m. UTC
From: Arnd Bergmann <arnd@arndb.de>

clang-12 fails to build the etm4x driver with -fsanitize=array-bounds,
where it decides to unroll certain loops in a way that result in a
C variable getting put into an inline assembly

<instantiation>:1:7: error: expected constant expression in '.inst' directive
.inst (0xd5200000|((((2) << 19) | ((1) << 16) | (((((((((((0x160 + (i * 4))))) >> 2))) >> 7) & 0x7)) << 12) | ((((((((((0x160 + (i * 4))))) >> 2))) & 0xf)) << 8) | (((((((((((0x160 + (i * 4))))) >> 2))) >> 4) & 0x7)) << 5)))|(.L__reg_num_x8))
      ^
drivers/hwtracing/coresight/coresight-etm4x-core.c:702:4: note: while in macro instantiation
                        etm4x_relaxed_read32(csa, TRCCNTVRn(i));
                        ^
drivers/hwtracing/coresight/coresight-etm4x.h:403:4: note: expanded from macro 'etm4x_relaxed_read32'
                 read_etm4x_sysreg_offset((offset), false)))
                 ^
drivers/hwtracing/coresight/coresight-etm4x.h:383:12: note: expanded from macro 'read_etm4x_sysreg_offset'
                        __val = read_etm4x_sysreg_const_offset((offset));       \
                                ^
drivers/hwtracing/coresight/coresight-etm4x.h:149:2: note: expanded from macro 'read_etm4x_sysreg_const_offset'
        READ_ETM4x_REG(ETM4x_OFFSET_TO_REG(offset))
        ^
drivers/hwtracing/coresight/coresight-etm4x.h:144:2: note: expanded from macro 'READ_ETM4x_REG'
        read_sysreg_s(ETM4x_REG_NUM_TO_SYSREG((reg)))
        ^
arch/arm64/include/asm/sysreg.h:1108:15: note: expanded from macro 'read_sysreg_s'
        asm volatile(__mrs_s("%0", r) : "=r" (__val));                  \
                     ^
arch/arm64/include/asm/sysreg.h:1074:2: note: expanded from macro '__mrs_s'
"       mrs_s " v ", " __stringify(r) "\n"                      \
 ^

This only happened in a few loops in which the array bounds sanitizer
added a special case for an array overflow that clang determined to be
possible, but any compiler is free to unroll any of the loops in the
same way that breaks the sysreg macros.

Introduce helper functions that perform a sysreg access with a
non-constant register number and use them in each call that passes
a loop counter.

Link: https://github.com/ClangBuiltLinux/linux/issues/1310
Link: https://lore.kernel.org/lkml/20210225094324.3542511-1-arnd@kernel.org/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 .../coresight/coresight-etm4x-core.c          | 78 +++++++++----------
 drivers/hwtracing/coresight/coresight-etm4x.h | 69 ++++++++++++++++
 2 files changed, 108 insertions(+), 39 deletions(-)

Comments

Suzuki K Poulose April 29, 2021, 5:37 p.m. UTC | #1
Hi Arnd

On 29/04/2021 15:57, Arnd Bergmann wrote:
> From: Arnd Bergmann <arnd@arndb.de>
> 
> clang-12 fails to build the etm4x driver with -fsanitize=array-bounds,
> where it decides to unroll certain loops in a way that result in a
> C variable getting put into an inline assembly
> 
> <instantiation>:1:7: error: expected constant expression in '.inst' directive
> .inst (0xd5200000|((((2) << 19) | ((1) << 16) | (((((((((((0x160 + (i * 4))))) >> 2))) >> 7) & 0x7)) << 12) | ((((((((((0x160 + (i * 4))))) >> 2))) & 0xf)) << 8) | (((((((((((0x160 + (i * 4))))) >> 2))) >> 4) & 0x7)) << 5)))|(.L__reg_num_x8))
>        ^
> drivers/hwtracing/coresight/coresight-etm4x-core.c:702:4: note: while in macro instantiation
>                          etm4x_relaxed_read32(csa, TRCCNTVRn(i));
>                          ^
> drivers/hwtracing/coresight/coresight-etm4x.h:403:4: note: expanded from macro 'etm4x_relaxed_read32'
>                   read_etm4x_sysreg_offset((offset), false)))
>                   ^
> drivers/hwtracing/coresight/coresight-etm4x.h:383:12: note: expanded from macro 'read_etm4x_sysreg_offset'
>                          __val = read_etm4x_sysreg_const_offset((offset));       \
>                                  ^
> drivers/hwtracing/coresight/coresight-etm4x.h:149:2: note: expanded from macro 'read_etm4x_sysreg_const_offset'
>          READ_ETM4x_REG(ETM4x_OFFSET_TO_REG(offset))
>          ^
> drivers/hwtracing/coresight/coresight-etm4x.h:144:2: note: expanded from macro 'READ_ETM4x_REG'
>          read_sysreg_s(ETM4x_REG_NUM_TO_SYSREG((reg)))
>          ^
> arch/arm64/include/asm/sysreg.h:1108:15: note: expanded from macro 'read_sysreg_s'
>          asm volatile(__mrs_s("%0", r) : "=r" (__val));                  \
>                       ^
> arch/arm64/include/asm/sysreg.h:1074:2: note: expanded from macro '__mrs_s'
> "       mrs_s " v ", " __stringify(r) "\n"                      \
>   ^
> 
> This only happened in a few loops in which the array bounds sanitizer
> added a special case for an array overflow that clang determined to be
> possible, but any compiler is free to unroll any of the loops in the
> same way that breaks the sysreg macros.
> 
> Introduce helper functions that perform a sysreg access with a
> non-constant register number and use them in each call that passes
> a loop counter.

You don't need to add this special helper. We have the exact 
infrastructure already. So these could simply be replaced with:

csdev_access_xxx(csa, ...)

see :

include/linux/coresight.h

Cheers
Suzuki
Arnd Bergmann April 29, 2021, 5:50 p.m. UTC | #2
On Thu, Apr 29, 2021 at 7:37 PM Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
> On 29/04/2021 15:57, Arnd Bergmann wrote:
> > From: Arnd Bergmann <arnd@arndb.de>
> >
> > clang-12 fails to build the etm4x driver with -fsanitize=array-bounds,
> > where it decides to unroll certain loops in a way that result in a
> > C variable getting put into an inline assembly
> >
> > <instantiation>:1:7: error: expected constant expression in '.inst' directive
> > .inst (0xd5200000|((((2) << 19) | ((1) << 16) | (((((((((((0x160 + (i * 4))))) >> 2))) >> 7) & 0x7)) << 12) | ((((((((((0x160 + (i * 4))))) >> 2))) & 0xf)) << 8) | (((((((((((0x160 + (i * 4))))) >> 2))) >> 4) & 0x7)) << 5)))|(.L__reg_num_x8))
> >        ^
> > drivers/hwtracing/coresight/coresight-etm4x-core.c:702:4: note: while in macro instantiation
> >                          etm4x_relaxed_read32(csa, TRCCNTVRn(i));
> >                          ^
> > drivers/hwtracing/coresight/coresight-etm4x.h:403:4: note: expanded from macro 'etm4x_relaxed_read32'
> >                   read_etm4x_sysreg_offset((offset), false)))
> >                   ^
> > drivers/hwtracing/coresight/coresight-etm4x.h:383:12: note: expanded from macro 'read_etm4x_sysreg_offset'
> >                          __val = read_etm4x_sysreg_const_offset((offset));       \
> >                                  ^
> > drivers/hwtracing/coresight/coresight-etm4x.h:149:2: note: expanded from macro 'read_etm4x_sysreg_const_offset'
> >          READ_ETM4x_REG(ETM4x_OFFSET_TO_REG(offset))
> >          ^
> > drivers/hwtracing/coresight/coresight-etm4x.h:144:2: note: expanded from macro 'READ_ETM4x_REG'
> >          read_sysreg_s(ETM4x_REG_NUM_TO_SYSREG((reg)))
> >          ^
> > arch/arm64/include/asm/sysreg.h:1108:15: note: expanded from macro 'read_sysreg_s'
> >          asm volatile(__mrs_s("%0", r) : "=r" (__val));                  \
> >                       ^
> > arch/arm64/include/asm/sysreg.h:1074:2: note: expanded from macro '__mrs_s'
> > "       mrs_s " v ", " __stringify(r) "\n"                      \
> >   ^
> >
> > This only happened in a few loops in which the array bounds sanitizer
> > added a special case for an array overflow that clang determined to be
> > possible, but any compiler is free to unroll any of the loops in the
> > same way that breaks the sysreg macros.
> >
> > Introduce helper functions that perform a sysreg access with a
> > non-constant register number and use them in each call that passes
> > a loop counter.
>
> You don't need to add this special helper. We have the exact
> infrastructure already. So these could simply be replaced with:
>
> csdev_access_xxx(csa, ...)
>
> see :
>
> include/linux/coresight.h

Ah, nice!

Do you mean replacing only the ones that use a nonconstant
offset, or all of them? I guess changing all would avoid some
really ugly magic macros, but the indirect function call and the
switch() adds a few cycles of overhead every time and the code
looks like it is micro-optimized for fast register access here.

      Arnd
Suzuki K Poulose April 29, 2021, 10:49 p.m. UTC | #3
On 29/04/2021 18:50, Arnd Bergmann wrote:
> On Thu, Apr 29, 2021 at 7:37 PM Suzuki K Poulose <suzuki.poulose@arm.com> wrote:
>> On 29/04/2021 15:57, Arnd Bergmann wrote:
>>> From: Arnd Bergmann <arnd@arndb.de>
>>>
>>> clang-12 fails to build the etm4x driver with -fsanitize=array-bounds,
>>> where it decides to unroll certain loops in a way that result in a
>>> C variable getting put into an inline assembly
>>>
>>> <instantiation>:1:7: error: expected constant expression in '.inst' directive
>>> .inst (0xd5200000|((((2) << 19) | ((1) << 16) | (((((((((((0x160 + (i * 4))))) >> 2))) >> 7) & 0x7)) << 12) | ((((((((((0x160 + (i * 4))))) >> 2))) & 0xf)) << 8) | (((((((((((0x160 + (i * 4))))) >> 2))) >> 4) & 0x7)) << 5)))|(.L__reg_num_x8))
>>>         ^
>>> drivers/hwtracing/coresight/coresight-etm4x-core.c:702:4: note: while in macro instantiation
>>>                           etm4x_relaxed_read32(csa, TRCCNTVRn(i));
>>>                           ^
>>> drivers/hwtracing/coresight/coresight-etm4x.h:403:4: note: expanded from macro 'etm4x_relaxed_read32'
>>>                    read_etm4x_sysreg_offset((offset), false)))
>>>                    ^
>>> drivers/hwtracing/coresight/coresight-etm4x.h:383:12: note: expanded from macro 'read_etm4x_sysreg_offset'
>>>                           __val = read_etm4x_sysreg_const_offset((offset));       \
>>>                                   ^
>>> drivers/hwtracing/coresight/coresight-etm4x.h:149:2: note: expanded from macro 'read_etm4x_sysreg_const_offset'
>>>           READ_ETM4x_REG(ETM4x_OFFSET_TO_REG(offset))
>>>           ^
>>> drivers/hwtracing/coresight/coresight-etm4x.h:144:2: note: expanded from macro 'READ_ETM4x_REG'
>>>           read_sysreg_s(ETM4x_REG_NUM_TO_SYSREG((reg)))
>>>           ^
>>> arch/arm64/include/asm/sysreg.h:1108:15: note: expanded from macro 'read_sysreg_s'
>>>           asm volatile(__mrs_s("%0", r) : "=r" (__val));                  \
>>>                        ^
>>> arch/arm64/include/asm/sysreg.h:1074:2: note: expanded from macro '__mrs_s'
>>> "       mrs_s " v ", " __stringify(r) "\n"                      \
>>>    ^
>>>
>>> This only happened in a few loops in which the array bounds sanitizer
>>> added a special case for an array overflow that clang determined to be
>>> possible, but any compiler is free to unroll any of the loops in the
>>> same way that breaks the sysreg macros.
>>>
>>> Introduce helper functions that perform a sysreg access with a
>>> non-constant register number and use them in each call that passes
>>> a loop counter.
>>
>> You don't need to add this special helper. We have the exact
>> infrastructure already. So these could simply be replaced with:
>>
>> csdev_access_xxx(csa, ...)
>>
>> see :
>>
>> include/linux/coresight.h
> 
> Ah, nice!
> 
> Do you mean replacing only the ones that use a nonconstant
> offset, or all of them? I guess changing all would avoid some

Only the CLANG "nonconstant" please. Going through an indirect
function call and large "switch..case" for every single register
access is painful. Which is why I decided to add those ugly macros.

So, please leave the rest as they are.

Suzuki


> really ugly magic macros, but the indirect function call and the
> switch() adds a few cycles of overhead every time and the code
> looks like it is micro-optimized for fast register access here.
> 
>        Arnd
>
diff mbox series

Patch

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index db881993c211..73649d9c89d2 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -381,14 +381,14 @@  static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
 	if (drvdata->nr_pe_cmp)
 		etm4x_relaxed_write32(csa, config->vipcssctlr, TRCVIPCSSCTLR);
 	for (i = 0; i < drvdata->nrseqstate - 1; i++)
-		etm4x_relaxed_write32(csa, config->seq_ctrl[i], TRCSEQEVRn(i));
+		etm4x_relaxed_write32_varreg(csa, config->seq_ctrl[i], TRCSEQEVRn(i));
 	etm4x_relaxed_write32(csa, config->seq_rst, TRCSEQRSTEVR);
 	etm4x_relaxed_write32(csa, config->seq_state, TRCSEQSTR);
 	etm4x_relaxed_write32(csa, config->ext_inp, TRCEXTINSELR);
 	for (i = 0; i < drvdata->nr_cntr; i++) {
-		etm4x_relaxed_write32(csa, config->cntrldvr[i], TRCCNTRLDVRn(i));
-		etm4x_relaxed_write32(csa, config->cntr_ctrl[i], TRCCNTCTLRn(i));
-		etm4x_relaxed_write32(csa, config->cntr_val[i], TRCCNTVRn(i));
+		etm4x_relaxed_write32_varreg(csa, config->cntrldvr[i], TRCCNTRLDVRn(i));
+		etm4x_relaxed_write32_varreg(csa, config->cntr_ctrl[i], TRCCNTCTLRn(i));
+		etm4x_relaxed_write32_varreg(csa, config->cntr_val[i], TRCCNTVRn(i));
 	}
 
 	/*
@@ -396,29 +396,29 @@  static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
 	 * such start at 2.
 	 */
 	for (i = 2; i < drvdata->nr_resource * 2; i++)
-		etm4x_relaxed_write32(csa, config->res_ctrl[i], TRCRSCTLRn(i));
+		etm4x_relaxed_write32_varreg(csa, config->res_ctrl[i], TRCRSCTLRn(i));
 
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
 		/* always clear status bit on restart if using single-shot */
 		if (config->ss_ctrl[i] || config->ss_pe_cmp[i])
 			config->ss_status[i] &= ~BIT(31);
-		etm4x_relaxed_write32(csa, config->ss_ctrl[i], TRCSSCCRn(i));
-		etm4x_relaxed_write32(csa, config->ss_status[i], TRCSSCSRn(i));
+		etm4x_relaxed_write32_varreg(csa, config->ss_ctrl[i], TRCSSCCRn(i));
+		etm4x_relaxed_write32_varreg(csa, config->ss_status[i], TRCSSCSRn(i));
 		if (etm4x_sspcicrn_present(drvdata, i))
-			etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i));
+			etm4x_relaxed_write32_varreg(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i));
 	}
 	for (i = 0; i < drvdata->nr_addr_cmp; i++) {
-		etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
-		etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
+		etm4x_relaxed_write64_varreg(csa, config->addr_val[i], TRCACVRn(i));
+		etm4x_relaxed_write64_varreg(csa, config->addr_acc[i], TRCACATRn(i));
 	}
 	for (i = 0; i < drvdata->numcidc; i++)
-		etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
+		etm4x_relaxed_write64_varreg(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
 	etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0);
 	if (drvdata->numcidc > 4)
 		etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);
 
 	for (i = 0; i < drvdata->numvmidc; i++)
-		etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
+		etm4x_relaxed_write64_varreg(csa, config->vmid_val[i], TRCVMIDCVRn(i));
 	etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0);
 	if (drvdata->numvmidc > 4)
 		etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
@@ -777,13 +777,13 @@  static void etm4_disable_hw(void *info)
 	/* read the status of the single shot comparators */
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
 		config->ss_status[i] =
-			etm4x_relaxed_read32(csa, TRCSSCSRn(i));
+			etm4x_relaxed_read32_varreg(csa, TRCSSCSRn(i));
 	}
 
 	/* read back the current counter values */
 	for (i = 0; i < drvdata->nr_cntr; i++) {
 		config->cntr_val[i] =
-			etm4x_relaxed_read32(csa, TRCCNTVRn(i));
+			etm4x_relaxed_read32_varreg(csa, TRCCNTVRn(i));
 	}
 
 	coresight_disclaim_device_unlocked(csdev);
@@ -1151,7 +1151,7 @@  static void etm4_init_arch_data(void *info)
 	drvdata->nr_ss_cmp = BMVAL(etmidr4, 20, 23);
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
 		drvdata->config.ss_status[i] =
-			etm4x_relaxed_read32(csa, TRCSSCSRn(i));
+			etm4x_relaxed_read32_varreg(csa, TRCSSCSRn(i));
 	}
 	/* NUMCIDC, bits[27:24] number of Context ID comparators for tracing */
 	drvdata->numcidc = BMVAL(etmidr4, 24, 27);
@@ -1595,31 +1595,31 @@  static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 	state->trcvdarcctlr = etm4x_read32(csa, TRCVDARCCTLR);
 
 	for (i = 0; i < drvdata->nrseqstate - 1; i++)
-		state->trcseqevr[i] = etm4x_read32(csa, TRCSEQEVRn(i));
+		state->trcseqevr[i] = etm4x_read32_varreg(csa, TRCSEQEVRn(i));
 
 	state->trcseqrstevr = etm4x_read32(csa, TRCSEQRSTEVR);
 	state->trcseqstr = etm4x_read32(csa, TRCSEQSTR);
 	state->trcextinselr = etm4x_read32(csa, TRCEXTINSELR);
 
 	for (i = 0; i < drvdata->nr_cntr; i++) {
-		state->trccntrldvr[i] = etm4x_read32(csa, TRCCNTRLDVRn(i));
-		state->trccntctlr[i] = etm4x_read32(csa, TRCCNTCTLRn(i));
-		state->trccntvr[i] = etm4x_read32(csa, TRCCNTVRn(i));
+		state->trccntrldvr[i] = etm4x_read32_varreg(csa, TRCCNTRLDVRn(i));
+		state->trccntctlr[i] = etm4x_read32_varreg(csa, TRCCNTCTLRn(i));
+		state->trccntvr[i] = etm4x_read32_varreg(csa, TRCCNTVRn(i));
 	}
 
 	for (i = 0; i < drvdata->nr_resource * 2; i++)
-		state->trcrsctlr[i] = etm4x_read32(csa, TRCRSCTLRn(i));
+		state->trcrsctlr[i] = etm4x_read32_varreg(csa, TRCRSCTLRn(i));
 
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
-		state->trcssccr[i] = etm4x_read32(csa, TRCSSCCRn(i));
-		state->trcsscsr[i] = etm4x_read32(csa, TRCSSCSRn(i));
+		state->trcssccr[i] = etm4x_read32_varreg(csa, TRCSSCCRn(i));
+		state->trcsscsr[i] = etm4x_read32_varreg(csa, TRCSSCSRn(i));
 		if (etm4x_sspcicrn_present(drvdata, i))
-			state->trcsspcicr[i] = etm4x_read32(csa, TRCSSPCICRn(i));
+			state->trcsspcicr[i] = etm4x_read32_varreg(csa, TRCSSPCICRn(i));
 	}
 
 	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
-		state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
-		state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
+		state->trcacvr[i] = etm4x_read64_varreg(csa, TRCACVRn(i));
+		state->trcacatr[i] = etm4x_read64_varreg(csa, TRCACATRn(i));
 	}
 
 	/*
@@ -1630,10 +1630,10 @@  static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 	 */
 
 	for (i = 0; i < drvdata->numcidc; i++)
-		state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
+		state->trccidcvr[i] = etm4x_read64_varreg(csa, TRCCIDCVRn(i));
 
 	for (i = 0; i < drvdata->numvmidc; i++)
-		state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
+		state->trcvmidcvr[i] = etm4x_read64_varreg(csa, TRCVMIDCVRn(i));
 
 	state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0);
 	if (drvdata->numcidc > 4)
@@ -1708,38 +1708,38 @@  static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 	etm4x_relaxed_write32(csa, state->trcvdarcctlr, TRCVDARCCTLR);
 
 	for (i = 0; i < drvdata->nrseqstate - 1; i++)
-		etm4x_relaxed_write32(csa, state->trcseqevr[i], TRCSEQEVRn(i));
+		etm4x_relaxed_write32_varreg(csa, state->trcseqevr[i], TRCSEQEVRn(i));
 
 	etm4x_relaxed_write32(csa, state->trcseqrstevr, TRCSEQRSTEVR);
 	etm4x_relaxed_write32(csa, state->trcseqstr, TRCSEQSTR);
 	etm4x_relaxed_write32(csa, state->trcextinselr, TRCEXTINSELR);
 
 	for (i = 0; i < drvdata->nr_cntr; i++) {
-		etm4x_relaxed_write32(csa, state->trccntrldvr[i], TRCCNTRLDVRn(i));
-		etm4x_relaxed_write32(csa, state->trccntctlr[i], TRCCNTCTLRn(i));
-		etm4x_relaxed_write32(csa, state->trccntvr[i], TRCCNTVRn(i));
+		etm4x_relaxed_write32_varreg(csa, state->trccntrldvr[i], TRCCNTRLDVRn(i));
+		etm4x_relaxed_write32_varreg(csa, state->trccntctlr[i], TRCCNTCTLRn(i));
+		etm4x_relaxed_write32_varreg(csa, state->trccntvr[i], TRCCNTVRn(i));
 	}
 
 	for (i = 0; i < drvdata->nr_resource * 2; i++)
-		etm4x_relaxed_write32(csa, state->trcrsctlr[i], TRCRSCTLRn(i));
+		etm4x_relaxed_write32_varreg(csa, state->trcrsctlr[i], TRCRSCTLRn(i));
 
 	for (i = 0; i < drvdata->nr_ss_cmp; i++) {
-		etm4x_relaxed_write32(csa, state->trcssccr[i], TRCSSCCRn(i));
-		etm4x_relaxed_write32(csa, state->trcsscsr[i], TRCSSCSRn(i));
+		etm4x_relaxed_write32_varreg(csa, state->trcssccr[i], TRCSSCCRn(i));
+		etm4x_relaxed_write32_varreg(csa, state->trcsscsr[i], TRCSSCSRn(i));
 		if (etm4x_sspcicrn_present(drvdata, i))
-			etm4x_relaxed_write32(csa, state->trcsspcicr[i], TRCSSPCICRn(i));
+			etm4x_relaxed_write32_varreg(csa, state->trcsspcicr[i], TRCSSPCICRn(i));
 	}
 
 	for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
-		etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
-		etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
+		etm4x_relaxed_write64_varreg(csa, state->trcacvr[i], TRCACVRn(i));
+		etm4x_relaxed_write64_varreg(csa, state->trcacatr[i], TRCACATRn(i));
 	}
 
 	for (i = 0; i < drvdata->numcidc; i++)
-		etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
+		etm4x_relaxed_write64_varreg(csa, state->trccidcvr[i], TRCCIDCVRn(i));
 
 	for (i = 0; i < drvdata->numvmidc; i++)
-		etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
+		etm4x_relaxed_write64_varreg(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
 
 	etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0);
 	if (drvdata->numcidc > 4)
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index e5b79bdb9851..acad1befa207 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -1001,6 +1001,75 @@  void etm4_config_trace_mode(struct etmv4_config *config);
 u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit);
 void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit);
 
+/*
+ * Register accessors for variable register number, use in a loop
+ * when 'reg' is not a constant that can be passed into an inline
+ * assembly.
+ */
+static inline void etm4x_relaxed_write32_varreg(struct csdev_access *csa, u32 val, u32 offset)
+{
+	if (csa->io_mem)
+		writel_relaxed(val, csa->base + offset);
+	else
+		etm4x_sysreg_write(val, offset, true, false);
+}
+
+static inline void etm4x_relaxed_write64_varreg(struct csdev_access *csa, u64 val, u32 offset)
+{
+	if (csa->io_mem)
+		writeq_relaxed(val, csa->base + offset);
+	else
+		etm4x_sysreg_write(val, offset, true, true);
+}
+
+static inline void etm4x_write32_varreg(struct csdev_access *csa, u32 val, u32 offset)
+{
+	if (csa->io_mem)
+		writel(val, csa->base + offset);
+	else
+		etm4x_sysreg_write(val, offset, false, false);
+}
+
+static inline void etm4x_write64_varreg(struct csdev_access *csa, u64 val, u32 offset)
+{
+	if (csa->io_mem)
+		writeq(val, csa->base + offset);
+	else
+		etm4x_sysreg_write(val, offset, false, true);
+}
+
+static inline u32 etm4x_relaxed_read32_varreg(struct csdev_access *csa, u32 offset)
+{
+	if (csa->io_mem)
+		return readl_relaxed(csa->base + offset);
+	else
+		return etm4x_sysreg_read(offset, true, false);
+}
+
+static inline u64 etm4x_relaxed_read64_varreg(struct csdev_access *csa, u32 offset)
+{
+	if (csa->io_mem)
+		return readq(csa->base + offset);
+	else
+		return etm4x_sysreg_read(offset, true, true);
+}
+
+static inline u32 etm4x_read32_varreg(struct csdev_access *csa, u32 offset)
+{
+	if (csa->io_mem)
+		return readl(csa->base + offset);
+	else
+		return etm4x_sysreg_read(offset, false, false);
+}
+
+static inline u64 etm4x_read64_varreg(struct csdev_access *csa, u32 offset)
+{
+	if (csa->io_mem)
+		return readl(csa->base + offset);
+	else
+		return etm4x_sysreg_read(offset, false, true);
+}
+
 static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata)
 {
 	return drvdata->arch >= ETM_ARCH_ETE;