diff mbox series

[PATCHv4,2/2] arm64/io: Add a header for mmio access instrumentation

Message ID 9396fbdc415a3096ab271868960372b21479e4fb.1636973694.git.quic_saipraka@quicinc.com (mailing list archive)
State Superseded
Headers show
Series tracing/rwmmio/arm64: Add support to trace register reads/writes | expand

Commit Message

Sai Prakash Ranjan Nov. 15, 2021, 11:33 a.m. UTC
The new generic header mmio-instrumented.h will keep arch code clean
and separate from instrumented version which traces mmio register
accesses. This instrumented header is generic and can be used by other
architectures as well. Also add a generic flag (__DISABLE_TRACE_MMIO__)
which is used to disable MMIO tracing in nVHE and if required can be
used to disable tracing for specific drivers.

Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
---
 arch/arm64/include/asm/io.h       | 25 ++++-------
 arch/arm64/kvm/hyp/nvhe/Makefile  |  2 +-
 include/linux/mmio-instrumented.h | 70 +++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 17 deletions(-)
 create mode 100644 include/linux/mmio-instrumented.h

Comments

Steven Rostedt Nov. 16, 2021, 10:40 p.m. UTC | #1
On Mon, 15 Nov 2021 17:03:30 +0530
Sai Prakash Ranjan <quic_saipraka@quicinc.com> wrote:

> The new generic header mmio-instrumented.h will keep arch code clean
> and separate from instrumented version which traces mmio register
> accesses. This instrumented header is generic and can be used by other
> architectures as well. Also add a generic flag (__DISABLE_TRACE_MMIO__)
> which is used to disable MMIO tracing in nVHE and if required can be
> used to disable tracing for specific drivers.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
>  arch/arm64/include/asm/io.h       | 25 ++++-------
>  arch/arm64/kvm/hyp/nvhe/Makefile  |  2 +-
>  include/linux/mmio-instrumented.h | 70 +++++++++++++++++++++++++++++++
>  3 files changed, 80 insertions(+), 17 deletions(-)
>  create mode 100644 include/linux/mmio-instrumented.h
> 
> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
> index 7fd836bea7eb..a635aaaf81b9 100644
> --- a/arch/arm64/include/asm/io.h
> +++ b/arch/arm64/include/asm/io.h
> @@ -10,6 +10,7 @@
>  
>  #include <linux/types.h>
>  #include <linux/pgtable.h>
> +#include <linux/mmio-instrumented.h>
>  
>  #include <asm/byteorder.h>
>  #include <asm/barrier.h>
> @@ -21,32 +22,27 @@
>  /*
>   * Generic IO read/write.  These perform native-endian accesses.
>   */
> -#define __raw_writeb __raw_writeb
> -static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
> +static inline void arch_raw_writeb(u8 val, volatile void __iomem *addr)
>  {
>  	asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
>  }
>  
> -#define __raw_writew __raw_writew
> -static inline void __raw_writew(u16 val, volatile void __iomem *addr)
> +static inline void arch_raw_writew(u16 val, volatile void __iomem *addr)
>  {
>  	asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
>  }
>  
> -#define __raw_writel __raw_writel
> -static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
> +static __always_inline void arch_raw_writel(u32 val, volatile void __iomem *addr)
>  {
>  	asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
>  }
>  
> -#define __raw_writeq __raw_writeq
> -static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
> +static inline void arch_raw_writeq(u64 val, volatile void __iomem *addr)
>  {
>  	asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
>  }
>  
> -#define __raw_readb __raw_readb
> -static inline u8 __raw_readb(const volatile void __iomem *addr)
> +static inline u8 arch_raw_readb(const volatile void __iomem *addr)
>  {
>  	u8 val;
>  	asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
> @@ -56,8 +52,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr)
>  	return val;
>  }
>  
> -#define __raw_readw __raw_readw
> -static inline u16 __raw_readw(const volatile void __iomem *addr)
> +static inline u16 arch_raw_readw(const volatile void __iomem *addr)
>  {
>  	u16 val;
>  
> @@ -68,8 +63,7 @@ static inline u16 __raw_readw(const volatile void __iomem *addr)
>  	return val;
>  }
>  
> -#define __raw_readl __raw_readl
> -static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
> +static __always_inline u32 arch_raw_readl(const volatile void __iomem *addr)
>  {
>  	u32 val;
>  	asm volatile(ALTERNATIVE("ldr %w0, [%1]",
> @@ -79,8 +73,7 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
>  	return val;
>  }
>  
> -#define __raw_readq __raw_readq
> -static inline u64 __raw_readq(const volatile void __iomem *addr)
> +static inline u64 arch_raw_readq(const volatile void __iomem *addr)

Shouldn't the above be done as a separate patch and handle other
architectures that implement __raw_read/write*()?


>  {
>  	u64 val;
>  	asm volatile(ALTERNATIVE("ldr %0, [%1]",
> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
> index c3c11974fa3b..ff56d2165ea9 100644
> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> @@ -4,7 +4,7 @@
>  #
>  
>  asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
> -ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
> +ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__
>  
>  hostprogs := gen-hyprel
>  HOST_EXTRACFLAGS += -I$(objtree)/include
> diff --git a/include/linux/mmio-instrumented.h b/include/linux/mmio-instrumented.h
> new file mode 100644
> index 000000000000..99979c025cc1
> --- /dev/null
> +++ b/include/linux/mmio-instrumented.h
> @@ -0,0 +1,70 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved.
> + */
> +
> +#ifndef _LINUX_MMIO_INSTRUMENTED_H
> +#define _LINUX_MMIO_INSTRUMENTED_H
> +
> +#include <linux/tracepoint-defs.h>
> +
> +/*
> + * Tracepoint and MMIO logging symbols should not be visible at EL2(HYP) as
> + * there is no way to execute them and any such MMIO access from EL2 will
> + * explode instantly (Words of Marc Zyngier). So introduce a generic flag
> + * __DISABLE_TRACE_MMIO__ to disable MMIO tracing in nVHE and other drivers
> + * if required.
> + */
> +#if IS_ENABLED(CONFIG_TRACE_MMIO_ACCESS) && !(defined(__DISABLE_TRACE_MMIO__))
> +DECLARE_TRACEPOINT(rwmmio_write);
> +DECLARE_TRACEPOINT(rwmmio_read);
> +
> +void log_write_mmio(const char *width, volatile void __iomem *addr);
> +void log_read_mmio(const char *width, const volatile void __iomem *addr);
> +
> +#define __raw_write(v, a, _l)	({				\
> +	volatile void __iomem *_a = (a);			\
> +	if (tracepoint_enabled(rwmmio_write))			\
> +		log_write_mmio(__stringify(write##_l), _a);	\
> +	arch_raw_write##_l((v), _a);				\
> +	})
> +
> +#define __raw_writeb(v, a)	__raw_write((v), a, b)
> +#define __raw_writew(v, a)	__raw_write((v), a, w)
> +#define __raw_writel(v, a)	__raw_write((v), a, l)
> +#define __raw_writeq(v, a)	__raw_write((v), a, q)
> +
> +#define __raw_read(a, _l, _t)    ({				\
> +	_t __a;							\
> +	const volatile void __iomem *_a = (a);			\
> +	if (tracepoint_enabled(rwmmio_read))			\
> +		log_read_mmio(__stringify(read##_l), _a);	\
> +	__a = arch_raw_read##_l(_a);				\
> +	__a;							\
> +	})
> +
> +#define __raw_readb(a)		__raw_read((a), b, u8)
> +#define __raw_readw(a)		__raw_read((a), w, u16)
> +#define __raw_readl(a)		__raw_read((a), l, u32)
> +#define __raw_readq(a)		__raw_read((a), q, u64)
> +
> +#else
> +
> +#define __raw_writeb(v, a)	arch_raw_writeb(v, a)
> +#define __raw_writew(v, a)	arch_raw_writew(v, a)
> +#define __raw_writel(v, a)	arch_raw_writel(v, a)
> +#define __raw_writeq(v, a)	arch_raw_writeq(v, a)
> +
> +#define __raw_readb(a)		arch_raw_readb(a)
> +#define __raw_readw(a)		arch_raw_readw(a)
> +#define __raw_readl(a)		arch_raw_readl(a)
> +#define __raw_readq(a)		arch_raw_readq(a)
> +
> +static inline void log_write_mmio(const char *width,
> +				  volatile void __iomem *addr) {}
> +static inline void log_read_mmio(const char *width,
> +				 const volatile void __iomem *addr) {}

The rest from a tracing point of view looks fine, and for that part:

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

-- Steve


> +
> +#endif /* CONFIG_TRACE_MMIO_ACCESS */
> +
> +#endif /* _LINUX_MMIO_INSTRUMENTED_H */
Sai Prakash Ranjan Nov. 17, 2021, 3:53 a.m. UTC | #2
Hi Steve,

On 11/17/2021 4:10 AM, Steven Rostedt wrote:
> On Mon, 15 Nov 2021 17:03:30 +0530
> Sai Prakash Ranjan <quic_saipraka@quicinc.com> wrote:
>
>> The new generic header mmio-instrumented.h will keep arch code clean
>> and separate from instrumented version which traces mmio register
>> accesses. This instrumented header is generic and can be used by other
>> architectures as well. Also add a generic flag (__DISABLE_TRACE_MMIO__)
>> which is used to disable MMIO tracing in nVHE and if required can be
>> used to disable tracing for specific drivers.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>   arch/arm64/include/asm/io.h       | 25 ++++-------
>>   arch/arm64/kvm/hyp/nvhe/Makefile  |  2 +-
>>   include/linux/mmio-instrumented.h | 70 +++++++++++++++++++++++++++++++
>>   3 files changed, 80 insertions(+), 17 deletions(-)
>>   create mode 100644 include/linux/mmio-instrumented.h
>>
>> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
>> index 7fd836bea7eb..a635aaaf81b9 100644
>> --- a/arch/arm64/include/asm/io.h
>> +++ b/arch/arm64/include/asm/io.h
>> @@ -10,6 +10,7 @@
>>   
>>   #include <linux/types.h>
>>   #include <linux/pgtable.h>
>> +#include <linux/mmio-instrumented.h>
>>   
>>   #include <asm/byteorder.h>
>>   #include <asm/barrier.h>
>> @@ -21,32 +22,27 @@
>>   /*
>>    * Generic IO read/write.  These perform native-endian accesses.
>>    */
>> -#define __raw_writeb __raw_writeb
>> -static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
>> +static inline void arch_raw_writeb(u8 val, volatile void __iomem *addr)
>>   {
>>   	asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
>>   }
>>   
>> -#define __raw_writew __raw_writew
>> -static inline void __raw_writew(u16 val, volatile void __iomem *addr)
>> +static inline void arch_raw_writew(u16 val, volatile void __iomem *addr)
>>   {
>>   	asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
>>   }
>>   
>> -#define __raw_writel __raw_writel
>> -static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
>> +static __always_inline void arch_raw_writel(u32 val, volatile void __iomem *addr)
>>   {
>>   	asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
>>   }
>>   
>> -#define __raw_writeq __raw_writeq
>> -static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
>> +static inline void arch_raw_writeq(u64 val, volatile void __iomem *addr)
>>   {
>>   	asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
>>   }
>>   
>> -#define __raw_readb __raw_readb
>> -static inline u8 __raw_readb(const volatile void __iomem *addr)
>> +static inline u8 arch_raw_readb(const volatile void __iomem *addr)
>>   {
>>   	u8 val;
>>   	asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
>> @@ -56,8 +52,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr)
>>   	return val;
>>   }
>>   
>> -#define __raw_readw __raw_readw
>> -static inline u16 __raw_readw(const volatile void __iomem *addr)
>> +static inline u16 arch_raw_readw(const volatile void __iomem *addr)
>>   {
>>   	u16 val;
>>   
>> @@ -68,8 +63,7 @@ static inline u16 __raw_readw(const volatile void __iomem *addr)
>>   	return val;
>>   }
>>   
>> -#define __raw_readl __raw_readl
>> -static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
>> +static __always_inline u32 arch_raw_readl(const volatile void __iomem *addr)
>>   {
>>   	u32 val;
>>   	asm volatile(ALTERNATIVE("ldr %w0, [%1]",
>> @@ -79,8 +73,7 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
>>   	return val;
>>   }
>>   
>> -#define __raw_readq __raw_readq
>> -static inline u64 __raw_readq(const volatile void __iomem *addr)
>> +static inline u64 arch_raw_readq(const volatile void __iomem *addr)
> Shouldn't the above be done as a separate patch and handle other
> architectures that implement __raw_read/write*()?

Will do it as a separate patch in the next version, but can't add for 
other architectures yet
as I have no way to test it except compile test and are better left 
since there are certain things
which are specific to certain archs, for example on arm64, we cannot 
enable MMIO tracing for
arm64 NVHE EL2 (HYP) mode, these things are better known by the 
corresponding arch experts.
So the idea of this patch series is to provide a generic instrumentation 
facility which can be easily
adopted by other architectures as well with initial support for ARM64 
just like any other new
feature starting with support for 1/2 archs.

>
>>   {
>>   	u64 val;
>>   	asm volatile(ALTERNATIVE("ldr %0, [%1]",
>> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
>> index c3c11974fa3b..ff56d2165ea9 100644
>> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
>> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
>> @@ -4,7 +4,7 @@
>>   #
>>   
>>   asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
>> -ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
>> +ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__
>>   
>>   hostprogs := gen-hyprel
>>   HOST_EXTRACFLAGS += -I$(objtree)/include
>> diff --git a/include/linux/mmio-instrumented.h b/include/linux/mmio-instrumented.h
>> new file mode 100644
>> index 000000000000..99979c025cc1
>> --- /dev/null
>> +++ b/include/linux/mmio-instrumented.h
>> @@ -0,0 +1,70 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved.
>> + */
>> +
>> +#ifndef _LINUX_MMIO_INSTRUMENTED_H
>> +#define _LINUX_MMIO_INSTRUMENTED_H
>> +
>> +#include <linux/tracepoint-defs.h>
>> +
>> +/*
>> + * Tracepoint and MMIO logging symbols should not be visible at EL2(HYP) as
>> + * there is no way to execute them and any such MMIO access from EL2 will
>> + * explode instantly (Words of Marc Zyngier). So introduce a generic flag
>> + * __DISABLE_TRACE_MMIO__ to disable MMIO tracing in nVHE and other drivers
>> + * if required.
>> + */
>> +#if IS_ENABLED(CONFIG_TRACE_MMIO_ACCESS) && !(defined(__DISABLE_TRACE_MMIO__))
>> +DECLARE_TRACEPOINT(rwmmio_write);
>> +DECLARE_TRACEPOINT(rwmmio_read);
>> +
>> +void log_write_mmio(const char *width, volatile void __iomem *addr);
>> +void log_read_mmio(const char *width, const volatile void __iomem *addr);
>> +
>> +#define __raw_write(v, a, _l)	({				\
>> +	volatile void __iomem *_a = (a);			\
>> +	if (tracepoint_enabled(rwmmio_write))			\
>> +		log_write_mmio(__stringify(write##_l), _a);	\
>> +	arch_raw_write##_l((v), _a);				\
>> +	})
>> +
>> +#define __raw_writeb(v, a)	__raw_write((v), a, b)
>> +#define __raw_writew(v, a)	__raw_write((v), a, w)
>> +#define __raw_writel(v, a)	__raw_write((v), a, l)
>> +#define __raw_writeq(v, a)	__raw_write((v), a, q)
>> +
>> +#define __raw_read(a, _l, _t)    ({				\
>> +	_t __a;							\
>> +	const volatile void __iomem *_a = (a);			\
>> +	if (tracepoint_enabled(rwmmio_read))			\
>> +		log_read_mmio(__stringify(read##_l), _a);	\
>> +	__a = arch_raw_read##_l(_a);				\
>> +	__a;							\
>> +	})
>> +
>> +#define __raw_readb(a)		__raw_read((a), b, u8)
>> +#define __raw_readw(a)		__raw_read((a), w, u16)
>> +#define __raw_readl(a)		__raw_read((a), l, u32)
>> +#define __raw_readq(a)		__raw_read((a), q, u64)
>> +
>> +#else
>> +
>> +#define __raw_writeb(v, a)	arch_raw_writeb(v, a)
>> +#define __raw_writew(v, a)	arch_raw_writew(v, a)
>> +#define __raw_writel(v, a)	arch_raw_writel(v, a)
>> +#define __raw_writeq(v, a)	arch_raw_writeq(v, a)
>> +
>> +#define __raw_readb(a)		arch_raw_readb(a)
>> +#define __raw_readw(a)		arch_raw_readw(a)
>> +#define __raw_readl(a)		arch_raw_readl(a)
>> +#define __raw_readq(a)		arch_raw_readq(a)
>> +
>> +static inline void log_write_mmio(const char *width,
>> +				  volatile void __iomem *addr) {}
>> +static inline void log_read_mmio(const char *width,
>> +				 const volatile void __iomem *addr) {}
> The rest from a tracing point of view looks fine, and for that part:
>
> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
>
> -- Steve
>
Thanks, would you be able to review Patch1 touching trace directory in 
case it has missed your queue?

-Sai

>> +
>> +#endif /* CONFIG_TRACE_MMIO_ACCESS */
>> +
>> +#endif /* _LINUX_MMIO_INSTRUMENTED_H */
kernel test robot Nov. 18, 2021, 2:58 p.m. UTC | #3
Hi Sai,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on rostedt-trace/for-next]
[also build test WARNING on arm64/for-next/core arm-perf/for-next/perf v5.16-rc1 next-20211118]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Sai-Prakash-Ranjan/tracing-rwmmio-arm64-Add-support-to-trace-register-reads-writes/20211115-193645
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git for-next
config: csky-randconfig-r005-20211118 (attached as .config)
compiler: csky-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/b3765baa5dcf19d695332a310cf29d7abd39ad73
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Sai-Prakash-Ranjan/tracing-rwmmio-arm64-Add-support-to-trace-register-reads-writes/20211115-193645
        git checkout b3765baa5dcf19d695332a310cf29d7abd39ad73
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=csky SHELL=/bin/bash kernel/trace/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/bitops.h:33,
                    from include/linux/kernel.h:12,
                    from include/linux/interrupt.h:6,
                    from include/linux/trace_recursion.h:5,
                    from include/linux/ftrace.h:10,
                    from kernel/trace/trace_readwrite.c:8:
   arch/csky/include/asm/bitops.h:77: warning: "__clear_bit" redefined
      77 | #define __clear_bit(nr, vaddr) clear_bit(nr, vaddr)
         | 
   In file included from arch/csky/include/asm/bitops.h:76,
                    from include/linux/bitops.h:33,
                    from include/linux/kernel.h:12,
                    from include/linux/interrupt.h:6,
                    from include/linux/trace_recursion.h:5,
                    from include/linux/ftrace.h:10,
                    from kernel/trace/trace_readwrite.c:8:
   include/asm-generic/bitops/non-atomic.h:34: note: this is the location of the previous definition
      34 | #define __clear_bit arch___clear_bit
         | 
   In file included from kernel/trace/trace_readwrite.c:9:
>> include/linux/mmio-instrumented.h:32: warning: "__raw_writeb" redefined
      32 | #define __raw_writeb(v, a)      __raw_write((v), a, b)
         | 
   In file included from arch/csky/include/asm/io.h:42,
                    from include/linux/io.h:13,
                    from include/linux/irq.h:20,
                    from include/asm-generic/hardirq.h:17,
                    from ./arch/csky/include/generated/asm/hardirq.h:1,
                    from include/linux/hardirq.h:11,
                    from include/linux/interrupt.h:11,
                    from include/linux/trace_recursion.h:5,
                    from include/linux/ftrace.h:10,
                    from kernel/trace/trace_readwrite.c:8:
   include/asm-generic/io.h:108: note: this is the location of the previous definition
     108 | #define __raw_writeb __raw_writeb
         | 
   In file included from kernel/trace/trace_readwrite.c:9:
>> include/linux/mmio-instrumented.h:33: warning: "__raw_writew" redefined
      33 | #define __raw_writew(v, a)      __raw_write((v), a, w)
         | 
   In file included from arch/csky/include/asm/io.h:42,
                    from include/linux/io.h:13,
                    from include/linux/irq.h:20,
                    from include/asm-generic/hardirq.h:17,
                    from ./arch/csky/include/generated/asm/hardirq.h:1,
                    from include/linux/hardirq.h:11,
                    from include/linux/interrupt.h:11,
                    from include/linux/trace_recursion.h:5,
                    from include/linux/ftrace.h:10,
                    from kernel/trace/trace_readwrite.c:8:
   include/asm-generic/io.h:116: note: this is the location of the previous definition
     116 | #define __raw_writew __raw_writew
         | 
   In file included from kernel/trace/trace_readwrite.c:9:
>> include/linux/mmio-instrumented.h:34: warning: "__raw_writel" redefined
      34 | #define __raw_writel(v, a)      __raw_write((v), a, l)
         | 
   In file included from arch/csky/include/asm/io.h:42,
                    from include/linux/io.h:13,
                    from include/linux/irq.h:20,
                    from include/asm-generic/hardirq.h:17,
                    from ./arch/csky/include/generated/asm/hardirq.h:1,
                    from include/linux/hardirq.h:11,
                    from include/linux/interrupt.h:11,
                    from include/linux/trace_recursion.h:5,
                    from include/linux/ftrace.h:10,
                    from kernel/trace/trace_readwrite.c:8:
   include/asm-generic/io.h:124: note: this is the location of the previous definition
     124 | #define __raw_writel __raw_writel
         | 
   In file included from kernel/trace/trace_readwrite.c:9:
>> include/linux/mmio-instrumented.h:46: warning: "__raw_readb" redefined
      46 | #define __raw_readb(a)          __raw_read((a), b, u8)
         | 
   In file included from arch/csky/include/asm/io.h:42,
                    from include/linux/io.h:13,
                    from include/linux/irq.h:20,
                    from include/asm-generic/hardirq.h:17,
                    from ./arch/csky/include/generated/asm/hardirq.h:1,
                    from include/linux/hardirq.h:11,
                    from include/linux/interrupt.h:11,
                    from include/linux/trace_recursion.h:5,
                    from include/linux/ftrace.h:10,
                    from kernel/trace/trace_readwrite.c:8:
   include/asm-generic/io.h:74: note: this is the location of the previous definition
      74 | #define __raw_readb __raw_readb
         | 
   In file included from kernel/trace/trace_readwrite.c:9:
>> include/linux/mmio-instrumented.h:47: warning: "__raw_readw" redefined
      47 | #define __raw_readw(a)          __raw_read((a), w, u16)
         | 
   In file included from arch/csky/include/asm/io.h:42,
                    from include/linux/io.h:13,
                    from include/linux/irq.h:20,
                    from include/asm-generic/hardirq.h:17,
                    from ./arch/csky/include/generated/asm/hardirq.h:1,
                    from include/linux/hardirq.h:11,
                    from include/linux/interrupt.h:11,
                    from include/linux/trace_recursion.h:5,
                    from include/linux/ftrace.h:10,
                    from kernel/trace/trace_readwrite.c:8:
   include/asm-generic/io.h:82: note: this is the location of the previous definition
      82 | #define __raw_readw __raw_readw
         | 
   In file included from kernel/trace/trace_readwrite.c:9:
>> include/linux/mmio-instrumented.h:48: warning: "__raw_readl" redefined
      48 | #define __raw_readl(a)          __raw_read((a), l, u32)
         | 
   In file included from arch/csky/include/asm/io.h:42,
                    from include/linux/io.h:13,
                    from include/linux/irq.h:20,
                    from include/asm-generic/hardirq.h:17,
                    from ./arch/csky/include/generated/asm/hardirq.h:1,
                    from include/linux/hardirq.h:11,
                    from include/linux/interrupt.h:11,
                    from include/linux/trace_recursion.h:5,
                    from include/linux/ftrace.h:10,
                    from kernel/trace/trace_readwrite.c:8:
   include/asm-generic/io.h:90: note: this is the location of the previous definition
      90 | #define __raw_readl __raw_readl
         | 
   In file included from include/trace/define_trace.h:102,
                    from include/trace/events/rwmmio.h:59,
                    from kernel/trace/trace_readwrite.c:13:
   include/trace/events/rwmmio.h: In function 'trace_event_raw_event_rwmmio_write':
>> include/trace/events/rwmmio.h:27:33: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
      27 |                 __entry->addr = (u64)(void *)addr;
         |                                 ^
   include/trace/trace_events.h:743:11: note: in definition of macro 'DECLARE_EVENT_CLASS'
     743 |         { assign; }                                                     \
         |           ^~~~~~
   include/trace/trace_events.h:79:30: note: in expansion of macro 'PARAMS'
      79 |                              PARAMS(assign),                   \
         |                              ^~~~~~
   include/trace/events/rwmmio.h:13:1: note: in expansion of macro 'TRACE_EVENT'
      13 | TRACE_EVENT(rwmmio_write,
         | ^~~~~~~~~~~
   include/trace/events/rwmmio.h:25:9: note: in expansion of macro 'TP_fast_assign'
      25 |         TP_fast_assign(
         |         ^~~~~~~~~~~~~~
   include/trace/events/rwmmio.h: In function 'trace_event_raw_event_rwmmio_read':
   include/trace/events/rwmmio.h:49:33: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
      49 |                 __entry->addr = (u64)(void *)addr;
         |                                 ^
   include/trace/trace_events.h:743:11: note: in definition of macro 'DECLARE_EVENT_CLASS'
     743 |         { assign; }                                                     \
         |           ^~~~~~
   include/trace/trace_events.h:79:30: note: in expansion of macro 'PARAMS'
      79 |                              PARAMS(assign),                   \
         |                              ^~~~~~
   include/trace/events/rwmmio.h:35:1: note: in expansion of macro 'TRACE_EVENT'
      35 | TRACE_EVENT(rwmmio_read,
         | ^~~~~~~~~~~
   include/trace/events/rwmmio.h:47:9: note: in expansion of macro 'TP_fast_assign'
      47 |         TP_fast_assign(
         |         ^~~~~~~~~~~~~~


vim +/__raw_writeb +32 include/linux/mmio-instrumented.h

    24	
    25	#define __raw_write(v, a, _l)	({				\
    26		volatile void __iomem *_a = (a);			\
    27		if (tracepoint_enabled(rwmmio_write))			\
    28			log_write_mmio(__stringify(write##_l), _a);	\
    29		arch_raw_write##_l((v), _a);				\
    30		})
    31	
  > 32	#define __raw_writeb(v, a)	__raw_write((v), a, b)
  > 33	#define __raw_writew(v, a)	__raw_write((v), a, w)
  > 34	#define __raw_writel(v, a)	__raw_write((v), a, l)
    35	#define __raw_writeq(v, a)	__raw_write((v), a, q)
    36	
    37	#define __raw_read(a, _l, _t)    ({				\
    38		_t __a;							\
    39		const volatile void __iomem *_a = (a);			\
    40		if (tracepoint_enabled(rwmmio_read))			\
    41			log_read_mmio(__stringify(read##_l), _a);	\
    42		__a = arch_raw_read##_l(_a);				\
    43		__a;							\
    44		})
    45	
  > 46	#define __raw_readb(a)		__raw_read((a), b, u8)
  > 47	#define __raw_readw(a)		__raw_read((a), w, u16)
  > 48	#define __raw_readl(a)		__raw_read((a), l, u32)
    49	#define __raw_readq(a)		__raw_read((a), q, u64)
    50	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Arnd Bergmann Nov. 18, 2021, 3:24 p.m. UTC | #4
On Mon, Nov 15, 2021 at 12:33 PM Sai Prakash Ranjan
<quic_saipraka@quicinc.com> wrote:
>  /*
>   * Generic IO read/write.  These perform native-endian accesses.
>   */
> -#define __raw_writeb __raw_writeb
> -static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
> +static inline void arch_raw_writeb(u8 val, volatile void __iomem *addr)
>  {
>         asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
>  }

Woundn't removing the #define here will break the logic in
include/asm-generic/io.h,
making it fall back to the pointer-dereference version for the actual access?

> +#if IS_ENABLED(CONFIG_TRACE_MMIO_ACCESS) && !(defined(__DISABLE_TRACE_MMIO__))
> +DECLARE_TRACEPOINT(rwmmio_write);
> +DECLARE_TRACEPOINT(rwmmio_read);
> +
> +void log_write_mmio(const char *width, volatile void __iomem *addr);
> +void log_read_mmio(const char *width, const volatile void __iomem *addr);
> +
> +#define __raw_write(v, a, _l)  ({                              \
> +       volatile void __iomem *_a = (a);                        \
> +       if (tracepoint_enabled(rwmmio_write))                   \
> +               log_write_mmio(__stringify(write##_l), _a);     \
> +       arch_raw_write##_l((v), _a);                            \
> +       })

This feels like it's getting too big to be inlined. Have you considered
integrating this with the lib/logic_iomem.c infrastructure instead?

That already provides a way to override MMIO areas, and it lets you do
the logging from a single place rather than having it duplicated in every
single caller. It also provides a way of filtering it based on the ioremap()
call.

        Arnd
Sai Prakash Ranjan Nov. 19, 2021, 4:06 a.m. UTC | #5
Hi Arnd,

On 11/18/2021 8:54 PM, Arnd Bergmann wrote:
> On Mon, Nov 15, 2021 at 12:33 PM Sai Prakash Ranjan
> <quic_saipraka@quicinc.com> wrote:
>>   /*
>>    * Generic IO read/write.  These perform native-endian accesses.
>>    */
>> -#define __raw_writeb __raw_writeb
>> -static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
>> +static inline void arch_raw_writeb(u8 val, volatile void __iomem *addr)
>>   {
>>          asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
>>   }
> Woundn't removing the #define here will break the logic in
> include/asm-generic/io.h,
> making it fall back to the pointer-dereference version for the actual access?

#defines for these are added in mmio-instrumented.h header which is 
included in
arm64/asm/io.h, so it won't break the logic by falling back to 
pointer-dereference.

>> +#if IS_ENABLED(CONFIG_TRACE_MMIO_ACCESS) && !(defined(__DISABLE_TRACE_MMIO__))
>> +DECLARE_TRACEPOINT(rwmmio_write);
>> +DECLARE_TRACEPOINT(rwmmio_read);
>> +
>> +void log_write_mmio(const char *width, volatile void __iomem *addr);
>> +void log_read_mmio(const char *width, const volatile void __iomem *addr);
>> +
>> +#define __raw_write(v, a, _l)  ({                              \
>> +       volatile void __iomem *_a = (a);                        \
>> +       if (tracepoint_enabled(rwmmio_write))                   \
>> +               log_write_mmio(__stringify(write##_l), _a);     \
>> +       arch_raw_write##_l((v), _a);                            \
>> +       })
> This feels like it's getting too big to be inlined. Have you considered
> integrating this with the lib/logic_iomem.c infrastructure instead?
>
> That already provides a way to override MMIO areas, and it lets you do
> the logging from a single place rather than having it duplicated in every
> single caller. It also provides a way of filtering it based on the ioremap()
> call.
>

Thanks for the suggestion, will look at the logic_iomem.c and see if it 
fits our
usecase.

Thanks,
Sai
Marc Zyngier Nov. 19, 2021, 1:48 p.m. UTC | #6
On Mon, 15 Nov 2021 11:33:30 +0000,
Sai Prakash Ranjan <quic_saipraka@quicinc.com> wrote:
> 
> The new generic header mmio-instrumented.h will keep arch code clean
> and separate from instrumented version which traces mmio register
> accesses. This instrumented header is generic and can be used by other
> architectures as well. Also add a generic flag (__DISABLE_TRACE_MMIO__)
> which is used to disable MMIO tracing in nVHE and if required can be
> used to disable tracing for specific drivers.
> 
> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
> ---
>  arch/arm64/include/asm/io.h       | 25 ++++-------
>  arch/arm64/kvm/hyp/nvhe/Makefile  |  2 +-
>  include/linux/mmio-instrumented.h | 70 +++++++++++++++++++++++++++++++
>  3 files changed, 80 insertions(+), 17 deletions(-)
>  create mode 100644 include/linux/mmio-instrumented.h
> 
> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
> index 7fd836bea7eb..a635aaaf81b9 100644
> --- a/arch/arm64/include/asm/io.h
> +++ b/arch/arm64/include/asm/io.h
> @@ -10,6 +10,7 @@
>  
>  #include <linux/types.h>
>  #include <linux/pgtable.h>
> +#include <linux/mmio-instrumented.h>
>  
>  #include <asm/byteorder.h>
>  #include <asm/barrier.h>
> @@ -21,32 +22,27 @@
>  /*
>   * Generic IO read/write.  These perform native-endian accesses.
>   */
> -#define __raw_writeb __raw_writeb
> -static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
> +static inline void arch_raw_writeb(u8 val, volatile void __iomem *addr)
>  {
>  	asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
>  }
>  
> -#define __raw_writew __raw_writew
> -static inline void __raw_writew(u16 val, volatile void __iomem *addr)
> +static inline void arch_raw_writew(u16 val, volatile void __iomem *addr)
>  {
>  	asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
>  }
>  
> -#define __raw_writel __raw_writel
> -static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
> +static __always_inline void arch_raw_writel(u32 val, volatile void __iomem *addr)
>  {
>  	asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
>  }
>  
> -#define __raw_writeq __raw_writeq
> -static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
> +static inline void arch_raw_writeq(u64 val, volatile void __iomem *addr)
>  {
>  	asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
>  }
>  
> -#define __raw_readb __raw_readb
> -static inline u8 __raw_readb(const volatile void __iomem *addr)
> +static inline u8 arch_raw_readb(const volatile void __iomem *addr)
>  {
>  	u8 val;
>  	asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
> @@ -56,8 +52,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr)
>  	return val;
>  }
>  
> -#define __raw_readw __raw_readw
> -static inline u16 __raw_readw(const volatile void __iomem *addr)
> +static inline u16 arch_raw_readw(const volatile void __iomem *addr)
>  {
>  	u16 val;
>  
> @@ -68,8 +63,7 @@ static inline u16 __raw_readw(const volatile void __iomem *addr)
>  	return val;
>  }
>  
> -#define __raw_readl __raw_readl
> -static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
> +static __always_inline u32 arch_raw_readl(const volatile void __iomem *addr)
>  {
>  	u32 val;
>  	asm volatile(ALTERNATIVE("ldr %w0, [%1]",
> @@ -79,8 +73,7 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
>  	return val;
>  }
>  
> -#define __raw_readq __raw_readq
> -static inline u64 __raw_readq(const volatile void __iomem *addr)
> +static inline u64 arch_raw_readq(const volatile void __iomem *addr)
>  {
>  	u64 val;
>  	asm volatile(ALTERNATIVE("ldr %0, [%1]",
> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
> index c3c11974fa3b..ff56d2165ea9 100644
> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
> @@ -4,7 +4,7 @@
>  #
>  
>  asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
> -ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
> +ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__
>  
>  hostprogs := gen-hyprel
>  HOST_EXTRACFLAGS += -I$(objtree)/include
> diff --git a/include/linux/mmio-instrumented.h b/include/linux/mmio-instrumented.h
> new file mode 100644
> index 000000000000..99979c025cc1
> --- /dev/null
> +++ b/include/linux/mmio-instrumented.h
> @@ -0,0 +1,70 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved.
> + */
> +
> +#ifndef _LINUX_MMIO_INSTRUMENTED_H
> +#define _LINUX_MMIO_INSTRUMENTED_H
> +
> +#include <linux/tracepoint-defs.h>
> +
> +/*
> + * Tracepoint and MMIO logging symbols should not be visible at EL2(HYP) as
> + * there is no way to execute them and any such MMIO access from EL2 will
> + * explode instantly (Words of Marc Zyngier). So introduce a generic flag
> + * __DISABLE_TRACE_MMIO__ to disable MMIO tracing in nVHE and other drivers
> + * if required.
> + */

This Gospel would be better placed next to the code that defines the
macro, given that this is an arch-independent include file, and hardly
anyone understands the quirks of a nVHE KVM (and only nVHE, something
that the comment fails to capture).

	M.
Sai Prakash Ranjan Nov. 19, 2021, 2:09 p.m. UTC | #7
On 11/19/2021 7:18 PM, Marc Zyngier wrote:
> On Mon, 15 Nov 2021 11:33:30 +0000,
> Sai Prakash Ranjan <quic_saipraka@quicinc.com> wrote:
>> The new generic header mmio-instrumented.h will keep arch code clean
>> and separate from instrumented version which traces mmio register
>> accesses. This instrumented header is generic and can be used by other
>> architectures as well. Also add a generic flag (__DISABLE_TRACE_MMIO__)
>> which is used to disable MMIO tracing in nVHE and if required can be
>> used to disable tracing for specific drivers.
>>
>> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
>> ---
>>   arch/arm64/include/asm/io.h       | 25 ++++-------
>>   arch/arm64/kvm/hyp/nvhe/Makefile  |  2 +-
>>   include/linux/mmio-instrumented.h | 70 +++++++++++++++++++++++++++++++
>>   3 files changed, 80 insertions(+), 17 deletions(-)
>>   create mode 100644 include/linux/mmio-instrumented.h
>>
>> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
>> index 7fd836bea7eb..a635aaaf81b9 100644
>> --- a/arch/arm64/include/asm/io.h
>> +++ b/arch/arm64/include/asm/io.h
>> @@ -10,6 +10,7 @@
>>   
>>   #include <linux/types.h>
>>   #include <linux/pgtable.h>
>> +#include <linux/mmio-instrumented.h>
>>   
>>   #include <asm/byteorder.h>
>>   #include <asm/barrier.h>
>> @@ -21,32 +22,27 @@
>>   /*
>>    * Generic IO read/write.  These perform native-endian accesses.
>>    */
>> -#define __raw_writeb __raw_writeb
>> -static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
>> +static inline void arch_raw_writeb(u8 val, volatile void __iomem *addr)
>>   {
>>   	asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
>>   }
>>   
>> -#define __raw_writew __raw_writew
>> -static inline void __raw_writew(u16 val, volatile void __iomem *addr)
>> +static inline void arch_raw_writew(u16 val, volatile void __iomem *addr)
>>   {
>>   	asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
>>   }
>>   
>> -#define __raw_writel __raw_writel
>> -static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
>> +static __always_inline void arch_raw_writel(u32 val, volatile void __iomem *addr)
>>   {
>>   	asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
>>   }
>>   
>> -#define __raw_writeq __raw_writeq
>> -static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
>> +static inline void arch_raw_writeq(u64 val, volatile void __iomem *addr)
>>   {
>>   	asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
>>   }
>>   
>> -#define __raw_readb __raw_readb
>> -static inline u8 __raw_readb(const volatile void __iomem *addr)
>> +static inline u8 arch_raw_readb(const volatile void __iomem *addr)
>>   {
>>   	u8 val;
>>   	asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
>> @@ -56,8 +52,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr)
>>   	return val;
>>   }
>>   
>> -#define __raw_readw __raw_readw
>> -static inline u16 __raw_readw(const volatile void __iomem *addr)
>> +static inline u16 arch_raw_readw(const volatile void __iomem *addr)
>>   {
>>   	u16 val;
>>   
>> @@ -68,8 +63,7 @@ static inline u16 __raw_readw(const volatile void __iomem *addr)
>>   	return val;
>>   }
>>   
>> -#define __raw_readl __raw_readl
>> -static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
>> +static __always_inline u32 arch_raw_readl(const volatile void __iomem *addr)
>>   {
>>   	u32 val;
>>   	asm volatile(ALTERNATIVE("ldr %w0, [%1]",
>> @@ -79,8 +73,7 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
>>   	return val;
>>   }
>>   
>> -#define __raw_readq __raw_readq
>> -static inline u64 __raw_readq(const volatile void __iomem *addr)
>> +static inline u64 arch_raw_readq(const volatile void __iomem *addr)
>>   {
>>   	u64 val;
>>   	asm volatile(ALTERNATIVE("ldr %0, [%1]",
>> diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
>> index c3c11974fa3b..ff56d2165ea9 100644
>> --- a/arch/arm64/kvm/hyp/nvhe/Makefile
>> +++ b/arch/arm64/kvm/hyp/nvhe/Makefile
>> @@ -4,7 +4,7 @@
>>   #
>>   
>>   asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
>> -ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
>> +ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__
>>   
>>   hostprogs := gen-hyprel
>>   HOST_EXTRACFLAGS += -I$(objtree)/include
>> diff --git a/include/linux/mmio-instrumented.h b/include/linux/mmio-instrumented.h
>> new file mode 100644
>> index 000000000000..99979c025cc1
>> --- /dev/null
>> +++ b/include/linux/mmio-instrumented.h
>> @@ -0,0 +1,70 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved.
>> + */
>> +
>> +#ifndef _LINUX_MMIO_INSTRUMENTED_H
>> +#define _LINUX_MMIO_INSTRUMENTED_H
>> +
>> +#include <linux/tracepoint-defs.h>
>> +
>> +/*
>> + * Tracepoint and MMIO logging symbols should not be visible at EL2(HYP) as
>> + * there is no way to execute them and any such MMIO access from EL2 will
>> + * explode instantly (Words of Marc Zyngier). So introduce a generic flag
>> + * __DISABLE_TRACE_MMIO__ to disable MMIO tracing in nVHE and other drivers
>> + * if required.
>> + */
> This Gospel would be better placed next to the code that defines the
> macro, given that this is an arch-independent include file, and hardly
> anyone understands the quirks of a nVHE KVM (and only nVHE, something
> that the comment fails to capture).
>
> 	M.
>

I'll move the comment and include nVHE in the comment as well.

Thanks,
Sai
Sai Prakash Ranjan Nov. 22, 2021, 1:35 p.m. UTC | #8
On 11/19/2021 9:36 AM, Sai Prakash Ranjan wrote:
> Hi Arnd,
>
> On 11/18/2021 8:54 PM, Arnd Bergmann wrote:
>> On Mon, Nov 15, 2021 at 12:33 PM Sai Prakash Ranjan
>> <quic_saipraka@quicinc.com> wrote:
>>>   /*
>>>    * Generic IO read/write.  These perform native-endian accesses.
>>>    */
>>> -#define __raw_writeb __raw_writeb
>>> -static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
>>> +static inline void arch_raw_writeb(u8 val, volatile void __iomem 
>>> *addr)
>>>   {
>>>          asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
>>>   }
>> Woundn't removing the #define here will break the logic in
>> include/asm-generic/io.h,
>> making it fall back to the pointer-dereference version for the actual 
>> access?
>
> #defines for these are added in mmio-instrumented.h header which is 
> included in
> arm64/asm/io.h, so it won't break the logic by falling back to 
> pointer-dereference.
>
>>> +#if IS_ENABLED(CONFIG_TRACE_MMIO_ACCESS) && 
>>> !(defined(__DISABLE_TRACE_MMIO__))
>>> +DECLARE_TRACEPOINT(rwmmio_write);
>>> +DECLARE_TRACEPOINT(rwmmio_read);
>>> +
>>> +void log_write_mmio(const char *width, volatile void __iomem *addr);
>>> +void log_read_mmio(const char *width, const volatile void __iomem 
>>> *addr);
>>> +
>>> +#define __raw_write(v, a, _l) ({                              \
>>> +       volatile void __iomem *_a = (a);                        \
>>> +       if (tracepoint_enabled(rwmmio_write))                   \
>>> +               log_write_mmio(__stringify(write##_l), _a);     \
>>> +       arch_raw_write##_l((v), _a);                            \
>>> +       })
>> This feels like it's getting too big to be inlined. Have you considered
>> integrating this with the lib/logic_iomem.c infrastructure instead?
>>
>> That already provides a way to override MMIO areas, and it lets you do
>> the logging from a single place rather than having it duplicated in 
>> every
>> single caller. It also provides a way of filtering it based on the 
>> ioremap()
>> call.
>>
>
> Thanks for the suggestion, will look at the logic_iomem.c and see if 
> it fits our
> usecase.
>
>

So I looked at logic_iomem.c which seems to be useful for emulated IO 
for virtio drivers
but our usecase just needs to log the mmio operations and no additional 
stuff, similar to
the logging access of x86 msr registers via tracepoint 
(arch/x86/include/asm/msr-trace.h).
Also raw read/write macros in logic_iomem.c have the callbacks which 
seems to be pretty costly
than inlining or direct function call given it has to be called for 
every register read and write
which are going to be thousands in our case. In their usecase, read and 
write callbacks are just
pci cfgspace reads and writes which may not be that frequently called 
and the latency might not
be visible but in our case, I think it would be visible if we have a 
callback as such. I know this is a
debug feature and perf isn't expected much but that wouldn't mean we 
should not have a debug
feature which performs better right.

On the second point, filtering by ioremap isn't much useful for our 
usecase since ioremapped
region can have 100s of registers and we are interested in the exact 
register read/write which
would cause any of the issues mentioned in the description of this patchset.

So I feel like the current way where we consolidate the instrumentation 
in mmio-instrumented.h
seems like the better way than adding tracing to an emulated iomem library.

Thanks,
Sai
Arnd Bergmann Nov. 22, 2021, 1:59 p.m. UTC | #9
On Mon, Nov 22, 2021 at 2:35 PM Sai Prakash Ranjan
<quic_saipraka@quicinc.com> wrote:
> On 11/19/2021 9:36 AM, Sai Prakash Ranjan wrote:
>
> So I looked at logic_iomem.c which seems to be useful for emulated IO
> for virtio drivers
> but our usecase just needs to log the mmio operations and no additional
> stuff, similar to
> the logging access of x86 msr registers via tracepoint
> (arch/x86/include/asm/msr-trace.h).

I think it depends on whether one wants to filter the MMIO access based
on the device, or based on the caller.

> Also raw read/write macros in logic_iomem.c have the callbacks which
> seems to be pretty costly
> than inlining or direct function call given it has to be called for
> every register read and write
> which are going to be thousands in our case. In their usecase, read and
> write callbacks are just
> pci cfgspace reads and writes which may not be that frequently called
> and the latency might not
> be visible but in our case, I think it would be visible if we have a
> callback as such. I know this is a
> debug feature and perf isn't expected much but that wouldn't mean we
> should not have a debug
> feature which performs better right.

I would expect the cost of a bus access to always dwarf the cost of
indirect function calls and instrumentation. On the other hand,
the cost of an inline trace call is nontrivial in terms of code size,
which may lead to wasting significant amounts of both RAM and
instruction cache on small machines. If you want to continue with
your approach, it would help to include code size numbers before/after
for a defconfig kernel, and maybe some performance numbers to
show what this does when you enable tracing for all registers of
a device with a lot of accesses.

> On the second point, filtering by ioremap isn't much useful for our
> usecase since ioremapped
> region can have 100s of registers and we are interested in the exact
> register read/write which
> would cause any of the issues mentioned in the description of this patchset.
>
> So I feel like the current way where we consolidate the instrumentation
> in mmio-instrumented.h
> seems like the better way than adding tracing to an emulated iomem
> library.

There is another point that I don't like in the implementation, which is
the extra indirection. If we end up with your approach of doing it
inline per caller, I would prefer having the instrumentation in
include/asm-generic/io.h, like

#ifndef readl
#define readl readl
static inline u32 readl(const volatile void __iomem *addr)
{
        u32 val;

        __io_br();
        val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
        __io_ar(val);
        if (tracepoint_enabled(rwmmio_read))
               log_read_mmio("readl", addr, val);
        return val;
}
#endif

I think this would be a lot less confusing to readers, as it is implemented
exactly in the place that has the normal definition, and it can also have
somewhat more logical semantics by only instrumenting the
normal/relaxed/ioport accessors but not the __raw_* versions that
are meant to be little more than a pointer dereference.

         Arnd
Sai Prakash Ranjan Nov. 22, 2021, 2:19 p.m. UTC | #10
On 11/22/2021 7:29 PM, Arnd Bergmann wrote:
> On Mon, Nov 22, 2021 at 2:35 PM Sai Prakash Ranjan
> <quic_saipraka@quicinc.com> wrote:
>> On 11/19/2021 9:36 AM, Sai Prakash Ranjan wrote:
>>
>> So I looked at logic_iomem.c which seems to be useful for emulated IO
>> for virtio drivers
>> but our usecase just needs to log the mmio operations and no additional
>> stuff, similar to
>> the logging access of x86 msr registers via tracepoint
>> (arch/x86/include/asm/msr-trace.h).
> I think it depends on whether one wants to filter the MMIO access based
> on the device, or based on the caller.
>
>> Also raw read/write macros in logic_iomem.c have the callbacks which
>> seems to be pretty costly
>> than inlining or direct function call given it has to be called for
>> every register read and write
>> which are going to be thousands in our case. In their usecase, read and
>> write callbacks are just
>> pci cfgspace reads and writes which may not be that frequently called
>> and the latency might not
>> be visible but in our case, I think it would be visible if we have a
>> callback as such. I know this is a
>> debug feature and perf isn't expected much but that wouldn't mean we
>> should not have a debug
>> feature which performs better right.
> I would expect the cost of a bus access to always dwarf the cost of
> indirect function calls and instrumentation. On the other hand,
> the cost of an inline trace call is nontrivial in terms of code size,
> which may lead to wasting significant amounts of both RAM and
> instruction cache on small machines. If you want to continue with
> your approach, it would help to include code size numbers before/after
> for a defconfig kernel, and maybe some performance numbers to
> show what this does when you enable tracing for all registers of
> a device with a lot of accesses.

Sure, I will get the numbers for both cases(inline and indirect calls) 
and run some
benchmark tests with register tracing enabled for both cases.


>> On the second point, filtering by ioremap isn't much useful for our
>> usecase since ioremapped
>> region can have 100s of registers and we are interested in the exact
>> register read/write which
>> would cause any of the issues mentioned in the description of this patchset.
>>
>> So I feel like the current way where we consolidate the instrumentation
>> in mmio-instrumented.h
>> seems like the better way than adding tracing to an emulated iomem
>> library.
> There is another point that I don't like in the implementation, which is
> the extra indirection. If we end up with your approach of doing it
> inline per caller, I would prefer having the instrumentation in
> include/asm-generic/io.h, like
>
> #ifndef readl
> #define readl readl
> static inline u32 readl(const volatile void __iomem *addr)
> {
>          u32 val;
>
>          __io_br();
>          val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
>          __io_ar(val);
>          if (tracepoint_enabled(rwmmio_read))
>                 log_read_mmio("readl", addr, val);
>          return val;
> }
> #endif
>
> I think this would be a lot less confusing to readers, as it is implemented
> exactly in the place that has the normal definition, and it can also have
> somewhat more logical semantics by only instrumenting the
> normal/relaxed/ioport accessors but not the __raw_* versions that
> are meant to be little more than a pointer dereference.
>
>           Arnd

But how is this different from logic in atomic-instrumented.h which also 
has asm-generic version?
Initial review few years back mentioned about having something similar 
to atomic instrumentation
and hence it was implemented with the similar approach keeping 
instrumentation out of arch specific
details.
And if we do move this instrumentation to asm-generic/io.h, how will 
that be executed since
the arch specifc read{b,w,l,q} overrides this generic version?

Thanks,
Sai
Arnd Bergmann Nov. 22, 2021, 2:30 p.m. UTC | #11
On Mon, Nov 22, 2021 at 3:19 PM Sai Prakash Ranjan
<quic_saipraka@quicinc.com> wrote:
> On 11/22/2021 7:29 PM, Arnd Bergmann wrote:
> >
> > I think this would be a lot less confusing to readers, as it is implemented
> > exactly in the place that has the normal definition, and it can also have
> > somewhat more logical semantics by only instrumenting the
> > normal/relaxed/ioport accessors but not the __raw_* versions that
> > are meant to be little more than a pointer dereference.
>
> But how is this different from logic in atomic-instrumented.h which also
> has asm-generic version?
> Initial review few years back mentioned about having something similar
> to atomic instrumentation
> and hence it was implemented with the similar approach keeping
> instrumentation out of arch specific details.

This is only a cosmetic difference. I usually prefer fewer indirections,
and I like the way that include/asm-generic/io.h only has all the
normal 'static inline' definitions spelled out, and calling the __raw_*
versions. Your version adds an extra layer with the arch_raw_readl(),
which I'd prefer to avoid.

> And if we do move this instrumentation to asm-generic/io.h, how will
> that be executed since
> the arch specifc read{b,w,l,q} overrides this generic version?

As I understand it, your version also requires architecture specific
changes, so that would be the same: it only works for architectures
that get the definition of readl()/readl_relaxed()/inl()/... from
include/asm-generic/io.h and only override the __raw version.

      Arnd
Sai Prakash Ranjan Nov. 22, 2021, 2:59 p.m. UTC | #12
On 11/22/2021 8:00 PM, Arnd Bergmann wrote:
> On Mon, Nov 22, 2021 at 3:19 PM Sai Prakash Ranjan
> <quic_saipraka@quicinc.com> wrote:
>> On 11/22/2021 7:29 PM, Arnd Bergmann wrote:
>>> I think this would be a lot less confusing to readers, as it is implemented
>>> exactly in the place that has the normal definition, and it can also have
>>> somewhat more logical semantics by only instrumenting the
>>> normal/relaxed/ioport accessors but not the __raw_* versions that
>>> are meant to be little more than a pointer dereference.
>> But how is this different from logic in atomic-instrumented.h which also
>> has asm-generic version?
>> Initial review few years back mentioned about having something similar
>> to atomic instrumentation
>> and hence it was implemented with the similar approach keeping
>> instrumentation out of arch specific details.
> This is only a cosmetic difference. I usually prefer fewer indirections,
> and I like the way that include/asm-generic/io.h only has all the
> normal 'static inline' definitions spelled out, and calling the __raw_*
> versions. Your version adds an extra layer with the arch_raw_readl(),
> which I'd prefer to avoid.

I'm ok with your preference as long as we have some way to log these 
MMIO accesses.

>> And if we do move this instrumentation to asm-generic/io.h, how will
>> that be executed since
>> the arch specifc read{b,w,l,q} overrides this generic version?
> As I understand it, your version also requires architecture specific 
> changes, so that would be the same: it only works for architectures 
> that get the definition of readl()/readl_relaxed()/inl()/... from 
> include/asm-generic/io.h and only override the __raw version. Arnd

Sorry, I didn't get this part, so  I am trying this on ARM64:

arm64/include/asm/io.h has read{b,l,w,q} defined.
include/asm-generic/io.h has below:
   #ifndef readl
   #define readl readl
   static inline u32 readl(const volatile void __iomem *addr)

and we include asm-generic/io.h in arm64/include/asm/io.h at the end 
after the definitions for arm64 mmio accesors.
So arch implementation here overrides generic ones as I see it, am I 
missing something? I even confirmed this
with some trace_printk to generic and arch specific definitions of readl 
and I see arch specific ones being called.

Thanks,
Sai
Arnd Bergmann Nov. 22, 2021, 3:35 p.m. UTC | #13
On Mon, Nov 22, 2021 at 3:59 PM Sai Prakash Ranjan
<quic_saipraka@quicinc.com> wrote:
> >> And if we do move this instrumentation to asm-generic/io.h, how will
> >> that be executed since
> >> the arch specifc read{b,w,l,q} overrides this generic version?
> > As I understand it, your version also requires architecture specific
> > changes, so that would be the same: it only works for architectures
> > that get the definition of readl()/readl_relaxed()/inl()/... from
> > include/asm-generic/io.h and only override the __raw version. Arnd
>
> Sorry, I didn't get this part, so  I am trying this on ARM64:
>
> arm64/include/asm/io.h has read{b,l,w,q} defined.
> include/asm-generic/io.h has below:
>    #ifndef readl
>    #define readl readl
>    static inline u32 readl(const volatile void __iomem *addr)
>
> and we include asm-generic/io.h in arm64/include/asm/io.h at the end
> after the definitions for arm64 mmio accesors.
> So arch implementation here overrides generic ones as I see it, am I
> missing something? I even confirmed this
> with some trace_printk to generic and arch specific definitions of readl
> and I see arch specific ones being called.

Ah, you are right that the arm64 version currently has custom definitions
of the high-level interfaces. These predate the introduction of the
__io_{p,}{b,a}{r,w} macros and are currently only used on risc-v.

I think in this case you should start by changing arm64 to use the
generic readl() etc definitions, by removing the extra definitions and
using

#define __io_ar(v) __iormb(__v)
#define __io_bw() dma_wmb()

      Arnd
Sai Prakash Ranjan Nov. 22, 2021, 3:43 p.m. UTC | #14
On 11/22/2021 9:05 PM, Arnd Bergmann wrote:
> On Mon, Nov 22, 2021 at 3:59 PM Sai Prakash Ranjan
> <quic_saipraka@quicinc.com> wrote:
>>>> And if we do move this instrumentation to asm-generic/io.h, how will
>>>> that be executed since
>>>> the arch specifc read{b,w,l,q} overrides this generic version?
>>> As I understand it, your version also requires architecture specific
>>> changes, so that would be the same: it only works for architectures
>>> that get the definition of readl()/readl_relaxed()/inl()/... from
>>> include/asm-generic/io.h and only override the __raw version. Arnd
>> Sorry, I didn't get this part, so  I am trying this on ARM64:
>>
>> arm64/include/asm/io.h has read{b,l,w,q} defined.
>> include/asm-generic/io.h has below:
>>     #ifndef readl
>>     #define readl readl
>>     static inline u32 readl(const volatile void __iomem *addr)
>>
>> and we include asm-generic/io.h in arm64/include/asm/io.h at the end
>> after the definitions for arm64 mmio accesors.
>> So arch implementation here overrides generic ones as I see it, am I
>> missing something? I even confirmed this
>> with some trace_printk to generic and arch specific definitions of readl
>> and I see arch specific ones being called.
> Ah, you are right that the arm64 version currently has custom definitions
> of the high-level interfaces. These predate the introduction of the
> __io_{p,}{b,a}{r,w} macros and are currently only used on risc-v.
>
> I think in this case you should start by changing arm64 to use the
> generic readl() etc definitions, by removing the extra definitions and
> using
>
> #define __io_ar(v) __iormb(__v)
> #define __io_bw() dma_wmb()
>
>

Sure, will do that.

Thanks,
Sai
Sai Prakash Ranjan Nov. 29, 2021, 1:49 p.m. UTC | #15
Hi Arnd,

On 11/22/2021 9:13 PM, Sai Prakash Ranjan wrote:
> On 11/22/2021 9:05 PM, Arnd Bergmann wrote:
>> On Mon, Nov 22, 2021 at 3:59 PM Sai Prakash Ranjan
>> <quic_saipraka@quicinc.com> wrote:
>>>>> And if we do move this instrumentation to asm-generic/io.h, how will
>>>>> that be executed since
>>>>> the arch specifc read{b,w,l,q} overrides this generic version?
>>>> As I understand it, your version also requires architecture specific
>>>> changes, so that would be the same: it only works for architectures
>>>> that get the definition of readl()/readl_relaxed()/inl()/... from
>>>> include/asm-generic/io.h and only override the __raw version. Arnd
>>> Sorry, I didn't get this part, so  I am trying this on ARM64:
>>>
>>> arm64/include/asm/io.h has read{b,l,w,q} defined.
>>> include/asm-generic/io.h has below:
>>>     #ifndef readl
>>>     #define readl readl
>>>     static inline u32 readl(const volatile void __iomem *addr)
>>>
>>> and we include asm-generic/io.h in arm64/include/asm/io.h at the end
>>> after the definitions for arm64 mmio accesors.
>>> So arch implementation here overrides generic ones as I see it, am I
>>> missing something? I even confirmed this
>>> with some trace_printk to generic and arch specific definitions of 
>>> readl
>>> and I see arch specific ones being called.
>> Ah, you are right that the arm64 version currently has custom 
>> definitions
>> of the high-level interfaces. These predate the introduction of the
>> __io_{p,}{b,a}{r,w} macros and are currently only used on risc-v.
>>
>> I think in this case you should start by changing arm64 to use the
>> generic readl() etc definitions, by removing the extra definitions and
>> using
>>
>> #define __io_ar(v) __iormb(__v)
>> #define __io_bw() dma_wmb()
>>
>>
>
> Sure, will do that.

I got the callback version implemented as suggested by you to compare 
the overall size and performance
with the inline version and apparently the size increased more in case 
of callback version when compared to
inline version. As for the performance, I ran some basic dd tests and 
sysbench and didn't see any noticeable
difference between the two implementations.

**Inline version with CONFIG_FTRACE=y and CONFIG_TRACE_MMIO_ACCESS=y**
$ size vmlinux
    text                   data                    bss dec             
hex         filename
23884219        14284468         532568 38701255        24e88c7 vmlinux

**Callback version with CONFIG_FTRACE=y and CONFIG_TRACE_MMIO_ACCESS=y**
$ size vmlinux
    text                  data                     bss dec               
        hex        filename
24108179        14279596         532568 38920343        251e097 vmlinux

$ ./scripts/bloat-o-meter inline-vmlinux callback-vmlinux
add/remove: 8/3 grow/shrink: 4889/89 up/down: 242244/-11564 (230680)
Total: Before=25812612, After=26043292, chg +0.89%

Note: I had arm64 move to asm-generic high level accessors in both 
versions which I plan to post together but
not included in below links,

For your reference, here are the 2 versions of the patches,
  Inline version - 
https://github.com/saiprakash-ranjan/RTB-Patches/blob/main/0001-asm-generic-io-Add-logging-support-for-MMIO-accessor.patch
  Callback version - 
https://github.com/saiprakash-ranjan/RTB-Patches/blob/main/0001-asm-generic-io-Add-callback-based-MMIO-logging-suppo.patch

Couple of things noted in callback version which didn't look quite good 
was that it needed some way to register the
callbacks and need to use some initcall (core_initcall used) as 
implemented in above patch but that would probably
mean we would lose some register logging(if there is some) in between 
early_initcall(which is when trace events get
enabled) and this trace_readwrite core_initcall. Another thing I noticed 
that since we now move to callback based
implementations, the caller info is somewhat inaccurate when compared to 
inline version.

Also regarding your earlier comment on inline versions being possibly 
problematic on lower memory systems, enabling
Ftrace itself is making a considerable size difference and in such 
systems ftrace wouldn't be enabled which implicitly means
MMIO logging which are based on trace events will be disabled anyways.

Here is the size delta with FTRACE enabled and disabled with arm64 
defconfig without MMIO logging support:
$ ./scripts/bloat-o-meter ftrace-disabled-vmlinux ftrace-enabled-vmlinux
add/remove: 8/3 grow/shrink: 4889/89 up/down: 242244/-11564 (230680)
Total: Before=22865837, After=25215198, chg +10.27%

Given all this, I would prefer inline versions in asm-generic high level 
accessors, let me know what you think.

Thanks,
Sai
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 7fd836bea7eb..a635aaaf81b9 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -10,6 +10,7 @@ 
 
 #include <linux/types.h>
 #include <linux/pgtable.h>
+#include <linux/mmio-instrumented.h>
 
 #include <asm/byteorder.h>
 #include <asm/barrier.h>
@@ -21,32 +22,27 @@ 
 /*
  * Generic IO read/write.  These perform native-endian accesses.
  */
-#define __raw_writeb __raw_writeb
-static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+static inline void arch_raw_writeb(u8 val, volatile void __iomem *addr)
 {
 	asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
 }
 
-#define __raw_writew __raw_writew
-static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+static inline void arch_raw_writew(u16 val, volatile void __iomem *addr)
 {
 	asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
 }
 
-#define __raw_writel __raw_writel
-static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
+static __always_inline void arch_raw_writel(u32 val, volatile void __iomem *addr)
 {
 	asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
 }
 
-#define __raw_writeq __raw_writeq
-static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+static inline void arch_raw_writeq(u64 val, volatile void __iomem *addr)
 {
 	asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
 }
 
-#define __raw_readb __raw_readb
-static inline u8 __raw_readb(const volatile void __iomem *addr)
+static inline u8 arch_raw_readb(const volatile void __iomem *addr)
 {
 	u8 val;
 	asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
@@ -56,8 +52,7 @@  static inline u8 __raw_readb(const volatile void __iomem *addr)
 	return val;
 }
 
-#define __raw_readw __raw_readw
-static inline u16 __raw_readw(const volatile void __iomem *addr)
+static inline u16 arch_raw_readw(const volatile void __iomem *addr)
 {
 	u16 val;
 
@@ -68,8 +63,7 @@  static inline u16 __raw_readw(const volatile void __iomem *addr)
 	return val;
 }
 
-#define __raw_readl __raw_readl
-static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
+static __always_inline u32 arch_raw_readl(const volatile void __iomem *addr)
 {
 	u32 val;
 	asm volatile(ALTERNATIVE("ldr %w0, [%1]",
@@ -79,8 +73,7 @@  static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
 	return val;
 }
 
-#define __raw_readq __raw_readq
-static inline u64 __raw_readq(const volatile void __iomem *addr)
+static inline u64 arch_raw_readq(const volatile void __iomem *addr)
 {
 	u64 val;
 	asm volatile(ALTERNATIVE("ldr %0, [%1]",
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index c3c11974fa3b..ff56d2165ea9 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -4,7 +4,7 @@ 
 #
 
 asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
-ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
+ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__
 
 hostprogs := gen-hyprel
 HOST_EXTRACFLAGS += -I$(objtree)/include
diff --git a/include/linux/mmio-instrumented.h b/include/linux/mmio-instrumented.h
new file mode 100644
index 000000000000..99979c025cc1
--- /dev/null
+++ b/include/linux/mmio-instrumented.h
@@ -0,0 +1,70 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _LINUX_MMIO_INSTRUMENTED_H
+#define _LINUX_MMIO_INSTRUMENTED_H
+
+#include <linux/tracepoint-defs.h>
+
+/*
+ * Tracepoint and MMIO logging symbols should not be visible at EL2(HYP) as
+ * there is no way to execute them and any such MMIO access from EL2 will
+ * explode instantly (Words of Marc Zyngier). So introduce a generic flag
+ * __DISABLE_TRACE_MMIO__ to disable MMIO tracing in nVHE and other drivers
+ * if required.
+ */
+#if IS_ENABLED(CONFIG_TRACE_MMIO_ACCESS) && !(defined(__DISABLE_TRACE_MMIO__))
+DECLARE_TRACEPOINT(rwmmio_write);
+DECLARE_TRACEPOINT(rwmmio_read);
+
+void log_write_mmio(const char *width, volatile void __iomem *addr);
+void log_read_mmio(const char *width, const volatile void __iomem *addr);
+
+#define __raw_write(v, a, _l)	({				\
+	volatile void __iomem *_a = (a);			\
+	if (tracepoint_enabled(rwmmio_write))			\
+		log_write_mmio(__stringify(write##_l), _a);	\
+	arch_raw_write##_l((v), _a);				\
+	})
+
+#define __raw_writeb(v, a)	__raw_write((v), a, b)
+#define __raw_writew(v, a)	__raw_write((v), a, w)
+#define __raw_writel(v, a)	__raw_write((v), a, l)
+#define __raw_writeq(v, a)	__raw_write((v), a, q)
+
+#define __raw_read(a, _l, _t)    ({				\
+	_t __a;							\
+	const volatile void __iomem *_a = (a);			\
+	if (tracepoint_enabled(rwmmio_read))			\
+		log_read_mmio(__stringify(read##_l), _a);	\
+	__a = arch_raw_read##_l(_a);				\
+	__a;							\
+	})
+
+#define __raw_readb(a)		__raw_read((a), b, u8)
+#define __raw_readw(a)		__raw_read((a), w, u16)
+#define __raw_readl(a)		__raw_read((a), l, u32)
+#define __raw_readq(a)		__raw_read((a), q, u64)
+
+#else
+
+#define __raw_writeb(v, a)	arch_raw_writeb(v, a)
+#define __raw_writew(v, a)	arch_raw_writew(v, a)
+#define __raw_writel(v, a)	arch_raw_writel(v, a)
+#define __raw_writeq(v, a)	arch_raw_writeq(v, a)
+
+#define __raw_readb(a)		arch_raw_readb(a)
+#define __raw_readw(a)		arch_raw_readw(a)
+#define __raw_readl(a)		arch_raw_readl(a)
+#define __raw_readq(a)		arch_raw_readq(a)
+
+static inline void log_write_mmio(const char *width,
+				  volatile void __iomem *addr) {}
+static inline void log_read_mmio(const char *width,
+				 const volatile void __iomem *addr) {}
+
+#endif /* CONFIG_TRACE_MMIO_ACCESS */
+
+#endif /* _LINUX_MMIO_INSTRUMENTED_H */