diff mbox series

[3/3] arm64/hw_breakpoint: Enable FEAT_Debugv8p9

Message ID 20241001043602.1116991-4-anshuman.khandual@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64/hw_breakpoint: Enable FEAT_Debugv8p9 | expand

Commit Message

Anshuman Khandual Oct. 1, 2024, 4:36 a.m. UTC
Currently there can be maximum 16 breakpoints, and 16 watchpoints available
on a given platform - as detected from ID_AA64DFR0_EL1.[BRPs|WRPs] register
fields. But these breakpoint, and watchpoints can be extended further up to
64 via a new arch feature FEAT_Debugv8p9.

This first enables banked access for the breakpoint and watchpoint register
set via MDSELR_EL1, extended exceptions via MDSCR_EL1.EMBWE and determining
available breakpoints and watchpoints in the platform from ID_AA64DFR1_EL1,
when FEAT_Debugv8p9 is enabled.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
 arch/arm64/include/asm/debug-monitors.h |  1 +
 arch/arm64/include/asm/hw_breakpoint.h  | 50 ++++++++++++++++++++-----
 arch/arm64/kernel/debug-monitors.c      | 16 ++++++--
 arch/arm64/kernel/hw_breakpoint.c       | 33 ++++++++++++++++
 4 files changed, 86 insertions(+), 14 deletions(-)

Comments

kernel test robot Oct. 2, 2024, 11:36 p.m. UTC | #1
Hi Anshuman,

kernel test robot noticed the following build errors:

[auto build test ERROR on arm64/for-next/core]
[also build test ERROR on kvmarm/next soc/for-next arm/for-next arm/fixes linus/master v6.12-rc1 next-20241002]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/arm64-cpufeature-Add-field-details-for-ID_AA64DFR1_EL1-register/20241001-123752
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link:    https://lore.kernel.org/r/20241001043602.1116991-4-anshuman.khandual%40arm.com
patch subject: [PATCH 3/3] arm64/hw_breakpoint: Enable FEAT_Debugv8p9
config: arm64-randconfig-004-20241003 (https://download.01.org/0day-ci/archive/20241003/202410030700.kZSan6G6-lkp@intel.com/config)
compiler: aarch64-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241003/202410030700.kZSan6G6-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410030700.kZSan6G6-lkp@intel.com/

All errors (new ones prefixed by >>):

   arch/arm64/kernel/hw_breakpoint.c: In function 'set_bank_index':
>> arch/arm64/kernel/hw_breakpoint.c:113:30: error: 'MDSELR_EL1_BANK_BANK_0' undeclared (first use in this function)
     113 |                 mdsel_bank = MDSELR_EL1_BANK_BANK_0;
         |                              ^~~~~~~~~~~~~~~~~~~~~~
   arch/arm64/kernel/hw_breakpoint.c:113:30: note: each undeclared identifier is reported only once for each function it appears in
>> arch/arm64/kernel/hw_breakpoint.c:116:30: error: 'MDSELR_EL1_BANK_BANK_1' undeclared (first use in this function)
     116 |                 mdsel_bank = MDSELR_EL1_BANK_BANK_1;
         |                              ^~~~~~~~~~~~~~~~~~~~~~
>> arch/arm64/kernel/hw_breakpoint.c:119:30: error: 'MDSELR_EL1_BANK_BANK_2' undeclared (first use in this function)
     119 |                 mdsel_bank = MDSELR_EL1_BANK_BANK_2;
         |                              ^~~~~~~~~~~~~~~~~~~~~~
>> arch/arm64/kernel/hw_breakpoint.c:122:30: error: 'MDSELR_EL1_BANK_BANK_3' undeclared (first use in this function)
     122 |                 mdsel_bank = MDSELR_EL1_BANK_BANK_3;
         |                              ^~~~~~~~~~~~~~~~~~~~~~
   In file included from arch/arm64/include/asm/cputype.h:226,
                    from arch/arm64/include/asm/cache.h:43,
                    from include/linux/cache.h:6,
                    from include/linux/time.h:5,
                    from include/linux/compat.h:10,
                    from arch/arm64/kernel/hw_breakpoint.c:12:
>> arch/arm64/kernel/hw_breakpoint.c:128:38: error: 'MDSELR_EL1_BANK_SHIFT' undeclared (first use in this function); did you mean 'CSSELR_EL1_InD_SHIFT'?
     128 |         write_sysreg_s(mdsel_bank << MDSELR_EL1_BANK_SHIFT, SYS_MDSELR_EL1);
         |                                      ^~~~~~~~~~~~~~~~~~~~~
   arch/arm64/include/asm/sysreg.h:1168:27: note: in definition of macro 'write_sysreg_s'
    1168 |         u64 __val = (u64)(v);                                           \
         |                           ^
>> arch/arm64/kernel/hw_breakpoint.c:128:61: error: 'SYS_MDSELR_EL1' undeclared (first use in this function); did you mean 'SYS_MDSCR_EL1'?
     128 |         write_sysreg_s(mdsel_bank << MDSELR_EL1_BANK_SHIFT, SYS_MDSELR_EL1);
         |                                                             ^~~~~~~~~~~~~~
   arch/arm64/include/asm/sysreg.h:1169:46: note: in definition of macro 'write_sysreg_s'
    1169 |         u32 __maybe_unused __check_r = (u32)(r);                        \
         |                                              ^


vim +/MDSELR_EL1_BANK_BANK_0 +113 arch/arm64/kernel/hw_breakpoint.c

    59	
    60	#define READ_WB_REG_CASE(OFF, N, REG, VAL)	\
    61		case (OFF + N):				\
    62			AARCH64_DBG_READ(N, REG, VAL);	\
    63			break
    64	
    65	#define WRITE_WB_REG_CASE(OFF, N, REG, VAL)	\
    66		case (OFF + N):				\
    67			AARCH64_DBG_WRITE(N, REG, VAL);	\
    68			break
    69	
    70	#define GEN_READ_WB_REG_CASES(OFF, REG, VAL)	\
    71		READ_WB_REG_CASE(OFF,  0, REG, VAL);	\
    72		READ_WB_REG_CASE(OFF,  1, REG, VAL);	\
    73		READ_WB_REG_CASE(OFF,  2, REG, VAL);	\
    74		READ_WB_REG_CASE(OFF,  3, REG, VAL);	\
    75		READ_WB_REG_CASE(OFF,  4, REG, VAL);	\
    76		READ_WB_REG_CASE(OFF,  5, REG, VAL);	\
    77		READ_WB_REG_CASE(OFF,  6, REG, VAL);	\
    78		READ_WB_REG_CASE(OFF,  7, REG, VAL);	\
    79		READ_WB_REG_CASE(OFF,  8, REG, VAL);	\
    80		READ_WB_REG_CASE(OFF,  9, REG, VAL);	\
    81		READ_WB_REG_CASE(OFF, 10, REG, VAL);	\
    82		READ_WB_REG_CASE(OFF, 11, REG, VAL);	\
    83		READ_WB_REG_CASE(OFF, 12, REG, VAL);	\
    84		READ_WB_REG_CASE(OFF, 13, REG, VAL);	\
    85		READ_WB_REG_CASE(OFF, 14, REG, VAL);	\
    86		READ_WB_REG_CASE(OFF, 15, REG, VAL)
    87	
    88	#define GEN_WRITE_WB_REG_CASES(OFF, REG, VAL)	\
    89		WRITE_WB_REG_CASE(OFF,  0, REG, VAL);	\
    90		WRITE_WB_REG_CASE(OFF,  1, REG, VAL);	\
    91		WRITE_WB_REG_CASE(OFF,  2, REG, VAL);	\
    92		WRITE_WB_REG_CASE(OFF,  3, REG, VAL);	\
    93		WRITE_WB_REG_CASE(OFF,  4, REG, VAL);	\
    94		WRITE_WB_REG_CASE(OFF,  5, REG, VAL);	\
    95		WRITE_WB_REG_CASE(OFF,  6, REG, VAL);	\
    96		WRITE_WB_REG_CASE(OFF,  7, REG, VAL);	\
    97		WRITE_WB_REG_CASE(OFF,  8, REG, VAL);	\
    98		WRITE_WB_REG_CASE(OFF,  9, REG, VAL);	\
    99		WRITE_WB_REG_CASE(OFF, 10, REG, VAL);	\
   100		WRITE_WB_REG_CASE(OFF, 11, REG, VAL);	\
   101		WRITE_WB_REG_CASE(OFF, 12, REG, VAL);	\
   102		WRITE_WB_REG_CASE(OFF, 13, REG, VAL);	\
   103		WRITE_WB_REG_CASE(OFF, 14, REG, VAL);	\
   104		WRITE_WB_REG_CASE(OFF, 15, REG, VAL)
   105	
   106	static int set_bank_index(int n)
   107	{
   108		int mdsel_bank;
   109		int bank = n / 16, index = n % 16;
   110	
   111		switch (bank) {
   112		case 0:
 > 113			mdsel_bank = MDSELR_EL1_BANK_BANK_0;
   114			break;
   115		case 1:
 > 116			mdsel_bank = MDSELR_EL1_BANK_BANK_1;
   117			break;
   118		case 2:
 > 119			mdsel_bank = MDSELR_EL1_BANK_BANK_2;
   120			break;
   121		case 3:
 > 122			mdsel_bank = MDSELR_EL1_BANK_BANK_3;
   123			break;
   124		default:
   125			pr_warn("Unknown register bank %d\n", bank);
   126		}
   127		preempt_disable();
 > 128		write_sysreg_s(mdsel_bank << MDSELR_EL1_BANK_SHIFT, SYS_MDSELR_EL1);
   129		isb();
   130		return index;
   131	}
   132
Anshuman Khandual Oct. 3, 2024, 3:40 a.m. UTC | #2
On 10/3/24 05:06, kernel test robot wrote:
> Hi Anshuman,
> 
> kernel test robot noticed the following build errors:
> 
> [auto build test ERROR on arm64/for-next/core]
> [also build test ERROR on kvmarm/next soc/for-next arm/for-next arm/fixes linus/master v6.12-rc1 next-20241002]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/arm64-cpufeature-Add-field-details-for-ID_AA64DFR1_EL1-register/20241001-123752
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
> patch link:    https://lore.kernel.org/r/20241001043602.1116991-4-anshuman.khandual%40arm.com
> patch subject: [PATCH 3/3] arm64/hw_breakpoint: Enable FEAT_Debugv8p9
> config: arm64-randconfig-004-20241003 (https://download.01.org/0day-ci/archive/20241003/202410030700.kZSan6G6-lkp@intel.com/config)
> compiler: aarch64-linux-gcc (GCC) 14.1.0
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241003/202410030700.kZSan6G6-lkp@intel.com/reproduce)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202410030700.kZSan6G6-lkp@intel.com/
> 
> All errors (new ones prefixed by >>):
> 
>    arch/arm64/kernel/hw_breakpoint.c: In function 'set_bank_index':
>>> arch/arm64/kernel/hw_breakpoint.c:113:30: error: 'MDSELR_EL1_BANK_BANK_0' undeclared (first use in this function)
>      113 |                 mdsel_bank = MDSELR_EL1_BANK_BANK_0;
>          |                              ^~~~~~~~~~~~~~~~~~~~~~
>    arch/arm64/kernel/hw_breakpoint.c:113:30: note: each undeclared identifier is reported only once for each function it appears in
>>> arch/arm64/kernel/hw_breakpoint.c:116:30: error: 'MDSELR_EL1_BANK_BANK_1' undeclared (first use in this function)
>      116 |                 mdsel_bank = MDSELR_EL1_BANK_BANK_1;
>          |                              ^~~~~~~~~~~~~~~~~~~~~~
>>> arch/arm64/kernel/hw_breakpoint.c:119:30: error: 'MDSELR_EL1_BANK_BANK_2' undeclared (first use in this function)
>      119 |                 mdsel_bank = MDSELR_EL1_BANK_BANK_2;
>          |                              ^~~~~~~~~~~~~~~~~~~~~~
>>> arch/arm64/kernel/hw_breakpoint.c:122:30: error: 'MDSELR_EL1_BANK_BANK_3' undeclared (first use in this function)
>      122 |                 mdsel_bank = MDSELR_EL1_BANK_BANK_3;
>          |                              ^~~~~~~~~~~~~~~~~~~~~~
>    In file included from arch/arm64/include/asm/cputype.h:226,
>                     from arch/arm64/include/asm/cache.h:43,
>                     from include/linux/cache.h:6,
>                     from include/linux/time.h:5,
>                     from include/linux/compat.h:10,
>                     from arch/arm64/kernel/hw_breakpoint.c:12:
>>> arch/arm64/kernel/hw_breakpoint.c:128:38: error: 'MDSELR_EL1_BANK_SHIFT' undeclared (first use in this function); did you mean 'CSSELR_EL1_InD_SHIFT'?
>      128 |         write_sysreg_s(mdsel_bank << MDSELR_EL1_BANK_SHIFT, SYS_MDSELR_EL1);
>          |                                      ^~~~~~~~~~~~~~~~~~~~~
>    arch/arm64/include/asm/sysreg.h:1168:27: note: in definition of macro 'write_sysreg_s'
>     1168 |         u64 __val = (u64)(v);                                           \
>          |                           ^
>>> arch/arm64/kernel/hw_breakpoint.c:128:61: error: 'SYS_MDSELR_EL1' undeclared (first use in this function); did you mean 'SYS_MDSCR_EL1'?
>      128 |         write_sysreg_s(mdsel_bank << MDSELR_EL1_BANK_SHIFT, SYS_MDSELR_EL1);
>          |                                                             ^~~~~~~~~~~~~~
>    arch/arm64/include/asm/sysreg.h:1169:46: note: in definition of macro 'write_sysreg_s'
>     1169 |         u32 __maybe_unused __check_r = (u32)(r);                        \
>          |                                              ^
> 
> 
> vim +/MDSELR_EL1_BANK_BANK_0 +113 arch/arm64/kernel/hw_breakpoint.c
> 
>     59	
>     60	#define READ_WB_REG_CASE(OFF, N, REG, VAL)	\
>     61		case (OFF + N):				\
>     62			AARCH64_DBG_READ(N, REG, VAL);	\
>     63			break
>     64	
>     65	#define WRITE_WB_REG_CASE(OFF, N, REG, VAL)	\
>     66		case (OFF + N):				\
>     67			AARCH64_DBG_WRITE(N, REG, VAL);	\
>     68			break
>     69	
>     70	#define GEN_READ_WB_REG_CASES(OFF, REG, VAL)	\
>     71		READ_WB_REG_CASE(OFF,  0, REG, VAL);	\
>     72		READ_WB_REG_CASE(OFF,  1, REG, VAL);	\
>     73		READ_WB_REG_CASE(OFF,  2, REG, VAL);	\
>     74		READ_WB_REG_CASE(OFF,  3, REG, VAL);	\
>     75		READ_WB_REG_CASE(OFF,  4, REG, VAL);	\
>     76		READ_WB_REG_CASE(OFF,  5, REG, VAL);	\
>     77		READ_WB_REG_CASE(OFF,  6, REG, VAL);	\
>     78		READ_WB_REG_CASE(OFF,  7, REG, VAL);	\
>     79		READ_WB_REG_CASE(OFF,  8, REG, VAL);	\
>     80		READ_WB_REG_CASE(OFF,  9, REG, VAL);	\
>     81		READ_WB_REG_CASE(OFF, 10, REG, VAL);	\
>     82		READ_WB_REG_CASE(OFF, 11, REG, VAL);	\
>     83		READ_WB_REG_CASE(OFF, 12, REG, VAL);	\
>     84		READ_WB_REG_CASE(OFF, 13, REG, VAL);	\
>     85		READ_WB_REG_CASE(OFF, 14, REG, VAL);	\
>     86		READ_WB_REG_CASE(OFF, 15, REG, VAL)
>     87	
>     88	#define GEN_WRITE_WB_REG_CASES(OFF, REG, VAL)	\
>     89		WRITE_WB_REG_CASE(OFF,  0, REG, VAL);	\
>     90		WRITE_WB_REG_CASE(OFF,  1, REG, VAL);	\
>     91		WRITE_WB_REG_CASE(OFF,  2, REG, VAL);	\
>     92		WRITE_WB_REG_CASE(OFF,  3, REG, VAL);	\
>     93		WRITE_WB_REG_CASE(OFF,  4, REG, VAL);	\
>     94		WRITE_WB_REG_CASE(OFF,  5, REG, VAL);	\
>     95		WRITE_WB_REG_CASE(OFF,  6, REG, VAL);	\
>     96		WRITE_WB_REG_CASE(OFF,  7, REG, VAL);	\
>     97		WRITE_WB_REG_CASE(OFF,  8, REG, VAL);	\
>     98		WRITE_WB_REG_CASE(OFF,  9, REG, VAL);	\
>     99		WRITE_WB_REG_CASE(OFF, 10, REG, VAL);	\
>    100		WRITE_WB_REG_CASE(OFF, 11, REG, VAL);	\
>    101		WRITE_WB_REG_CASE(OFF, 12, REG, VAL);	\
>    102		WRITE_WB_REG_CASE(OFF, 13, REG, VAL);	\
>    103		WRITE_WB_REG_CASE(OFF, 14, REG, VAL);	\
>    104		WRITE_WB_REG_CASE(OFF, 15, REG, VAL)
>    105	
>    106	static int set_bank_index(int n)
>    107	{
>    108		int mdsel_bank;
>    109		int bank = n / 16, index = n % 16;
>    110	
>    111		switch (bank) {
>    112		case 0:
>  > 113			mdsel_bank = MDSELR_EL1_BANK_BANK_0;
>    114			break;
>    115		case 1:
>  > 116			mdsel_bank = MDSELR_EL1_BANK_BANK_1;
>    117			break;
>    118		case 2:
>  > 119			mdsel_bank = MDSELR_EL1_BANK_BANK_2;
>    120			break;
>    121		case 3:
>  > 122			mdsel_bank = MDSELR_EL1_BANK_BANK_3;
>    123			break;
>    124		default:
>    125			pr_warn("Unknown register bank %d\n", bank);
>    126		}
>    127		preempt_disable();
>  > 128		write_sysreg_s(mdsel_bank << MDSELR_EL1_BANK_SHIFT, SYS_MDSELR_EL1);
>    129		isb();
>    130		return index;
>    131	}
>    132	
> 

This build failure and also the other two on the series here are false positives
caused by non-availability of used register field definitions which are provided
via the dependent KVM FEAT_FGT2 FGU series.
Mark Rutland Oct. 22, 2024, 3:34 p.m. UTC | #3
On Tue, Oct 01, 2024 at 10:06:02AM +0530, Anshuman Khandual wrote:
> Currently there can be maximum 16 breakpoints, and 16 watchpoints available
> on a given platform - as detected from ID_AA64DFR0_EL1.[BRPs|WRPs] register
> fields. But these breakpoint, and watchpoints can be extended further up to
> 64 via a new arch feature FEAT_Debugv8p9.
> 
> This first enables banked access for the breakpoint and watchpoint register
> set via MDSELR_EL1, extended exceptions via MDSCR_EL1.EMBWE and determining
> available breakpoints and watchpoints in the platform from ID_AA64DFR1_EL1,
> when FEAT_Debugv8p9 is enabled.
> 
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
>  arch/arm64/include/asm/debug-monitors.h |  1 +
>  arch/arm64/include/asm/hw_breakpoint.h  | 50 ++++++++++++++++++++-----
>  arch/arm64/kernel/debug-monitors.c      | 16 ++++++--
>  arch/arm64/kernel/hw_breakpoint.c       | 33 ++++++++++++++++
>  4 files changed, 86 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
> index 13d437bcbf58..a14097673ae0 100644
> --- a/arch/arm64/include/asm/debug-monitors.h
> +++ b/arch/arm64/include/asm/debug-monitors.h
> @@ -20,6 +20,7 @@
>  #define DBG_MDSCR_KDE		(1 << 13)
>  #define DBG_MDSCR_MDE		(1 << 15)
>  #define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
> +#define DBG_MDSCR_EMBWE		(1UL << 32)
>  
>  #define	DBG_ESR_EVT(x)		(((x) >> 27) & 0x7)
>  
> diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
> index bd81cf17744a..362c4d4a64ac 100644
> --- a/arch/arm64/include/asm/hw_breakpoint.h
> +++ b/arch/arm64/include/asm/hw_breakpoint.h
> @@ -79,8 +79,8 @@ static inline void decode_ctrl_reg(u32 reg,
>   * Limits.
>   * Changing these will require modifications to the register accessors.
>   */
> -#define ARM_MAX_BRP		16
> -#define ARM_MAX_WRP		16
> +#define ARM_MAX_BRP		64
> +#define ARM_MAX_WRP		64
>  
>  /* Virtual debug register bases. */
>  #define AARCH64_DBG_REG_BVR	0
> @@ -94,13 +94,25 @@ static inline void decode_ctrl_reg(u32 reg,
>  #define AARCH64_DBG_REG_NAME_WVR	wvr
>  #define AARCH64_DBG_REG_NAME_WCR	wcr
>  
> +static inline bool is_debug_v8p9_enabled(void)
> +{
> +	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
> +	int dver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_DebugVer_SHIFT);
> +
> +	return dver == ID_AA64DFR0_EL1_DebugVer_V8P9;
> +}
> +
>  /* Accessor macros for the debug registers. */
>  #define AARCH64_DBG_READ(N, REG, VAL) do {\
>  	VAL = read_sysreg(dbg##REG##N##_el1);\
> +	if (is_debug_v8p9_enabled())	\
> +		preempt_enable();	\
>  } while (0)
>  
>  #define AARCH64_DBG_WRITE(N, REG, VAL) do {\
>  	write_sysreg(VAL, dbg##REG##N##_el1);\
> +	if (is_debug_v8p9_enabled())	\
> +		preempt_enable();	\
>  } while (0)

Without looking any further in this patch, this is clearly the wrong
level of abstraction. Any disable/enable of preemption should be clearly
balanced in a caller rather than half of that being hidden away in a
low-level primitive.

Wherever this lives it needs a comment explaining what it is doing and
why. I assume this is intended to protect the bank in sequences like:

	MSR	MDSELR, <...>
	ISB
	MRS	<..._, BANKED_REGISTER

... but is theat suffucient for mutual exclusion against
exception handlers, or does that come from somewhere else?

>  struct task_struct;
> @@ -138,19 +150,37 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task)
>  /* Determine number of BRP registers available. */
>  static inline int get_num_brps(void)
>  {
> -	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
> -	return 1 +
> -		cpuid_feature_extract_unsigned_field(dfr0,
> -						ID_AA64DFR0_EL1_BRPs_SHIFT);
> +	u64 dfr0, dfr1;
> +	int dver, brps;
> +
> +	dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
> +	dver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_DebugVer_SHIFT);
> +	if (dver == ID_AA64DFR0_EL1_DebugVer_V8P9) {
> +		dfr1 = read_sanitised_ftr_reg(SYS_ID_AA64DFR1_EL1);
> +		brps = cpuid_feature_extract_unsigned_field_width(dfr1,
> +								  ID_AA64DFR1_EL1_BRPs_SHIFT, 8);
> +	} else {
> +		brps = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRPs_SHIFT);
> +	}
> +	return 1 + brps;
>  }
>  
>  /* Determine number of WRP registers available. */
>  static inline int get_num_wrps(void)
>  {
> -	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
> -	return 1 +
> -		cpuid_feature_extract_unsigned_field(dfr0,
> -						ID_AA64DFR0_EL1_WRPs_SHIFT);
> +	u64 dfr0, dfr1;
> +	int dver, wrps;
> +
> +	dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
> +	dver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_DebugVer_SHIFT);
> +	if (dver == ID_AA64DFR0_EL1_DebugVer_V8P9) {
> +		dfr1 = read_sanitised_ftr_reg(SYS_ID_AA64DFR1_EL1);
> +		wrps = cpuid_feature_extract_unsigned_field_width(dfr1,
> +								  ID_AA64DFR1_EL1_WRPs_SHIFT, 8);
> +	} else {
> +		wrps = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_WRPs_SHIFT);
> +	}
> +	return 1 + wrps;
>  }
>  
>  #ifdef CONFIG_CPU_PM
> diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
> index 024a7b245056..af643c935f2e 100644
> --- a/arch/arm64/kernel/debug-monitors.c
> +++ b/arch/arm64/kernel/debug-monitors.c
> @@ -23,6 +23,7 @@
>  #include <asm/debug-monitors.h>
>  #include <asm/system_misc.h>
>  #include <asm/traps.h>
> +#include <asm/hw_breakpoint.h>

Nit: these are ordered alphabetically, please keep them that way.

>  
>  /* Determine debug architecture. */
>  u8 debug_monitors_arch(void)
> @@ -34,7 +35,7 @@ u8 debug_monitors_arch(void)
>  /*
>   * MDSCR access routines.
>   */
> -static void mdscr_write(u32 mdscr)
> +static void mdscr_write(u64 mdscr)
>  {
>  	unsigned long flags;
>  	flags = local_daif_save();
> @@ -43,7 +44,7 @@ static void mdscr_write(u32 mdscr)
>  }
>  NOKPROBE_SYMBOL(mdscr_write);
>  
> -static u32 mdscr_read(void)
> +static u64 mdscr_read(void)
>  {
>  	return read_sysreg(mdscr_el1);
>  }
> @@ -76,10 +77,11 @@ early_param("nodebugmon", early_debug_disable);
>   */
>  static DEFINE_PER_CPU(int, mde_ref_count);
>  static DEFINE_PER_CPU(int, kde_ref_count);
> +static DEFINE_PER_CPU(int, embwe_ref_count);

We have refcounting for MDE and KDE because they enable debug exceptions
to be taken (and e.g. require a hypervisor to do more work when they're
enabled), but AFAICT that's not true for EMBWE.

Do we need to refcount EMBWE?

>  void enable_debug_monitors(enum dbg_active_el el)
>  {
> -	u32 mdscr, enable = 0;
> +	u64 mdscr, enable = 0;
>  
>  	WARN_ON(preemptible());
>  
> @@ -90,6 +92,9 @@ void enable_debug_monitors(enum dbg_active_el el)
>  	    this_cpu_inc_return(kde_ref_count) == 1)
>  		enable |= DBG_MDSCR_KDE;
>  
> +	if (is_debug_v8p9_enabled() && this_cpu_inc_return(embwe_ref_count) == 1)
> +		enable |= DBG_MDSCR_EMBWE;

... which suggests that this could simplified to be:

	if (is_debug_v8p9_enabled())
		enable != DBG_MDSCR_EMBWE;

... and likewise below, unless I'm missing some reason why we must
refcount this?

> diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
> index 722ac45f9f7b..30156d732284 100644
> --- a/arch/arm64/kernel/hw_breakpoint.c
> +++ b/arch/arm64/kernel/hw_breakpoint.c
> @@ -103,10 +103,40 @@ int hw_breakpoint_slots(int type)
>  	WRITE_WB_REG_CASE(OFF, 14, REG, VAL);	\
>  	WRITE_WB_REG_CASE(OFF, 15, REG, VAL)
>  
> +static int set_bank_index(int n)
> +{
> +	int mdsel_bank;
> +	int bank = n / 16, index = n % 16;
> +
> +	switch (bank) {
> +	case 0:
> +		mdsel_bank = MDSELR_EL1_BANK_BANK_0;
> +		break;
> +	case 1:
> +		mdsel_bank = MDSELR_EL1_BANK_BANK_1;
> +		break;
> +	case 2:
> +		mdsel_bank = MDSELR_EL1_BANK_BANK_2;
> +		break;
> +	case 3:
> +		mdsel_bank = MDSELR_EL1_BANK_BANK_3;
> +		break;

Since this is a trivial mapping, do we actually need the switch?

> +	default:
> +		pr_warn("Unknown register bank %d\n", bank);
> +	}
> +	preempt_disable();
> +	write_sysreg_s(mdsel_bank << MDSELR_EL1_BANK_SHIFT, SYS_MDSELR_EL1);
> +	isb();
> +	return index;
> +}
> +
>  static u64 read_wb_reg(int reg, int n)
>  {
>  	u64 val = 0;
>  
> +	if (is_debug_v8p9_enabled())
> +		n = set_bank_index(n);
> +
>  	switch (reg + n) {
>  	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val);
>  	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val);

As above, this would be better as something like:

	// rename the existing read_wb_reg(), unchanged
	static u64 __read_wb_reg(int reg, int n);

	static u64 read_wb_reg(int reg, int n)
	{
		u64 val;

		if (!is_debug_v8p9_enabled())
			return __read_wb_reg(reg, n);

		/*
		 * TODO: explain here
		 */
		preempt_disable();
		write_sysreg_s(...); // MDSELR
		isb();
		val = __read_wb_reg(reg, idx_within_bank);
		preempt_enable();

		return val;
	}

... or:

	static u64 read_wb_reg(int reg, int n)
	{
		u64 val;

		if (is_debug_v8p9_enabled()) {
			/*
			 * TODO: explain here
			 */
			preempt_disable();
			write_sysreg_s(...); // MDSELR
			isb();
			val = __read_wb_reg(reg, idx_within_bank);
			preempt_enable();
		} else {
			val = __read_wb_reg(reg, n);
		}

		return val;
	}

... which is more lines but *vastly* clearer. 

Likewise for the write case.

Mark.
Anshuman Khandual Oct. 23, 2024, 7:31 a.m. UTC | #4
On 10/22/24 21:04, Mark Rutland wrote:
> On Tue, Oct 01, 2024 at 10:06:02AM +0530, Anshuman Khandual wrote:
>> Currently there can be maximum 16 breakpoints, and 16 watchpoints available
>> on a given platform - as detected from ID_AA64DFR0_EL1.[BRPs|WRPs] register
>> fields. But these breakpoint, and watchpoints can be extended further up to
>> 64 via a new arch feature FEAT_Debugv8p9.
>>
>> This first enables banked access for the breakpoint and watchpoint register
>> set via MDSELR_EL1, extended exceptions via MDSCR_EL1.EMBWE and determining
>> available breakpoints and watchpoints in the platform from ID_AA64DFR1_EL1,
>> when FEAT_Debugv8p9 is enabled.
>>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will@kernel.org>
>> Cc: Mark Rutland <mark.rutland@arm.com>
>> Cc: linux-arm-kernel@lists.infradead.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>> ---
>>  arch/arm64/include/asm/debug-monitors.h |  1 +
>>  arch/arm64/include/asm/hw_breakpoint.h  | 50 ++++++++++++++++++++-----
>>  arch/arm64/kernel/debug-monitors.c      | 16 ++++++--
>>  arch/arm64/kernel/hw_breakpoint.c       | 33 ++++++++++++++++
>>  4 files changed, 86 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
>> index 13d437bcbf58..a14097673ae0 100644
>> --- a/arch/arm64/include/asm/debug-monitors.h
>> +++ b/arch/arm64/include/asm/debug-monitors.h
>> @@ -20,6 +20,7 @@
>>  #define DBG_MDSCR_KDE		(1 << 13)
>>  #define DBG_MDSCR_MDE		(1 << 15)
>>  #define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
>> +#define DBG_MDSCR_EMBWE		(1UL << 32)
>>  
>>  #define	DBG_ESR_EVT(x)		(((x) >> 27) & 0x7)
>>  
>> diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
>> index bd81cf17744a..362c4d4a64ac 100644
>> --- a/arch/arm64/include/asm/hw_breakpoint.h
>> +++ b/arch/arm64/include/asm/hw_breakpoint.h
>> @@ -79,8 +79,8 @@ static inline void decode_ctrl_reg(u32 reg,
>>   * Limits.
>>   * Changing these will require modifications to the register accessors.
>>   */
>> -#define ARM_MAX_BRP		16
>> -#define ARM_MAX_WRP		16
>> +#define ARM_MAX_BRP		64
>> +#define ARM_MAX_WRP		64
>>  
>>  /* Virtual debug register bases. */
>>  #define AARCH64_DBG_REG_BVR	0
>> @@ -94,13 +94,25 @@ static inline void decode_ctrl_reg(u32 reg,
>>  #define AARCH64_DBG_REG_NAME_WVR	wvr
>>  #define AARCH64_DBG_REG_NAME_WCR	wcr
>>  
>> +static inline bool is_debug_v8p9_enabled(void)
>> +{
>> +	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
>> +	int dver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_DebugVer_SHIFT);
>> +
>> +	return dver == ID_AA64DFR0_EL1_DebugVer_V8P9;
>> +}
>> +
>>  /* Accessor macros for the debug registers. */
>>  #define AARCH64_DBG_READ(N, REG, VAL) do {\
>>  	VAL = read_sysreg(dbg##REG##N##_el1);\
>> +	if (is_debug_v8p9_enabled())	\
>> +		preempt_enable();	\
>>  } while (0)
>>  
>>  #define AARCH64_DBG_WRITE(N, REG, VAL) do {\
>>  	write_sysreg(VAL, dbg##REG##N##_el1);\
>> +	if (is_debug_v8p9_enabled())	\
>> +		preempt_enable();	\
>>  } while (0)
> 
> Without looking any further in this patch, this is clearly the wrong
> level of abstraction. Any disable/enable of preemption should be clearly
> balanced in a caller rather than half of that being hidden away in a
> low-level primitive.

Agreed, this was not the most optimal method from readability perspective
as well but could not come up with a better way without creating too much
code churn. But sure, will improve upon this (as you have suggested later).

> 
> Wherever this lives it needs a comment explaining what it is doing and
> why. I assume this is intended to protect the bank in sequences like:
> 
> 	MSR	MDSELR, <...>
> 	ISB
> 	MRS	<..._, BANKED_REGISTER

Correct, it is protecting the above sequence.

> 
> ... but is theat suffucient for mutual exclusion against
> exception handlers, or does that come from somewhere else?

Looking at all existing use cases for breakpoint/watchpoints, it should
be sufficient to protect against mutual exclusion. But thinking, do you
have a particular exception handler scenario in mind where this might
still be problematic ? Will keep looking into it.

> 
>>  struct task_struct;
>> @@ -138,19 +150,37 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task)
>>  /* Determine number of BRP registers available. */
>>  static inline int get_num_brps(void)
>>  {
>> -	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
>> -	return 1 +
>> -		cpuid_feature_extract_unsigned_field(dfr0,
>> -						ID_AA64DFR0_EL1_BRPs_SHIFT);
>> +	u64 dfr0, dfr1;
>> +	int dver, brps;
>> +
>> +	dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
>> +	dver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_DebugVer_SHIFT);
>> +	if (dver == ID_AA64DFR0_EL1_DebugVer_V8P9) {
>> +		dfr1 = read_sanitised_ftr_reg(SYS_ID_AA64DFR1_EL1);
>> +		brps = cpuid_feature_extract_unsigned_field_width(dfr1,
>> +								  ID_AA64DFR1_EL1_BRPs_SHIFT, 8);
>> +	} else {
>> +		brps = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRPs_SHIFT);
>> +	}
>> +	return 1 + brps;
>>  }
>>  
>>  /* Determine number of WRP registers available. */
>>  static inline int get_num_wrps(void)
>>  {
>> -	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
>> -	return 1 +
>> -		cpuid_feature_extract_unsigned_field(dfr0,
>> -						ID_AA64DFR0_EL1_WRPs_SHIFT);
>> +	u64 dfr0, dfr1;
>> +	int dver, wrps;
>> +
>> +	dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
>> +	dver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_DebugVer_SHIFT);
>> +	if (dver == ID_AA64DFR0_EL1_DebugVer_V8P9) {
>> +		dfr1 = read_sanitised_ftr_reg(SYS_ID_AA64DFR1_EL1);
>> +		wrps = cpuid_feature_extract_unsigned_field_width(dfr1,
>> +								  ID_AA64DFR1_EL1_WRPs_SHIFT, 8);
>> +	} else {
>> +		wrps = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_WRPs_SHIFT);
>> +	}
>> +	return 1 + wrps;
>>  }
>>  
>>  #ifdef CONFIG_CPU_PM
>> diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
>> index 024a7b245056..af643c935f2e 100644
>> --- a/arch/arm64/kernel/debug-monitors.c
>> +++ b/arch/arm64/kernel/debug-monitors.c
>> @@ -23,6 +23,7 @@
>>  #include <asm/debug-monitors.h>
>>  #include <asm/system_misc.h>
>>  #include <asm/traps.h>
>> +#include <asm/hw_breakpoint.h>
> 
> Nit: these are ordered alphabetically, please keep them that way.

Sure, will change.

> 
>>  
>>  /* Determine debug architecture. */
>>  u8 debug_monitors_arch(void)
>> @@ -34,7 +35,7 @@ u8 debug_monitors_arch(void)
>>  /*
>>   * MDSCR access routines.
>>   */
>> -static void mdscr_write(u32 mdscr)
>> +static void mdscr_write(u64 mdscr)
>>  {
>>  	unsigned long flags;
>>  	flags = local_daif_save();
>> @@ -43,7 +44,7 @@ static void mdscr_write(u32 mdscr)
>>  }
>>  NOKPROBE_SYMBOL(mdscr_write);
>>  
>> -static u32 mdscr_read(void)
>> +static u64 mdscr_read(void)
>>  {
>>  	return read_sysreg(mdscr_el1);
>>  }
>> @@ -76,10 +77,11 @@ early_param("nodebugmon", early_debug_disable);
>>   */
>>  static DEFINE_PER_CPU(int, mde_ref_count);
>>  static DEFINE_PER_CPU(int, kde_ref_count);
>> +static DEFINE_PER_CPU(int, embwe_ref_count);
> 
> We have refcounting for MDE and KDE because they enable debug exceptions
> to be taken (and e.g. require a hypervisor to do more work when they're
> enabled), but AFAICT that's not true for EMBWE.
Thought something similar would be required for EMBWE.

> 
> Do we need to refcount EMBWE?

TBH, not sure. Don't understand this component well enough. You are probably
right, this refcount mechanism for EMBWE might not be required here.

> 
>>  void enable_debug_monitors(enum dbg_active_el el)
>>  {
>> -	u32 mdscr, enable = 0;
>> +	u64 mdscr, enable = 0;
>>  
>>  	WARN_ON(preemptible());
>>  
>> @@ -90,6 +92,9 @@ void enable_debug_monitors(enum dbg_active_el el)
>>  	    this_cpu_inc_return(kde_ref_count) == 1)
>>  		enable |= DBG_MDSCR_KDE;
>>  
>> +	if (is_debug_v8p9_enabled() && this_cpu_inc_return(embwe_ref_count) == 1)
>> +		enable |= DBG_MDSCR_EMBWE;
> 
> ... which suggests that this could simplified to be:
> 
> 	if (is_debug_v8p9_enabled())
> 		enable != DBG_MDSCR_EMBWE;

Sure, will change.

> 
> ... and likewise below, unless I'm missing some reason why we must
> refcount this?

Okay, will drop the refcount mechanism completely and instead just
enable-disable DBG_MDSCR_EMBWE based on is_debug_v8p9_enabled()
feature test.

> 
>> diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
>> index 722ac45f9f7b..30156d732284 100644
>> --- a/arch/arm64/kernel/hw_breakpoint.c
>> +++ b/arch/arm64/kernel/hw_breakpoint.c
>> @@ -103,10 +103,40 @@ int hw_breakpoint_slots(int type)
>>  	WRITE_WB_REG_CASE(OFF, 14, REG, VAL);	\
>>  	WRITE_WB_REG_CASE(OFF, 15, REG, VAL)
>>  
>> +static int set_bank_index(int n)
>> +{
>> +	int mdsel_bank;
>> +	int bank = n / 16, index = n % 16;
>> +
>> +	switch (bank) {
>> +	case 0:
>> +		mdsel_bank = MDSELR_EL1_BANK_BANK_0;
>> +		break;
>> +	case 1:
>> +		mdsel_bank = MDSELR_EL1_BANK_BANK_1;
>> +		break;
>> +	case 2:
>> +		mdsel_bank = MDSELR_EL1_BANK_BANK_2;
>> +		break;
>> +	case 3:
>> +		mdsel_bank = MDSELR_EL1_BANK_BANK_3;
>> +		break;
> 
> Since this is a trivial mapping, do we actually need the switch?

MDSELR_EL1_BANK_BANK_<N> = N (0..3) So mdsel_bank could
directly be used without going through the 'bank' based
switch case above. Will drop them as suggested.

> 
>> +	default:
>> +		pr_warn("Unknown register bank %d\n", bank);
>> +	}
>> +	preempt_disable();
>> +	write_sysreg_s(mdsel_bank << MDSELR_EL1_BANK_SHIFT, SYS_MDSELR_EL1);
>> +	isb();
>> +	return index;
>> +}
>> +
>>  static u64 read_wb_reg(int reg, int n)
>>  {
>>  	u64 val = 0;
>>  
>> +	if (is_debug_v8p9_enabled())
>> +		n = set_bank_index(n);
>> +
>>  	switch (reg + n) {
>>  	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val);
>>  	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val);
> 
> As above, this would be better as something like:
> 
> 	// rename the existing read_wb_reg(), unchanged
> 	static u64 __read_wb_reg(int reg, int n);
> 
> 	static u64 read_wb_reg(int reg, int n)
> 	{
> 		u64 val;
> 
> 		if (!is_debug_v8p9_enabled())
> 			return __read_wb_reg(reg, n);
> 
> 		/*
> 		 * TODO: explain here
> 		 */
> 		preempt_disable();
> 		write_sysreg_s(...); // MDSELR
> 		isb();
> 		val = __read_wb_reg(reg, idx_within_bank);
> 		preempt_enable();
> 
> 		return val;
> 	}
> 
> ... or:
> 
> 	static u64 read_wb_reg(int reg, int n)
> 	{
> 		u64 val;
> 
> 		if (is_debug_v8p9_enabled()) {
> 			/*
> 			 * TODO: explain here
> 			 */
> 			preempt_disable();
> 			write_sysreg_s(...); // MDSELR
> 			isb();
> 			val = __read_wb_reg(reg, idx_within_bank);
> 			preempt_enable();
> 		} else {
> 			val = __read_wb_reg(reg, n);
> 		}
> 
> 		return val;
> 	}
> 
> ... which is more lines but *vastly* clearer. 
> 
> Likewise for the write case.

Agreed, they are indeed much clearer, will change as suggested and respin.

Thanks for your detailed review.
Mark Rutland Oct. 28, 2024, 12:47 p.m. UTC | #5
On Wed, Oct 23, 2024 at 01:01:52PM +0530, Anshuman Khandual wrote:
> 
> 
> On 10/22/24 21:04, Mark Rutland wrote:
> > On Tue, Oct 01, 2024 at 10:06:02AM +0530, Anshuman Khandual wrote:

[...]

> > Wherever this lives it needs a comment explaining what it is doing and
> > why. I assume this is intended to protect the bank in sequences like:
> > 
> > 	MSR	MDSELR, <...>
> > 	ISB
> > 	MRS	<..._, BANKED_REGISTER
> 
> Correct, it is protecting the above sequence.
> 
> > 
> > ... but is theat suffucient for mutual exclusion against
> > exception handlers, or does that come from somewhere else?
> 
> Looking at all existing use cases for breakpoint/watchpoints, it should
> be sufficient to protect against mutual exclusion. But thinking, do you
> have a particular exception handler scenario in mind where this might
> still be problematic ? Will keep looking into it.

Where does the mutual exclusion come from for the existing sequences?
We should be able to descrive should be able to describe that in the
commit message or in a comment somewhere (or better, with some
assertions that get tested).

For example, what prevents watchpoint_handler() from firing in the
middle of arch_install_hw_breakpoint() or
arch_uninstall_hw_breakpoint()?

Is the existing code correct?

Mark.
Anshuman Khandual Oct. 29, 2024, 7:36 a.m. UTC | #6
On 10/28/24 18:17, Mark Rutland wrote:
> On Wed, Oct 23, 2024 at 01:01:52PM +0530, Anshuman Khandual wrote:
>>
>>
>> On 10/22/24 21:04, Mark Rutland wrote:
>>> On Tue, Oct 01, 2024 at 10:06:02AM +0530, Anshuman Khandual wrote:
> 
> [...]
> 
>>> Wherever this lives it needs a comment explaining what it is doing and
>>> why. I assume this is intended to protect the bank in sequences like:
>>>
>>> 	MSR	MDSELR, <...>
>>> 	ISB
>>> 	MRS	<..._, BANKED_REGISTER
>>
>> Correct, it is protecting the above sequence.
>>
>>>
>>> ... but is theat suffucient for mutual exclusion against
>>> exception handlers, or does that come from somewhere else?
>>
>> Looking at all existing use cases for breakpoint/watchpoints, it should
>> be sufficient to protect against mutual exclusion. But thinking, do you
>> have a particular exception handler scenario in mind where this might
>> still be problematic ? Will keep looking into it.
> 
> Where does the mutual exclusion come from for the existing sequences?

Bank selection followed by indexed read/write, inherently requires mutual
exclusion (ensuring that both these steps executed together) in order to
prevent read/write into wrong registers. That being said, HW breakpoints
get used in multiple different places such as perf, ptrace, debug monitor
based single stepping etc calling platform functions which operate on the
HW breakpoint registers here.

preempt_disable()/enable() sequence in the very last leaf level helpers
such as [read|write]_wb_reg(), will ensure required mutual exclusion.

> We should be able to descrive should be able to describe that in the
> commit message or in a comment somewhere (or better, with some
> assertions that get tested).

Planning to add a comment - something like this both for read and write
helpers.
       /*
        * Bank selection in MDSELR_EL1, followed by indexed read from
        * [break|watch]point registers cannot be interrupted, as that
        * might cause misread from wrong targets. Hence this requires
        * mutual exclusion via preventing any preemption.
        */

But regarding adding assertions, could you give some more details and
it will be great to have some relevant examples as well.

> 
> For example, what prevents watchpoint_handler() from firing in the
> middle of arch_install_hw_breakpoint() or
> arch_uninstall_hw_breakpoint()?

If perf is the only user, watchpoint_handler() will not get triggered
without watchpoints being installed via arch_install_hw_breakpoint().
Similarly once they get uninstalled via arch_uninstall_hw_breakpoint()
there will not be active watchpoints to trigger the handler. Although
there are other users (ptrace, debug monitor etc) besides perf which
could also be active simultaneously and race with each other ? TBH, I
am not sure.

> 
> Is the existing code correct?

I have not tested the concurrency aspects of the HW breakpoints enough
to be able to answer that question. But if there is a particular concern
here, happy to look into that.

But wondering how does this new bank indexed read/write mechanism (after
taking care of the mutual exclusion in the leaf level helpers such as
[read| write]_wb_reg()) still makes the existing concurrency situation
worse off than earlier ?
Mark Rutland Oct. 29, 2024, 4:20 p.m. UTC | #7
On Tue, Oct 29, 2024 at 01:06:38PM +0530, Anshuman Khandual wrote:
> On 10/28/24 18:17, Mark Rutland wrote:
> > On Wed, Oct 23, 2024 at 01:01:52PM +0530, Anshuman Khandual wrote:
> >> On 10/22/24 21:04, Mark Rutland wrote:

> >>> I assume this is intended to protect the bank in sequences like:
> >>>
> >>> 	MSR	MDSELR, <...>
> >>> 	ISB
> >>> 	MRS	<..._, BANKED_REGISTER
> >>
> >> Correct, it is protecting the above sequence.
> >>
> >>> ... but is theat suffucient for mutual exclusion against
> >>> exception handlers, or does that come from somewhere else?
> >>
> >> Looking at all existing use cases for breakpoint/watchpoints, it should
> >> be sufficient to protect against mutual exclusion. But thinking, do you
> >> have a particular exception handler scenario in mind where this might
> >> still be problematic ? Will keep looking into it.
> > 
> > Where does the mutual exclusion come from for the existing sequences?
> 
> Bank selection followed by indexed read/write, inherently requires mutual
> exclusion (ensuring that both these steps executed together) in order to
> prevent read/write into wrong registers. That being said, HW breakpoints
> get used in multiple different places such as perf, ptrace, debug monitor
> based single stepping etc calling platform functions which operate on the
> HW breakpoint registers here.

Yes; that's *why* I'm asking. 

> preempt_disable()/enable() sequence in the very last leaf level helpers
> such as [read|write]_wb_reg(), will ensure required mutual exclusion.

I do not believe that this assertion is correct.

I specifically gave the example of mutual exclusion against exception
handlers, and preempt_disable() ... preempt_disable() does not prevent
exceptions being taken, so disabling preemption *cannot* be sufficient
to provide mutual exclusion against exception handlers.

What prevents a race with an exception handler? e.g. 

* Does the structure of the code prevent that somehow?

* What context(s) does this code execute in?
  - Are debug exceptions always masked?
  - Do we disable breakpoints/watchpoints around (some) manipulation of
    the relevant registers?

> > We should be able to descrive should be able to describe that in the
> > commit message or in a comment somewhere (or better, with some
> > assertions that get tested).
> 
> Planning to add a comment - something like this both for read and write
> helpers.
>        /*
>         * Bank selection in MDSELR_EL1, followed by indexed read from
>         * [break|watch]point registers cannot be interrupted, as that
>         * might cause misread from wrong targets. Hence this requires
>         * mutual exclusion via preventing any preemption.
>         */

As above, I do not believe this is correct. At minimum, disabling
preemption is not the full story here.

> But regarding adding assertions, could you give some more details and
> it will be great to have some relevant examples as well.

I've given some suggestions above. Please go and read the code and
figure this out.

> > For example, what prevents watchpoint_handler() from firing in the
> > middle of arch_install_hw_breakpoint() or
> > arch_uninstall_hw_breakpoint()?
> 
> If perf is the only user, watchpoint_handler() will not get triggered
> without watchpoints being installed via arch_install_hw_breakpoint().
> Similarly once they get uninstalled via arch_uninstall_hw_breakpoint()
> there will not be active watchpoints to trigger the handler. Although
> there are other users (ptrace, debug monitor etc) besides perf which
> could also be active simultaneously and race with each other ? TBH, I
> am not sure.

Please go and read the code and figure this out.

> > Is the existing code correct?
> 
> I have not tested the concurrency aspects of the HW breakpoints enough
> to be able to answer that question. But if there is a particular concern
> here, happy to look into that.
> 
> But wondering how does this new bank indexed read/write mechanism (after
> taking care of the mutual exclusion in the leaf level helpers such as
> [read| write]_wb_reg()) still makes the existing concurrency situation
> worse off than earlier ?

Please go and read the code and figure this out.

Mark.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 13d437bcbf58..a14097673ae0 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -20,6 +20,7 @@ 
 #define DBG_MDSCR_KDE		(1 << 13)
 #define DBG_MDSCR_MDE		(1 << 15)
 #define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
+#define DBG_MDSCR_EMBWE		(1UL << 32)
 
 #define	DBG_ESR_EVT(x)		(((x) >> 27) & 0x7)
 
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index bd81cf17744a..362c4d4a64ac 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -79,8 +79,8 @@  static inline void decode_ctrl_reg(u32 reg,
  * Limits.
  * Changing these will require modifications to the register accessors.
  */
-#define ARM_MAX_BRP		16
-#define ARM_MAX_WRP		16
+#define ARM_MAX_BRP		64
+#define ARM_MAX_WRP		64
 
 /* Virtual debug register bases. */
 #define AARCH64_DBG_REG_BVR	0
@@ -94,13 +94,25 @@  static inline void decode_ctrl_reg(u32 reg,
 #define AARCH64_DBG_REG_NAME_WVR	wvr
 #define AARCH64_DBG_REG_NAME_WCR	wcr
 
+static inline bool is_debug_v8p9_enabled(void)
+{
+	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+	int dver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_DebugVer_SHIFT);
+
+	return dver == ID_AA64DFR0_EL1_DebugVer_V8P9;
+}
+
 /* Accessor macros for the debug registers. */
 #define AARCH64_DBG_READ(N, REG, VAL) do {\
 	VAL = read_sysreg(dbg##REG##N##_el1);\
+	if (is_debug_v8p9_enabled())	\
+		preempt_enable();	\
 } while (0)
 
 #define AARCH64_DBG_WRITE(N, REG, VAL) do {\
 	write_sysreg(VAL, dbg##REG##N##_el1);\
+	if (is_debug_v8p9_enabled())	\
+		preempt_enable();	\
 } while (0)
 
 struct task_struct;
@@ -138,19 +150,37 @@  static inline void ptrace_hw_copy_thread(struct task_struct *task)
 /* Determine number of BRP registers available. */
 static inline int get_num_brps(void)
 {
-	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
-	return 1 +
-		cpuid_feature_extract_unsigned_field(dfr0,
-						ID_AA64DFR0_EL1_BRPs_SHIFT);
+	u64 dfr0, dfr1;
+	int dver, brps;
+
+	dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+	dver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_DebugVer_SHIFT);
+	if (dver == ID_AA64DFR0_EL1_DebugVer_V8P9) {
+		dfr1 = read_sanitised_ftr_reg(SYS_ID_AA64DFR1_EL1);
+		brps = cpuid_feature_extract_unsigned_field_width(dfr1,
+								  ID_AA64DFR1_EL1_BRPs_SHIFT, 8);
+	} else {
+		brps = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRPs_SHIFT);
+	}
+	return 1 + brps;
 }
 
 /* Determine number of WRP registers available. */
 static inline int get_num_wrps(void)
 {
-	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
-	return 1 +
-		cpuid_feature_extract_unsigned_field(dfr0,
-						ID_AA64DFR0_EL1_WRPs_SHIFT);
+	u64 dfr0, dfr1;
+	int dver, wrps;
+
+	dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+	dver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_DebugVer_SHIFT);
+	if (dver == ID_AA64DFR0_EL1_DebugVer_V8P9) {
+		dfr1 = read_sanitised_ftr_reg(SYS_ID_AA64DFR1_EL1);
+		wrps = cpuid_feature_extract_unsigned_field_width(dfr1,
+								  ID_AA64DFR1_EL1_WRPs_SHIFT, 8);
+	} else {
+		wrps = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_WRPs_SHIFT);
+	}
+	return 1 + wrps;
 }
 
 #ifdef CONFIG_CPU_PM
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 024a7b245056..af643c935f2e 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -23,6 +23,7 @@ 
 #include <asm/debug-monitors.h>
 #include <asm/system_misc.h>
 #include <asm/traps.h>
+#include <asm/hw_breakpoint.h>
 
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
@@ -34,7 +35,7 @@  u8 debug_monitors_arch(void)
 /*
  * MDSCR access routines.
  */
-static void mdscr_write(u32 mdscr)
+static void mdscr_write(u64 mdscr)
 {
 	unsigned long flags;
 	flags = local_daif_save();
@@ -43,7 +44,7 @@  static void mdscr_write(u32 mdscr)
 }
 NOKPROBE_SYMBOL(mdscr_write);
 
-static u32 mdscr_read(void)
+static u64 mdscr_read(void)
 {
 	return read_sysreg(mdscr_el1);
 }
@@ -76,10 +77,11 @@  early_param("nodebugmon", early_debug_disable);
  */
 static DEFINE_PER_CPU(int, mde_ref_count);
 static DEFINE_PER_CPU(int, kde_ref_count);
+static DEFINE_PER_CPU(int, embwe_ref_count);
 
 void enable_debug_monitors(enum dbg_active_el el)
 {
-	u32 mdscr, enable = 0;
+	u64 mdscr, enable = 0;
 
 	WARN_ON(preemptible());
 
@@ -90,6 +92,9 @@  void enable_debug_monitors(enum dbg_active_el el)
 	    this_cpu_inc_return(kde_ref_count) == 1)
 		enable |= DBG_MDSCR_KDE;
 
+	if (is_debug_v8p9_enabled() && this_cpu_inc_return(embwe_ref_count) == 1)
+		enable |= DBG_MDSCR_EMBWE;
+
 	if (enable && debug_enabled) {
 		mdscr = mdscr_read();
 		mdscr |= enable;
@@ -100,7 +105,7 @@  NOKPROBE_SYMBOL(enable_debug_monitors);
 
 void disable_debug_monitors(enum dbg_active_el el)
 {
-	u32 mdscr, disable = 0;
+	u64 mdscr, disable = 0;
 
 	WARN_ON(preemptible());
 
@@ -111,6 +116,9 @@  void disable_debug_monitors(enum dbg_active_el el)
 	    this_cpu_dec_return(kde_ref_count) == 0)
 		disable &= ~DBG_MDSCR_KDE;
 
+	if (is_debug_v8p9_enabled() && this_cpu_dec_return(embwe_ref_count) == 0)
+		disable &= ~DBG_MDSCR_EMBWE;
+
 	if (disable) {
 		mdscr = mdscr_read();
 		mdscr &= disable;
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 722ac45f9f7b..30156d732284 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -103,10 +103,40 @@  int hw_breakpoint_slots(int type)
 	WRITE_WB_REG_CASE(OFF, 14, REG, VAL);	\
 	WRITE_WB_REG_CASE(OFF, 15, REG, VAL)
 
+static int set_bank_index(int n)
+{
+	int mdsel_bank;
+	int bank = n / 16, index = n % 16;
+
+	switch (bank) {
+	case 0:
+		mdsel_bank = MDSELR_EL1_BANK_BANK_0;
+		break;
+	case 1:
+		mdsel_bank = MDSELR_EL1_BANK_BANK_1;
+		break;
+	case 2:
+		mdsel_bank = MDSELR_EL1_BANK_BANK_2;
+		break;
+	case 3:
+		mdsel_bank = MDSELR_EL1_BANK_BANK_3;
+		break;
+	default:
+		pr_warn("Unknown register bank %d\n", bank);
+	}
+	preempt_disable();
+	write_sysreg_s(mdsel_bank << MDSELR_EL1_BANK_SHIFT, SYS_MDSELR_EL1);
+	isb();
+	return index;
+}
+
 static u64 read_wb_reg(int reg, int n)
 {
 	u64 val = 0;
 
+	if (is_debug_v8p9_enabled())
+		n = set_bank_index(n);
+
 	switch (reg + n) {
 	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val);
 	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val);
@@ -122,6 +152,9 @@  NOKPROBE_SYMBOL(read_wb_reg);
 
 static void write_wb_reg(int reg, int n, u64 val)
 {
+	if (is_debug_v8p9_enabled())
+		n = set_bank_index(n);
+
 	switch (reg + n) {
 	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val);
 	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val);