diff mbox

[RFC,v2,1/1] ARM64: KGDB: Add FP/SIMD debug support

Message ID 1384776417-29076-2-git-send-email-vijay.kilari@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vijay Kilari Nov. 18, 2013, 12:06 p.m. UTC
From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>

Add KGDB debug support for FP/SIMD processor.This support
only debugging of FP/SIMD in kernel mode.
With this patch one can view or set FP/SIMD registers in
kernel context

Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
---
 arch/arm64/include/asm/fpsimd.h |    1 +
 arch/arm64/kernel/fpsimd.c      |    5 ++
 arch/arm64/kernel/kgdb.c        |  105 +++++++++++++++++++++++++--------------
 3 files changed, 73 insertions(+), 38 deletions(-)

Comments

Will Deacon Nov. 25, 2013, 6:52 p.m. UTC | #1
On Mon, Nov 18, 2013 at 12:06:57PM +0000, vijay.kilari@gmail.com wrote:
> From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
> 
> Add KGDB debug support for FP/SIMD processor.This support
> only debugging of FP/SIMD in kernel mode.
> With this patch one can view or set FP/SIMD registers in
> kernel context
> 
> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
> ---
>  arch/arm64/include/asm/fpsimd.h |    1 +
>  arch/arm64/kernel/fpsimd.c      |    5 ++
>  arch/arm64/kernel/kgdb.c        |  105 +++++++++++++++++++++++++--------------
>  3 files changed, 73 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 609bc44..5039d6d 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -60,6 +60,7 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
>  extern void fpsimd_thread_switch(struct task_struct *next);
>  extern void fpsimd_flush_thread(void);
>  extern void fpsimd_reload_fpstate(void);
> +extern int  fpsimd_thread_state(void);
>  
>  #endif
>  
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 5b13c17..27604c1 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -124,6 +124,11 @@ void fpsimd_flush_thread(void)
>  	set_thread_flag(TIF_FOREIGN_FPSTATE);
>  }
>  
> +int fpsimd_thread_state(void)
> +{
> +	return (current->mm && test_thread_flag(TIF_FOREIGN_FPSTATE)) ? 1 : 0;
> +}

Erm...

  1. `? 1 : 0' doesn't do anything
  2. TIF_FOREIGN_FPSTATE doesn't seem to appear in my source tree (3.13-rc1)
  3. Does it make sense for threads without an mm to have TIF_FOREIGN_FPSTATE set?

>  void fpsimd_reload_fpstate(void)
>  {
>  	preempt_disable();
> diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
> index f10f2ba..5e34852 100644
> --- a/arch/arm64/kernel/kgdb.c
> +++ b/arch/arm64/kernel/kgdb.c
> @@ -23,6 +23,14 @@
>  #include <linux/kdebug.h>
>  #include <linux/kgdb.h>
>  #include <asm/traps.h>
> +#include <asm/fpsimd.h>
> +
> +#ifdef CONFIG_KERNEL_MODE_NEON
> +/*
> + * Structure to hold FP/SIMD register contents
> + */
> +struct fpsimd_state kgdb_fpsimd_regs;
> +#endif
>  
>  struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>  	{ "x0", 8, offsetof(struct pt_regs, regs[0])},
> @@ -59,40 +67,40 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>  	{ "sp", 8, offsetof(struct pt_regs, sp)},
>  	{ "pc", 8, offsetof(struct pt_regs, pc)},
>  	{ "pstate", 4, offsetof(struct pt_regs, pstate)},
> -	{ "v0", 16, -1 },
> -	{ "v1", 16, -1 },
> -	{ "v2", 16, -1 },
> -	{ "v3", 16, -1 },
> -	{ "v4", 16, -1 },
> -	{ "v5", 16, -1 },
> -	{ "v6", 16, -1 },
> -	{ "v7", 16, -1 },
> -	{ "v8", 16, -1 },
> -	{ "v9", 16, -1 },
> -	{ "v10", 16, -1 },
> -	{ "v11", 16, -1 },
> -	{ "v12", 16, -1 },
> -	{ "v13", 16, -1 },
> -	{ "v14", 16, -1 },
> -	{ "v15", 16, -1 },
> -	{ "v16", 16, -1 },
> -	{ "v17", 16, -1 },
> -	{ "v18", 16, -1 },
> -	{ "v19", 16, -1 },
> -	{ "v20", 16, -1 },
> -	{ "v21", 16, -1 },
> -	{ "v22", 16, -1 },
> -	{ "v23", 16, -1 },
> -	{ "v24", 16, -1 },
> -	{ "v25", 16, -1 },
> -	{ "v26", 16, -1 },
> -	{ "v27", 16, -1 },
> -	{ "v28", 16, -1 },
> -	{ "v29", 16, -1 },
> -	{ "v30", 16, -1 },
> -	{ "v31", 16, -1 },
> -	{ "fpsr", 4, -1 },
> -	{ "fpcr", 4, -1 },
> +	{ "v0", 16, offsetof(struct fpsimd_state, vregs[0])},
> +	{ "v1", 16, offsetof(struct fpsimd_state, vregs[1])},
> +	{ "v2", 16, offsetof(struct fpsimd_state, vregs[2])},
> +	{ "v3", 16, offsetof(struct fpsimd_state, vregs[3])},
> +	{ "v4", 16, offsetof(struct fpsimd_state, vregs[4])},
> +	{ "v5", 16, offsetof(struct fpsimd_state, vregs[5])},
> +	{ "v6", 16, offsetof(struct fpsimd_state, vregs[6])},
> +	{ "v7", 16, offsetof(struct fpsimd_state, vregs[7])},
> +	{ "v8", 16, offsetof(struct fpsimd_state, vregs[8])},
> +	{ "v9", 16, offsetof(struct fpsimd_state, vregs[9])},
> +	{ "v10", 16, offsetof(struct fpsimd_state, vregs[10])},
> +	{ "v11", 16, offsetof(struct fpsimd_state, vregs[11])},
> +	{ "v12", 16, offsetof(struct fpsimd_state, vregs[12])},
> +	{ "v13", 16, offsetof(struct fpsimd_state, vregs[13])},
> +	{ "v14", 16, offsetof(struct fpsimd_state, vregs[14])},
> +	{ "v15", 16, offsetof(struct fpsimd_state, vregs[15])},
> +	{ "v16", 16, offsetof(struct fpsimd_state, vregs[16])},
> +	{ "v17", 16, offsetof(struct fpsimd_state, vregs[17])},
> +	{ "v18", 16, offsetof(struct fpsimd_state, vregs[18])},
> +	{ "v19", 16, offsetof(struct fpsimd_state, vregs[19])},
> +	{ "v20", 16, offsetof(struct fpsimd_state, vregs[20])},
> +	{ "v21", 16, offsetof(struct fpsimd_state, vregs[21])},
> +	{ "v22", 16, offsetof(struct fpsimd_state, vregs[22])},
> +	{ "v23", 16, offsetof(struct fpsimd_state, vregs[23])},
> +	{ "v24", 16, offsetof(struct fpsimd_state, vregs[24])},
> +	{ "v25", 16, offsetof(struct fpsimd_state, vregs[25])},
> +	{ "v26", 16, offsetof(struct fpsimd_state, vregs[26])},
> +	{ "v27", 16, offsetof(struct fpsimd_state, vregs[27])},
> +	{ "v28", 16, offsetof(struct fpsimd_state, vregs[28])},
> +	{ "v29", 16, offsetof(struct fpsimd_state, vregs[29])},
> +	{ "v30", 16, offsetof(struct fpsimd_state, vregs[30])},
> +	{ "v31", 16, offsetof(struct fpsimd_state, vregs[31])},
> +	{ "fpsr", 4, offsetof(struct fpsimd_state, fpsr)},
> +	{ "fpcr", 4, offsetof(struct fpsimd_state, fpcr)},
>  };
>  
>  char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
> @@ -100,11 +108,22 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>  	if (regno >= DBG_MAX_REG_NUM || regno < 0)
>  		return NULL;
>  
> -	if (dbg_reg_def[regno].offset != -1)
> +	if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>  		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
>  		       dbg_reg_def[regno].size);
> -	else
> +	else {
> +#ifdef CONFIG_KERNEL_MODE_NEON
> +		if (fpsimd_thread_state()) {
> +			fpsimd_save_state(&kgdb_fpsimd_regs);
> +			memcpy(mem, (void *)&kgdb_fpsimd_regs +
> +					dbg_reg_def[regno].offset,
> +					dbg_reg_def[regno].size);
> +		} else
> +			memset(mem, 0, dbg_reg_def[regno].size);
> +#else
>  		memset(mem, 0, dbg_reg_def[regno].size);

Can you use IS_ENABLED to tidy this up?

Will
Ard Biesheuvel Nov. 25, 2013, 7:03 p.m. UTC | #2
On 25 November 2013 19:52, Will Deacon <will.deacon@arm.com> wrote:
> On Mon, Nov 18, 2013 at 12:06:57PM +0000, vijay.kilari@gmail.com wrote:
>> From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>>
>> Add KGDB debug support for FP/SIMD processor.This support
>> only debugging of FP/SIMD in kernel mode.
>> With this patch one can view or set FP/SIMD registers in
>> kernel context
>>
>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>> ---
>>  arch/arm64/include/asm/fpsimd.h |    1 +
>>  arch/arm64/kernel/fpsimd.c      |    5 ++
>>  arch/arm64/kernel/kgdb.c        |  105 +++++++++++++++++++++++++--------------
>>  3 files changed, 73 insertions(+), 38 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
>> index 609bc44..5039d6d 100644
>> --- a/arch/arm64/include/asm/fpsimd.h
>> +++ b/arch/arm64/include/asm/fpsimd.h
>> @@ -60,6 +60,7 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
>>  extern void fpsimd_thread_switch(struct task_struct *next);
>>  extern void fpsimd_flush_thread(void);
>>  extern void fpsimd_reload_fpstate(void);
>> +extern int  fpsimd_thread_state(void);
>>
>>  #endif
>>
>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> index 5b13c17..27604c1 100644
>> --- a/arch/arm64/kernel/fpsimd.c
>> +++ b/arch/arm64/kernel/fpsimd.c
>> @@ -124,6 +124,11 @@ void fpsimd_flush_thread(void)
>>       set_thread_flag(TIF_FOREIGN_FPSTATE);
>>  }
>>
>> +int fpsimd_thread_state(void)
>> +{
>> +     return (current->mm && test_thread_flag(TIF_FOREIGN_FPSTATE)) ? 1 : 0;
>> +}
>
> Erm...
>
>   1. `? 1 : 0' doesn't do anything
>   2. TIF_FOREIGN_FPSTATE doesn't seem to appear in my source tree (3.13-rc1)
>   3. Does it make sense for threads without an mm to have TIF_FOREIGN_FPSTATE set?
>

I think Vijay did mention at some point that his patches depend on my
patch 'arm64: defer reloading a task's FPSIMD state to userland
resume' which I posted here

http://marc.info/?l=linux-arm-kernel&m=138418879110655&w=2

(with you on cc, IIRC) but which did not evoke any response on the list.
Vijay Kilari Nov. 26, 2013, 11:57 a.m. UTC | #3
On Tue, Nov 26, 2013 at 12:22 AM, Will Deacon <will.deacon@arm.com> wrote:
> On Mon, Nov 18, 2013 at 12:06:57PM +0000, vijay.kilari@gmail.com wrote:
>> From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>>
>> Add KGDB debug support for FP/SIMD processor.This support
>> only debugging of FP/SIMD in kernel mode.
>> With this patch one can view or set FP/SIMD registers in
>> kernel context
>>
>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>> ---
>>  arch/arm64/include/asm/fpsimd.h |    1 +
>>  arch/arm64/kernel/fpsimd.c      |    5 ++
>>  arch/arm64/kernel/kgdb.c        |  105 +++++++++++++++++++++++++--------------
>>  3 files changed, 73 insertions(+), 38 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
>> index 609bc44..5039d6d 100644
>> --- a/arch/arm64/include/asm/fpsimd.h
>> +++ b/arch/arm64/include/asm/fpsimd.h
>> @@ -60,6 +60,7 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
>>  extern void fpsimd_thread_switch(struct task_struct *next);
>>  extern void fpsimd_flush_thread(void);
>>  extern void fpsimd_reload_fpstate(void);
>> +extern int  fpsimd_thread_state(void);
>>
>>  #endif
>>
>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> index 5b13c17..27604c1 100644
>> --- a/arch/arm64/kernel/fpsimd.c
>> +++ b/arch/arm64/kernel/fpsimd.c
>> @@ -124,6 +124,11 @@ void fpsimd_flush_thread(void)
>>       set_thread_flag(TIF_FOREIGN_FPSTATE);
>>  }
>>
>> +int fpsimd_thread_state(void)
>> +{
>> +     return (current->mm && test_thread_flag(TIF_FOREIGN_FPSTATE)) ? 1 : 0;
>> +}
>
> Erm...
>
>   1. `? 1 : 0' doesn't do anything
     it worked for me. on true condition it returned 1 else 0. Any
thing specific?

>   2. TIF_FOREIGN_FPSTATE doesn't seem to appear in my source tree (3.13-rc1)
     As mentioned in cover patch, it is based on Ard's patch. I can
include his patch
     if required.

>   3. Does it make sense for threads without an mm to have TIF_FOREIGN_FPSTATE set?

      As per Ard's patch, this flag is set only if threads has mm. So
mm check would be
      redundant here.  Ard might have more inputs

      How about allowing to view FP/SIMD registers irrespective of
FP/SIMD is used
      by kernel & user-space?. it will help user to view FP registers
for debugging

>>  void fpsimd_reload_fpstate(void)
>>  {
>>       preempt_disable();
>> diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
>> index f10f2ba..5e34852 100644
>> --- a/arch/arm64/kernel/kgdb.c
>> +++ b/arch/arm64/kernel/kgdb.c
>> @@ -23,6 +23,14 @@
>>  #include <linux/kdebug.h>
>>  #include <linux/kgdb.h>
>>  #include <asm/traps.h>
>> +#include <asm/fpsimd.h>
>> +
>> +#ifdef CONFIG_KERNEL_MODE_NEON
>> +/*
>> + * Structure to hold FP/SIMD register contents
>> + */
>> +struct fpsimd_state kgdb_fpsimd_regs;
>> +#endif
>>
>>  struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>>       { "x0", 8, offsetof(struct pt_regs, regs[0])},
>> @@ -59,40 +67,40 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>>       { "sp", 8, offsetof(struct pt_regs, sp)},
>>       { "pc", 8, offsetof(struct pt_regs, pc)},
>>       { "pstate", 4, offsetof(struct pt_regs, pstate)},
>> -     { "v0", 16, -1 },
>> -     { "v1", 16, -1 },
>> -     { "v2", 16, -1 },
>> -     { "v3", 16, -1 },
>> -     { "v4", 16, -1 },
>> -     { "v5", 16, -1 },
>> -     { "v6", 16, -1 },
>> -     { "v7", 16, -1 },
>> -     { "v8", 16, -1 },
>> -     { "v9", 16, -1 },
>> -     { "v10", 16, -1 },
>> -     { "v11", 16, -1 },
>> -     { "v12", 16, -1 },
>> -     { "v13", 16, -1 },
>> -     { "v14", 16, -1 },
>> -     { "v15", 16, -1 },
>> -     { "v16", 16, -1 },
>> -     { "v17", 16, -1 },
>> -     { "v18", 16, -1 },
>> -     { "v19", 16, -1 },
>> -     { "v20", 16, -1 },
>> -     { "v21", 16, -1 },
>> -     { "v22", 16, -1 },
>> -     { "v23", 16, -1 },
>> -     { "v24", 16, -1 },
>> -     { "v25", 16, -1 },
>> -     { "v26", 16, -1 },
>> -     { "v27", 16, -1 },
>> -     { "v28", 16, -1 },
>> -     { "v29", 16, -1 },
>> -     { "v30", 16, -1 },
>> -     { "v31", 16, -1 },
>> -     { "fpsr", 4, -1 },
>> -     { "fpcr", 4, -1 },
>> +     { "v0", 16, offsetof(struct fpsimd_state, vregs[0])},
>> +     { "v1", 16, offsetof(struct fpsimd_state, vregs[1])},
>> +     { "v2", 16, offsetof(struct fpsimd_state, vregs[2])},
>> +     { "v3", 16, offsetof(struct fpsimd_state, vregs[3])},
>> +     { "v4", 16, offsetof(struct fpsimd_state, vregs[4])},
>> +     { "v5", 16, offsetof(struct fpsimd_state, vregs[5])},
>> +     { "v6", 16, offsetof(struct fpsimd_state, vregs[6])},
>> +     { "v7", 16, offsetof(struct fpsimd_state, vregs[7])},
>> +     { "v8", 16, offsetof(struct fpsimd_state, vregs[8])},
>> +     { "v9", 16, offsetof(struct fpsimd_state, vregs[9])},
>> +     { "v10", 16, offsetof(struct fpsimd_state, vregs[10])},
>> +     { "v11", 16, offsetof(struct fpsimd_state, vregs[11])},
>> +     { "v12", 16, offsetof(struct fpsimd_state, vregs[12])},
>> +     { "v13", 16, offsetof(struct fpsimd_state, vregs[13])},
>> +     { "v14", 16, offsetof(struct fpsimd_state, vregs[14])},
>> +     { "v15", 16, offsetof(struct fpsimd_state, vregs[15])},
>> +     { "v16", 16, offsetof(struct fpsimd_state, vregs[16])},
>> +     { "v17", 16, offsetof(struct fpsimd_state, vregs[17])},
>> +     { "v18", 16, offsetof(struct fpsimd_state, vregs[18])},
>> +     { "v19", 16, offsetof(struct fpsimd_state, vregs[19])},
>> +     { "v20", 16, offsetof(struct fpsimd_state, vregs[20])},
>> +     { "v21", 16, offsetof(struct fpsimd_state, vregs[21])},
>> +     { "v22", 16, offsetof(struct fpsimd_state, vregs[22])},
>> +     { "v23", 16, offsetof(struct fpsimd_state, vregs[23])},
>> +     { "v24", 16, offsetof(struct fpsimd_state, vregs[24])},
>> +     { "v25", 16, offsetof(struct fpsimd_state, vregs[25])},
>> +     { "v26", 16, offsetof(struct fpsimd_state, vregs[26])},
>> +     { "v27", 16, offsetof(struct fpsimd_state, vregs[27])},
>> +     { "v28", 16, offsetof(struct fpsimd_state, vregs[28])},
>> +     { "v29", 16, offsetof(struct fpsimd_state, vregs[29])},
>> +     { "v30", 16, offsetof(struct fpsimd_state, vregs[30])},
>> +     { "v31", 16, offsetof(struct fpsimd_state, vregs[31])},
>> +     { "fpsr", 4, offsetof(struct fpsimd_state, fpsr)},
>> +     { "fpcr", 4, offsetof(struct fpsimd_state, fpcr)},
>>  };
>>
>>  char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>> @@ -100,11 +108,22 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>>       if (regno >= DBG_MAX_REG_NUM || regno < 0)
>>               return NULL;
>>
>> -     if (dbg_reg_def[regno].offset != -1)
>> +     if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>>               memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
>>                      dbg_reg_def[regno].size);
>> -     else
>> +     else {
>> +#ifdef CONFIG_KERNEL_MODE_NEON
>> +             if (fpsimd_thread_state()) {
>> +                     fpsimd_save_state(&kgdb_fpsimd_regs);
>> +                     memcpy(mem, (void *)&kgdb_fpsimd_regs +
>> +                                     dbg_reg_def[regno].offset,
>> +                                     dbg_reg_def[regno].size);
>> +             } else
>> +                     memset(mem, 0, dbg_reg_def[regno].size);
>> +#else
>>               memset(mem, 0, dbg_reg_def[regno].size);
>
> Can you use IS_ENABLED to tidy this up?
Yes, will take care in next version

>
> Will
Ard Biesheuvel Nov. 29, 2013, 8:53 a.m. UTC | #4
On 18 November 2013 13:06,  <vijay.kilari@gmail.com> wrote:
> From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>
> Add KGDB debug support for FP/SIMD processor.This support
> only debugging of FP/SIMD in kernel mode.
> With this patch one can view or set FP/SIMD registers in
> kernel context
>
> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
> ---
>  arch/arm64/include/asm/fpsimd.h |    1 +
>  arch/arm64/kernel/fpsimd.c      |    5 ++
>  arch/arm64/kernel/kgdb.c        |  105 +++++++++++++++++++++++++--------------
>  3 files changed, 73 insertions(+), 38 deletions(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 609bc44..5039d6d 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -60,6 +60,7 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
>  extern void fpsimd_thread_switch(struct task_struct *next);
>  extern void fpsimd_flush_thread(void);
>  extern void fpsimd_reload_fpstate(void);
> +extern int  fpsimd_thread_state(void);
>
>  #endif
>
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 5b13c17..27604c1 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -124,6 +124,11 @@ void fpsimd_flush_thread(void)
>         set_thread_flag(TIF_FOREIGN_FPSTATE);
>  }
>
> +int fpsimd_thread_state(void)

Please use a meaningful name for the function. Currently, it returns
true iff 'current' is a non-kernel thread whose FPSIMD state is /not/
present in the FPSIMD register file. You should choose a name that
reflects that purpose.

> +{
> +       return (current->mm && test_thread_flag(TIF_FOREIGN_FPSTATE)) ? 1 : 0;
> +}
> +

As Will pointed out, the expression before ? already evaluates to 1 or
0, so appending '? 1 : 0' is redundant.

>  void fpsimd_reload_fpstate(void)
>  {
>         preempt_disable();
> diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
> index f10f2ba..5e34852 100644
> --- a/arch/arm64/kernel/kgdb.c
> +++ b/arch/arm64/kernel/kgdb.c
> @@ -23,6 +23,14 @@
>  #include <linux/kdebug.h>
>  #include <linux/kgdb.h>
>  #include <asm/traps.h>
> +#include <asm/fpsimd.h>
> +
> +#ifdef CONFIG_KERNEL_MODE_NEON
> +/*
> + * Structure to hold FP/SIMD register contents
> + */
> +struct fpsimd_state kgdb_fpsimd_regs;

'static' if it's local to this source file

> +#endif
>
>  struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>         { "x0", 8, offsetof(struct pt_regs, regs[0])},
> @@ -59,40 +67,40 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>         { "sp", 8, offsetof(struct pt_regs, sp)},
>         { "pc", 8, offsetof(struct pt_regs, pc)},
>         { "pstate", 4, offsetof(struct pt_regs, pstate)},
> -       { "v0", 16, -1 },
> -       { "v1", 16, -1 },
> -       { "v2", 16, -1 },
> -       { "v3", 16, -1 },
> -       { "v4", 16, -1 },
> -       { "v5", 16, -1 },
> -       { "v6", 16, -1 },
> -       { "v7", 16, -1 },
> -       { "v8", 16, -1 },
> -       { "v9", 16, -1 },
> -       { "v10", 16, -1 },
> -       { "v11", 16, -1 },
> -       { "v12", 16, -1 },
> -       { "v13", 16, -1 },
> -       { "v14", 16, -1 },
> -       { "v15", 16, -1 },
> -       { "v16", 16, -1 },
> -       { "v17", 16, -1 },
> -       { "v18", 16, -1 },
> -       { "v19", 16, -1 },
> -       { "v20", 16, -1 },
> -       { "v21", 16, -1 },
> -       { "v22", 16, -1 },
> -       { "v23", 16, -1 },
> -       { "v24", 16, -1 },
> -       { "v25", 16, -1 },
> -       { "v26", 16, -1 },
> -       { "v27", 16, -1 },
> -       { "v28", 16, -1 },
> -       { "v29", 16, -1 },
> -       { "v30", 16, -1 },
> -       { "v31", 16, -1 },
> -       { "fpsr", 4, -1 },
> -       { "fpcr", 4, -1 },
> +       { "v0", 16, offsetof(struct fpsimd_state, vregs[0])},
> +       { "v1", 16, offsetof(struct fpsimd_state, vregs[1])},
> +       { "v2", 16, offsetof(struct fpsimd_state, vregs[2])},
> +       { "v3", 16, offsetof(struct fpsimd_state, vregs[3])},
> +       { "v4", 16, offsetof(struct fpsimd_state, vregs[4])},
> +       { "v5", 16, offsetof(struct fpsimd_state, vregs[5])},
> +       { "v6", 16, offsetof(struct fpsimd_state, vregs[6])},
> +       { "v7", 16, offsetof(struct fpsimd_state, vregs[7])},
> +       { "v8", 16, offsetof(struct fpsimd_state, vregs[8])},
> +       { "v9", 16, offsetof(struct fpsimd_state, vregs[9])},
> +       { "v10", 16, offsetof(struct fpsimd_state, vregs[10])},
> +       { "v11", 16, offsetof(struct fpsimd_state, vregs[11])},
> +       { "v12", 16, offsetof(struct fpsimd_state, vregs[12])},
> +       { "v13", 16, offsetof(struct fpsimd_state, vregs[13])},
> +       { "v14", 16, offsetof(struct fpsimd_state, vregs[14])},
> +       { "v15", 16, offsetof(struct fpsimd_state, vregs[15])},
> +       { "v16", 16, offsetof(struct fpsimd_state, vregs[16])},
> +       { "v17", 16, offsetof(struct fpsimd_state, vregs[17])},
> +       { "v18", 16, offsetof(struct fpsimd_state, vregs[18])},
> +       { "v19", 16, offsetof(struct fpsimd_state, vregs[19])},
> +       { "v20", 16, offsetof(struct fpsimd_state, vregs[20])},
> +       { "v21", 16, offsetof(struct fpsimd_state, vregs[21])},
> +       { "v22", 16, offsetof(struct fpsimd_state, vregs[22])},
> +       { "v23", 16, offsetof(struct fpsimd_state, vregs[23])},
> +       { "v24", 16, offsetof(struct fpsimd_state, vregs[24])},
> +       { "v25", 16, offsetof(struct fpsimd_state, vregs[25])},
> +       { "v26", 16, offsetof(struct fpsimd_state, vregs[26])},
> +       { "v27", 16, offsetof(struct fpsimd_state, vregs[27])},
> +       { "v28", 16, offsetof(struct fpsimd_state, vregs[28])},
> +       { "v29", 16, offsetof(struct fpsimd_state, vregs[29])},
> +       { "v30", 16, offsetof(struct fpsimd_state, vregs[30])},
> +       { "v31", 16, offsetof(struct fpsimd_state, vregs[31])},
> +       { "fpsr", 4, offsetof(struct fpsimd_state, fpsr)},
> +       { "fpcr", 4, offsetof(struct fpsimd_state, fpcr)},
>  };
>
>  char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
> @@ -100,11 +108,22 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>         if (regno >= DBG_MAX_REG_NUM || regno < 0)
>                 return NULL;
>
> -       if (dbg_reg_def[regno].offset != -1)
> +       if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>                 memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
>                        dbg_reg_def[regno].size);
> -       else
> +       else {
> +#ifdef CONFIG_KERNEL_MODE_NEON

else if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON)) {

> +               if (fpsimd_thread_state()) {
> +                       fpsimd_save_state(&kgdb_fpsimd_regs);
> +                       memcpy(mem, (void *)&kgdb_fpsimd_regs +
> +                                       dbg_reg_def[regno].offset,
> +                                       dbg_reg_def[regno].size);

Why do this conditionally? Shouldn't kgdb return whatever is currently
present in the register file, regardless of whether 'current' is a
kernel thread or has had its FPSIMD state swapped out?
Also, one could wonder why this is dependent on KERNEL_MODE_NEON being set

> +               } else
> +                       memset(mem, 0, dbg_reg_def[regno].size);
> +#else
>                 memset(mem, 0, dbg_reg_def[regno].size);
> +#endif
> +       }
>         return dbg_reg_def[regno].name;
>  }
>
> @@ -113,9 +132,19 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
>         if (regno >= DBG_MAX_REG_NUM || regno < 0)
>                 return -EINVAL;
>
> -       if (dbg_reg_def[regno].offset != -1)
> +       if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>                 memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
> -                      dbg_reg_def[regno].size);
> +                       dbg_reg_def[regno].size);
> +#ifdef CONFIG_KERNEL_MODE_NEON
> +       else {
> +               if (fpsimd_thread_state()) {

Again, why the conditional?

> +                       memcpy((void *)&kgdb_fpsimd_regs +
> +                                       dbg_reg_def[regno].offset,
> +                                       mem, dbg_reg_def[regno].size);
> +                       fpsimd_load_state(&kgdb_fpsimd_regs);

Are you sure it is safe to clobber all the registers here? Shouldn't
you save the state first, do the memcpy() and then load?

Regards,
Ard.




> +               }
> +       }
> +#endif
>         return 0;
>  }
>
> --
> 1.7.9.5
>
Vijay Kilari Nov. 29, 2013, 9:20 a.m. UTC | #5
On Fri, Nov 29, 2013 at 2:23 PM, Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
> On 18 November 2013 13:06,  <vijay.kilari@gmail.com> wrote:
>> From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>>
>> Add KGDB debug support for FP/SIMD processor.This support
>> only debugging of FP/SIMD in kernel mode.
>> With this patch one can view or set FP/SIMD registers in
>> kernel context
>>
>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>> ---
>>  arch/arm64/include/asm/fpsimd.h |    1 +
>>  arch/arm64/kernel/fpsimd.c      |    5 ++
>>  arch/arm64/kernel/kgdb.c        |  105 +++++++++++++++++++++++++--------------
>>  3 files changed, 73 insertions(+), 38 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
>> index 609bc44..5039d6d 100644
>> --- a/arch/arm64/include/asm/fpsimd.h
>> +++ b/arch/arm64/include/asm/fpsimd.h
>> @@ -60,6 +60,7 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
>>  extern void fpsimd_thread_switch(struct task_struct *next);
>>  extern void fpsimd_flush_thread(void);
>>  extern void fpsimd_reload_fpstate(void);
>> +extern int  fpsimd_thread_state(void);
>>
>>  #endif
>>
>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>> index 5b13c17..27604c1 100644
>> --- a/arch/arm64/kernel/fpsimd.c
>> +++ b/arch/arm64/kernel/fpsimd.c
>> @@ -124,6 +124,11 @@ void fpsimd_flush_thread(void)
>>         set_thread_flag(TIF_FOREIGN_FPSTATE);
>>  }
>>
>> +int fpsimd_thread_state(void)
>
> Please use a meaningful name for the function. Currently, it returns
> true iff 'current' is a non-kernel thread whose FPSIMD state is /not/
> present in the FPSIMD register file. You should choose a name that
> reflects that purpose.
>
 do you have any reason why only TIF_FOREIGN_FPSTATE flag is
set only for non-kernel threads in kernel_neon_begin()?

>> +{
>> +       return (current->mm && test_thread_flag(TIF_FOREIGN_FPSTATE)) ? 1 : 0;
>> +}
>> +
>
> As Will pointed out, the expression before ? already evaluates to 1 or
> 0, so appending '? 1 : 0' is redundant.
>
>>  void fpsimd_reload_fpstate(void)
>>  {
>>         preempt_disable();
>> diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
>> index f10f2ba..5e34852 100644
>> --- a/arch/arm64/kernel/kgdb.c
>> +++ b/arch/arm64/kernel/kgdb.c
>> @@ -23,6 +23,14 @@
>>  #include <linux/kdebug.h>
>>  #include <linux/kgdb.h>
>>  #include <asm/traps.h>
>> +#include <asm/fpsimd.h>
>> +
>> +#ifdef CONFIG_KERNEL_MODE_NEON
>> +/*
>> + * Structure to hold FP/SIMD register contents
>> + */
>> +struct fpsimd_state kgdb_fpsimd_regs;
>
> 'static' if it's local to this source file
>
   OK
>> +#endif
>>
>>  struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>>         { "x0", 8, offsetof(struct pt_regs, regs[0])},
>> @@ -59,40 +67,40 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>>         { "sp", 8, offsetof(struct pt_regs, sp)},
>>         { "pc", 8, offsetof(struct pt_regs, pc)},
>>         { "pstate", 4, offsetof(struct pt_regs, pstate)},
>> -       { "v0", 16, -1 },
>> -       { "v1", 16, -1 },
>> -       { "v2", 16, -1 },
>> -       { "v3", 16, -1 },
>> -       { "v4", 16, -1 },
>> -       { "v5", 16, -1 },
>> -       { "v6", 16, -1 },
>> -       { "v7", 16, -1 },
>> -       { "v8", 16, -1 },
>> -       { "v9", 16, -1 },
>> -       { "v10", 16, -1 },
>> -       { "v11", 16, -1 },
>> -       { "v12", 16, -1 },
>> -       { "v13", 16, -1 },
>> -       { "v14", 16, -1 },
>> -       { "v15", 16, -1 },
>> -       { "v16", 16, -1 },
>> -       { "v17", 16, -1 },
>> -       { "v18", 16, -1 },
>> -       { "v19", 16, -1 },
>> -       { "v20", 16, -1 },
>> -       { "v21", 16, -1 },
>> -       { "v22", 16, -1 },
>> -       { "v23", 16, -1 },
>> -       { "v24", 16, -1 },
>> -       { "v25", 16, -1 },
>> -       { "v26", 16, -1 },
>> -       { "v27", 16, -1 },
>> -       { "v28", 16, -1 },
>> -       { "v29", 16, -1 },
>> -       { "v30", 16, -1 },
>> -       { "v31", 16, -1 },
>> -       { "fpsr", 4, -1 },
>> -       { "fpcr", 4, -1 },
>> +       { "v0", 16, offsetof(struct fpsimd_state, vregs[0])},
>> +       { "v1", 16, offsetof(struct fpsimd_state, vregs[1])},
>> +       { "v2", 16, offsetof(struct fpsimd_state, vregs[2])},
>> +       { "v3", 16, offsetof(struct fpsimd_state, vregs[3])},
>> +       { "v4", 16, offsetof(struct fpsimd_state, vregs[4])},
>> +       { "v5", 16, offsetof(struct fpsimd_state, vregs[5])},
>> +       { "v6", 16, offsetof(struct fpsimd_state, vregs[6])},
>> +       { "v7", 16, offsetof(struct fpsimd_state, vregs[7])},
>> +       { "v8", 16, offsetof(struct fpsimd_state, vregs[8])},
>> +       { "v9", 16, offsetof(struct fpsimd_state, vregs[9])},
>> +       { "v10", 16, offsetof(struct fpsimd_state, vregs[10])},
>> +       { "v11", 16, offsetof(struct fpsimd_state, vregs[11])},
>> +       { "v12", 16, offsetof(struct fpsimd_state, vregs[12])},
>> +       { "v13", 16, offsetof(struct fpsimd_state, vregs[13])},
>> +       { "v14", 16, offsetof(struct fpsimd_state, vregs[14])},
>> +       { "v15", 16, offsetof(struct fpsimd_state, vregs[15])},
>> +       { "v16", 16, offsetof(struct fpsimd_state, vregs[16])},
>> +       { "v17", 16, offsetof(struct fpsimd_state, vregs[17])},
>> +       { "v18", 16, offsetof(struct fpsimd_state, vregs[18])},
>> +       { "v19", 16, offsetof(struct fpsimd_state, vregs[19])},
>> +       { "v20", 16, offsetof(struct fpsimd_state, vregs[20])},
>> +       { "v21", 16, offsetof(struct fpsimd_state, vregs[21])},
>> +       { "v22", 16, offsetof(struct fpsimd_state, vregs[22])},
>> +       { "v23", 16, offsetof(struct fpsimd_state, vregs[23])},
>> +       { "v24", 16, offsetof(struct fpsimd_state, vregs[24])},
>> +       { "v25", 16, offsetof(struct fpsimd_state, vregs[25])},
>> +       { "v26", 16, offsetof(struct fpsimd_state, vregs[26])},
>> +       { "v27", 16, offsetof(struct fpsimd_state, vregs[27])},
>> +       { "v28", 16, offsetof(struct fpsimd_state, vregs[28])},
>> +       { "v29", 16, offsetof(struct fpsimd_state, vregs[29])},
>> +       { "v30", 16, offsetof(struct fpsimd_state, vregs[30])},
>> +       { "v31", 16, offsetof(struct fpsimd_state, vregs[31])},
>> +       { "fpsr", 4, offsetof(struct fpsimd_state, fpsr)},
>> +       { "fpcr", 4, offsetof(struct fpsimd_state, fpcr)},
>>  };
>>
>>  char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>> @@ -100,11 +108,22 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>>         if (regno >= DBG_MAX_REG_NUM || regno < 0)
>>                 return NULL;
>>
>> -       if (dbg_reg_def[regno].offset != -1)
>> +       if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>>                 memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
>>                        dbg_reg_def[regno].size);
>> -       else
>> +       else {
>> +#ifdef CONFIG_KERNEL_MODE_NEON
>
> else if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON)) {
>
>> +               if (fpsimd_thread_state()) {
>> +                       fpsimd_save_state(&kgdb_fpsimd_regs);
>> +                       memcpy(mem, (void *)&kgdb_fpsimd_regs +
>> +                                       dbg_reg_def[regno].offset,
>> +                                       dbg_reg_def[regno].size);
>
> Why do this conditionally? Shouldn't kgdb return whatever is currently
> present in the register file, regardless of whether 'current' is a
> kernel thread or has had its FPSIMD state swapped out?
> Also, one could wonder why this is dependent on KERNEL_MODE_NEON being set
>

KGDB, being kernel debugger it was thought that allow
debugging only if kernel mode is supported.
In fact, I proposed (previous email)  to remove this condition and
allow to reading neon
registers always and allow write only if neon is in kernel mode.

But completely removing this condition check and allowing to modify
is user choice to ensure neon registers state.

Will, Please let me know your opinion?

>> +               } else
>> +                       memset(mem, 0, dbg_reg_def[regno].size);
>> +#else
>>                 memset(mem, 0, dbg_reg_def[regno].size);
>> +#endif
>> +       }
>>         return dbg_reg_def[regno].name;
>>  }
>>
>> @@ -113,9 +132,19 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
>>         if (regno >= DBG_MAX_REG_NUM || regno < 0)
>>                 return -EINVAL;
>>
>> -       if (dbg_reg_def[regno].offset != -1)
>> +       if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>>                 memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
>> -                      dbg_reg_def[regno].size);
>> +                       dbg_reg_def[regno].size);
>> +#ifdef CONFIG_KERNEL_MODE_NEON
>> +       else {
>> +               if (fpsimd_thread_state()) {
>
> Again, why the conditional?
>
>> +                       memcpy((void *)&kgdb_fpsimd_regs +
>> +                                       dbg_reg_def[regno].offset,
>> +                                       mem, dbg_reg_def[regno].size);
>> +                       fpsimd_load_state(&kgdb_fpsimd_regs);
>
> Are you sure it is safe to clobber all the registers here? Shouldn't
> you save the state first, do the memcpy() and then load?
>
yes it is safe. gdb read registers after every command so all registers
are saved first before they are modified

> Regards,
> Ard.
>
>
>
>
>> +               }
>> +       }
>> +#endif
>>         return 0;
>>  }
>>
>> --
>> 1.7.9.5
>>
Ard Biesheuvel Nov. 29, 2013, 9:27 a.m. UTC | #6
On 29 November 2013 10:20, Vijay Kilari <vijay.kilari@gmail.com> wrote:
> On Fri, Nov 29, 2013 at 2:23 PM, Ard Biesheuvel
> <ard.biesheuvel@linaro.org> wrote:
>> On 18 November 2013 13:06,  <vijay.kilari@gmail.com> wrote:
>>> From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>>>
>>> Add KGDB debug support for FP/SIMD processor.This support
>>> only debugging of FP/SIMD in kernel mode.
>>> With this patch one can view or set FP/SIMD registers in
>>> kernel context
>>>
>>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>>> ---
>>>  arch/arm64/include/asm/fpsimd.h |    1 +
>>>  arch/arm64/kernel/fpsimd.c      |    5 ++
>>>  arch/arm64/kernel/kgdb.c        |  105 +++++++++++++++++++++++++--------------
>>>  3 files changed, 73 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
>>> index 609bc44..5039d6d 100644
>>> --- a/arch/arm64/include/asm/fpsimd.h
>>> +++ b/arch/arm64/include/asm/fpsimd.h
>>> @@ -60,6 +60,7 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
>>>  extern void fpsimd_thread_switch(struct task_struct *next);
>>>  extern void fpsimd_flush_thread(void);
>>>  extern void fpsimd_reload_fpstate(void);
>>> +extern int  fpsimd_thread_state(void);
>>>
>>>  #endif
>>>
>>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>>> index 5b13c17..27604c1 100644
>>> --- a/arch/arm64/kernel/fpsimd.c
>>> +++ b/arch/arm64/kernel/fpsimd.c
>>> @@ -124,6 +124,11 @@ void fpsimd_flush_thread(void)
>>>         set_thread_flag(TIF_FOREIGN_FPSTATE);
>>>  }
>>>
>>> +int fpsimd_thread_state(void)
>>
>> Please use a meaningful name for the function. Currently, it returns
>> true iff 'current' is a non-kernel thread whose FPSIMD state is /not/
>> present in the FPSIMD register file. You should choose a name that
>> reflects that purpose.
>>
>  do you have any reason why only TIF_FOREIGN_FPSTATE flag is
> set only for non-kernel threads in kernel_neon_begin()?
>

The purpose of the TIF_FOREIGN_FPSTATE flag is to convey that the
FPSIMD context should be restored before a task enters userland. A
kernel thread never enters userland by definition so the flag has no
meaning there.

>>> +{
>>> +       return (current->mm && test_thread_flag(TIF_FOREIGN_FPSTATE)) ? 1 : 0;
>>> +}
>>> +
>>
>> As Will pointed out, the expression before ? already evaluates to 1 or
>> 0, so appending '? 1 : 0' is redundant.
>>
>>>  void fpsimd_reload_fpstate(void)
>>>  {
>>>         preempt_disable();
>>> diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
>>> index f10f2ba..5e34852 100644
>>> --- a/arch/arm64/kernel/kgdb.c
>>> +++ b/arch/arm64/kernel/kgdb.c
>>> @@ -23,6 +23,14 @@
>>>  #include <linux/kdebug.h>
>>>  #include <linux/kgdb.h>
>>>  #include <asm/traps.h>
>>> +#include <asm/fpsimd.h>
>>> +
>>> +#ifdef CONFIG_KERNEL_MODE_NEON
>>> +/*
>>> + * Structure to hold FP/SIMD register contents
>>> + */
>>> +struct fpsimd_state kgdb_fpsimd_regs;
>>
>> 'static' if it's local to this source file
>>
>    OK
>>> +#endif
>>>
>>>  struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>>>         { "x0", 8, offsetof(struct pt_regs, regs[0])},
>>> @@ -59,40 +67,40 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>>>         { "sp", 8, offsetof(struct pt_regs, sp)},
>>>         { "pc", 8, offsetof(struct pt_regs, pc)},
>>>         { "pstate", 4, offsetof(struct pt_regs, pstate)},
>>> -       { "v0", 16, -1 },
>>> -       { "v1", 16, -1 },
>>> -       { "v2", 16, -1 },
>>> -       { "v3", 16, -1 },
>>> -       { "v4", 16, -1 },
>>> -       { "v5", 16, -1 },
>>> -       { "v6", 16, -1 },
>>> -       { "v7", 16, -1 },
>>> -       { "v8", 16, -1 },
>>> -       { "v9", 16, -1 },
>>> -       { "v10", 16, -1 },
>>> -       { "v11", 16, -1 },
>>> -       { "v12", 16, -1 },
>>> -       { "v13", 16, -1 },
>>> -       { "v14", 16, -1 },
>>> -       { "v15", 16, -1 },
>>> -       { "v16", 16, -1 },
>>> -       { "v17", 16, -1 },
>>> -       { "v18", 16, -1 },
>>> -       { "v19", 16, -1 },
>>> -       { "v20", 16, -1 },
>>> -       { "v21", 16, -1 },
>>> -       { "v22", 16, -1 },
>>> -       { "v23", 16, -1 },
>>> -       { "v24", 16, -1 },
>>> -       { "v25", 16, -1 },
>>> -       { "v26", 16, -1 },
>>> -       { "v27", 16, -1 },
>>> -       { "v28", 16, -1 },
>>> -       { "v29", 16, -1 },
>>> -       { "v30", 16, -1 },
>>> -       { "v31", 16, -1 },
>>> -       { "fpsr", 4, -1 },
>>> -       { "fpcr", 4, -1 },
>>> +       { "v0", 16, offsetof(struct fpsimd_state, vregs[0])},
>>> +       { "v1", 16, offsetof(struct fpsimd_state, vregs[1])},
>>> +       { "v2", 16, offsetof(struct fpsimd_state, vregs[2])},
>>> +       { "v3", 16, offsetof(struct fpsimd_state, vregs[3])},
>>> +       { "v4", 16, offsetof(struct fpsimd_state, vregs[4])},
>>> +       { "v5", 16, offsetof(struct fpsimd_state, vregs[5])},
>>> +       { "v6", 16, offsetof(struct fpsimd_state, vregs[6])},
>>> +       { "v7", 16, offsetof(struct fpsimd_state, vregs[7])},
>>> +       { "v8", 16, offsetof(struct fpsimd_state, vregs[8])},
>>> +       { "v9", 16, offsetof(struct fpsimd_state, vregs[9])},
>>> +       { "v10", 16, offsetof(struct fpsimd_state, vregs[10])},
>>> +       { "v11", 16, offsetof(struct fpsimd_state, vregs[11])},
>>> +       { "v12", 16, offsetof(struct fpsimd_state, vregs[12])},
>>> +       { "v13", 16, offsetof(struct fpsimd_state, vregs[13])},
>>> +       { "v14", 16, offsetof(struct fpsimd_state, vregs[14])},
>>> +       { "v15", 16, offsetof(struct fpsimd_state, vregs[15])},
>>> +       { "v16", 16, offsetof(struct fpsimd_state, vregs[16])},
>>> +       { "v17", 16, offsetof(struct fpsimd_state, vregs[17])},
>>> +       { "v18", 16, offsetof(struct fpsimd_state, vregs[18])},
>>> +       { "v19", 16, offsetof(struct fpsimd_state, vregs[19])},
>>> +       { "v20", 16, offsetof(struct fpsimd_state, vregs[20])},
>>> +       { "v21", 16, offsetof(struct fpsimd_state, vregs[21])},
>>> +       { "v22", 16, offsetof(struct fpsimd_state, vregs[22])},
>>> +       { "v23", 16, offsetof(struct fpsimd_state, vregs[23])},
>>> +       { "v24", 16, offsetof(struct fpsimd_state, vregs[24])},
>>> +       { "v25", 16, offsetof(struct fpsimd_state, vregs[25])},
>>> +       { "v26", 16, offsetof(struct fpsimd_state, vregs[26])},
>>> +       { "v27", 16, offsetof(struct fpsimd_state, vregs[27])},
>>> +       { "v28", 16, offsetof(struct fpsimd_state, vregs[28])},
>>> +       { "v29", 16, offsetof(struct fpsimd_state, vregs[29])},
>>> +       { "v30", 16, offsetof(struct fpsimd_state, vregs[30])},
>>> +       { "v31", 16, offsetof(struct fpsimd_state, vregs[31])},
>>> +       { "fpsr", 4, offsetof(struct fpsimd_state, fpsr)},
>>> +       { "fpcr", 4, offsetof(struct fpsimd_state, fpcr)},
>>>  };
>>>
>>>  char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>>> @@ -100,11 +108,22 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>>>         if (regno >= DBG_MAX_REG_NUM || regno < 0)
>>>                 return NULL;
>>>
>>> -       if (dbg_reg_def[regno].offset != -1)
>>> +       if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>>>                 memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
>>>                        dbg_reg_def[regno].size);
>>> -       else
>>> +       else {
>>> +#ifdef CONFIG_KERNEL_MODE_NEON
>>
>> else if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON)) {
>>
>>> +               if (fpsimd_thread_state()) {
>>> +                       fpsimd_save_state(&kgdb_fpsimd_regs);
>>> +                       memcpy(mem, (void *)&kgdb_fpsimd_regs +
>>> +                                       dbg_reg_def[regno].offset,
>>> +                                       dbg_reg_def[regno].size);
>>
>> Why do this conditionally? Shouldn't kgdb return whatever is currently
>> present in the register file, regardless of whether 'current' is a
>> kernel thread or has had its FPSIMD state swapped out?
>> Also, one could wonder why this is dependent on KERNEL_MODE_NEON being set
>>
>
> KGDB, being kernel debugger it was thought that allow
> debugging only if kernel mode is supported.
> In fact, I proposed (previous email)  to remove this condition and
> allow to reading neon
> registers always and allow write only if neon is in kernel mode.
>

I think KGDB is a special case here and should not considered as a
user of kernel mode NEON. So even writing the registers should
allowable regardless of this setting imo.

> But completely removing this condition check and allowing to modify
> is user choice to ensure neon registers state.
>
> Will, Please let me know your opinion?
>
>>> +               } else
>>> +                       memset(mem, 0, dbg_reg_def[regno].size);
>>> +#else
>>>                 memset(mem, 0, dbg_reg_def[regno].size);
>>> +#endif
>>> +       }
>>>         return dbg_reg_def[regno].name;
>>>  }
>>>
>>> @@ -113,9 +132,19 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
>>>         if (regno >= DBG_MAX_REG_NUM || regno < 0)
>>>                 return -EINVAL;
>>>
>>> -       if (dbg_reg_def[regno].offset != -1)
>>> +       if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>>>                 memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
>>> -                      dbg_reg_def[regno].size);
>>> +                       dbg_reg_def[regno].size);
>>> +#ifdef CONFIG_KERNEL_MODE_NEON
>>> +       else {
>>> +               if (fpsimd_thread_state()) {
>>
>> Again, why the conditional?
>>
>>> +                       memcpy((void *)&kgdb_fpsimd_regs +
>>> +                                       dbg_reg_def[regno].offset,
>>> +                                       mem, dbg_reg_def[regno].size);
>>> +                       fpsimd_load_state(&kgdb_fpsimd_regs);
>>
>> Are you sure it is safe to clobber all the registers here? Shouldn't
>> you save the state first, do the memcpy() and then load?
>>
> yes it is safe. gdb read registers after every command so all registers
> are saved first before they are modified
>

Does that imply that -after every command- the whole NEON state (32*16
bytes) is saved to kgdb_fpsimd_regs 32 times, each time to access the
contents of a single register?
Vijay Kilari Nov. 29, 2013, 9:57 a.m. UTC | #7
On Fri, Nov 29, 2013 at 2:57 PM, Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
> On 29 November 2013 10:20, Vijay Kilari <vijay.kilari@gmail.com> wrote:
>> On Fri, Nov 29, 2013 at 2:23 PM, Ard Biesheuvel
>> <ard.biesheuvel@linaro.org> wrote:
>>> On 18 November 2013 13:06,  <vijay.kilari@gmail.com> wrote:
>>>> From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>>>>
>>>> Add KGDB debug support for FP/SIMD processor.This support
>>>> only debugging of FP/SIMD in kernel mode.
>>>> With this patch one can view or set FP/SIMD registers in
>>>> kernel context
>>>>
>>>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>>>> ---
>>>>  arch/arm64/include/asm/fpsimd.h |    1 +
>>>>  arch/arm64/kernel/fpsimd.c      |    5 ++
>>>>  arch/arm64/kernel/kgdb.c        |  105 +++++++++++++++++++++++++--------------
>>>>  3 files changed, 73 insertions(+), 38 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
>>>> index 609bc44..5039d6d 100644
>>>> --- a/arch/arm64/include/asm/fpsimd.h
>>>> +++ b/arch/arm64/include/asm/fpsimd.h
>>>> @@ -60,6 +60,7 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
>>>>  extern void fpsimd_thread_switch(struct task_struct *next);
>>>>  extern void fpsimd_flush_thread(void);
>>>>  extern void fpsimd_reload_fpstate(void);
>>>> +extern int  fpsimd_thread_state(void);
>>>>
>>>>  #endif
>>>>
>>>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>>>> index 5b13c17..27604c1 100644
>>>> --- a/arch/arm64/kernel/fpsimd.c
>>>> +++ b/arch/arm64/kernel/fpsimd.c
>>>> @@ -124,6 +124,11 @@ void fpsimd_flush_thread(void)
>>>>         set_thread_flag(TIF_FOREIGN_FPSTATE);
>>>>  }
>>>>
>>>> +int fpsimd_thread_state(void)
>>>
>>> Please use a meaningful name for the function. Currently, it returns
>>> true iff 'current' is a non-kernel thread whose FPSIMD state is /not/
>>> present in the FPSIMD register file. You should choose a name that
>>> reflects that purpose.
>>>
>>  do you have any reason why only TIF_FOREIGN_FPSTATE flag is
>> set only for non-kernel threads in kernel_neon_begin()?
>>
>
> The purpose of the TIF_FOREIGN_FPSTATE flag is to convey that the
> FPSIMD context should be restored before a task enters userland. A
> kernel thread never enters userland by definition so the flag has no
> meaning there.
>
>>>> +{
>>>> +       return (current->mm && test_thread_flag(TIF_FOREIGN_FPSTATE)) ? 1 : 0;
>>>> +}
>>>> +
>>>
>>> As Will pointed out, the expression before ? already evaluates to 1 or
>>> 0, so appending '? 1 : 0' is redundant.
>>>
>>>>  void fpsimd_reload_fpstate(void)
>>>>  {
>>>>         preempt_disable();
>>>> diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
>>>> index f10f2ba..5e34852 100644
>>>> --- a/arch/arm64/kernel/kgdb.c
>>>> +++ b/arch/arm64/kernel/kgdb.c
>>>> @@ -23,6 +23,14 @@
>>>>  #include <linux/kdebug.h>
>>>>  #include <linux/kgdb.h>
>>>>  #include <asm/traps.h>
>>>> +#include <asm/fpsimd.h>
>>>> +
>>>> +#ifdef CONFIG_KERNEL_MODE_NEON
>>>> +/*
>>>> + * Structure to hold FP/SIMD register contents
>>>> + */
>>>> +struct fpsimd_state kgdb_fpsimd_regs;
>>>
>>> 'static' if it's local to this source file
>>>
>>    OK
>>>> +#endif
>>>>
>>>>  struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>>>>         { "x0", 8, offsetof(struct pt_regs, regs[0])},
>>>> @@ -59,40 +67,40 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>>>>         { "sp", 8, offsetof(struct pt_regs, sp)},
>>>>         { "pc", 8, offsetof(struct pt_regs, pc)},
>>>>         { "pstate", 4, offsetof(struct pt_regs, pstate)},
>>>> -       { "v0", 16, -1 },
>>>> -       { "v1", 16, -1 },
>>>> -       { "v2", 16, -1 },
>>>> -       { "v3", 16, -1 },
>>>> -       { "v4", 16, -1 },
>>>> -       { "v5", 16, -1 },
>>>> -       { "v6", 16, -1 },
>>>> -       { "v7", 16, -1 },
>>>> -       { "v8", 16, -1 },
>>>> -       { "v9", 16, -1 },
>>>> -       { "v10", 16, -1 },
>>>> -       { "v11", 16, -1 },
>>>> -       { "v12", 16, -1 },
>>>> -       { "v13", 16, -1 },
>>>> -       { "v14", 16, -1 },
>>>> -       { "v15", 16, -1 },
>>>> -       { "v16", 16, -1 },
>>>> -       { "v17", 16, -1 },
>>>> -       { "v18", 16, -1 },
>>>> -       { "v19", 16, -1 },
>>>> -       { "v20", 16, -1 },
>>>> -       { "v21", 16, -1 },
>>>> -       { "v22", 16, -1 },
>>>> -       { "v23", 16, -1 },
>>>> -       { "v24", 16, -1 },
>>>> -       { "v25", 16, -1 },
>>>> -       { "v26", 16, -1 },
>>>> -       { "v27", 16, -1 },
>>>> -       { "v28", 16, -1 },
>>>> -       { "v29", 16, -1 },
>>>> -       { "v30", 16, -1 },
>>>> -       { "v31", 16, -1 },
>>>> -       { "fpsr", 4, -1 },
>>>> -       { "fpcr", 4, -1 },
>>>> +       { "v0", 16, offsetof(struct fpsimd_state, vregs[0])},
>>>> +       { "v1", 16, offsetof(struct fpsimd_state, vregs[1])},
>>>> +       { "v2", 16, offsetof(struct fpsimd_state, vregs[2])},
>>>> +       { "v3", 16, offsetof(struct fpsimd_state, vregs[3])},
>>>> +       { "v4", 16, offsetof(struct fpsimd_state, vregs[4])},
>>>> +       { "v5", 16, offsetof(struct fpsimd_state, vregs[5])},
>>>> +       { "v6", 16, offsetof(struct fpsimd_state, vregs[6])},
>>>> +       { "v7", 16, offsetof(struct fpsimd_state, vregs[7])},
>>>> +       { "v8", 16, offsetof(struct fpsimd_state, vregs[8])},
>>>> +       { "v9", 16, offsetof(struct fpsimd_state, vregs[9])},
>>>> +       { "v10", 16, offsetof(struct fpsimd_state, vregs[10])},
>>>> +       { "v11", 16, offsetof(struct fpsimd_state, vregs[11])},
>>>> +       { "v12", 16, offsetof(struct fpsimd_state, vregs[12])},
>>>> +       { "v13", 16, offsetof(struct fpsimd_state, vregs[13])},
>>>> +       { "v14", 16, offsetof(struct fpsimd_state, vregs[14])},
>>>> +       { "v15", 16, offsetof(struct fpsimd_state, vregs[15])},
>>>> +       { "v16", 16, offsetof(struct fpsimd_state, vregs[16])},
>>>> +       { "v17", 16, offsetof(struct fpsimd_state, vregs[17])},
>>>> +       { "v18", 16, offsetof(struct fpsimd_state, vregs[18])},
>>>> +       { "v19", 16, offsetof(struct fpsimd_state, vregs[19])},
>>>> +       { "v20", 16, offsetof(struct fpsimd_state, vregs[20])},
>>>> +       { "v21", 16, offsetof(struct fpsimd_state, vregs[21])},
>>>> +       { "v22", 16, offsetof(struct fpsimd_state, vregs[22])},
>>>> +       { "v23", 16, offsetof(struct fpsimd_state, vregs[23])},
>>>> +       { "v24", 16, offsetof(struct fpsimd_state, vregs[24])},
>>>> +       { "v25", 16, offsetof(struct fpsimd_state, vregs[25])},
>>>> +       { "v26", 16, offsetof(struct fpsimd_state, vregs[26])},
>>>> +       { "v27", 16, offsetof(struct fpsimd_state, vregs[27])},
>>>> +       { "v28", 16, offsetof(struct fpsimd_state, vregs[28])},
>>>> +       { "v29", 16, offsetof(struct fpsimd_state, vregs[29])},
>>>> +       { "v30", 16, offsetof(struct fpsimd_state, vregs[30])},
>>>> +       { "v31", 16, offsetof(struct fpsimd_state, vregs[31])},
>>>> +       { "fpsr", 4, offsetof(struct fpsimd_state, fpsr)},
>>>> +       { "fpcr", 4, offsetof(struct fpsimd_state, fpcr)},
>>>>  };
>>>>
>>>>  char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>>>> @@ -100,11 +108,22 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>>>>         if (regno >= DBG_MAX_REG_NUM || regno < 0)
>>>>                 return NULL;
>>>>
>>>> -       if (dbg_reg_def[regno].offset != -1)
>>>> +       if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>>>>                 memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
>>>>                        dbg_reg_def[regno].size);
>>>> -       else
>>>> +       else {
>>>> +#ifdef CONFIG_KERNEL_MODE_NEON
>>>
>>> else if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON)) {
>>>
>>>> +               if (fpsimd_thread_state()) {
>>>> +                       fpsimd_save_state(&kgdb_fpsimd_regs);
>>>> +                       memcpy(mem, (void *)&kgdb_fpsimd_regs +
>>>> +                                       dbg_reg_def[regno].offset,
>>>> +                                       dbg_reg_def[regno].size);
>>>
>>> Why do this conditionally? Shouldn't kgdb return whatever is currently
>>> present in the register file, regardless of whether 'current' is a
>>> kernel thread or has had its FPSIMD state swapped out?
>>> Also, one could wonder why this is dependent on KERNEL_MODE_NEON being set
>>>
>>
>> KGDB, being kernel debugger it was thought that allow
>> debugging only if kernel mode is supported.
>> In fact, I proposed (previous email)  to remove this condition and
>> allow to reading neon
>> registers always and allow write only if neon is in kernel mode.
>>
>
> I think KGDB is a special case here and should not considered as a
> user of kernel mode NEON. So even writing the registers should
> allowable regardless of this setting imo.
>
>> But completely removing this condition check and allowing to modify
>> is user choice to ensure neon registers state.
>>
>> Will, Please let me know your opinion?
>>
>>>> +               } else
>>>> +                       memset(mem, 0, dbg_reg_def[regno].size);
>>>> +#else
>>>>                 memset(mem, 0, dbg_reg_def[regno].size);
>>>> +#endif
>>>> +       }
>>>>         return dbg_reg_def[regno].name;
>>>>  }
>>>>
>>>> @@ -113,9 +132,19 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
>>>>         if (regno >= DBG_MAX_REG_NUM || regno < 0)
>>>>                 return -EINVAL;
>>>>
>>>> -       if (dbg_reg_def[regno].offset != -1)
>>>> +       if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>>>>                 memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
>>>> -                      dbg_reg_def[regno].size);
>>>> +                       dbg_reg_def[regno].size);
>>>> +#ifdef CONFIG_KERNEL_MODE_NEON
>>>> +       else {
>>>> +               if (fpsimd_thread_state()) {
>>>
>>> Again, why the conditional?
>>>
>>>> +                       memcpy((void *)&kgdb_fpsimd_regs +
>>>> +                                       dbg_reg_def[regno].offset,
>>>> +                                       mem, dbg_reg_def[regno].size);
>>>> +                       fpsimd_load_state(&kgdb_fpsimd_regs);
>>>
>>> Are you sure it is safe to clobber all the registers here? Shouldn't
>>> you save the state first, do the memcpy() and then load?
>>>
>> yes it is safe. gdb read registers after every command so all registers
>> are saved first before they are modified
>>
>
> Does that imply that -after every command- the whole NEON state (32*16
> bytes) is saved to kgdb_fpsimd_regs 32 times, each time to access the
> contents of a single register?
>
Yes, I mentioned this overhead in cover letter. There is no single call to
architecture specific code to read all the registers at once for gdb
'register read'
command. The kgdb makes one call for every register read.

I can think of saving this registers for every debug exception call,
but that too is not optimized because gdb tool issues multiple step commands
for every 'next' command.
IMO, this little overhead is still ok as it is used only in step debugging

> --
> Ard.
Vijay Kilari Feb. 28, 2014, 11:08 a.m. UTC | #8
Hi Will,

On Fri, Nov 29, 2013 at 3:27 PM, Vijay Kilari <vijay.kilari@gmail.com> wrote:
> On Fri, Nov 29, 2013 at 2:57 PM, Ard Biesheuvel
> <ard.biesheuvel@linaro.org> wrote:
>> On 29 November 2013 10:20, Vijay Kilari <vijay.kilari@gmail.com> wrote:
>>> On Fri, Nov 29, 2013 at 2:23 PM, Ard Biesheuvel
>>> <ard.biesheuvel@linaro.org> wrote:
>>>> On 18 November 2013 13:06,  <vijay.kilari@gmail.com> wrote:
>>>>> From: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>>>>>
>>>>> Add KGDB debug support for FP/SIMD processor.This support
>>>>> only debugging of FP/SIMD in kernel mode.
>>>>> With this patch one can view or set FP/SIMD registers in
>>>>> kernel context
>>>>>
>>>>> Signed-off-by: Vijaya Kumar K <Vijaya.Kumar@caviumnetworks.com>
>>>>> ---
>>>>>  arch/arm64/include/asm/fpsimd.h |    1 +
>>>>>  arch/arm64/kernel/fpsimd.c      |    5 ++
>>>>>  arch/arm64/kernel/kgdb.c        |  105 +++++++++++++++++++++++++--------------
>>>>>  3 files changed, 73 insertions(+), 38 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
>>>>> index 609bc44..5039d6d 100644
>>>>> --- a/arch/arm64/include/asm/fpsimd.h
>>>>> +++ b/arch/arm64/include/asm/fpsimd.h
>>>>> @@ -60,6 +60,7 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
>>>>>  extern void fpsimd_thread_switch(struct task_struct *next);
>>>>>  extern void fpsimd_flush_thread(void);
>>>>>  extern void fpsimd_reload_fpstate(void);
>>>>> +extern int  fpsimd_thread_state(void);
>>>>>
>>>>>  #endif
>>>>>
>>>>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>>>>> index 5b13c17..27604c1 100644
>>>>> --- a/arch/arm64/kernel/fpsimd.c
>>>>> +++ b/arch/arm64/kernel/fpsimd.c
>>>>> @@ -124,6 +124,11 @@ void fpsimd_flush_thread(void)
>>>>>         set_thread_flag(TIF_FOREIGN_FPSTATE);
>>>>>  }
>>>>>
>>>>> +int fpsimd_thread_state(void)
>>>>
>>>> Please use a meaningful name for the function. Currently, it returns
>>>> true iff 'current' is a non-kernel thread whose FPSIMD state is /not/
>>>> present in the FPSIMD register file. You should choose a name that
>>>> reflects that purpose.
>>>>
>>>  do you have any reason why only TIF_FOREIGN_FPSTATE flag is
>>> set only for non-kernel threads in kernel_neon_begin()?
>>>
>>
>> The purpose of the TIF_FOREIGN_FPSTATE flag is to convey that the
>> FPSIMD context should be restored before a task enters userland. A
>> kernel thread never enters userland by definition so the flag has no
>> meaning there.
>>
>>>>> +{
>>>>> +       return (current->mm && test_thread_flag(TIF_FOREIGN_FPSTATE)) ? 1 : 0;
>>>>> +}
>>>>> +
>>>>
>>>> As Will pointed out, the expression before ? already evaluates to 1 or
>>>> 0, so appending '? 1 : 0' is redundant.
>>>>
>>>>>  void fpsimd_reload_fpstate(void)
>>>>>  {
>>>>>         preempt_disable();
>>>>> diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
>>>>> index f10f2ba..5e34852 100644
>>>>> --- a/arch/arm64/kernel/kgdb.c
>>>>> +++ b/arch/arm64/kernel/kgdb.c
>>>>> @@ -23,6 +23,14 @@
>>>>>  #include <linux/kdebug.h>
>>>>>  #include <linux/kgdb.h>
>>>>>  #include <asm/traps.h>
>>>>> +#include <asm/fpsimd.h>
>>>>> +
>>>>> +#ifdef CONFIG_KERNEL_MODE_NEON
>>>>> +/*
>>>>> + * Structure to hold FP/SIMD register contents
>>>>> + */
>>>>> +struct fpsimd_state kgdb_fpsimd_regs;
>>>>
>>>> 'static' if it's local to this source file
>>>>
>>>    OK
>>>>> +#endif
>>>>>
>>>>>  struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>>>>>         { "x0", 8, offsetof(struct pt_regs, regs[0])},
>>>>> @@ -59,40 +67,40 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
>>>>>         { "sp", 8, offsetof(struct pt_regs, sp)},
>>>>>         { "pc", 8, offsetof(struct pt_regs, pc)},
>>>>>         { "pstate", 4, offsetof(struct pt_regs, pstate)},
>>>>> -       { "v0", 16, -1 },
>>>>> -       { "v1", 16, -1 },
>>>>> -       { "v2", 16, -1 },
>>>>> -       { "v3", 16, -1 },
>>>>> -       { "v4", 16, -1 },
>>>>> -       { "v5", 16, -1 },
>>>>> -       { "v6", 16, -1 },
>>>>> -       { "v7", 16, -1 },
>>>>> -       { "v8", 16, -1 },
>>>>> -       { "v9", 16, -1 },
>>>>> -       { "v10", 16, -1 },
>>>>> -       { "v11", 16, -1 },
>>>>> -       { "v12", 16, -1 },
>>>>> -       { "v13", 16, -1 },
>>>>> -       { "v14", 16, -1 },
>>>>> -       { "v15", 16, -1 },
>>>>> -       { "v16", 16, -1 },
>>>>> -       { "v17", 16, -1 },
>>>>> -       { "v18", 16, -1 },
>>>>> -       { "v19", 16, -1 },
>>>>> -       { "v20", 16, -1 },
>>>>> -       { "v21", 16, -1 },
>>>>> -       { "v22", 16, -1 },
>>>>> -       { "v23", 16, -1 },
>>>>> -       { "v24", 16, -1 },
>>>>> -       { "v25", 16, -1 },
>>>>> -       { "v26", 16, -1 },
>>>>> -       { "v27", 16, -1 },
>>>>> -       { "v28", 16, -1 },
>>>>> -       { "v29", 16, -1 },
>>>>> -       { "v30", 16, -1 },
>>>>> -       { "v31", 16, -1 },
>>>>> -       { "fpsr", 4, -1 },
>>>>> -       { "fpcr", 4, -1 },
>>>>> +       { "v0", 16, offsetof(struct fpsimd_state, vregs[0])},
>>>>> +       { "v1", 16, offsetof(struct fpsimd_state, vregs[1])},
>>>>> +       { "v2", 16, offsetof(struct fpsimd_state, vregs[2])},
>>>>> +       { "v3", 16, offsetof(struct fpsimd_state, vregs[3])},
>>>>> +       { "v4", 16, offsetof(struct fpsimd_state, vregs[4])},
>>>>> +       { "v5", 16, offsetof(struct fpsimd_state, vregs[5])},
>>>>> +       { "v6", 16, offsetof(struct fpsimd_state, vregs[6])},
>>>>> +       { "v7", 16, offsetof(struct fpsimd_state, vregs[7])},
>>>>> +       { "v8", 16, offsetof(struct fpsimd_state, vregs[8])},
>>>>> +       { "v9", 16, offsetof(struct fpsimd_state, vregs[9])},
>>>>> +       { "v10", 16, offsetof(struct fpsimd_state, vregs[10])},
>>>>> +       { "v11", 16, offsetof(struct fpsimd_state, vregs[11])},
>>>>> +       { "v12", 16, offsetof(struct fpsimd_state, vregs[12])},
>>>>> +       { "v13", 16, offsetof(struct fpsimd_state, vregs[13])},
>>>>> +       { "v14", 16, offsetof(struct fpsimd_state, vregs[14])},
>>>>> +       { "v15", 16, offsetof(struct fpsimd_state, vregs[15])},
>>>>> +       { "v16", 16, offsetof(struct fpsimd_state, vregs[16])},
>>>>> +       { "v17", 16, offsetof(struct fpsimd_state, vregs[17])},
>>>>> +       { "v18", 16, offsetof(struct fpsimd_state, vregs[18])},
>>>>> +       { "v19", 16, offsetof(struct fpsimd_state, vregs[19])},
>>>>> +       { "v20", 16, offsetof(struct fpsimd_state, vregs[20])},
>>>>> +       { "v21", 16, offsetof(struct fpsimd_state, vregs[21])},
>>>>> +       { "v22", 16, offsetof(struct fpsimd_state, vregs[22])},
>>>>> +       { "v23", 16, offsetof(struct fpsimd_state, vregs[23])},
>>>>> +       { "v24", 16, offsetof(struct fpsimd_state, vregs[24])},
>>>>> +       { "v25", 16, offsetof(struct fpsimd_state, vregs[25])},
>>>>> +       { "v26", 16, offsetof(struct fpsimd_state, vregs[26])},
>>>>> +       { "v27", 16, offsetof(struct fpsimd_state, vregs[27])},
>>>>> +       { "v28", 16, offsetof(struct fpsimd_state, vregs[28])},
>>>>> +       { "v29", 16, offsetof(struct fpsimd_state, vregs[29])},
>>>>> +       { "v30", 16, offsetof(struct fpsimd_state, vregs[30])},
>>>>> +       { "v31", 16, offsetof(struct fpsimd_state, vregs[31])},
>>>>> +       { "fpsr", 4, offsetof(struct fpsimd_state, fpsr)},
>>>>> +       { "fpcr", 4, offsetof(struct fpsimd_state, fpcr)},
>>>>>  };
>>>>>
>>>>>  char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>>>>> @@ -100,11 +108,22 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
>>>>>         if (regno >= DBG_MAX_REG_NUM || regno < 0)
>>>>>                 return NULL;
>>>>>
>>>>> -       if (dbg_reg_def[regno].offset != -1)
>>>>> +       if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>>>>>                 memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
>>>>>                        dbg_reg_def[regno].size);
>>>>> -       else
>>>>> +       else {
>>>>> +#ifdef CONFIG_KERNEL_MODE_NEON
>>>>
>>>> else if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON)) {
>>>>
>>>>> +               if (fpsimd_thread_state()) {
>>>>> +                       fpsimd_save_state(&kgdb_fpsimd_regs);
>>>>> +                       memcpy(mem, (void *)&kgdb_fpsimd_regs +
>>>>> +                                       dbg_reg_def[regno].offset,
>>>>> +                                       dbg_reg_def[regno].size);
>>>>
>>>> Why do this conditionally? Shouldn't kgdb return whatever is currently
>>>> present in the register file, regardless of whether 'current' is a
>>>> kernel thread or has had its FPSIMD state swapped out?
>>>> Also, one could wonder why this is dependent on KERNEL_MODE_NEON being set
>>>>
>>>
>>> KGDB, being kernel debugger it was thought that allow
>>> debugging only if kernel mode is supported.
>>> In fact, I proposed (previous email)  to remove this condition and
>>> allow to reading neon
>>> registers always and allow write only if neon is in kernel mode.
>>>
>>
>> I think KGDB is a special case here and should not considered as a
>> user of kernel mode NEON. So even writing the registers should
>> allowable regardless of this setting imo.
>>

 Can we avoid checking on NEON state and allow debugger to
access NEON registers irrespective of state to keep it simple?
and just compile config is enough?

>>> But completely removing this condition check and allowing to modify
>>> is user choice to ensure neon registers state.
>>>
>>> Will, Please let me know your opinion?
>>>
>>>>> +               } else
>>>>> +                       memset(mem, 0, dbg_reg_def[regno].size);
>>>>> +#else
>>>>>                 memset(mem, 0, dbg_reg_def[regno].size);
>>>>> +#endif
>>>>> +       }
>>>>>         return dbg_reg_def[regno].name;
>>>>>  }
>>>>>
>>>>> @@ -113,9 +132,19 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
>>>>>         if (regno >= DBG_MAX_REG_NUM || regno < 0)
>>>>>                 return -EINVAL;
>>>>>
>>>>> -       if (dbg_reg_def[regno].offset != -1)
>>>>> +       if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
>>>>>                 memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
>>>>> -                      dbg_reg_def[regno].size);
>>>>> +                       dbg_reg_def[regno].size);
>>>>> +#ifdef CONFIG_KERNEL_MODE_NEON
>>>>> +       else {
>>>>> +               if (fpsimd_thread_state()) {
>>>>
>>>> Again, why the conditional?
>>>>
>>>>> +                       memcpy((void *)&kgdb_fpsimd_regs +
>>>>> +                                       dbg_reg_def[regno].offset,
>>>>> +                                       mem, dbg_reg_def[regno].size);
>>>>> +                       fpsimd_load_state(&kgdb_fpsimd_regs);
>>>>
>>>> Are you sure it is safe to clobber all the registers here? Shouldn't
>>>> you save the state first, do the memcpy() and then load?
>>>>
>>> yes it is safe. gdb read registers after every command so all registers
>>> are saved first before they are modified
>>>
>>
>> Does that imply that -after every command- the whole NEON state (32*16
>> bytes) is saved to kgdb_fpsimd_regs 32 times, each time to access the
>> contents of a single register?
>>
> Yes, I mentioned this overhead in cover letter. There is no single call to
> architecture specific code to read all the registers at once for gdb
> 'register read'
> command. The kgdb makes one call for every register read.
>
> I can think of saving this registers for every debug exception call,
> but that too is not optimized because gdb tool issues multiple step commands
> for every 'next' command.
> IMO, this little overhead is still ok as it is used only in step debugging
>
>> --
>> Ard.
Will Deacon Feb. 28, 2014, 5:36 p.m. UTC | #9
On Fri, Feb 28, 2014 at 11:08:58AM +0000, Vijay Kilari wrote:
> On Fri, Nov 29, 2013 at 3:27 PM, Vijay Kilari <vijay.kilari@gmail.com> wrote:
> >>> KGDB, being kernel debugger it was thought that allow
> >>> debugging only if kernel mode is supported.
> >>> In fact, I proposed (previous email)  to remove this condition and
> >>> allow to reading neon
> >>> registers always and allow write only if neon is in kernel mode.
> >>>
> >>
> >> I think KGDB is a special case here and should not considered as a
> >> user of kernel mode NEON. So even writing the registers should
> >> allowable regardless of this setting imo.
> >>
> 
>  Can we avoid checking on NEON state and allow debugger to
> access NEON registers irrespective of state to keep it simple?
> and just compile config is enough?

I'm afraid I don't follow... I think we *do* need to do the relevant
flush/sync operations, I just don't think it should be predicated on
KERNEL_MODE_NEON.

Will
Ard Biesheuvel March 2, 2014, 1:08 p.m. UTC | #10
On 28 February 2014 18:36, Will Deacon <will.deacon@arm.com> wrote:
> On Fri, Feb 28, 2014 at 11:08:58AM +0000, Vijay Kilari wrote:
>> On Fri, Nov 29, 2013 at 3:27 PM, Vijay Kilari <vijay.kilari@gmail.com> wrote:
>> >>> KGDB, being kernel debugger it was thought that allow
>> >>> debugging only if kernel mode is supported.
>> >>> In fact, I proposed (previous email)  to remove this condition and
>> >>> allow to reading neon
>> >>> registers always and allow write only if neon is in kernel mode.
>> >>>
>> >>
>> >> I think KGDB is a special case here and should not considered as a
>> >> user of kernel mode NEON. So even writing the registers should
>> >> allowable regardless of this setting imo.
>> >>
>>
>>  Can we avoid checking on NEON state and allow debugger to
>> access NEON registers irrespective of state to keep it simple?
>> and just compile config is enough?
>
> I'm afraid I don't follow... I think we *do* need to do the relevant
> flush/sync operations, I just don't think it should be predicated on
> KERNEL_MODE_NEON.

Could you elaborate a bit on what you consider flush/sync operations?

The point I was making is that, from kgdb's point of view, it is
irrelevant whether the FPSIMD registers contain current's userland
FPSIMD state or something else. kgdb should return and/or manipulate
what the kernel itself sees when it accesses the FPSIMD registers
directly, regardless of whether those contents belong to current's
userland or not. (please refer to my pending series on kernel mode
NEON optimizations)

CONFIG_KERNEL_MODE_NEON is a 'def_bool y' on arm64, so whether it is
more correct to depend on it or not is not that relevant.

Regards,
Ard.
Will Deacon March 3, 2014, 11:43 a.m. UTC | #11
On Sun, Mar 02, 2014 at 01:08:56PM +0000, Ard Biesheuvel wrote:
> On 28 February 2014 18:36, Will Deacon <will.deacon@arm.com> wrote:
> > On Fri, Feb 28, 2014 at 11:08:58AM +0000, Vijay Kilari wrote:
> >> On Fri, Nov 29, 2013 at 3:27 PM, Vijay Kilari <vijay.kilari@gmail.com> wrote:
> >> >>> KGDB, being kernel debugger it was thought that allow
> >> >>> debugging only if kernel mode is supported.
> >> >>> In fact, I proposed (previous email)  to remove this condition and
> >> >>> allow to reading neon
> >> >>> registers always and allow write only if neon is in kernel mode.
> >> >>>
> >> >>
> >> >> I think KGDB is a special case here and should not considered as a
> >> >> user of kernel mode NEON. So even writing the registers should
> >> >> allowable regardless of this setting imo.
> >> >>
> >>
> >>  Can we avoid checking on NEON state and allow debugger to
> >> access NEON registers irrespective of state to keep it simple?
> >> and just compile config is enough?
> >
> > I'm afraid I don't follow... I think we *do* need to do the relevant
> > flush/sync operations, I just don't think it should be predicated on
> > KERNEL_MODE_NEON.
> 
> Could you elaborate a bit on what you consider flush/sync operations?
> 
> The point I was making is that, from kgdb's point of view, it is
> irrelevant whether the FPSIMD registers contain current's userland
> FPSIMD state or something else. kgdb should return and/or manipulate
> what the kernel itself sees when it accesses the FPSIMD registers
> directly, regardless of whether those contents belong to current's
> userland or not. (please refer to my pending series on kernel mode
> NEON optimizations)

Ok, point taken. So we just allow KGDB to access the hardware directly,
without any saving/restoring of state.

Will
diff mbox

Patch

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 609bc44..5039d6d 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -60,6 +60,7 @@  extern void fpsimd_load_state(struct fpsimd_state *state);
 extern void fpsimd_thread_switch(struct task_struct *next);
 extern void fpsimd_flush_thread(void);
 extern void fpsimd_reload_fpstate(void);
+extern int  fpsimd_thread_state(void);
 
 #endif
 
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5b13c17..27604c1 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -124,6 +124,11 @@  void fpsimd_flush_thread(void)
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
 }
 
+int fpsimd_thread_state(void)
+{
+	return (current->mm && test_thread_flag(TIF_FOREIGN_FPSTATE)) ? 1 : 0;
+}
+
 void fpsimd_reload_fpstate(void)
 {
 	preempt_disable();
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index f10f2ba..5e34852 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -23,6 +23,14 @@ 
 #include <linux/kdebug.h>
 #include <linux/kgdb.h>
 #include <asm/traps.h>
+#include <asm/fpsimd.h>
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+/*
+ * Structure to hold FP/SIMD register contents
+ */
+struct fpsimd_state kgdb_fpsimd_regs;
+#endif
 
 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
 	{ "x0", 8, offsetof(struct pt_regs, regs[0])},
@@ -59,40 +67,40 @@  struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
 	{ "sp", 8, offsetof(struct pt_regs, sp)},
 	{ "pc", 8, offsetof(struct pt_regs, pc)},
 	{ "pstate", 4, offsetof(struct pt_regs, pstate)},
-	{ "v0", 16, -1 },
-	{ "v1", 16, -1 },
-	{ "v2", 16, -1 },
-	{ "v3", 16, -1 },
-	{ "v4", 16, -1 },
-	{ "v5", 16, -1 },
-	{ "v6", 16, -1 },
-	{ "v7", 16, -1 },
-	{ "v8", 16, -1 },
-	{ "v9", 16, -1 },
-	{ "v10", 16, -1 },
-	{ "v11", 16, -1 },
-	{ "v12", 16, -1 },
-	{ "v13", 16, -1 },
-	{ "v14", 16, -1 },
-	{ "v15", 16, -1 },
-	{ "v16", 16, -1 },
-	{ "v17", 16, -1 },
-	{ "v18", 16, -1 },
-	{ "v19", 16, -1 },
-	{ "v20", 16, -1 },
-	{ "v21", 16, -1 },
-	{ "v22", 16, -1 },
-	{ "v23", 16, -1 },
-	{ "v24", 16, -1 },
-	{ "v25", 16, -1 },
-	{ "v26", 16, -1 },
-	{ "v27", 16, -1 },
-	{ "v28", 16, -1 },
-	{ "v29", 16, -1 },
-	{ "v30", 16, -1 },
-	{ "v31", 16, -1 },
-	{ "fpsr", 4, -1 },
-	{ "fpcr", 4, -1 },
+	{ "v0", 16, offsetof(struct fpsimd_state, vregs[0])},
+	{ "v1", 16, offsetof(struct fpsimd_state, vregs[1])},
+	{ "v2", 16, offsetof(struct fpsimd_state, vregs[2])},
+	{ "v3", 16, offsetof(struct fpsimd_state, vregs[3])},
+	{ "v4", 16, offsetof(struct fpsimd_state, vregs[4])},
+	{ "v5", 16, offsetof(struct fpsimd_state, vregs[5])},
+	{ "v6", 16, offsetof(struct fpsimd_state, vregs[6])},
+	{ "v7", 16, offsetof(struct fpsimd_state, vregs[7])},
+	{ "v8", 16, offsetof(struct fpsimd_state, vregs[8])},
+	{ "v9", 16, offsetof(struct fpsimd_state, vregs[9])},
+	{ "v10", 16, offsetof(struct fpsimd_state, vregs[10])},
+	{ "v11", 16, offsetof(struct fpsimd_state, vregs[11])},
+	{ "v12", 16, offsetof(struct fpsimd_state, vregs[12])},
+	{ "v13", 16, offsetof(struct fpsimd_state, vregs[13])},
+	{ "v14", 16, offsetof(struct fpsimd_state, vregs[14])},
+	{ "v15", 16, offsetof(struct fpsimd_state, vregs[15])},
+	{ "v16", 16, offsetof(struct fpsimd_state, vregs[16])},
+	{ "v17", 16, offsetof(struct fpsimd_state, vregs[17])},
+	{ "v18", 16, offsetof(struct fpsimd_state, vregs[18])},
+	{ "v19", 16, offsetof(struct fpsimd_state, vregs[19])},
+	{ "v20", 16, offsetof(struct fpsimd_state, vregs[20])},
+	{ "v21", 16, offsetof(struct fpsimd_state, vregs[21])},
+	{ "v22", 16, offsetof(struct fpsimd_state, vregs[22])},
+	{ "v23", 16, offsetof(struct fpsimd_state, vregs[23])},
+	{ "v24", 16, offsetof(struct fpsimd_state, vregs[24])},
+	{ "v25", 16, offsetof(struct fpsimd_state, vregs[25])},
+	{ "v26", 16, offsetof(struct fpsimd_state, vregs[26])},
+	{ "v27", 16, offsetof(struct fpsimd_state, vregs[27])},
+	{ "v28", 16, offsetof(struct fpsimd_state, vregs[28])},
+	{ "v29", 16, offsetof(struct fpsimd_state, vregs[29])},
+	{ "v30", 16, offsetof(struct fpsimd_state, vregs[30])},
+	{ "v31", 16, offsetof(struct fpsimd_state, vregs[31])},
+	{ "fpsr", 4, offsetof(struct fpsimd_state, fpsr)},
+	{ "fpcr", 4, offsetof(struct fpsimd_state, fpcr)},
 };
 
 char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
@@ -100,11 +108,22 @@  char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
 	if (regno >= DBG_MAX_REG_NUM || regno < 0)
 		return NULL;
 
-	if (dbg_reg_def[regno].offset != -1)
+	if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
 		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
 		       dbg_reg_def[regno].size);
-	else
+	else {
+#ifdef CONFIG_KERNEL_MODE_NEON
+		if (fpsimd_thread_state()) {
+			fpsimd_save_state(&kgdb_fpsimd_regs);
+			memcpy(mem, (void *)&kgdb_fpsimd_regs +
+					dbg_reg_def[regno].offset,
+					dbg_reg_def[regno].size);
+		} else
+			memset(mem, 0, dbg_reg_def[regno].size);
+#else
 		memset(mem, 0, dbg_reg_def[regno].size);
+#endif
+	}
 	return dbg_reg_def[regno].name;
 }
 
@@ -113,9 +132,19 @@  int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
 	if (regno >= DBG_MAX_REG_NUM || regno < 0)
 		return -EINVAL;
 
-	if (dbg_reg_def[regno].offset != -1)
+	if (dbg_reg_def[regno].offset != -1 && regno < _GP_REGS)
 		memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
-		       dbg_reg_def[regno].size);
+			dbg_reg_def[regno].size);
+#ifdef CONFIG_KERNEL_MODE_NEON
+	else {
+		if (fpsimd_thread_state()) {
+			memcpy((void *)&kgdb_fpsimd_regs +
+					dbg_reg_def[regno].offset,
+					mem, dbg_reg_def[regno].size);
+			fpsimd_load_state(&kgdb_fpsimd_regs);
+		}
+	}
+#endif
 	return 0;
 }