diff mbox series

[6/7] riscv: report misaligned accesses emulation to hwprobe

Message ID 20230926150316.1129648-7-cleger@rivosinc.com (mailing list archive)
State Superseded
Headers show
Series Add support to handle misaligned accesses in S-mode | expand

Checks

Context Check Description
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next at HEAD 0bb80ecc33a8
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 5 and now 5
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig fail Failed to build the tree with this patch.
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig fail Failed to build the tree with this patch.
conchuod/build_rv32_defconfig fail Build failed
conchuod/dtb_warn_rv64 success Errors and warnings before: 29 this patch: 29
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch warning CHECK: Concatenated strings should use spaces between elements CHECK: Consider using #include <linux/cpufeature.h> instead of <asm/cpufeature.h> CHECK: Lines should not end with a '('
conchuod/build_rv64_nommu_k210_defconfig fail Build failed
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig fail Build failed
conchuod/vmtest-for-next-PR fail PR summary
conchuod/patch-6-test-1 fail .github/scripts/patches/build_rv32_defconfig.sh
conchuod/patch-6-test-2 fail .github/scripts/patches/build_rv64_clang_allmodconfig.sh
conchuod/patch-6-test-3 fail .github/scripts/patches/build_rv64_gcc_allmodconfig.sh
conchuod/patch-6-test-4 fail .github/scripts/patches/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-6-test-5 fail .github/scripts/patches/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-6-test-6 fail .github/scripts/patches/checkpatch.sh
conchuod/patch-6-test-7 success .github/scripts/patches/dtb_warn_rv64.sh
conchuod/patch-6-test-8 success .github/scripts/patches/header_inline.sh
conchuod/patch-6-test-9 success .github/scripts/patches/kdoc.sh
conchuod/patch-6-test-10 success .github/scripts/patches/maintainers_patterns.sh
conchuod/patch-6-test-11 success .github/scripts/patches/module_param.sh
conchuod/patch-6-test-12 success .github/scripts/patches/verify_fixes.sh
conchuod/patch-6-test-13 success .github/scripts/patches/verify_signedoff.sh

Commit Message

Clément Léger Sept. 26, 2023, 3:03 p.m. UTC
hwprobe provides a way to report if misaligned access are emulated. In
order to correctly populate that feature, we can check if it actually
traps when doing a misaligned access. This can be checked using an
exception table entry which will actually be used when a misaligned
access is done from kernel mode.

Signed-off-by: Clément Léger <cleger@rivosinc.com>
---
 arch/riscv/include/asm/cpufeature.h  |  6 +++
 arch/riscv/kernel/cpufeature.c       |  6 ++-
 arch/riscv/kernel/setup.c            |  1 +
 arch/riscv/kernel/traps_misaligned.c | 63 +++++++++++++++++++++++++++-
 4 files changed, 74 insertions(+), 2 deletions(-)

Comments

Evan Green Sept. 26, 2023, 9:57 p.m. UTC | #1
On Tue, Sep 26, 2023 at 8:03 AM Clément Léger <cleger@rivosinc.com> wrote:
>
> hwprobe provides a way to report if misaligned access are emulated. In
> order to correctly populate that feature, we can check if it actually
> traps when doing a misaligned access. This can be checked using an
> exception table entry which will actually be used when a misaligned
> access is done from kernel mode.
>
> Signed-off-by: Clément Léger <cleger@rivosinc.com>
> ---
>  arch/riscv/include/asm/cpufeature.h  |  6 +++
>  arch/riscv/kernel/cpufeature.c       |  6 ++-
>  arch/riscv/kernel/setup.c            |  1 +
>  arch/riscv/kernel/traps_misaligned.c | 63 +++++++++++++++++++++++++++-
>  4 files changed, 74 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index d0345bd659c9..c1f0ef02cd7d 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -8,6 +8,7 @@
>
>  #include <linux/bitmap.h>
>  #include <asm/hwcap.h>
> +#include <asm/hwprobe.h>
>
>  /*
>   * These are probed via a device_initcall(), via either the SBI or directly
> @@ -32,4 +33,9 @@ extern struct riscv_isainfo hart_isa[NR_CPUS];
>
>  void check_unaligned_access(int cpu);
>
> +bool unaligned_ctl_available(void);
> +
> +bool check_unaligned_access_emulated(int cpu);
> +void unaligned_emulation_finish(void);
> +
>  #endif
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 1cfbba65d11a..fbbde800bc21 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -568,6 +568,9 @@ void check_unaligned_access(int cpu)
>         void *src;
>         long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
>
> +       if (check_unaligned_access_emulated(cpu))

This spot (referenced below).

> +               return;
> +
>         page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
>         if (!page) {
>                 pr_warn("Can't alloc pages to measure memcpy performance");
> @@ -645,9 +648,10 @@ void check_unaligned_access(int cpu)
>         __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
>  }
>
> -static int check_unaligned_access_boot_cpu(void)
> +static int __init check_unaligned_access_boot_cpu(void)
>  {
>         check_unaligned_access(0);
> +       unaligned_emulation_finish();
>         return 0;
>  }
>
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index e600aab116a4..3af6ad4df7cf 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -26,6 +26,7 @@
>  #include <asm/acpi.h>
>  #include <asm/alternative.h>
>  #include <asm/cacheflush.h>
> +#include <asm/cpufeature.h>
>  #include <asm/cpu_ops.h>
>  #include <asm/early_ioremap.h>
>  #include <asm/pgtable.h>
> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
> index b5fb1ff078e3..fa81f6952fa4 100644
> --- a/arch/riscv/kernel/traps_misaligned.c
> +++ b/arch/riscv/kernel/traps_misaligned.c
> @@ -9,11 +9,14 @@
>  #include <linux/perf_event.h>
>  #include <linux/irq.h>
>  #include <linux/stringify.h>
> +#include <linux/prctl.h>
>
>  #include <asm/processor.h>
>  #include <asm/ptrace.h>
>  #include <asm/csr.h>
>  #include <asm/entry-common.h>
> +#include <asm/hwprobe.h>
> +#include <asm/cpufeature.h>
>
>  #define INSN_MATCH_LB                  0x3
>  #define INSN_MASK_LB                   0x707f
> @@ -396,8 +399,10 @@ union reg_data {
>         u64 data_u64;
>  };
>
> +static bool unaligned_ctl __read_mostly;
> +
>  /* sysctl hooks */
> -int unaligned_enabled __read_mostly = 1;       /* Enabled by default */
> +int unaligned_enabled __read_mostly;
>
>  int handle_misaligned_load(struct pt_regs *regs)
>  {
> @@ -412,6 +417,9 @@ int handle_misaligned_load(struct pt_regs *regs)
>         if (!unaligned_enabled)
>                 return -1;
>
> +       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
> +               return -1;
> +
>         if (get_insn(regs, epc, &insn))
>                 return -1;
>
> @@ -511,6 +519,9 @@ int handle_misaligned_store(struct pt_regs *regs)
>         if (!unaligned_enabled)
>                 return -1;
>
> +       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
> +               return -1;
> +
>         if (get_insn(regs, epc, &insn))
>                 return -1;
>
> @@ -585,3 +596,53 @@ int handle_misaligned_store(struct pt_regs *regs)
>
>         return 0;
>  }
> +
> +bool check_unaligned_access_emulated(int cpu)
> +{
> +       unsigned long emulated = 1, tmp_var;
> +
> +       /* Use a fixup to detect if misaligned access triggered an exception */
> +       __asm__ __volatile__ (
> +               "1:\n"
> +               "       "REG_L" %[tmp], 1(%[ptr])\n"
> +               "       li %[emulated], 0\n"
> +               "2:\n"
> +               _ASM_EXTABLE(1b, 2b)
> +       : [emulated] "+r" (emulated), [tmp] "=r" (tmp_var)
> +       : [ptr] "r" (&tmp_var)
> +       : "memory");
> +
> +       if (!emulated)
> +               return false;
> +
> +       per_cpu(misaligned_access_speed, cpu) =
> +               RISCV_HWPROBE_MISALIGNED_EMULATED;

For tidiness, can we move the assignment of this per-cpu variable into
check_unaligned_access(), at the spot I referenced above. That way
people looking to see how this variable is set don't have to hunt
through multiple locations.

> +
> +       return true;
> +}
> +
> +void __init unaligned_emulation_finish(void)
> +{
> +       int cpu;
> +
> +       /*
> +        * We can only support PR_UNALIGN controls if all CPUs have misaligned
> +        * accesses emulated since tasks requesting such control can run on any
> +        * CPU.
> +        */
> +       for_each_possible_cpu(cpu) {
> +               if (per_cpu(misaligned_access_speed, cpu) !=
> +                   RISCV_HWPROBE_MISALIGNED_EMULATED) {
> +                       goto out;
> +               }
> +       }
> +       unaligned_ctl = true;

This doesn't handle the case where a CPU is hotplugged later that
doesn't match with the others. You may want to add a patch that fails
the onlining of that new CPU if unaligned_ctl is true and
new_cpu.misaligned_access_speed != EMULATED.

-Evan
Clément Léger Sept. 28, 2023, 7:46 a.m. UTC | #2
On 26/09/2023 23:57, Evan Green wrote:
> On Tue, Sep 26, 2023 at 8:03 AM Clément Léger <cleger@rivosinc.com> wrote:
>>
>> hwprobe provides a way to report if misaligned access are emulated. In
>> order to correctly populate that feature, we can check if it actually
>> traps when doing a misaligned access. This can be checked using an
>> exception table entry which will actually be used when a misaligned
>> access is done from kernel mode.
>>
>> Signed-off-by: Clément Léger <cleger@rivosinc.com>
>> ---
>>  arch/riscv/include/asm/cpufeature.h  |  6 +++
>>  arch/riscv/kernel/cpufeature.c       |  6 ++-
>>  arch/riscv/kernel/setup.c            |  1 +
>>  arch/riscv/kernel/traps_misaligned.c | 63 +++++++++++++++++++++++++++-
>>  4 files changed, 74 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
>> index d0345bd659c9..c1f0ef02cd7d 100644
>> --- a/arch/riscv/include/asm/cpufeature.h
>> +++ b/arch/riscv/include/asm/cpufeature.h
>> @@ -8,6 +8,7 @@
>>
>>  #include <linux/bitmap.h>
>>  #include <asm/hwcap.h>
>> +#include <asm/hwprobe.h>
>>
>>  /*
>>   * These are probed via a device_initcall(), via either the SBI or directly
>> @@ -32,4 +33,9 @@ extern struct riscv_isainfo hart_isa[NR_CPUS];
>>
>>  void check_unaligned_access(int cpu);
>>
>> +bool unaligned_ctl_available(void);
>> +
>> +bool check_unaligned_access_emulated(int cpu);
>> +void unaligned_emulation_finish(void);
>> +
>>  #endif
>> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
>> index 1cfbba65d11a..fbbde800bc21 100644
>> --- a/arch/riscv/kernel/cpufeature.c
>> +++ b/arch/riscv/kernel/cpufeature.c
>> @@ -568,6 +568,9 @@ void check_unaligned_access(int cpu)
>>         void *src;
>>         long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
>>
>> +       if (check_unaligned_access_emulated(cpu))
> 
> This spot (referenced below).
> 
>> +               return;
>> +
>>         page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
>>         if (!page) {
>>                 pr_warn("Can't alloc pages to measure memcpy performance");
>> @@ -645,9 +648,10 @@ void check_unaligned_access(int cpu)
>>         __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
>>  }
>>
>> -static int check_unaligned_access_boot_cpu(void)
>> +static int __init check_unaligned_access_boot_cpu(void)
>>  {
>>         check_unaligned_access(0);
>> +       unaligned_emulation_finish();
>>         return 0;
>>  }
>>
>> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>> index e600aab116a4..3af6ad4df7cf 100644
>> --- a/arch/riscv/kernel/setup.c
>> +++ b/arch/riscv/kernel/setup.c
>> @@ -26,6 +26,7 @@
>>  #include <asm/acpi.h>
>>  #include <asm/alternative.h>
>>  #include <asm/cacheflush.h>
>> +#include <asm/cpufeature.h>
>>  #include <asm/cpu_ops.h>
>>  #include <asm/early_ioremap.h>
>>  #include <asm/pgtable.h>
>> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
>> index b5fb1ff078e3..fa81f6952fa4 100644
>> --- a/arch/riscv/kernel/traps_misaligned.c
>> +++ b/arch/riscv/kernel/traps_misaligned.c
>> @@ -9,11 +9,14 @@
>>  #include <linux/perf_event.h>
>>  #include <linux/irq.h>
>>  #include <linux/stringify.h>
>> +#include <linux/prctl.h>
>>
>>  #include <asm/processor.h>
>>  #include <asm/ptrace.h>
>>  #include <asm/csr.h>
>>  #include <asm/entry-common.h>
>> +#include <asm/hwprobe.h>
>> +#include <asm/cpufeature.h>
>>
>>  #define INSN_MATCH_LB                  0x3
>>  #define INSN_MASK_LB                   0x707f
>> @@ -396,8 +399,10 @@ union reg_data {
>>         u64 data_u64;
>>  };
>>
>> +static bool unaligned_ctl __read_mostly;
>> +
>>  /* sysctl hooks */
>> -int unaligned_enabled __read_mostly = 1;       /* Enabled by default */
>> +int unaligned_enabled __read_mostly;
>>
>>  int handle_misaligned_load(struct pt_regs *regs)
>>  {
>> @@ -412,6 +417,9 @@ int handle_misaligned_load(struct pt_regs *regs)
>>         if (!unaligned_enabled)
>>                 return -1;
>>
>> +       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
>> +               return -1;
>> +
>>         if (get_insn(regs, epc, &insn))
>>                 return -1;
>>
>> @@ -511,6 +519,9 @@ int handle_misaligned_store(struct pt_regs *regs)
>>         if (!unaligned_enabled)
>>                 return -1;
>>
>> +       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
>> +               return -1;
>> +
>>         if (get_insn(regs, epc, &insn))
>>                 return -1;
>>
>> @@ -585,3 +596,53 @@ int handle_misaligned_store(struct pt_regs *regs)
>>
>>         return 0;
>>  }
>> +
>> +bool check_unaligned_access_emulated(int cpu)
>> +{
>> +       unsigned long emulated = 1, tmp_var;
>> +
>> +       /* Use a fixup to detect if misaligned access triggered an exception */
>> +       __asm__ __volatile__ (
>> +               "1:\n"
>> +               "       "REG_L" %[tmp], 1(%[ptr])\n"
>> +               "       li %[emulated], 0\n"
>> +               "2:\n"
>> +               _ASM_EXTABLE(1b, 2b)
>> +       : [emulated] "+r" (emulated), [tmp] "=r" (tmp_var)
>> +       : [ptr] "r" (&tmp_var)
>> +       : "memory");
>> +
>> +       if (!emulated)
>> +               return false;
>> +
>> +       per_cpu(misaligned_access_speed, cpu) =
>> +               RISCV_HWPROBE_MISALIGNED_EMULATED;
> 
> For tidiness, can we move the assignment of this per-cpu variable into
> check_unaligned_access(), at the spot I referenced above. That way
> people looking to see how this variable is set don't have to hunt
> through multiple locations.

Agreed, that seems better.

> 
>> +
>> +       return true;
>> +}
>> +
>> +void __init unaligned_emulation_finish(void)
>> +{
>> +       int cpu;
>> +
>> +       /*
>> +        * We can only support PR_UNALIGN controls if all CPUs have misaligned
>> +        * accesses emulated since tasks requesting such control can run on any
>> +        * CPU.
>> +        */
>> +       for_each_possible_cpu(cpu) {
>> +               if (per_cpu(misaligned_access_speed, cpu) !=
>> +                   RISCV_HWPROBE_MISALIGNED_EMULATED) {
>> +                       goto out;
>> +               }
>> +       }
>> +       unaligned_ctl = true;
> 
> This doesn't handle the case where a CPU is hotplugged later that
> doesn't match with the others. You may want to add a patch that fails
> the onlining of that new CPU if unaligned_ctl is true and
> new_cpu.misaligned_access_speed != EMULATED.

So actually, this will require a bit more plumbing as I realize the
switch to disable misalignment support is global. This switch should
only be disabled at boot which means I won't be able to disable it at
runtime (while hiotplugging a CPU) for CPU detection. There are multiple
ways to handle that:

1- Have a per-cpu switch for misalignment handling which would be
disabled only when detection is needed.

2- Assume that once detected at boot-time, emulation will not change.

Not sure which one is better though. Advice are welcomed.

Clément

> 
> -Evan
Evan Green Sept. 28, 2023, 4:51 p.m. UTC | #3
On Thu, Sep 28, 2023 at 12:46 AM Clément Léger <cleger@rivosinc.com> wrote:
>
>
>
> On 26/09/2023 23:57, Evan Green wrote:
> > On Tue, Sep 26, 2023 at 8:03 AM Clément Léger <cleger@rivosinc.com> wrote:
> >>
> >> hwprobe provides a way to report if misaligned access are emulated. In
> >> order to correctly populate that feature, we can check if it actually
> >> traps when doing a misaligned access. This can be checked using an
> >> exception table entry which will actually be used when a misaligned
> >> access is done from kernel mode.
> >>
> >> Signed-off-by: Clément Léger <cleger@rivosinc.com>
> >> ---
> >>  arch/riscv/include/asm/cpufeature.h  |  6 +++
> >>  arch/riscv/kernel/cpufeature.c       |  6 ++-
> >>  arch/riscv/kernel/setup.c            |  1 +
> >>  arch/riscv/kernel/traps_misaligned.c | 63 +++++++++++++++++++++++++++-
> >>  4 files changed, 74 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> >> index d0345bd659c9..c1f0ef02cd7d 100644
> >> --- a/arch/riscv/include/asm/cpufeature.h
> >> +++ b/arch/riscv/include/asm/cpufeature.h
> >> @@ -8,6 +8,7 @@
> >>
> >>  #include <linux/bitmap.h>
> >>  #include <asm/hwcap.h>
> >> +#include <asm/hwprobe.h>
> >>
> >>  /*
> >>   * These are probed via a device_initcall(), via either the SBI or directly
> >> @@ -32,4 +33,9 @@ extern struct riscv_isainfo hart_isa[NR_CPUS];
> >>
> >>  void check_unaligned_access(int cpu);
> >>
> >> +bool unaligned_ctl_available(void);
> >> +
> >> +bool check_unaligned_access_emulated(int cpu);
> >> +void unaligned_emulation_finish(void);
> >> +
> >>  #endif
> >> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> >> index 1cfbba65d11a..fbbde800bc21 100644
> >> --- a/arch/riscv/kernel/cpufeature.c
> >> +++ b/arch/riscv/kernel/cpufeature.c
> >> @@ -568,6 +568,9 @@ void check_unaligned_access(int cpu)
> >>         void *src;
> >>         long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
> >>
> >> +       if (check_unaligned_access_emulated(cpu))
> >
> > This spot (referenced below).
> >
> >> +               return;
> >> +
> >>         page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
> >>         if (!page) {
> >>                 pr_warn("Can't alloc pages to measure memcpy performance");
> >> @@ -645,9 +648,10 @@ void check_unaligned_access(int cpu)
> >>         __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
> >>  }
> >>
> >> -static int check_unaligned_access_boot_cpu(void)
> >> +static int __init check_unaligned_access_boot_cpu(void)
> >>  {
> >>         check_unaligned_access(0);
> >> +       unaligned_emulation_finish();
> >>         return 0;
> >>  }
> >>
> >> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> >> index e600aab116a4..3af6ad4df7cf 100644
> >> --- a/arch/riscv/kernel/setup.c
> >> +++ b/arch/riscv/kernel/setup.c
> >> @@ -26,6 +26,7 @@
> >>  #include <asm/acpi.h>
> >>  #include <asm/alternative.h>
> >>  #include <asm/cacheflush.h>
> >> +#include <asm/cpufeature.h>
> >>  #include <asm/cpu_ops.h>
> >>  #include <asm/early_ioremap.h>
> >>  #include <asm/pgtable.h>
> >> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
> >> index b5fb1ff078e3..fa81f6952fa4 100644
> >> --- a/arch/riscv/kernel/traps_misaligned.c
> >> +++ b/arch/riscv/kernel/traps_misaligned.c
> >> @@ -9,11 +9,14 @@
> >>  #include <linux/perf_event.h>
> >>  #include <linux/irq.h>
> >>  #include <linux/stringify.h>
> >> +#include <linux/prctl.h>
> >>
> >>  #include <asm/processor.h>
> >>  #include <asm/ptrace.h>
> >>  #include <asm/csr.h>
> >>  #include <asm/entry-common.h>
> >> +#include <asm/hwprobe.h>
> >> +#include <asm/cpufeature.h>
> >>
> >>  #define INSN_MATCH_LB                  0x3
> >>  #define INSN_MASK_LB                   0x707f
> >> @@ -396,8 +399,10 @@ union reg_data {
> >>         u64 data_u64;
> >>  };
> >>
> >> +static bool unaligned_ctl __read_mostly;
> >> +
> >>  /* sysctl hooks */
> >> -int unaligned_enabled __read_mostly = 1;       /* Enabled by default */
> >> +int unaligned_enabled __read_mostly;
> >>
> >>  int handle_misaligned_load(struct pt_regs *regs)
> >>  {
> >> @@ -412,6 +417,9 @@ int handle_misaligned_load(struct pt_regs *regs)
> >>         if (!unaligned_enabled)
> >>                 return -1;
> >>
> >> +       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
> >> +               return -1;
> >> +
> >>         if (get_insn(regs, epc, &insn))
> >>                 return -1;
> >>
> >> @@ -511,6 +519,9 @@ int handle_misaligned_store(struct pt_regs *regs)
> >>         if (!unaligned_enabled)
> >>                 return -1;
> >>
> >> +       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
> >> +               return -1;
> >> +
> >>         if (get_insn(regs, epc, &insn))
> >>                 return -1;
> >>
> >> @@ -585,3 +596,53 @@ int handle_misaligned_store(struct pt_regs *regs)
> >>
> >>         return 0;
> >>  }
> >> +
> >> +bool check_unaligned_access_emulated(int cpu)
> >> +{
> >> +       unsigned long emulated = 1, tmp_var;
> >> +
> >> +       /* Use a fixup to detect if misaligned access triggered an exception */
> >> +       __asm__ __volatile__ (
> >> +               "1:\n"
> >> +               "       "REG_L" %[tmp], 1(%[ptr])\n"
> >> +               "       li %[emulated], 0\n"
> >> +               "2:\n"
> >> +               _ASM_EXTABLE(1b, 2b)
> >> +       : [emulated] "+r" (emulated), [tmp] "=r" (tmp_var)
> >> +       : [ptr] "r" (&tmp_var)
> >> +       : "memory");
> >> +
> >> +       if (!emulated)
> >> +               return false;
> >> +
> >> +       per_cpu(misaligned_access_speed, cpu) =
> >> +               RISCV_HWPROBE_MISALIGNED_EMULATED;
> >
> > For tidiness, can we move the assignment of this per-cpu variable into
> > check_unaligned_access(), at the spot I referenced above. That way
> > people looking to see how this variable is set don't have to hunt
> > through multiple locations.
>
> Agreed, that seems better.
>
> >
> >> +
> >> +       return true;
> >> +}
> >> +
> >> +void __init unaligned_emulation_finish(void)
> >> +{
> >> +       int cpu;
> >> +
> >> +       /*
> >> +        * We can only support PR_UNALIGN controls if all CPUs have misaligned
> >> +        * accesses emulated since tasks requesting such control can run on any
> >> +        * CPU.
> >> +        */
> >> +       for_each_possible_cpu(cpu) {
> >> +               if (per_cpu(misaligned_access_speed, cpu) !=
> >> +                   RISCV_HWPROBE_MISALIGNED_EMULATED) {
> >> +                       goto out;
> >> +               }
> >> +       }
> >> +       unaligned_ctl = true;
> >
> > This doesn't handle the case where a CPU is hotplugged later that
> > doesn't match with the others. You may want to add a patch that fails
> > the onlining of that new CPU if unaligned_ctl is true and
> > new_cpu.misaligned_access_speed != EMULATED.
>
> So actually, this will require a bit more plumbing as I realize the
> switch to disable misalignment support is global. This switch should
> only be disabled at boot which means I won't be able to disable it at
> runtime (while hiotplugging a CPU) for CPU detection. There are multiple
> ways to handle that:
>
> 1- Have a per-cpu switch for misalignment handling which would be
> disabled only when detection is needed.
>
> 2- Assume that once detected at boot-time, emulation will not change.
>
> Not sure which one is better though. Advice are welcomed.

If I gaze into my own crystal ball, my hope is that the Venn diagram
of "systems that support hotplug" and "systems that still use software
assist for misaligned access" is just two circles not touching. If
people agree with that, then the safe thing to do is enforce it, by
failing to online new hotplugged CPUs that don't conform to
misaligned_access_speed == EMULATED if unaligned_ctl is true. We would
sacrifice some future flexibility by making this choice now though, so
it requires buy-in for this particular crystal ball vision.

-Evan
kernel test robot Sept. 29, 2023, 1:02 a.m. UTC | #4
Hi Clément,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.6-rc3 next-20230928]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Cl-ment-L-ger/riscv-remove-unused-functions-in-traps_misaligned-c/20230926-230654
base:   linus/master
patch link:    https://lore.kernel.org/r/20230926150316.1129648-7-cleger%40rivosinc.com
patch subject: [PATCH 6/7] riscv: report misaligned accesses emulation to hwprobe
config: riscv-randconfig-002-20230929 (https://download.01.org/0day-ci/archive/20230929/202309290842.Dk0K2nsp-lkp@intel.com/config)
compiler: riscv64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230929/202309290842.Dk0K2nsp-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202309290842.Dk0K2nsp-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   arch/riscv/kernel/traps_misaligned.c: In function 'handle_misaligned_load':
   arch/riscv/kernel/traps_misaligned.c:420:48: error: 'struct thread_struct' has no member named 'align_ctl'
     420 |         if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
         |                                                ^
   arch/riscv/kernel/traps_misaligned.c: In function 'handle_misaligned_store':
   arch/riscv/kernel/traps_misaligned.c:522:48: error: 'struct thread_struct' has no member named 'align_ctl'
     522 |         if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
         |                                                ^
   arch/riscv/kernel/traps_misaligned.c: In function 'check_unaligned_access_emulated':
>> arch/riscv/kernel/traps_misaligned.c:610:17: error: expected ':' or ')' before '_ASM_EXTABLE'
     610 |                 _ASM_EXTABLE(1b, 2b)
         |                 ^~~~~~~~~~~~
>> arch/riscv/kernel/traps_misaligned.c:610:30: error: invalid suffix "b" on integer constant
     610 |                 _ASM_EXTABLE(1b, 2b)
         |                              ^~
   arch/riscv/kernel/traps_misaligned.c:610:34: error: invalid suffix "b" on integer constant
     610 |                 _ASM_EXTABLE(1b, 2b)
         |                                  ^~
>> arch/riscv/kernel/traps_misaligned.c:602:37: warning: unused variable 'tmp_var' [-Wunused-variable]
     602 |         unsigned long emulated = 1, tmp_var;
         |                                     ^~~~~~~


vim +610 arch/riscv/kernel/traps_misaligned.c

   599	
   600	bool check_unaligned_access_emulated(int cpu)
   601	{
 > 602		unsigned long emulated = 1, tmp_var;
   603	
   604		/* Use a fixup to detect if misaligned access triggered an exception */
   605		__asm__ __volatile__ (
   606			"1:\n"
   607			"	"REG_L" %[tmp], 1(%[ptr])\n"
   608			"	li %[emulated], 0\n"
   609			"2:\n"
 > 610			_ASM_EXTABLE(1b, 2b)
   611		: [emulated] "+r" (emulated), [tmp] "=r" (tmp_var)
   612		: [ptr] "r" (&tmp_var)
   613		: "memory");
   614	
   615		if (!emulated)
   616			return false;
   617	
   618		per_cpu(misaligned_access_speed, cpu) =
   619			RISCV_HWPROBE_MISALIGNED_EMULATED;
   620	
   621		return true;
   622	}
   623
Atish Kumar Patra Oct. 3, 2023, 9:50 a.m. UTC | #5
On Thu, Sep 28, 2023 at 9:52 AM Evan Green <evan@rivosinc.com> wrote:
>
> On Thu, Sep 28, 2023 at 12:46 AM Clément Léger <cleger@rivosinc.com> wrote:
> >
> >
> >
> > On 26/09/2023 23:57, Evan Green wrote:
> > > On Tue, Sep 26, 2023 at 8:03 AM Clément Léger <cleger@rivosinc.com> wrote:
> > >>
> > >> hwprobe provides a way to report if misaligned access are emulated. In
> > >> order to correctly populate that feature, we can check if it actually
> > >> traps when doing a misaligned access. This can be checked using an
> > >> exception table entry which will actually be used when a misaligned
> > >> access is done from kernel mode.
> > >>
> > >> Signed-off-by: Clément Léger <cleger@rivosinc.com>
> > >> ---
> > >>  arch/riscv/include/asm/cpufeature.h  |  6 +++
> > >>  arch/riscv/kernel/cpufeature.c       |  6 ++-
> > >>  arch/riscv/kernel/setup.c            |  1 +
> > >>  arch/riscv/kernel/traps_misaligned.c | 63 +++++++++++++++++++++++++++-
> > >>  4 files changed, 74 insertions(+), 2 deletions(-)
> > >>
> > >> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> > >> index d0345bd659c9..c1f0ef02cd7d 100644
> > >> --- a/arch/riscv/include/asm/cpufeature.h
> > >> +++ b/arch/riscv/include/asm/cpufeature.h
> > >> @@ -8,6 +8,7 @@
> > >>
> > >>  #include <linux/bitmap.h>
> > >>  #include <asm/hwcap.h>
> > >> +#include <asm/hwprobe.h>
> > >>
> > >>  /*
> > >>   * These are probed via a device_initcall(), via either the SBI or directly
> > >> @@ -32,4 +33,9 @@ extern struct riscv_isainfo hart_isa[NR_CPUS];
> > >>
> > >>  void check_unaligned_access(int cpu);
> > >>
> > >> +bool unaligned_ctl_available(void);
> > >> +
> > >> +bool check_unaligned_access_emulated(int cpu);
> > >> +void unaligned_emulation_finish(void);
> > >> +
> > >>  #endif
> > >> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > >> index 1cfbba65d11a..fbbde800bc21 100644
> > >> --- a/arch/riscv/kernel/cpufeature.c
> > >> +++ b/arch/riscv/kernel/cpufeature.c
> > >> @@ -568,6 +568,9 @@ void check_unaligned_access(int cpu)
> > >>         void *src;
> > >>         long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
> > >>
> > >> +       if (check_unaligned_access_emulated(cpu))
> > >
> > > This spot (referenced below).
> > >
> > >> +               return;
> > >> +
> > >>         page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
> > >>         if (!page) {
> > >>                 pr_warn("Can't alloc pages to measure memcpy performance");
> > >> @@ -645,9 +648,10 @@ void check_unaligned_access(int cpu)
> > >>         __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
> > >>  }
> > >>
> > >> -static int check_unaligned_access_boot_cpu(void)
> > >> +static int __init check_unaligned_access_boot_cpu(void)
> > >>  {
> > >>         check_unaligned_access(0);
> > >> +       unaligned_emulation_finish();
> > >>         return 0;
> > >>  }
> > >>
> > >> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > >> index e600aab116a4..3af6ad4df7cf 100644
> > >> --- a/arch/riscv/kernel/setup.c
> > >> +++ b/arch/riscv/kernel/setup.c
> > >> @@ -26,6 +26,7 @@
> > >>  #include <asm/acpi.h>
> > >>  #include <asm/alternative.h>
> > >>  #include <asm/cacheflush.h>
> > >> +#include <asm/cpufeature.h>
> > >>  #include <asm/cpu_ops.h>
> > >>  #include <asm/early_ioremap.h>
> > >>  #include <asm/pgtable.h>
> > >> diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
> > >> index b5fb1ff078e3..fa81f6952fa4 100644
> > >> --- a/arch/riscv/kernel/traps_misaligned.c
> > >> +++ b/arch/riscv/kernel/traps_misaligned.c
> > >> @@ -9,11 +9,14 @@
> > >>  #include <linux/perf_event.h>
> > >>  #include <linux/irq.h>
> > >>  #include <linux/stringify.h>
> > >> +#include <linux/prctl.h>
> > >>
> > >>  #include <asm/processor.h>
> > >>  #include <asm/ptrace.h>
> > >>  #include <asm/csr.h>
> > >>  #include <asm/entry-common.h>
> > >> +#include <asm/hwprobe.h>
> > >> +#include <asm/cpufeature.h>
> > >>
> > >>  #define INSN_MATCH_LB                  0x3
> > >>  #define INSN_MASK_LB                   0x707f
> > >> @@ -396,8 +399,10 @@ union reg_data {
> > >>         u64 data_u64;
> > >>  };
> > >>
> > >> +static bool unaligned_ctl __read_mostly;
> > >> +
> > >>  /* sysctl hooks */
> > >> -int unaligned_enabled __read_mostly = 1;       /* Enabled by default */
> > >> +int unaligned_enabled __read_mostly;
> > >>
> > >>  int handle_misaligned_load(struct pt_regs *regs)
> > >>  {
> > >> @@ -412,6 +417,9 @@ int handle_misaligned_load(struct pt_regs *regs)
> > >>         if (!unaligned_enabled)
> > >>                 return -1;
> > >>
> > >> +       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
> > >> +               return -1;
> > >> +
> > >>         if (get_insn(regs, epc, &insn))
> > >>                 return -1;
> > >>
> > >> @@ -511,6 +519,9 @@ int handle_misaligned_store(struct pt_regs *regs)
> > >>         if (!unaligned_enabled)
> > >>                 return -1;
> > >>
> > >> +       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
> > >> +               return -1;
> > >> +
> > >>         if (get_insn(regs, epc, &insn))
> > >>                 return -1;
> > >>
> > >> @@ -585,3 +596,53 @@ int handle_misaligned_store(struct pt_regs *regs)
> > >>
> > >>         return 0;
> > >>  }
> > >> +
> > >> +bool check_unaligned_access_emulated(int cpu)
> > >> +{
> > >> +       unsigned long emulated = 1, tmp_var;
> > >> +
> > >> +       /* Use a fixup to detect if misaligned access triggered an exception */
> > >> +       __asm__ __volatile__ (
> > >> +               "1:\n"
> > >> +               "       "REG_L" %[tmp], 1(%[ptr])\n"
> > >> +               "       li %[emulated], 0\n"
> > >> +               "2:\n"
> > >> +               _ASM_EXTABLE(1b, 2b)
> > >> +       : [emulated] "+r" (emulated), [tmp] "=r" (tmp_var)
> > >> +       : [ptr] "r" (&tmp_var)
> > >> +       : "memory");
> > >> +
> > >> +       if (!emulated)
> > >> +               return false;
> > >> +
> > >> +       per_cpu(misaligned_access_speed, cpu) =
> > >> +               RISCV_HWPROBE_MISALIGNED_EMULATED;
> > >
> > > For tidiness, can we move the assignment of this per-cpu variable into
> > > check_unaligned_access(), at the spot I referenced above. That way
> > > people looking to see how this variable is set don't have to hunt
> > > through multiple locations.
> >
> > Agreed, that seems better.
> >
> > >
> > >> +
> > >> +       return true;
> > >> +}
> > >> +
> > >> +void __init unaligned_emulation_finish(void)
> > >> +{
> > >> +       int cpu;
> > >> +
> > >> +       /*
> > >> +        * We can only support PR_UNALIGN controls if all CPUs have misaligned
> > >> +        * accesses emulated since tasks requesting such control can run on any
> > >> +        * CPU.
> > >> +        */
> > >> +       for_each_possible_cpu(cpu) {
> > >> +               if (per_cpu(misaligned_access_speed, cpu) !=
> > >> +                   RISCV_HWPROBE_MISALIGNED_EMULATED) {
> > >> +                       goto out;
> > >> +               }
> > >> +       }
> > >> +       unaligned_ctl = true;
> > >

Note: You probably want to loop through the present cpu mask instead
of possible cpus.
Possible cpus list will have all the cpus listed in DT/ACPI. However,
all of them may not come up during the boot.
Hardware errata, different kernel configuration, incorrect DT are few
examples where possible may not match present cpumask.

> > > This doesn't handle the case where a CPU is hotplugged later that
> > > doesn't match with the others. You may want to add a patch that fails
> > > the onlining of that new CPU if unaligned_ctl is true and
> > > new_cpu.misaligned_access_speed != EMULATED.
> >
> > So actually, this will require a bit more plumbing as I realize the
> > switch to disable misalignment support is global. This switch should
> > only be disabled at boot which means I won't be able to disable it at
> > runtime (while hiotplugging a CPU) for CPU detection. There are multiple
> > ways to handle that:
> >
> > 1- Have a per-cpu switch for misalignment handling which would be
> > disabled only when detection is needed.
> >
> > 2- Assume that once detected at boot-time, emulation will not change.
> >
> > Not sure which one is better though. Advice are welcomed.
>
> If I gaze into my own crystal ball, my hope is that the Venn diagram
> of "systems that support hotplug" and "systems that still use software
> assist for misaligned access" is just two circles not touching. If
> people agree with that, then the safe thing to do is enforce it, by

In a sane world, this is probably true. But given that errats exists,
who knows what systems
we may end up with.

> failing to online new hotplugged CPUs that don't conform to
> misaligned_access_speed == EMULATED if unaligned_ctl is true. We would
> sacrifice some future flexibility by making this choice now though, so
> it requires buy-in for this particular crystal ball vision.
>
> -Evan
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index d0345bd659c9..c1f0ef02cd7d 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -8,6 +8,7 @@ 
 
 #include <linux/bitmap.h>
 #include <asm/hwcap.h>
+#include <asm/hwprobe.h>
 
 /*
  * These are probed via a device_initcall(), via either the SBI or directly
@@ -32,4 +33,9 @@  extern struct riscv_isainfo hart_isa[NR_CPUS];
 
 void check_unaligned_access(int cpu);
 
+bool unaligned_ctl_available(void);
+
+bool check_unaligned_access_emulated(int cpu);
+void unaligned_emulation_finish(void);
+
 #endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 1cfbba65d11a..fbbde800bc21 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -568,6 +568,9 @@  void check_unaligned_access(int cpu)
 	void *src;
 	long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
 
+	if (check_unaligned_access_emulated(cpu))
+		return;
+
 	page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
 	if (!page) {
 		pr_warn("Can't alloc pages to measure memcpy performance");
@@ -645,9 +648,10 @@  void check_unaligned_access(int cpu)
 	__free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
 }
 
-static int check_unaligned_access_boot_cpu(void)
+static int __init check_unaligned_access_boot_cpu(void)
 {
 	check_unaligned_access(0);
+	unaligned_emulation_finish();
 	return 0;
 }
 
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index e600aab116a4..3af6ad4df7cf 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -26,6 +26,7 @@ 
 #include <asm/acpi.h>
 #include <asm/alternative.h>
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/early_ioremap.h>
 #include <asm/pgtable.h>
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index b5fb1ff078e3..fa81f6952fa4 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -9,11 +9,14 @@ 
 #include <linux/perf_event.h>
 #include <linux/irq.h>
 #include <linux/stringify.h>
+#include <linux/prctl.h>
 
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/csr.h>
 #include <asm/entry-common.h>
+#include <asm/hwprobe.h>
+#include <asm/cpufeature.h>
 
 #define INSN_MATCH_LB			0x3
 #define INSN_MASK_LB			0x707f
@@ -396,8 +399,10 @@  union reg_data {
 	u64 data_u64;
 };
 
+static bool unaligned_ctl __read_mostly;
+
 /* sysctl hooks */
-int unaligned_enabled __read_mostly = 1;	/* Enabled by default */
+int unaligned_enabled __read_mostly;
 
 int handle_misaligned_load(struct pt_regs *regs)
 {
@@ -412,6 +417,9 @@  int handle_misaligned_load(struct pt_regs *regs)
 	if (!unaligned_enabled)
 		return -1;
 
+	if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
+		return -1;
+
 	if (get_insn(regs, epc, &insn))
 		return -1;
 
@@ -511,6 +519,9 @@  int handle_misaligned_store(struct pt_regs *regs)
 	if (!unaligned_enabled)
 		return -1;
 
+	if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
+		return -1;
+
 	if (get_insn(regs, epc, &insn))
 		return -1;
 
@@ -585,3 +596,53 @@  int handle_misaligned_store(struct pt_regs *regs)
 
 	return 0;
 }
+
+bool check_unaligned_access_emulated(int cpu)
+{
+	unsigned long emulated = 1, tmp_var;
+
+	/* Use a fixup to detect if misaligned access triggered an exception */
+	__asm__ __volatile__ (
+		"1:\n"
+		"	"REG_L" %[tmp], 1(%[ptr])\n"
+		"	li %[emulated], 0\n"
+		"2:\n"
+		_ASM_EXTABLE(1b, 2b)
+	: [emulated] "+r" (emulated), [tmp] "=r" (tmp_var)
+	: [ptr] "r" (&tmp_var)
+	: "memory");
+
+	if (!emulated)
+		return false;
+
+	per_cpu(misaligned_access_speed, cpu) =
+		RISCV_HWPROBE_MISALIGNED_EMULATED;
+
+	return true;
+}
+
+void __init unaligned_emulation_finish(void)
+{
+	int cpu;
+
+	/*
+	 * We can only support PR_UNALIGN controls if all CPUs have misaligned
+	 * accesses emulated since tasks requesting such control can run on any
+	 * CPU.
+	 */
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(misaligned_access_speed, cpu) !=
+		    RISCV_HWPROBE_MISALIGNED_EMULATED) {
+			goto out;
+		}
+	}
+	unaligned_ctl = true;
+
+out:
+	unaligned_enabled = 1;
+}
+
+bool unaligned_ctl_available(void)
+{
+	return unaligned_ctl;
+}