diff mbox series

[v14,2/5] riscv: Add static key for misaligned accesses

Message ID 20231227-optimize_checksum-v14-2-ddfd48016566@rivosinc.com (mailing list archive)
State Superseded
Headers show
Series riscv: Add fine-tuned checksum functions | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR fail PR summary
conchuod/patch-2-test-1 success .github/scripts/patches/build_rv32_defconfig.sh
conchuod/patch-2-test-2 success .github/scripts/patches/build_rv64_clang_allmodconfig.sh
conchuod/patch-2-test-3 success .github/scripts/patches/build_rv64_gcc_allmodconfig.sh
conchuod/patch-2-test-4 success .github/scripts/patches/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-2-test-5 success .github/scripts/patches/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-2-test-6 success .github/scripts/patches/checkpatch.sh
conchuod/patch-2-test-7 success .github/scripts/patches/dtb_warn_rv64.sh
conchuod/patch-2-test-8 success .github/scripts/patches/header_inline.sh
conchuod/patch-2-test-9 success .github/scripts/patches/kdoc.sh
conchuod/patch-2-test-10 success .github/scripts/patches/module_param.sh
conchuod/patch-2-test-11 success .github/scripts/patches/verify_fixes.sh
conchuod/patch-2-test-12 success .github/scripts/patches/verify_signedoff.sh

Commit Message

Charlie Jenkins Dec. 27, 2023, 5:38 p.m. UTC
Support static branches depending on the value of misaligned accesses.
This will be used by a later patch in the series. All online cpus must
be considered "fast" for this static branch to be flipped.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
 arch/riscv/include/asm/cpufeature.h |  2 +
 arch/riscv/kernel/cpufeature.c      | 89 +++++++++++++++++++++++++++++++++++--
 2 files changed, 87 insertions(+), 4 deletions(-)

Comments

Evan Green Jan. 8, 2024, 7:22 p.m. UTC | #1
On Wed, Dec 27, 2023 at 9:38 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>
> Support static branches depending on the value of misaligned accesses.
> This will be used by a later patch in the series. All online cpus must
> be considered "fast" for this static branch to be flipped.
>
> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>

This is fancier than I would have gone for, I probably would have
punted on heterogeneous hotplug out of laziness for now. However, what
you've done looks smart, in that we'll basically flip the branch if at
any moment all the online CPUs are fast. I've got some nits below, but
won't withhold my review for them (making them optional I suppose :)).

Reviewed-by: Evan Green <evan@rivosinc.com>

> ---
>  arch/riscv/include/asm/cpufeature.h |  2 +
>  arch/riscv/kernel/cpufeature.c      | 89 +++++++++++++++++++++++++++++++++++--
>  2 files changed, 87 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index a418c3112cd6..7b129e5e2f07 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -133,4 +133,6 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
>         return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
>  }
>
> +DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> +
>  #endif
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index b3785ffc1570..dfd716b93565 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -8,8 +8,10 @@
>
>  #include <linux/acpi.h>
>  #include <linux/bitmap.h>
> +#include <linux/cpu.h>
>  #include <linux/cpuhotplug.h>
>  #include <linux/ctype.h>
> +#include <linux/jump_label.h>
>  #include <linux/log2.h>
>  #include <linux/memory.h>
>  #include <linux/module.h>
> @@ -44,6 +46,8 @@ struct riscv_isainfo hart_isa[NR_CPUS];
>  /* Performance information */
>  DEFINE_PER_CPU(long, misaligned_access_speed);
>
> +static cpumask_t fast_misaligned_access;
> +
>  /**
>   * riscv_isa_extension_base() - Get base extension word
>   *
> @@ -643,6 +647,16 @@ static int check_unaligned_access(void *param)
>                 (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
>
>         per_cpu(misaligned_access_speed, cpu) = speed;
> +
> +       /*
> +        * Set the value of fast_misaligned_access of a CPU. These operations
> +        * are atomic to avoid race conditions.
> +        */
> +       if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
> +               cpumask_set_cpu(cpu, &fast_misaligned_access);
> +       else
> +               cpumask_clear_cpu(cpu, &fast_misaligned_access);
> +
>         return 0;
>  }
>
> @@ -655,13 +669,70 @@ static void check_unaligned_access_nonboot_cpu(void *param)
>                 check_unaligned_access(pages[cpu]);
>  }
>
> +DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> +
> +static int exclude_set_unaligned_access_static_branches(int cpu)
> +{
> +       /*
> +        * Same as set_unaligned_access_static_branches, except excludes the
> +        * given CPU from the result. When a CPU is hotplugged into an offline
> +        * state, this function is called before the CPU is set to offline in
> +        * the cpumask, and thus the CPU needs to be explicitly excluded.
> +        */
> +
> +       cpumask_t online_fast_misaligned_access;
> +
> +       cpumask_and(&online_fast_misaligned_access, &fast_misaligned_access, cpu_online_mask);
> +       cpumask_clear_cpu(cpu, &online_fast_misaligned_access);
> +
> +       if (cpumask_weight(&online_fast_misaligned_access) == (num_online_cpus() - 1))
> +               static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
> +       else
> +               static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
> +
> +       return 0;
> +}

A minor nit:  the function above and below are looking a little
copy/pasty, and lead to multiple spots where the static branch gets
changed. You could make a third function that actually does the
setting with parameters, then these two could call it in different
ways. The return types also don't need to be int, since you always
return 0. Something like:

static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
{
        if (cpumask_weight(mask) == weight) {
               static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
        } else {
               static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
        }
}

static void set_unaligned_access_branches(void)
{
        cpumask_t fast_and_online;

        cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
        modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
}

static void set_unaligned_access_branches_except_cpu(unsigned int cpu)
{
        cpumask_t fast_except_me;

        cpumask_and(&online_fast_misaligned_access,
&fast_misaligned_access, cpu_online_mask);
        cpumask_clear_cpu(cpu, &fast_except_me);
        modify_unaligned_access_branches(&fast_except_me,
num_online_cpus() - 1);
}

> +
> +static int set_unaligned_access_static_branches(void)
> +{
> +       /*
> +        * This will be called after check_unaligned_access_all_cpus so the
> +        * result of unaligned access speed for all CPUs will be available.
> +        *
> +        * To avoid the number of online cpus changing between reading
> +        * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
> +        * held before calling this function.
> +        */
> +       cpumask_t online_fast_misaligned_access;
> +
> +       cpumask_and(&online_fast_misaligned_access, &fast_misaligned_access, cpu_online_mask);
> +
> +       if (cpumask_weight(&online_fast_misaligned_access) == num_online_cpus())
> +               static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
> +       else
> +               static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
> +
> +       return 0;
> +}
> +
> +static int lock_and_set_unaligned_access_static_branch(void)
> +{
> +       cpus_read_lock();
> +       set_unaligned_access_static_branches();
> +       cpus_read_unlock();
> +
> +       return 0;
> +}
> +
> +arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
> +
>  static int riscv_online_cpu(unsigned int cpu)
>  {
>         static struct page *buf;
>
>         /* We are already set since the last check */
>         if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
> -               return 0;
> +               goto exit;
>
>         buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
>         if (!buf) {
> @@ -671,7 +742,14 @@ static int riscv_online_cpu(unsigned int cpu)
>
>         check_unaligned_access(buf);
>         __free_pages(buf, MISALIGNED_BUFFER_ORDER);
> -       return 0;
> +
> +exit:
> +       return set_unaligned_access_static_branches();
> +}
> +
> +static int riscv_offline_cpu(unsigned int cpu)
> +{
> +       return exclude_set_unaligned_access_static_branches(cpu);
>  }
>
>  /* Measure unaligned access on all CPUs present at boot in parallel. */
> @@ -705,9 +783,12 @@ static int check_unaligned_access_all_cpus(void)
>         /* Check core 0. */
>         smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
>
> -       /* Setup hotplug callback for any new CPUs that come online. */
> +       /*
> +        * Setup hotplug callbacks for any new CPUs that come online or go
> +        * offline.
> +        */
>         cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> -                                 riscv_online_cpu, NULL);
> +                                 riscv_online_cpu, riscv_offline_cpu);
>
>  out:
>         unaligned_emulation_finish();
>
> --
> 2.43.0
>
Charlie Jenkins Jan. 8, 2024, 9:37 p.m. UTC | #2
On Mon, Jan 08, 2024 at 11:22:34AM -0800, Evan Green wrote:
> On Wed, Dec 27, 2023 at 9:38 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >
> > Support static branches depending on the value of misaligned accesses.
> > This will be used by a later patch in the series. All online cpus must
> > be considered "fast" for this static branch to be flipped.
> >
> > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> 
> This is fancier than I would have gone for, I probably would have
> punted on heterogeneous hotplug out of laziness for now. However, what
> you've done looks smart, in that we'll basically flip the branch if at
> any moment all the online CPUs are fast. I've got some nits below, but
> won't withhold my review for them (making them optional I suppose :)).
> 
> Reviewed-by: Evan Green <evan@rivosinc.com>
> 

Thanks!

> > ---
> >  arch/riscv/include/asm/cpufeature.h |  2 +
> >  arch/riscv/kernel/cpufeature.c      | 89 +++++++++++++++++++++++++++++++++++--
> >  2 files changed, 87 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> > index a418c3112cd6..7b129e5e2f07 100644
> > --- a/arch/riscv/include/asm/cpufeature.h
> > +++ b/arch/riscv/include/asm/cpufeature.h
> > @@ -133,4 +133,6 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
> >         return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> >  }
> >
> > +DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> > +
> >  #endif
> > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > index b3785ffc1570..dfd716b93565 100644
> > --- a/arch/riscv/kernel/cpufeature.c
> > +++ b/arch/riscv/kernel/cpufeature.c
> > @@ -8,8 +8,10 @@
> >
> >  #include <linux/acpi.h>
> >  #include <linux/bitmap.h>
> > +#include <linux/cpu.h>
> >  #include <linux/cpuhotplug.h>
> >  #include <linux/ctype.h>
> > +#include <linux/jump_label.h>
> >  #include <linux/log2.h>
> >  #include <linux/memory.h>
> >  #include <linux/module.h>
> > @@ -44,6 +46,8 @@ struct riscv_isainfo hart_isa[NR_CPUS];
> >  /* Performance information */
> >  DEFINE_PER_CPU(long, misaligned_access_speed);
> >
> > +static cpumask_t fast_misaligned_access;
> > +
> >  /**
> >   * riscv_isa_extension_base() - Get base extension word
> >   *
> > @@ -643,6 +647,16 @@ static int check_unaligned_access(void *param)
> >                 (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
> >
> >         per_cpu(misaligned_access_speed, cpu) = speed;
> > +
> > +       /*
> > +        * Set the value of fast_misaligned_access of a CPU. These operations
> > +        * are atomic to avoid race conditions.
> > +        */
> > +       if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
> > +               cpumask_set_cpu(cpu, &fast_misaligned_access);
> > +       else
> > +               cpumask_clear_cpu(cpu, &fast_misaligned_access);
> > +
> >         return 0;
> >  }
> >
> > @@ -655,13 +669,70 @@ static void check_unaligned_access_nonboot_cpu(void *param)
> >                 check_unaligned_access(pages[cpu]);
> >  }
> >
> > +DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> > +
> > +static int exclude_set_unaligned_access_static_branches(int cpu)
> > +{
> > +       /*
> > +        * Same as set_unaligned_access_static_branches, except excludes the
> > +        * given CPU from the result. When a CPU is hotplugged into an offline
> > +        * state, this function is called before the CPU is set to offline in
> > +        * the cpumask, and thus the CPU needs to be explicitly excluded.
> > +        */
> > +
> > +       cpumask_t online_fast_misaligned_access;
> > +
> > +       cpumask_and(&online_fast_misaligned_access, &fast_misaligned_access, cpu_online_mask);
> > +       cpumask_clear_cpu(cpu, &online_fast_misaligned_access);
> > +
> > +       if (cpumask_weight(&online_fast_misaligned_access) == (num_online_cpus() - 1))
> > +               static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
> > +       else
> > +               static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
> > +
> > +       return 0;
> > +}
> 
> A minor nit:  the function above and below are looking a little
> copy/pasty, and lead to multiple spots where the static branch gets
> changed. You could make a third function that actually does the
> setting with parameters, then these two could call it in different
> ways. The return types also don't need to be int, since you always
> return 0. Something like:
> 
> static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
> {
>         if (cpumask_weight(mask) == weight) {
>                static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
>         } else {
>                static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
>         }
> }
> 
> static void set_unaligned_access_branches(void)
> {
>         cpumask_t fast_and_online;
> 
>         cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
>         modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
> }
> 
> static void set_unaligned_access_branches_except_cpu(unsigned int cpu)
> {
>         cpumask_t fast_except_me;
> 
>         cpumask_and(&online_fast_misaligned_access,
> &fast_misaligned_access, cpu_online_mask);
>         cpumask_clear_cpu(cpu, &fast_except_me);
>         modify_unaligned_access_branches(&fast_except_me,
> num_online_cpus() - 1);
> }
> 

Great suggestions, I will apply these changes and send out a new
version.

- Charlie

> > +
> > +static int set_unaligned_access_static_branches(void)
> > +{
> > +       /*
> > +        * This will be called after check_unaligned_access_all_cpus so the
> > +        * result of unaligned access speed for all CPUs will be available.
> > +        *
> > +        * To avoid the number of online cpus changing between reading
> > +        * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
> > +        * held before calling this function.
> > +        */
> > +       cpumask_t online_fast_misaligned_access;
> > +
> > +       cpumask_and(&online_fast_misaligned_access, &fast_misaligned_access, cpu_online_mask);
> > +
> > +       if (cpumask_weight(&online_fast_misaligned_access) == num_online_cpus())
> > +               static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
> > +       else
> > +               static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
> > +
> > +       return 0;
> > +}
> > +
> > +static int lock_and_set_unaligned_access_static_branch(void)
> > +{
> > +       cpus_read_lock();
> > +       set_unaligned_access_static_branches();
> > +       cpus_read_unlock();
> > +
> > +       return 0;
> > +}
> > +
> > +arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
> > +
> >  static int riscv_online_cpu(unsigned int cpu)
> >  {
> >         static struct page *buf;
> >
> >         /* We are already set since the last check */
> >         if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
> > -               return 0;
> > +               goto exit;
> >
> >         buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
> >         if (!buf) {
> > @@ -671,7 +742,14 @@ static int riscv_online_cpu(unsigned int cpu)
> >
> >         check_unaligned_access(buf);
> >         __free_pages(buf, MISALIGNED_BUFFER_ORDER);
> > -       return 0;
> > +
> > +exit:
> > +       return set_unaligned_access_static_branches();
> > +}
> > +
> > +static int riscv_offline_cpu(unsigned int cpu)
> > +{
> > +       return exclude_set_unaligned_access_static_branches(cpu);
> >  }
> >
> >  /* Measure unaligned access on all CPUs present at boot in parallel. */
> > @@ -705,9 +783,12 @@ static int check_unaligned_access_all_cpus(void)
> >         /* Check core 0. */
> >         smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
> >
> > -       /* Setup hotplug callback for any new CPUs that come online. */
> > +       /*
> > +        * Setup hotplug callbacks for any new CPUs that come online or go
> > +        * offline.
> > +        */
> >         cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> > -                                 riscv_online_cpu, NULL);
> > +                                 riscv_online_cpu, riscv_offline_cpu);
> >
> >  out:
> >         unaligned_emulation_finish();
> >
> > --
> > 2.43.0
> >
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index a418c3112cd6..7b129e5e2f07 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -133,4 +133,6 @@  static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
 	return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
 }
 
+DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
+
 #endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index b3785ffc1570..dfd716b93565 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -8,8 +8,10 @@ 
 
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
+#include <linux/cpu.h>
 #include <linux/cpuhotplug.h>
 #include <linux/ctype.h>
+#include <linux/jump_label.h>
 #include <linux/log2.h>
 #include <linux/memory.h>
 #include <linux/module.h>
@@ -44,6 +46,8 @@  struct riscv_isainfo hart_isa[NR_CPUS];
 /* Performance information */
 DEFINE_PER_CPU(long, misaligned_access_speed);
 
+static cpumask_t fast_misaligned_access;
+
 /**
  * riscv_isa_extension_base() - Get base extension word
  *
@@ -643,6 +647,16 @@  static int check_unaligned_access(void *param)
 		(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
 
 	per_cpu(misaligned_access_speed, cpu) = speed;
+
+	/*
+	 * Set the value of fast_misaligned_access of a CPU. These operations
+	 * are atomic to avoid race conditions.
+	 */
+	if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
+		cpumask_set_cpu(cpu, &fast_misaligned_access);
+	else
+		cpumask_clear_cpu(cpu, &fast_misaligned_access);
+
 	return 0;
 }
 
@@ -655,13 +669,70 @@  static void check_unaligned_access_nonboot_cpu(void *param)
 		check_unaligned_access(pages[cpu]);
 }
 
+DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
+
+static int exclude_set_unaligned_access_static_branches(int cpu)
+{
+	/*
+	 * Same as set_unaligned_access_static_branches, except excludes the
+	 * given CPU from the result. When a CPU is hotplugged into an offline
+	 * state, this function is called before the CPU is set to offline in
+	 * the cpumask, and thus the CPU needs to be explicitly excluded.
+	 */
+
+	cpumask_t online_fast_misaligned_access;
+
+	cpumask_and(&online_fast_misaligned_access, &fast_misaligned_access, cpu_online_mask);
+	cpumask_clear_cpu(cpu, &online_fast_misaligned_access);
+
+	if (cpumask_weight(&online_fast_misaligned_access) == (num_online_cpus() - 1))
+		static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
+	else
+		static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
+
+	return 0;
+}
+
+static int set_unaligned_access_static_branches(void)
+{
+	/*
+	 * This will be called after check_unaligned_access_all_cpus so the
+	 * result of unaligned access speed for all CPUs will be available.
+	 *
+	 * To avoid the number of online cpus changing between reading
+	 * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
+	 * held before calling this function.
+	 */
+	cpumask_t online_fast_misaligned_access;
+
+	cpumask_and(&online_fast_misaligned_access, &fast_misaligned_access, cpu_online_mask);
+
+	if (cpumask_weight(&online_fast_misaligned_access) == num_online_cpus())
+		static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
+	else
+		static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
+
+	return 0;
+}
+
+static int lock_and_set_unaligned_access_static_branch(void)
+{
+	cpus_read_lock();
+	set_unaligned_access_static_branches();
+	cpus_read_unlock();
+
+	return 0;
+}
+
+arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
+
 static int riscv_online_cpu(unsigned int cpu)
 {
 	static struct page *buf;
 
 	/* We are already set since the last check */
 	if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
-		return 0;
+		goto exit;
 
 	buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
 	if (!buf) {
@@ -671,7 +742,14 @@  static int riscv_online_cpu(unsigned int cpu)
 
 	check_unaligned_access(buf);
 	__free_pages(buf, MISALIGNED_BUFFER_ORDER);
-	return 0;
+
+exit:
+	return set_unaligned_access_static_branches();
+}
+
+static int riscv_offline_cpu(unsigned int cpu)
+{
+	return exclude_set_unaligned_access_static_branches(cpu);
 }
 
 /* Measure unaligned access on all CPUs present at boot in parallel. */
@@ -705,9 +783,12 @@  static int check_unaligned_access_all_cpus(void)
 	/* Check core 0. */
 	smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
 
-	/* Setup hotplug callback for any new CPUs that come online. */
+	/*
+	 * Setup hotplug callbacks for any new CPUs that come online or go
+	 * offline.
+	 */
 	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
-				  riscv_online_cpu, NULL);
+				  riscv_online_cpu, riscv_offline_cpu);
 
 out:
 	unaligned_emulation_finish();