diff mbox series

[v3,8/9] riscv: introduce interfaces to patch kernel code

Message ID d27d9e68491e1df67dbee6c22df6a72ff95bab18.1583772574.git.zong.li@sifive.com (mailing list archive)
State New, archived
Headers show
Series Support strict kernel memory permissions for security | expand

Commit Message

Zong Li March 9, 2020, 4:55 p.m. UTC
On strict kernel memory permission, we couldn't patch code without
writable permission. Preserve two holes in fixmap area, so we can map
the kernel code temporarily to fixmap area, then patch the instructions.

We need two pages here because we support the compressed instruction, so
the instruction might be align to 2 bytes. When patching the 32-bit
length instruction which is 2 bytes alignment, it will across two pages.

Introduce two interfaces to patch kernel code:
riscv_patch_text_nosync:
 - patch code without synchronization, it's caller's responsibility to
   synchronize all CPUs if needed.
riscv_patch_text:
 - patch code and always synchronize with stop_machine()

Signed-off-by: Zong Li <zong.li@sifive.com>
---
 arch/riscv/include/asm/fixmap.h |   2 +
 arch/riscv/include/asm/patch.h  |  12 ++++
 arch/riscv/kernel/Makefile      |   4 +-
 arch/riscv/kernel/patch.c       | 120 ++++++++++++++++++++++++++++++++
 4 files changed, 137 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/include/asm/patch.h
 create mode 100644 arch/riscv/kernel/patch.c

Comments

Masami Hiramatsu (Google) March 31, 2020, 3:32 p.m. UTC | #1
Hi,

On Tue, 10 Mar 2020 00:55:43 +0800
Zong Li <zong.li@sifive.com> wrote:

> On strict kernel memory permission, we couldn't patch code without
> writable permission. Preserve two holes in fixmap area, so we can map
> the kernel code temporarily to fixmap area, then patch the instructions.
> 
> We need two pages here because we support the compressed instruction, so
> the instruction might be align to 2 bytes. When patching the 32-bit
> length instruction which is 2 bytes alignment, it will across two pages.
> 
> Introduce two interfaces to patch kernel code:
> riscv_patch_text_nosync:
>  - patch code without synchronization, it's caller's responsibility to
>    synchronize all CPUs if needed.
> riscv_patch_text:
>  - patch code and always synchronize with stop_machine()
> 
> Signed-off-by: Zong Li <zong.li@sifive.com>
> ---
>  arch/riscv/include/asm/fixmap.h |   2 +
>  arch/riscv/include/asm/patch.h  |  12 ++++
>  arch/riscv/kernel/Makefile      |   4 +-
>  arch/riscv/kernel/patch.c       | 120 ++++++++++++++++++++++++++++++++
>  4 files changed, 137 insertions(+), 1 deletion(-)
>  create mode 100644 arch/riscv/include/asm/patch.h
>  create mode 100644 arch/riscv/kernel/patch.c
> 
> diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
> index 42d2c42f3cc9..2368d49eb4ef 100644
> --- a/arch/riscv/include/asm/fixmap.h
> +++ b/arch/riscv/include/asm/fixmap.h
> @@ -27,6 +27,8 @@ enum fixed_addresses {
>  	FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
>  	FIX_PTE,
>  	FIX_PMD,
> +	FIX_TEXT_POKE1,
> +	FIX_TEXT_POKE0,
>  	FIX_EARLYCON_MEM_BASE,
>  	__end_of_fixed_addresses
>  };
> diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
> new file mode 100644
> index 000000000000..b5918a6e0615
> --- /dev/null
> +++ b/arch/riscv/include/asm/patch.h
> @@ -0,0 +1,12 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2020 SiFive
> + */
> +
> +#ifndef _ASM_RISCV_PATCH_H
> +#define _ASM_RISCV_PATCH_H
> +
> +int riscv_patch_text_nosync(void *addr, const void *insns, size_t len);
> +int riscv_patch_text(void *addr, u32 insn);
> +
> +#endif /* _ASM_RISCV_PATCH_H */
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index f40205cb9a22..d189bd3d8501 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -4,7 +4,8 @@
>  #
>  
>  ifdef CONFIG_FTRACE
> -CFLAGS_REMOVE_ftrace.o = -pg
> +CFLAGS_REMOVE_ftrace.o	= -pg
> +CFLAGS_REMOVE_patch.o	= -pg
>  endif
>  
>  extra-y += head.o
> @@ -26,6 +27,7 @@ obj-y	+= traps.o
>  obj-y	+= riscv_ksyms.o
>  obj-y	+= stacktrace.o
>  obj-y	+= cacheinfo.o
> +obj-y	+= patch.o
>  obj-$(CONFIG_MMU) += vdso.o vdso/
>  
>  obj-$(CONFIG_RISCV_M_MODE)	+= clint.o
> diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
> new file mode 100644
> index 000000000000..8a4fc65ee022
> --- /dev/null
> +++ b/arch/riscv/kernel/patch.c
> @@ -0,0 +1,120 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2020 SiFive
> + */
> +
> +#include <linux/spinlock.h>
> +#include <linux/mm.h>
> +#include <linux/uaccess.h>
> +#include <linux/stop_machine.h>
> +#include <asm/kprobes.h>
> +#include <asm/cacheflush.h>
> +#include <asm/fixmap.h>
> +
> +struct riscv_insn_patch {
> +	void *addr;
> +	u32 insn;
> +	atomic_t cpu_count;
> +};
> +
> +#ifdef CONFIG_MMU
> +static DEFINE_RAW_SPINLOCK(patch_lock);
> +
> +static void __kprobes *patch_map(void *addr, int fixmap)

Please use NOKPROBE_SYMBOL() instead of __kprobes. __kprobes is old style.

> +{
> +	uintptr_t uintaddr = (uintptr_t) addr;
> +	struct page *page;
> +
> +	if (core_kernel_text(uintaddr))
> +		page = phys_to_page(__pa_symbol(addr));
> +	else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
> +		page = vmalloc_to_page(addr);
> +	else
> +		return addr;
> +
> +	BUG_ON(!page);
> +
> +	return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
> +					 (uintaddr & ~PAGE_MASK));
> +}
> +
> +static void __kprobes patch_unmap(int fixmap)
> +{
> +	clear_fixmap(fixmap);
> +}
> +
> +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len)

Why would you add "riscv_" prefix for those functions? It seems a bit odd.

> +{
> +	void *waddr = addr;
> +	bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> +	unsigned long flags = 0;
> +	int ret;
> +
> +	raw_spin_lock_irqsave(&patch_lock, flags);

This looks a bit odd since stop_machine() is protected by its own mutex,
and also the irq is already disabled here.

Thank you,

> +
> +	if (across_pages)
> +		patch_map(addr + len, FIX_TEXT_POKE1);
> +
> +	waddr = patch_map(addr, FIX_TEXT_POKE0);
> +
> +	ret = probe_kernel_write(waddr, insn, len);
> +
> +	patch_unmap(FIX_TEXT_POKE0);
> +
> +	if (across_pages)
> +		patch_unmap(FIX_TEXT_POKE1);
> +
> +	raw_spin_unlock_irqrestore(&patch_lock, flags);
> +
> +	return ret;
> +}
> +#else
> +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len)
> +{
> +	return probe_kernel_write(addr, insn, len);
> +}
> +#endif /* CONFIG_MMU */
> +
> +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len)
> +{
> +	u32 *tp = addr;
> +	int ret;
> +
> +	ret = riscv_insn_write(tp, insns, len);
> +
> +	if (!ret)
> +		flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
> +
> +	return ret;
> +}
> +
> +static int __kprobes riscv_patch_text_cb(void *data)
> +{
> +	struct riscv_insn_patch *patch = data;
> +	int ret = 0;
> +
> +	if (atomic_inc_return(&patch->cpu_count) == 1) {
> +		ret =
> +		    riscv_patch_text_nosync(patch->addr, &patch->insn,
> +					    GET_INSN_LENGTH(patch->insn));
> +		atomic_inc(&patch->cpu_count);
> +	} else {
> +		while (atomic_read(&patch->cpu_count) <= num_online_cpus())
> +			cpu_relax();
> +		smp_mb();
> +	}
> +
> +	return ret;
> +}
> +
> +int __kprobes riscv_patch_text(void *addr, u32 insn)
> +{
> +	struct riscv_insn_patch patch = {
> +		.addr = addr,
> +		.insn = insn,
> +		.cpu_count = ATOMIC_INIT(0),
> +	};
> +
> +	return stop_machine_cpuslocked(riscv_patch_text_cb,
> +				       &patch, cpu_online_mask);
> +}
> -- 
> 2.25.1
>
Zong Li April 1, 2020, 7:42 a.m. UTC | #2
On Tue, Mar 31, 2020 at 11:32 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> Hi,
>
> On Tue, 10 Mar 2020 00:55:43 +0800
> Zong Li <zong.li@sifive.com> wrote:
>
> > On strict kernel memory permission, we couldn't patch code without
> > writable permission. Preserve two holes in fixmap area, so we can map
> > the kernel code temporarily to fixmap area, then patch the instructions.
> >
> > We need two pages here because we support the compressed instruction, so
> > the instruction might be align to 2 bytes. When patching the 32-bit
> > length instruction which is 2 bytes alignment, it will across two pages.
> >
> > Introduce two interfaces to patch kernel code:
> > riscv_patch_text_nosync:
> >  - patch code without synchronization, it's caller's responsibility to
> >    synchronize all CPUs if needed.
> > riscv_patch_text:
> >  - patch code and always synchronize with stop_machine()
> >
> > Signed-off-by: Zong Li <zong.li@sifive.com>
> > ---
> >  arch/riscv/include/asm/fixmap.h |   2 +
> >  arch/riscv/include/asm/patch.h  |  12 ++++
> >  arch/riscv/kernel/Makefile      |   4 +-
> >  arch/riscv/kernel/patch.c       | 120 ++++++++++++++++++++++++++++++++
> >  4 files changed, 137 insertions(+), 1 deletion(-)
> >  create mode 100644 arch/riscv/include/asm/patch.h
> >  create mode 100644 arch/riscv/kernel/patch.c
> >
> > diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
> > index 42d2c42f3cc9..2368d49eb4ef 100644
> > --- a/arch/riscv/include/asm/fixmap.h
> > +++ b/arch/riscv/include/asm/fixmap.h
> > @@ -27,6 +27,8 @@ enum fixed_addresses {
> >       FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
> >       FIX_PTE,
> >       FIX_PMD,
> > +     FIX_TEXT_POKE1,
> > +     FIX_TEXT_POKE0,
> >       FIX_EARLYCON_MEM_BASE,
> >       __end_of_fixed_addresses
> >  };
> > diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
> > new file mode 100644
> > index 000000000000..b5918a6e0615
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/patch.h
> > @@ -0,0 +1,12 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Copyright (C) 2020 SiFive
> > + */
> > +
> > +#ifndef _ASM_RISCV_PATCH_H
> > +#define _ASM_RISCV_PATCH_H
> > +
> > +int riscv_patch_text_nosync(void *addr, const void *insns, size_t len);
> > +int riscv_patch_text(void *addr, u32 insn);
> > +
> > +#endif /* _ASM_RISCV_PATCH_H */
> > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > index f40205cb9a22..d189bd3d8501 100644
> > --- a/arch/riscv/kernel/Makefile
> > +++ b/arch/riscv/kernel/Makefile
> > @@ -4,7 +4,8 @@
> >  #
> >
> >  ifdef CONFIG_FTRACE
> > -CFLAGS_REMOVE_ftrace.o = -pg
> > +CFLAGS_REMOVE_ftrace.o       = -pg
> > +CFLAGS_REMOVE_patch.o        = -pg
> >  endif
> >
> >  extra-y += head.o
> > @@ -26,6 +27,7 @@ obj-y       += traps.o
> >  obj-y        += riscv_ksyms.o
> >  obj-y        += stacktrace.o
> >  obj-y        += cacheinfo.o
> > +obj-y        += patch.o
> >  obj-$(CONFIG_MMU) += vdso.o vdso/
> >
> >  obj-$(CONFIG_RISCV_M_MODE)   += clint.o
> > diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
> > new file mode 100644
> > index 000000000000..8a4fc65ee022
> > --- /dev/null
> > +++ b/arch/riscv/kernel/patch.c
> > @@ -0,0 +1,120 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2020 SiFive
> > + */
> > +
> > +#include <linux/spinlock.h>
> > +#include <linux/mm.h>
> > +#include <linux/uaccess.h>
> > +#include <linux/stop_machine.h>
> > +#include <asm/kprobes.h>
> > +#include <asm/cacheflush.h>
> > +#include <asm/fixmap.h>
> > +
> > +struct riscv_insn_patch {
> > +     void *addr;
> > +     u32 insn;
> > +     atomic_t cpu_count;
> > +};
> > +
> > +#ifdef CONFIG_MMU
> > +static DEFINE_RAW_SPINLOCK(patch_lock);
> > +
> > +static void __kprobes *patch_map(void *addr, int fixmap)
>
> Please use NOKPROBE_SYMBOL() instead of __kprobes. __kprobes is old style.
>
> > +{
> > +     uintptr_t uintaddr = (uintptr_t) addr;
> > +     struct page *page;
> > +
> > +     if (core_kernel_text(uintaddr))
> > +             page = phys_to_page(__pa_symbol(addr));
> > +     else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
> > +             page = vmalloc_to_page(addr);
> > +     else
> > +             return addr;
> > +
> > +     BUG_ON(!page);
> > +
> > +     return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
> > +                                      (uintaddr & ~PAGE_MASK));
> > +}
> > +
> > +static void __kprobes patch_unmap(int fixmap)
> > +{
> > +     clear_fixmap(fixmap);
> > +}
> > +
> > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len)
>
> Why would you add "riscv_" prefix for those functions? It seems a bit odd.

There is no particular reason, I just was used to adding a prefix for
arch-related stuff. I have no preference here, it's OK to me to remove
the prefix of these functions, do you think we need to remove them?

>
> > +{
> > +     void *waddr = addr;
> > +     bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> > +     unsigned long flags = 0;
> > +     int ret;
> > +
> > +     raw_spin_lock_irqsave(&patch_lock, flags);
>
> This looks a bit odd since stop_machine() is protected by its own mutex,
> and also the irq is already disabled here.

We need it because we don't always enter the riscv_patch_text_nosync()
through stop_machine mechanism. If we call the
riscv_patch_text_nosync() directly, we need a lock to protect the
page.

>
> Thank you,
>
> > +
> > +     if (across_pages)
> > +             patch_map(addr + len, FIX_TEXT_POKE1);
> > +
> > +     waddr = patch_map(addr, FIX_TEXT_POKE0);
> > +
> > +     ret = probe_kernel_write(waddr, insn, len);
> > +
> > +     patch_unmap(FIX_TEXT_POKE0);
> > +
> > +     if (across_pages)
> > +             patch_unmap(FIX_TEXT_POKE1);
> > +
> > +     raw_spin_unlock_irqrestore(&patch_lock, flags);
> > +
> > +     return ret;
> > +}
> > +#else
> > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len)
> > +{
> > +     return probe_kernel_write(addr, insn, len);
> > +}
> > +#endif /* CONFIG_MMU */
> > +
> > +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len)
> > +{
> > +     u32 *tp = addr;
> > +     int ret;
> > +
> > +     ret = riscv_insn_write(tp, insns, len);
> > +
> > +     if (!ret)
> > +             flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
> > +
> > +     return ret;
> > +}
> > +
> > +static int __kprobes riscv_patch_text_cb(void *data)
> > +{
> > +     struct riscv_insn_patch *patch = data;
> > +     int ret = 0;
> > +
> > +     if (atomic_inc_return(&patch->cpu_count) == 1) {
> > +             ret =
> > +                 riscv_patch_text_nosync(patch->addr, &patch->insn,
> > +                                         GET_INSN_LENGTH(patch->insn));
> > +             atomic_inc(&patch->cpu_count);
> > +     } else {
> > +             while (atomic_read(&patch->cpu_count) <= num_online_cpus())
> > +                     cpu_relax();
> > +             smp_mb();
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> > +int __kprobes riscv_patch_text(void *addr, u32 insn)
> > +{
> > +     struct riscv_insn_patch patch = {
> > +             .addr = addr,
> > +             .insn = insn,
> > +             .cpu_count = ATOMIC_INIT(0),
> > +     };
> > +
> > +     return stop_machine_cpuslocked(riscv_patch_text_cb,
> > +                                    &patch, cpu_online_mask);
> > +}
> > --
> > 2.25.1
> >
>
>
> --
> Masami Hiramatsu <mhiramat@kernel.org>
Masami Hiramatsu (Google) April 2, 2020, 1:17 a.m. UTC | #3
Hi,

On Wed, 1 Apr 2020 15:42:30 +0800
Zong Li <zong.li@sifive.com> wrote:

> > > +
> > > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len)
> >
> > Why would you add "riscv_" prefix for those functions? It seems a bit odd.
> 
> There is no particular reason, I just was used to adding a prefix for
> arch-related stuff. I have no preference here, it's OK to me to remove
> the prefix of these functions, do you think we need to remove them?

Yeah, it will be better, unless it can mixed up with arch-independent
functions.

> > > +{
> > > +     void *waddr = addr;
> > > +     bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> > > +     unsigned long flags = 0;
> > > +     int ret;
> > > +
> > > +     raw_spin_lock_irqsave(&patch_lock, flags);
> >
> > This looks a bit odd since stop_machine() is protected by its own mutex,
> > and also the irq is already disabled here.
> 
> We need it because we don't always enter the riscv_patch_text_nosync()
> through stop_machine mechanism. If we call the
> riscv_patch_text_nosync() directly, we need a lock to protect the
> page.

Oh, OK, but it leads another question. Is that safe to patch the
text without sync? Would you use it for UP system?
I think it is better to clarify "in what case user can call _nosync()"
and add a comment on it.

Thank you,

> 
> >
> > Thank you,
> >
> > > +
> > > +     if (across_pages)
> > > +             patch_map(addr + len, FIX_TEXT_POKE1);
> > > +
> > > +     waddr = patch_map(addr, FIX_TEXT_POKE0);
> > > +
> > > +     ret = probe_kernel_write(waddr, insn, len);
> > > +
> > > +     patch_unmap(FIX_TEXT_POKE0);
> > > +
> > > +     if (across_pages)
> > > +             patch_unmap(FIX_TEXT_POKE1);
> > > +
> > > +     raw_spin_unlock_irqrestore(&patch_lock, flags);
> > > +
> > > +     return ret;
> > > +}
> > > +#else
> > > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len)
> > > +{
> > > +     return probe_kernel_write(addr, insn, len);
> > > +}
> > > +#endif /* CONFIG_MMU */
> > > +
> > > +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len)
> > > +{
> > > +     u32 *tp = addr;
> > > +     int ret;
> > > +
> > > +     ret = riscv_insn_write(tp, insns, len);
> > > +
> > > +     if (!ret)
> > > +             flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
> > > +
> > > +     return ret;
> > > +}
> > > +
> > > +static int __kprobes riscv_patch_text_cb(void *data)
> > > +{
> > > +     struct riscv_insn_patch *patch = data;
> > > +     int ret = 0;
> > > +
> > > +     if (atomic_inc_return(&patch->cpu_count) == 1) {
> > > +             ret =
> > > +                 riscv_patch_text_nosync(patch->addr, &patch->insn,
> > > +                                         GET_INSN_LENGTH(patch->insn));
> > > +             atomic_inc(&patch->cpu_count);
> > > +     } else {
> > > +             while (atomic_read(&patch->cpu_count) <= num_online_cpus())
> > > +                     cpu_relax();
> > > +             smp_mb();
> > > +     }
> > > +
> > > +     return ret;
> > > +}
> > > +
> > > +int __kprobes riscv_patch_text(void *addr, u32 insn)
> > > +{
> > > +     struct riscv_insn_patch patch = {
> > > +             .addr = addr,
> > > +             .insn = insn,
> > > +             .cpu_count = ATOMIC_INIT(0),
> > > +     };
> > > +
> > > +     return stop_machine_cpuslocked(riscv_patch_text_cb,
> > > +                                    &patch, cpu_online_mask);
> > > +}
> > > --
> > > 2.25.1
> > >
> >
> >
> > --
> > Masami Hiramatsu <mhiramat@kernel.org>
Zong Li April 3, 2020, 9:04 a.m. UTC | #4
On Thu, Apr 2, 2020 at 9:17 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> Hi,
>
> On Wed, 1 Apr 2020 15:42:30 +0800
> Zong Li <zong.li@sifive.com> wrote:
>
> > > > +
> > > > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len)
> > >
> > > Why would you add "riscv_" prefix for those functions? It seems a bit odd.
> >
> > There is no particular reason, I just was used to adding a prefix for
> > arch-related stuff. I have no preference here, it's OK to me to remove
> > the prefix of these functions, do you think we need to remove them?
>
> Yeah, it will be better, unless it can mixed up with arch-independent
> functions.

OK. I'll remove it and use NOKPROBE_SYMBOL() instead of __kprobes annotation.

>
> > > > +{
> > > > +     void *waddr = addr;
> > > > +     bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> > > > +     unsigned long flags = 0;
> > > > +     int ret;
> > > > +
> > > > +     raw_spin_lock_irqsave(&patch_lock, flags);
> > >
> > > This looks a bit odd since stop_machine() is protected by its own mutex,
> > > and also the irq is already disabled here.
> >
> > We need it because we don't always enter the riscv_patch_text_nosync()
> > through stop_machine mechanism. If we call the
> > riscv_patch_text_nosync() directly, we need a lock to protect the
> > page.
>
> Oh, OK, but it leads another question. Is that safe to patch the
> text without sync? Would you use it for UP system?
> I think it is better to clarify "in what case user can call _nosync()"
> and add a comment on it.

The ftrace is one of the cases, as documentation of ftrace said, when
dynamic ftrace is initialized, it calls kstop_machine to make the
machine act like a uniprocessor so that it can freely modify code
without worrying about other processors executing that same code. So
the ftrace called the _nosync interface here directly.

>
> Thank you,
>
> >
> > >
> > > Thank you,
> > >
> > > > +
> > > > +     if (across_pages)
> > > > +             patch_map(addr + len, FIX_TEXT_POKE1);
> > > > +
> > > > +     waddr = patch_map(addr, FIX_TEXT_POKE0);
> > > > +
> > > > +     ret = probe_kernel_write(waddr, insn, len);
> > > > +
> > > > +     patch_unmap(FIX_TEXT_POKE0);
> > > > +
> > > > +     if (across_pages)
> > > > +             patch_unmap(FIX_TEXT_POKE1);
> > > > +
> > > > +     raw_spin_unlock_irqrestore(&patch_lock, flags);
> > > > +
> > > > +     return ret;
> > > > +}
> > > > +#else
> > > > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len)
> > > > +{
> > > > +     return probe_kernel_write(addr, insn, len);
> > > > +}
> > > > +#endif /* CONFIG_MMU */
> > > > +
> > > > +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len)
> > > > +{
> > > > +     u32 *tp = addr;
> > > > +     int ret;
> > > > +
> > > > +     ret = riscv_insn_write(tp, insns, len);
> > > > +
> > > > +     if (!ret)
> > > > +             flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
> > > > +
> > > > +     return ret;
> > > > +}
> > > > +
> > > > +static int __kprobes riscv_patch_text_cb(void *data)
> > > > +{
> > > > +     struct riscv_insn_patch *patch = data;
> > > > +     int ret = 0;
> > > > +
> > > > +     if (atomic_inc_return(&patch->cpu_count) == 1) {
> > > > +             ret =
> > > > +                 riscv_patch_text_nosync(patch->addr, &patch->insn,
> > > > +                                         GET_INSN_LENGTH(patch->insn));
> > > > +             atomic_inc(&patch->cpu_count);
> > > > +     } else {
> > > > +             while (atomic_read(&patch->cpu_count) <= num_online_cpus())
> > > > +                     cpu_relax();
> > > > +             smp_mb();
> > > > +     }
> > > > +
> > > > +     return ret;
> > > > +}
> > > > +
> > > > +int __kprobes riscv_patch_text(void *addr, u32 insn)
> > > > +{
> > > > +     struct riscv_insn_patch patch = {
> > > > +             .addr = addr,
> > > > +             .insn = insn,
> > > > +             .cpu_count = ATOMIC_INIT(0),
> > > > +     };
> > > > +
> > > > +     return stop_machine_cpuslocked(riscv_patch_text_cb,
> > > > +                                    &patch, cpu_online_mask);
> > > > +}
> > > > --
> > > > 2.25.1
> > > >
> > >
> > >
> > > --
> > > Masami Hiramatsu <mhiramat@kernel.org>
>
>
> --
> Masami Hiramatsu <mhiramat@kernel.org>
Masami Hiramatsu (Google) April 4, 2020, 3:14 a.m. UTC | #5
Hi Zong,

On Fri, 3 Apr 2020 17:04:51 +0800
Zong Li <zong.li@sifive.com> wrote:

> > > > > +{
> > > > > +     void *waddr = addr;
> > > > > +     bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> > > > > +     unsigned long flags = 0;
> > > > > +     int ret;
> > > > > +
> > > > > +     raw_spin_lock_irqsave(&patch_lock, flags);
> > > >
> > > > This looks a bit odd since stop_machine() is protected by its own mutex,
> > > > and also the irq is already disabled here.
> > >
> > > We need it because we don't always enter the riscv_patch_text_nosync()
> > > through stop_machine mechanism. If we call the
> > > riscv_patch_text_nosync() directly, we need a lock to protect the
> > > page.
> >
> > Oh, OK, but it leads another question. Is that safe to patch the
> > text without sync? Would you use it for UP system?
> > I think it is better to clarify "in what case user can call _nosync()"
> > and add a comment on it.
> 
> The ftrace is one of the cases, as documentation of ftrace said, when
> dynamic ftrace is initialized, it calls kstop_machine to make the
> machine act like a uniprocessor so that it can freely modify code
> without worrying about other processors executing that same code. So
> the ftrace called the _nosync interface here directly.

Hmm, even though, since it already running under kstop_machine(), no
other thread will run. 
Could you consider to use text_mutex instead of that? The text_mutex
is already widely used in x86 and kernel/kprobes.c etc.

(Hmm, it seems except for x86, alternative code don't care about
 racing...)

Thank you,
Zong Li April 4, 2020, 12:12 p.m. UTC | #6
On Sat, Apr 4, 2020 at 11:14 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> Hi Zong,
>
> On Fri, 3 Apr 2020 17:04:51 +0800
> Zong Li <zong.li@sifive.com> wrote:
>
> > > > > > +{
> > > > > > +     void *waddr = addr;
> > > > > > +     bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> > > > > > +     unsigned long flags = 0;
> > > > > > +     int ret;
> > > > > > +
> > > > > > +     raw_spin_lock_irqsave(&patch_lock, flags);
> > > > >
> > > > > This looks a bit odd since stop_machine() is protected by its own mutex,
> > > > > and also the irq is already disabled here.
> > > >
> > > > We need it because we don't always enter the riscv_patch_text_nosync()
> > > > through stop_machine mechanism. If we call the
> > > > riscv_patch_text_nosync() directly, we need a lock to protect the
> > > > page.
> > >
> > > Oh, OK, but it leads another question. Is that safe to patch the
> > > text without sync? Would you use it for UP system?
> > > I think it is better to clarify "in what case user can call _nosync()"
> > > and add a comment on it.
> >
> > The ftrace is one of the cases, as documentation of ftrace said, when
> > dynamic ftrace is initialized, it calls kstop_machine to make the
> > machine act like a uniprocessor so that it can freely modify code
> > without worrying about other processors executing that same code. So
> > the ftrace called the _nosync interface here directly.
>
> Hmm, even though, since it already running under kstop_machine(), no
> other thread will run.
> Could you consider to use text_mutex instead of that? The text_mutex
> is already widely used in x86 and kernel/kprobes.c etc.
>
> (Hmm, it seems except for x86, alternative code don't care about
>  racing...)
>

Yes, text_mutex seems to be great. I'll change to use text_mutex in
the next version if it works fine after testing. Thanks.

> Thank you,
> --
> Masami Hiramatsu <mhiramat@kernel.org>
Zong Li April 6, 2020, 10:36 a.m. UTC | #7
On Sat, Apr 4, 2020 at 8:12 PM Zong Li <zong.li@sifive.com> wrote:
>
> On Sat, Apr 4, 2020 at 11:14 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> >
> > Hi Zong,
> >
> > On Fri, 3 Apr 2020 17:04:51 +0800
> > Zong Li <zong.li@sifive.com> wrote:
> >
> > > > > > > +{
> > > > > > > +     void *waddr = addr;
> > > > > > > +     bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> > > > > > > +     unsigned long flags = 0;
> > > > > > > +     int ret;
> > > > > > > +
> > > > > > > +     raw_spin_lock_irqsave(&patch_lock, flags);
> > > > > >
> > > > > > This looks a bit odd since stop_machine() is protected by its own mutex,
> > > > > > and also the irq is already disabled here.
> > > > >
> > > > > We need it because we don't always enter the riscv_patch_text_nosync()
> > > > > through stop_machine mechanism. If we call the
> > > > > riscv_patch_text_nosync() directly, we need a lock to protect the
> > > > > page.
> > > >
> > > > Oh, OK, but it leads another question. Is that safe to patch the
> > > > text without sync? Would you use it for UP system?
> > > > I think it is better to clarify "in what case user can call _nosync()"
> > > > and add a comment on it.
> > >
> > > The ftrace is one of the cases, as documentation of ftrace said, when
> > > dynamic ftrace is initialized, it calls kstop_machine to make the
> > > machine act like a uniprocessor so that it can freely modify code
> > > without worrying about other processors executing that same code. So
> > > the ftrace called the _nosync interface here directly.
> >
> > Hmm, even though, since it already running under kstop_machine(), no
> > other thread will run.
> > Could you consider to use text_mutex instead of that? The text_mutex
> > is already widely used in x86 and kernel/kprobes.c etc.
> >
> > (Hmm, it seems except for x86, alternative code don't care about
> >  racing...)
> >

The mutex_lock doesn't seem to work in ftrace context, I think it
might be the reason why other architectures didn't use text_mutex in
somewhere.

# echo function > current_tracer
[   28.198070] BUG: sleeping function called from invalid context at
kernel/locking/mutex.c:281
[   28.198663] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid:
11, name: migration/0
[   28.199491] CPU: 0 PID: 11 Comm: migration/0 Not tainted
5.6.0-00012-gd6f56a7a4be2-dirty #10
[   28.200330] Call Trace:
[   28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc
[   28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46
[   28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc
[   28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46
[   28.201898] [<ffffffe000d498b0>] dump_stack+0x76/0x90
[   28.202329] [<ffffffe00062c3f0>] ___might_sleep+0x100/0x10e
[   28.202720] [<ffffffe00062c448>] __might_sleep+0x4a/0x78
[   28.203033] [<ffffffe000d61622>] mutex_lock+0x2c/0x54
[   28.203397] [<ffffffe00060393e>] patch_insn_write+0x32/0xd8
[   28.203780] [<ffffffe000603a94>] patch_text_nosync+0x10/0x32
[   28.204139] [<ffffffe0006051b0>] __ftrace_modify_call+0x5c/0x6c
[   28.204497] [<ffffffe0006052c6>] ftrace_update_ftrace_func+0x20/0x4a
[   28.204919] [<ffffffe000697742>] ftrace_modify_all_code+0xa0/0x148
[   28.205378] [<ffffffe0006977fc>] __ftrace_modify_code+0x12/0x1c
[   28.205793] [<ffffffe0006924b6>] multi_cpu_stop+0xa2/0x158
[   28.206147] [<ffffffe0006921b0>] cpu_stopper_thread+0xa4/0x13a
[   28.206510] [<ffffffe000629f38>] smpboot_thread_fn+0xf8/0x1da
[   28.206868] [<ffffffe000625f36>] kthread+0xfa/0x12a
[   28.207201] [<ffffffe0006017e2>] ret_from_exception+0x0/0xc

>
> Yes, text_mutex seems to be great. I'll change to use text_mutex in
> the next version if it works fine after testing. Thanks.
>
> > Thank you,
> > --
> > Masami Hiramatsu <mhiramat@kernel.org>
Masami Hiramatsu (Google) April 7, 2020, 12:29 p.m. UTC | #8
On Mon, 6 Apr 2020 18:36:42 +0800
Zong Li <zong.li@sifive.com> wrote:

> On Sat, Apr 4, 2020 at 8:12 PM Zong Li <zong.li@sifive.com> wrote:
> >
> > On Sat, Apr 4, 2020 at 11:14 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > >
> > > Hi Zong,
> > >
> > > On Fri, 3 Apr 2020 17:04:51 +0800
> > > Zong Li <zong.li@sifive.com> wrote:
> > >
> > > > > > > > +{
> > > > > > > > +     void *waddr = addr;
> > > > > > > > +     bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> > > > > > > > +     unsigned long flags = 0;
> > > > > > > > +     int ret;
> > > > > > > > +
> > > > > > > > +     raw_spin_lock_irqsave(&patch_lock, flags);
> > > > > > >
> > > > > > > This looks a bit odd since stop_machine() is protected by its own mutex,
> > > > > > > and also the irq is already disabled here.
> > > > > >
> > > > > > We need it because we don't always enter the riscv_patch_text_nosync()
> > > > > > through stop_machine mechanism. If we call the
> > > > > > riscv_patch_text_nosync() directly, we need a lock to protect the
> > > > > > page.
> > > > >
> > > > > Oh, OK, but it leads another question. Is that safe to patch the
> > > > > text without sync? Would you use it for UP system?
> > > > > I think it is better to clarify "in what case user can call _nosync()"
> > > > > and add a comment on it.
> > > >
> > > > The ftrace is one of the cases, as documentation of ftrace said, when
> > > > dynamic ftrace is initialized, it calls kstop_machine to make the
> > > > machine act like a uniprocessor so that it can freely modify code
> > > > without worrying about other processors executing that same code. So
> > > > the ftrace called the _nosync interface here directly.
> > >
> > > Hmm, even though, since it already running under kstop_machine(), no
> > > other thread will run.
> > > Could you consider to use text_mutex instead of that? The text_mutex
> > > is already widely used in x86 and kernel/kprobes.c etc.
> > >
> > > (Hmm, it seems except for x86, alternative code don't care about
> > >  racing...)
> > >
> 
> The mutex_lock doesn't seem to work in ftrace context, I think it
> might be the reason why other architectures didn't use text_mutex in
> somewhere.

Yes, you need to implement ftrace_arch_code_modify_prepare() and
ftrace_arch_code_modify_post_process() in arch/riscv/kernel/ftrace.c.
Please see arch/x86/kernel/ftrace.c.

Thank you,

> 
> # echo function > current_tracer
> [   28.198070] BUG: sleeping function called from invalid context at
> kernel/locking/mutex.c:281
> [   28.198663] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid:
> 11, name: migration/0
> [   28.199491] CPU: 0 PID: 11 Comm: migration/0 Not tainted
> 5.6.0-00012-gd6f56a7a4be2-dirty #10
> [   28.200330] Call Trace:
> [   28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc
> [   28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46
> [   28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc
> [   28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46
> [   28.201898] [<ffffffe000d498b0>] dump_stack+0x76/0x90
> [   28.202329] [<ffffffe00062c3f0>] ___might_sleep+0x100/0x10e
> [   28.202720] [<ffffffe00062c448>] __might_sleep+0x4a/0x78
> [   28.203033] [<ffffffe000d61622>] mutex_lock+0x2c/0x54
> [   28.203397] [<ffffffe00060393e>] patch_insn_write+0x32/0xd8
> [   28.203780] [<ffffffe000603a94>] patch_text_nosync+0x10/0x32
> [   28.204139] [<ffffffe0006051b0>] __ftrace_modify_call+0x5c/0x6c
> [   28.204497] [<ffffffe0006052c6>] ftrace_update_ftrace_func+0x20/0x4a
> [   28.204919] [<ffffffe000697742>] ftrace_modify_all_code+0xa0/0x148
> [   28.205378] [<ffffffe0006977fc>] __ftrace_modify_code+0x12/0x1c
> [   28.205793] [<ffffffe0006924b6>] multi_cpu_stop+0xa2/0x158
> [   28.206147] [<ffffffe0006921b0>] cpu_stopper_thread+0xa4/0x13a
> [   28.206510] [<ffffffe000629f38>] smpboot_thread_fn+0xf8/0x1da
> [   28.206868] [<ffffffe000625f36>] kthread+0xfa/0x12a
> [   28.207201] [<ffffffe0006017e2>] ret_from_exception+0x0/0xc
> 
> >
> > Yes, text_mutex seems to be great. I'll change to use text_mutex in
> > the next version if it works fine after testing. Thanks.
> >
> > > Thank you,
> > > --
> > > Masami Hiramatsu <mhiramat@kernel.org>
Zong Li April 7, 2020, 1:06 p.m. UTC | #9
On Tue, Apr 7, 2020 at 8:29 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Mon, 6 Apr 2020 18:36:42 +0800
> Zong Li <zong.li@sifive.com> wrote:
>
> > On Sat, Apr 4, 2020 at 8:12 PM Zong Li <zong.li@sifive.com> wrote:
> > >
> > > On Sat, Apr 4, 2020 at 11:14 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > >
> > > > Hi Zong,
> > > >
> > > > On Fri, 3 Apr 2020 17:04:51 +0800
> > > > Zong Li <zong.li@sifive.com> wrote:
> > > >
> > > > > > > > > +{
> > > > > > > > > +     void *waddr = addr;
> > > > > > > > > +     bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> > > > > > > > > +     unsigned long flags = 0;
> > > > > > > > > +     int ret;
> > > > > > > > > +
> > > > > > > > > +     raw_spin_lock_irqsave(&patch_lock, flags);
> > > > > > > >
> > > > > > > > This looks a bit odd since stop_machine() is protected by its own mutex,
> > > > > > > > and also the irq is already disabled here.
> > > > > > >
> > > > > > > We need it because we don't always enter the riscv_patch_text_nosync()
> > > > > > > through stop_machine mechanism. If we call the
> > > > > > > riscv_patch_text_nosync() directly, we need a lock to protect the
> > > > > > > page.
> > > > > >
> > > > > > Oh, OK, but it leads another question. Is that safe to patch the
> > > > > > text without sync? Would you use it for UP system?
> > > > > > I think it is better to clarify "in what case user can call _nosync()"
> > > > > > and add a comment on it.
> > > > >
> > > > > The ftrace is one of the cases, as documentation of ftrace said, when
> > > > > dynamic ftrace is initialized, it calls kstop_machine to make the
> > > > > machine act like a uniprocessor so that it can freely modify code
> > > > > without worrying about other processors executing that same code. So
> > > > > the ftrace called the _nosync interface here directly.
> > > >
> > > > Hmm, even though, since it already running under kstop_machine(), no
> > > > other thread will run.
> > > > Could you consider to use text_mutex instead of that? The text_mutex
> > > > is already widely used in x86 and kernel/kprobes.c etc.
> > > >
> > > > (Hmm, it seems except for x86, alternative code don't care about
> > > >  racing...)
> > > >
> >
> > The mutex_lock doesn't seem to work in ftrace context, I think it
> > might be the reason why other architectures didn't use text_mutex in
> > somewhere.
>
> Yes, you need to implement ftrace_arch_code_modify_prepare() and
> ftrace_arch_code_modify_post_process() in arch/riscv/kernel/ftrace.c.
> Please see arch/x86/kernel/ftrace.c.
>

Oh ok, I misunderstood it before, I just use text_mutex instead of
patch_lock in patch.c. Thanks.

> Thank you,
>
> >
> > # echo function > current_tracer
> > [   28.198070] BUG: sleeping function called from invalid context at
> > kernel/locking/mutex.c:281
> > [   28.198663] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid:
> > 11, name: migration/0
> > [   28.199491] CPU: 0 PID: 11 Comm: migration/0 Not tainted
> > 5.6.0-00012-gd6f56a7a4be2-dirty #10
> > [   28.200330] Call Trace:
> > [   28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc
> > [   28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46
> > [   28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc
> > [   28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46
> > [   28.201898] [<ffffffe000d498b0>] dump_stack+0x76/0x90
> > [   28.202329] [<ffffffe00062c3f0>] ___might_sleep+0x100/0x10e
> > [   28.202720] [<ffffffe00062c448>] __might_sleep+0x4a/0x78
> > [   28.203033] [<ffffffe000d61622>] mutex_lock+0x2c/0x54
> > [   28.203397] [<ffffffe00060393e>] patch_insn_write+0x32/0xd8
> > [   28.203780] [<ffffffe000603a94>] patch_text_nosync+0x10/0x32
> > [   28.204139] [<ffffffe0006051b0>] __ftrace_modify_call+0x5c/0x6c
> > [   28.204497] [<ffffffe0006052c6>] ftrace_update_ftrace_func+0x20/0x4a
> > [   28.204919] [<ffffffe000697742>] ftrace_modify_all_code+0xa0/0x148
> > [   28.205378] [<ffffffe0006977fc>] __ftrace_modify_code+0x12/0x1c
> > [   28.205793] [<ffffffe0006924b6>] multi_cpu_stop+0xa2/0x158
> > [   28.206147] [<ffffffe0006921b0>] cpu_stopper_thread+0xa4/0x13a
> > [   28.206510] [<ffffffe000629f38>] smpboot_thread_fn+0xf8/0x1da
> > [   28.206868] [<ffffffe000625f36>] kthread+0xfa/0x12a
> > [   28.207201] [<ffffffe0006017e2>] ret_from_exception+0x0/0xc
> >
> > >
> > > Yes, text_mutex seems to be great. I'll change to use text_mutex in
> > > the next version if it works fine after testing. Thanks.
> > >
> > > > Thank you,
> > > > --
> > > > Masami Hiramatsu <mhiramat@kernel.org>
>
>
> --
> Masami Hiramatsu <mhiramat@kernel.org>
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 42d2c42f3cc9..2368d49eb4ef 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -27,6 +27,8 @@  enum fixed_addresses {
 	FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
 	FIX_PTE,
 	FIX_PMD,
+	FIX_TEXT_POKE1,
+	FIX_TEXT_POKE0,
 	FIX_EARLYCON_MEM_BASE,
 	__end_of_fixed_addresses
 };
diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
new file mode 100644
index 000000000000..b5918a6e0615
--- /dev/null
+++ b/arch/riscv/include/asm/patch.h
@@ -0,0 +1,12 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+
+#ifndef _ASM_RISCV_PATCH_H
+#define _ASM_RISCV_PATCH_H
+
+int riscv_patch_text_nosync(void *addr, const void *insns, size_t len);
+int riscv_patch_text(void *addr, u32 insn);
+
+#endif /* _ASM_RISCV_PATCH_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index f40205cb9a22..d189bd3d8501 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -4,7 +4,8 @@ 
 #
 
 ifdef CONFIG_FTRACE
-CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_ftrace.o	= -pg
+CFLAGS_REMOVE_patch.o	= -pg
 endif
 
 extra-y += head.o
@@ -26,6 +27,7 @@  obj-y	+= traps.o
 obj-y	+= riscv_ksyms.o
 obj-y	+= stacktrace.o
 obj-y	+= cacheinfo.o
+obj-y	+= patch.o
 obj-$(CONFIG_MMU) += vdso.o vdso/
 
 obj-$(CONFIG_RISCV_M_MODE)	+= clint.o
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
new file mode 100644
index 000000000000..8a4fc65ee022
--- /dev/null
+++ b/arch/riscv/kernel/patch.c
@@ -0,0 +1,120 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 SiFive
+ */
+
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/stop_machine.h>
+#include <asm/kprobes.h>
+#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
+
+struct riscv_insn_patch {
+	void *addr;
+	u32 insn;
+	atomic_t cpu_count;
+};
+
+#ifdef CONFIG_MMU
+static DEFINE_RAW_SPINLOCK(patch_lock);
+
+static void __kprobes *patch_map(void *addr, int fixmap)
+{
+	uintptr_t uintaddr = (uintptr_t) addr;
+	struct page *page;
+
+	if (core_kernel_text(uintaddr))
+		page = phys_to_page(__pa_symbol(addr));
+	else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+		page = vmalloc_to_page(addr);
+	else
+		return addr;
+
+	BUG_ON(!page);
+
+	return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
+					 (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap)
+{
+	clear_fixmap(fixmap);
+}
+
+static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len)
+{
+	void *waddr = addr;
+	bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
+	unsigned long flags = 0;
+	int ret;
+
+	raw_spin_lock_irqsave(&patch_lock, flags);
+
+	if (across_pages)
+		patch_map(addr + len, FIX_TEXT_POKE1);
+
+	waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+	ret = probe_kernel_write(waddr, insn, len);
+
+	patch_unmap(FIX_TEXT_POKE0);
+
+	if (across_pages)
+		patch_unmap(FIX_TEXT_POKE1);
+
+	raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+	return ret;
+}
+#else
+static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len)
+{
+	return probe_kernel_write(addr, insn, len);
+}
+#endif /* CONFIG_MMU */
+
+int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len)
+{
+	u32 *tp = addr;
+	int ret;
+
+	ret = riscv_insn_write(tp, insns, len);
+
+	if (!ret)
+		flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
+
+	return ret;
+}
+
+static int __kprobes riscv_patch_text_cb(void *data)
+{
+	struct riscv_insn_patch *patch = data;
+	int ret = 0;
+
+	if (atomic_inc_return(&patch->cpu_count) == 1) {
+		ret =
+		    riscv_patch_text_nosync(patch->addr, &patch->insn,
+					    GET_INSN_LENGTH(patch->insn));
+		atomic_inc(&patch->cpu_count);
+	} else {
+		while (atomic_read(&patch->cpu_count) <= num_online_cpus())
+			cpu_relax();
+		smp_mb();
+	}
+
+	return ret;
+}
+
+int __kprobes riscv_patch_text(void *addr, u32 insn)
+{
+	struct riscv_insn_patch patch = {
+		.addr = addr,
+		.insn = insn,
+		.cpu_count = ATOMIC_INIT(0),
+	};
+
+	return stop_machine_cpuslocked(riscv_patch_text_cb,
+				       &patch, cpu_online_mask);
+}