From patchwork Wed Jan 12 14:02:58 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Masami Hiramatsu (Google)" X-Patchwork-Id: 12711429 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A5580C433F5 for ; Wed, 12 Jan 2022 14:03:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353882AbiALODI (ORCPT ); Wed, 12 Jan 2022 09:03:08 -0500 Received: from ams.source.kernel.org ([145.40.68.75]:55790 "EHLO ams.source.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S240845AbiALODH (ORCPT ); Wed, 12 Jan 2022 09:03:07 -0500 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id D5089B81ECF; Wed, 12 Jan 2022 14:03:05 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id C93BBC36AEB; Wed, 12 Jan 2022 14:03:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1641996184; bh=amKt34teMxxIOXBwhnAeme3pTGwo72TG1kvA6fCdAUc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=JM5Ek/Gkdgj1W6LkNO3zvguV7eQ/YgGUEFumexpV7OZALwD2h+HAdznF73CaJKfOU 66W5QtKh9dt4HpgTXBNJfH7z6f4e7lg0iR98TE1LRO+Y1d69DFHtMGgJXcVWQ5w7Z5 3fOXI6f8CyZ7+kd3xNMtErHqAxKChAJ6AxG6P25/B/81usdgh9ek6eJ8CZZNl9oyx2 /usOeVmqN1X17K4LL9/lCo6u+HN9tHAmusWKZxAlIP48IbcLKxI9bM0m6vQZIIdc20 a/rr9E1g2ikcSJR1/KBWyK33dgXlhXF95hIgW98qQ1+91gCeziKqsadfemfhXeRDSO tUU3yAswDHkAA== From: Masami Hiramatsu To: Jiri Olsa , Alexei Starovoitov Cc: Daniel Borkmann , Andrii Nakryiko , Masami Hiramatsu , netdev@vger.kernel.org, bpf@vger.kernel.org, lkml , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Steven Rostedt , "Naveen N . Rao" , Anil S Keshavamurthy , "David S . Miller" Subject: [RFC PATCH v2 1/8] ftrace: Add ftrace_set_filter_ips function Date: Wed, 12 Jan 2022 23:02:58 +0900 Message-Id: <164199617822.1247129.11776583613282139994.stgit@devnote2> X-Mailer: git-send-email 2.25.1 In-Reply-To: <164199616622.1247129.783024987490980883.stgit@devnote2> References: <164199616622.1247129.783024987490980883.stgit@devnote2> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-State: RFC From: Jiri Olsa Adding ftrace_set_filter_ips function to be able to set filter on multiple ip addresses at once. With the kprobe multi attach interface we have cases where we need to initialize ftrace_ops object with thousands of functions, so having single function diving into ftrace_hash_move_and_update_ops with ftrace_lock is faster. The functions ips are passed as unsigned long array with count. Signed-off-by: Jiri Olsa --- include/linux/ftrace.h | 3 +++ kernel/trace/ftrace.c | 53 ++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9999e29187de..60847cbce0da 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -512,6 +512,8 @@ struct dyn_ftrace { int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset); +int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips, + unsigned int cnt, int remove, int reset); int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, @@ -802,6 +804,7 @@ static inline unsigned long ftrace_location(unsigned long ip) #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) #define ftrace_set_filter_ip(ops, ip, remove, reset) ({ -ENODEV; }) +#define ftrace_set_filter_ips(ops, ips, cnt, remove, reset) ({ -ENODEV; }) #define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_free_filter(ops) do { } while (0) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 30bc880c3849..d38ae5063be3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4958,7 +4958,7 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf, } static int -ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) +__ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) { struct ftrace_func_entry *entry; @@ -4976,9 +4976,25 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) return add_hash_entry(hash, ip); } +static int +ftrace_match_addr(struct ftrace_hash *hash, unsigned long *ips, + unsigned int cnt, int remove) +{ + unsigned int i; + int err; + + for (i = 0; i < cnt; i++) { + err = __ftrace_match_addr(hash, ips[i], remove); + if (err) + return err; + } + return 0; +} + static int ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, - unsigned long ip, int remove, int reset, int enable) + unsigned long *ips, unsigned int cnt, + int remove, int reset, int enable) { struct ftrace_hash **orig_hash; struct ftrace_hash *hash; @@ -5008,8 +5024,8 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, ret = -EINVAL; goto out_regex_unlock; } - if (ip) { - ret = ftrace_match_addr(hash, ip, remove); + if (ips) { + ret = ftrace_match_addr(hash, ips, cnt, remove); if (ret < 0) goto out_regex_unlock; } @@ -5026,10 +5042,10 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, } static int -ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, - int reset, int enable) +ftrace_set_addr(struct ftrace_ops *ops, unsigned long *ips, unsigned int cnt, + int remove, int reset, int enable) { - return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable); + return ftrace_set_hash(ops, NULL, 0, ips, cnt, remove, reset, enable); } #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS @@ -5628,10 +5644,29 @@ int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset) { ftrace_ops_init(ops); - return ftrace_set_addr(ops, ip, remove, reset, 1); + return ftrace_set_addr(ops, &ip, 1, remove, reset, 1); } EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); +/** + * ftrace_set_filter_ips - set a functions to filter on in ftrace by addresses + * @ops - the ops to set the filter with + * @ips - the array of addresses to add to or remove from the filter. + * @cnt - the number of addresses in @ips + * @remove - non zero to remove ips from the filter + * @reset - non zero to reset all filters before applying this filter. + * + * Filters denote which functions should be enabled when tracing is enabled + * If @ips array or any ip specified within is NULL , it fails to update filter. + */ +int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips, + unsigned int cnt, int remove, int reset) +{ + ftrace_ops_init(ops); + return ftrace_set_addr(ops, ips, cnt, remove, reset, 1); +} +EXPORT_SYMBOL_GPL(ftrace_set_filter_ips); + /** * ftrace_ops_set_global_filter - setup ops to use global filters * @ops - the ops which will use the global filters @@ -5653,7 +5688,7 @@ static int ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, int reset, int enable) { - return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable); + return ftrace_set_hash(ops, buf, len, NULL, 0, 0, reset, enable); } /** From patchwork Wed Jan 12 14:03:10 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Masami Hiramatsu (Google)" X-Patchwork-Id: 12711438 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 62F04C433EF for ; Wed, 12 Jan 2022 14:03:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S242266AbiALODa (ORCPT ); Wed, 12 Jan 2022 09:03:30 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41222 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353895AbiALODX (ORCPT ); Wed, 12 Jan 2022 09:03:23 -0500 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AFF5BC061759; Wed, 12 Jan 2022 06:03:17 -0800 (PST) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 4103560C03; Wed, 12 Jan 2022 14:03:17 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id B141FC36AE5; Wed, 12 Jan 2022 14:03:12 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1641996196; bh=p7jQZb2SjvJICJ40fF0z8P2smX9k+bKbPIwO6eKJTos=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=sz5diZg2YG+Qm7gIUBqEUa8I2Xv/wABYjKFsqxnamV8b9Psxi7wtodDTryuHA2COo upKfSPDCP4RE9Ozbm2cfjjOt6EDrfKXqF3WB+P/dclCUV6PfDvfCQzv3KFhXw1qO8Q OLWoHkUxgw9Z2mboHdG7UERtqCmb9iUDLZdLdVDRNpvgdpwveQ7+cE32BKegmcMv+a FWzomW+0aSRh+LlVB9+LX5gFdAcxNEIQgOT6ntyAPMqBYqTOIXIWc3EuNlZ3ZIGtZc KoSlj/QwO22CIs9e4WgW+MDD2SzzJLJtHbAiWohCw2VE/TkZtScpFNmyq6MbB0WKrJ 0gKGYAy2XpZIg== From: Masami Hiramatsu To: Jiri Olsa , Alexei Starovoitov Cc: Daniel Borkmann , Andrii Nakryiko , Masami Hiramatsu , netdev@vger.kernel.org, bpf@vger.kernel.org, lkml , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Steven Rostedt , "Naveen N . Rao" , Anil S Keshavamurthy , "David S . Miller" Subject: [RFC PATCH v2 2/8] fprobe: Add ftrace based probe APIs Date: Wed, 12 Jan 2022 23:03:10 +0900 Message-Id: <164199619000.1247129.1034324609530250036.stgit@devnote2> X-Mailer: git-send-email 2.25.1 In-Reply-To: <164199616622.1247129.783024987490980883.stgit@devnote2> References: <164199616622.1247129.783024987490980883.stgit@devnote2> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-State: RFC The fprobe is a wrapper API for ftrace function tracer. Unlike kprobes, this probes only supports the function entry, but it can probe multiple functions by one fprobe. The usage is almost same as the kprobe, user will specify the function names by fprobe::syms, the number of syms by fprobe::nentry, and the user handler by fprobe::entry_handler. struct fprobe fp = { 0 }; const char *targets[] = { "func1", "func2", "func3"}; fp.handler = user_handler; fp.nentry = ARRAY_SIZE(targets); fp.syms = targets; ret = register_fprobe(&fp); Signed-off-by: Masami Hiramatsu --- Changes in v2: - Remove fprobe_entry. - Do not sort the address array since there is no user private data. - Since there is only "fprobe", make filename and config name "fprobe". - Use ftrace_set_filter_ips() - Fix style warnings. --- include/linux/fprobe.h | 53 +++++++++++++++++++++++ kernel/trace/Kconfig | 10 ++++ kernel/trace/Makefile | 1 kernel/trace/fprobe.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 177 insertions(+) create mode 100644 include/linux/fprobe.h create mode 100644 kernel/trace/fprobe.c diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h new file mode 100644 index 000000000000..614d28d5828b --- /dev/null +++ b/include/linux/fprobe.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Simple ftrace probe wrapper */ +#ifndef _LINUX_FPROBE_H +#define _LINUX_FPROBE_H + +#include +#include + +struct fprobe { + const char **syms; + unsigned long *addrs; + unsigned int nentry; + + struct ftrace_ops ftrace; + unsigned long nmissed; + unsigned int flags; + void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); +}; + +#define FPROBE_FL_DISABLED 1 + +static inline bool fprobe_disabled(struct fprobe *fp) +{ + return (fp) ? fp->flags & FPROBE_FL_DISABLED : false; +} + +#ifdef CONFIG_FPROBE +int register_fprobe(struct fprobe *fp); +int unregister_fprobe(struct fprobe *fp); +#else +static inline int register_fprobe(struct fprobe *fp) +{ + return -EOPNOTSUPP; +} +static inline int unregister_fprobe(struct fprobe *fp) +{ + return -EOPNOTSUPP; +} +#endif + +static inline void disable_fprobe(struct fprobe *fp) +{ + if (fp) + fp->flags |= FPROBE_FL_DISABLED; +} + +static inline void enable_fprobe(struct fprobe *fp) +{ + if (fp) + fp->flags &= ~FPROBE_FL_DISABLED; +} + +#endif diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 420ff4bc67fd..6834b0272798 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -223,6 +223,16 @@ config DYNAMIC_FTRACE_WITH_ARGS depends on DYNAMIC_FTRACE depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS +config FPROBE + bool "Kernel Function Probe (fprobe)" + depends on FUNCTION_TRACER + depends on DYNAMIC_FTRACE_WITH_REGS + default n + help + This option enables kernel function probe feature, which is + similar to kprobes, but probes only for kernel function entries + and it can probe multiple functions by one fprobe. + config FUNCTION_PROFILER bool "Kernel function profiler" depends on FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index bedc5caceec7..79255f9de9a4 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -97,6 +97,7 @@ obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o +obj-$(CONFIG_FPROBE) += fprobe.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c new file mode 100644 index 000000000000..0247fc7d75e2 --- /dev/null +++ b/kernel/trace/fprobe.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fprobe - Simple ftrace probe wrapper for function entry. + */ +#define pr_fmt(fmt) "fprobe: " fmt + +#include +#include +#include +#include +#include + +static void fprobe_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct ftrace_regs *fregs) +{ + struct fprobe *fp; + int bit; + + fp = container_of(ops, struct fprobe, ftrace); + if (fprobe_disabled(fp)) + return; + + bit = ftrace_test_recursion_trylock(ip, parent_ip); + if (bit < 0) { + fp->nmissed++; + return; + } + + if (fp->entry_handler) + fp->entry_handler(fp, ip, ftrace_get_regs(fregs)); + + ftrace_test_recursion_unlock(bit); +} +NOKPROBE_SYMBOL(fprobe_handler); + +static int convert_func_addresses(struct fprobe *fp) +{ + unsigned int i; + + if (!fp->syms) + return 0; + + fp->addrs = kcalloc(fp->nentry, sizeof(*fp->addrs), GFP_KERNEL); + if (!fp->addrs) + return -ENOMEM; + + for (i = 0; i < fp->nentry; i++) { + + fp->addrs[i] = kallsyms_lookup_name(fp->syms[i]); + if (!fp->addrs[i]) + return -ENOENT; + } + + return 0; +} + +/** + * register_fprobe - Register fprobe to ftrace + * @fp: A fprobe data structure to be registered. + * + * This expects the user set @fp::entry_handler, @fp::syms or @fp:addrs, + * and @fp::nentry. + * Note that you do not set both of @fp::addrs and @fp::syms. + */ +int register_fprobe(struct fprobe *fp) +{ + int ret; + + if (!fp || !fp->nentry || (!fp->syms && !fp->addrs) || + (fp->syms && fp->addrs)) + return -EINVAL; + + ret = convert_func_addresses(fp); + if (ret < 0) + return ret; + + fp->nmissed = 0; + fp->ftrace.func = fprobe_handler; + fp->ftrace.flags = FTRACE_OPS_FL_SAVE_REGS; + + ret = ftrace_set_filter_ips(&fp->ftrace, fp->addrs, fp->nentry, 0, 0); + if (!ret) + ret = register_ftrace_function(&fp->ftrace); + + if (ret < 0 && fp->syms) { + kfree(fp->addrs); + fp->addrs = NULL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(register_fprobe); + +/** + * unregister_fprobe - Unregister fprobe from ftrace + * @fp: A fprobe data structure to be unregistered. + */ +int unregister_fprobe(struct fprobe *fp) +{ + int ret; + + if (!fp || !fp->nentry || !fp->addrs) + return -EINVAL; + + ret = unregister_ftrace_function(&fp->ftrace); + + if (!ret && fp->syms) { + kfree(fp->addrs); + fp->addrs = NULL; + } + return ret; +} +EXPORT_SYMBOL_GPL(unregister_fprobe); From patchwork Wed Jan 12 14:03:22 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Masami Hiramatsu (Google)" X-Patchwork-Id: 12711439 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id B8CF7C433F5 for ; Wed, 12 Jan 2022 14:03:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353915AbiALODd (ORCPT ); Wed, 12 Jan 2022 09:03:33 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41256 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353909AbiALOD3 (ORCPT ); Wed, 12 Jan 2022 09:03:29 -0500 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1DC67C06173F; Wed, 12 Jan 2022 06:03:29 -0800 (PST) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id B374C60BA3; Wed, 12 Jan 2022 14:03:28 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7AB0DC36AEA; Wed, 12 Jan 2022 14:03:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1641996208; bh=sex2KOvLrdt+nZq7x40xcM7odTt2DYtNVU6WKvOCNO8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=k47ZN9pDl6PclIUnDbEyEmiYwTfJcWO2wExR/UxhBzqrT2GgPc9/u/IHwnb7uWe5R a+L+MgyyjulwGHwFVCvFkxqamJ7p3eqDCPhr1uD+2N2uAEDXtL5dYt8PV8tEGP53VV wVsgeDzfGxC5/JtQpzQ1sjGrhQDtOX2WhR/7vP26TUAxRGJ2DuLo3EO9tWJASuHSY7 mXvz8sIRtfcC6XQ8lLUEeWsPOmOskY1WmVb3gv/9Tc5VQnGhRUG473xNqMOlqvYvN4 nPOpF9oA6UX53OAZZ0c0YOjMIeUTeGo0jgkEspNdTV/GJda/m83jGNsH5Qg/0CN4OL 7oQUx3xkQs51w== From: Masami Hiramatsu To: Jiri Olsa , Alexei Starovoitov Cc: Daniel Borkmann , Andrii Nakryiko , Masami Hiramatsu , netdev@vger.kernel.org, bpf@vger.kernel.org, lkml , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Steven Rostedt , "Naveen N . Rao" , Anil S Keshavamurthy , "David S . Miller" Subject: [RFC PATCH v2 3/8] rethook: Add a generic return hook Date: Wed, 12 Jan 2022 23:03:22 +0900 Message-Id: <164199620208.1247129.13021391608719523669.stgit@devnote2> X-Mailer: git-send-email 2.25.1 In-Reply-To: <164199616622.1247129.783024987490980883.stgit@devnote2> References: <164199616622.1247129.783024987490980883.stgit@devnote2> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-State: RFC Add a return hook framework which hooks the function return. Most of the idea came from the kretprobe, but this is independent from kretprobe. Note that this is expected to be used with other function entry hooking feature, like ftrace, fprobe, adn kprobes. Eventually this will replace the kretprobe (e.g. kprobe + rethook = kretprobe), but at this moment, this is just a additional hook. Signed-off-by: Masami Hiramatsu --- include/linux/rethook.h | 74 +++++++++++++++ include/linux/sched.h | 3 + kernel/exit.c | 2 kernel/fork.c | 3 + kernel/trace/Kconfig | 11 ++ kernel/trace/Makefile | 1 kernel/trace/rethook.c | 226 +++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 320 insertions(+) create mode 100644 include/linux/rethook.h create mode 100644 kernel/trace/rethook.c diff --git a/include/linux/rethook.h b/include/linux/rethook.h new file mode 100644 index 000000000000..2622bcd5213a --- /dev/null +++ b/include/linux/rethook.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Return hooking with list-based shadow stack. + */ +#ifndef _LINUX_RETHOOK_H +#define _LINUX_RETHOOK_H + +#include +#include +#include +#include +#include + +struct rethook_node; + +typedef void (*rethook_handler_t) (struct rethook_node *, void *, struct pt_regs *); + +struct rethook { + void *data; + rethook_handler_t handler; + struct freelist_head pool; + refcount_t ref; + struct rcu_head rcu; +}; + +struct rethook_node { + union { + struct freelist_node freelist; + struct rcu_head rcu; + }; + struct llist_node llist; + struct rethook *rethook; + unsigned long ret_addr; + unsigned long frame; +}; + +int rethook_node_init(struct rethook_node *node); + +struct rethook *rethook_alloc(void *data, rethook_handler_t handler); +void rethook_free(struct rethook *rh); +void rethook_add_node(struct rethook *rh, struct rethook_node *node); + +struct rethook_node *rethook_try_get(struct rethook *rh); +void rethook_node_recycle(struct rethook_node *node); +void rethook_hook_current(struct rethook_node *node, struct pt_regs *regs); + +unsigned long rethook_find_ret_addr(struct task_struct *tsk, unsigned long frame, + struct llist_node **cur); + +/* Arch dependent code must implement this and trampoline code */ +void arch_rethook_prepare(struct rethook_node *node, struct pt_regs *regs); +void arch_rethook_trampoline(void); + +static inline bool is_rethook_trampoline(unsigned long addr) +{ + return addr == (unsigned long)arch_rethook_trampoline; +} + +/* If the architecture needs a fixup the return address, implement it. */ +void arch_rethook_fixup_return(struct pt_regs *regs, + unsigned long correct_ret_addr); + +/* Generic trampoline handler, arch code must prepare asm stub */ +unsigned long rethook_trampoline_handler(struct pt_regs *regs, + unsigned long frame); + +#ifdef CONFIG_RETHOOK +void rethook_flush_task(struct task_struct *tk); +#else +#define rethook_flush_task(tsk) do { } while (0) +#endif + +#endif + diff --git a/include/linux/sched.h b/include/linux/sched.h index 78c351e35fec..2bfabf5355b7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1473,6 +1473,9 @@ struct task_struct { #ifdef CONFIG_KRETPROBES struct llist_head kretprobe_instances; #endif +#ifdef CONFIG_RETHOOK + struct llist_head rethooks; +#endif #ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH /* diff --git a/kernel/exit.c b/kernel/exit.c index f702a6a63686..a39a321c1f37 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -169,6 +170,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); kprobe_flush_task(tsk); + rethook_flush_task(tsk); perf_event_delayed_put(tsk); trace_sched_process_free(tsk); put_task_struct(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 3244cc56b697..ffae38be64c4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2282,6 +2282,9 @@ static __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_KRETPROBES p->kretprobe_instances.first = NULL; #endif +#ifdef CONFIG_RETHOOK + p->rethooks.first = NULL; +#endif /* * Ensure that the cgroup subsystem policies allow the new process to be diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6834b0272798..44c473ad9021 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -10,6 +10,17 @@ config USER_STACKTRACE_SUPPORT config NOP_TRACER bool +config HAVE_RETHOOK + bool + +config RETHOOK + bool + depends on HAVE_RETHOOK + help + Enable generic return hooking feature. This is an internal + API, which will be used by other function-entry hooking + feature like fprobe and kprobes. + config HAVE_FUNCTION_TRACER bool help diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 79255f9de9a4..c6f11a139eac 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -98,6 +98,7 @@ obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o obj-$(CONFIG_FPROBE) += fprobe.o +obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c new file mode 100644 index 000000000000..80c0584e8497 --- /dev/null +++ b/kernel/trace/rethook.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) "rethook: " fmt + +#include +#include +#include +#include +#include +#include +#include + +/* Return hook list (shadow stack by list) */ + +void rethook_flush_task(struct task_struct *tk) +{ + struct rethook_node *rhn; + struct llist_node *node; + + preempt_disable(); + + node = __llist_del_all(&tk->rethooks); + while (node) { + rhn = container_of(node, struct rethook_node, llist); + node = node->next; + rethook_node_recycle(rhn); + } + + preempt_enable(); +} + +static void rethook_free_rcu(struct rcu_head *head) +{ + struct rethook *rh = container_of(head, struct rethook, rcu); + struct rethook_node *rhn; + struct freelist_node *node; + int count = 1; + + node = rh->pool.head; + while (node) { + rhn = container_of(node, struct rethook_node, freelist); + node = node->next; + kfree(rhn); + count++; + } + + /* The rh->ref is the number of pooled node + 1 */ + if (refcount_sub_and_test(count, &rh->ref)) + kfree(rh); +} + +void rethook_free(struct rethook *rh) +{ + rh->handler = NULL; + rh->data = NULL; + + call_rcu(&rh->rcu, rethook_free_rcu); +} + +/* + * @handler must not NULL. @handler == NULL means this rethook is + * going to be freed. + */ +struct rethook *rethook_alloc(void *data, rethook_handler_t handler) +{ + struct rethook *rh = kzalloc(sizeof(struct rethook), GFP_KERNEL); + + if (!rh || !handler) + return NULL; + + rh->data = data; + rh->handler = handler; + rh->pool.head = NULL; + refcount_set(&rh->ref, 1); + + return rh; +} + +void rethook_add_node(struct rethook *rh, struct rethook_node *node) +{ + node->rethook = rh; + freelist_add(&node->freelist, &rh->pool); + refcount_inc(&rh->ref); +} + +static void free_rethook_node_rcu(struct rcu_head *head) +{ + struct rethook_node *node = container_of(head, struct rethook_node, rcu); + + if (refcount_dec_and_test(&node->rethook->ref)) + kfree(node->rethook); + kfree(node); +} + +void rethook_node_recycle(struct rethook_node *node) +{ + if (likely(READ_ONCE(node->rethook->handler))) + freelist_add(&node->freelist, &node->rethook->pool); + else + call_rcu(&node->rcu, free_rethook_node_rcu); +} + +struct rethook_node *rethook_try_get(struct rethook *rh) +{ + struct freelist_node *fn; + + /* Check whether @rh is going to be freed. */ + if (unlikely(!READ_ONCE(rh->handler))) + return NULL; + + fn = freelist_try_get(&rh->pool); + if (!fn) + return NULL; + + return container_of(fn, struct rethook_node, freelist); +} + +void rethook_hook_current(struct rethook_node *node, struct pt_regs *regs) +{ + arch_rethook_prepare(node, regs); + __llist_add(&node->llist, ¤t->rethooks); +} + +/* This assumes the 'tsk' is the current task or the is not running. */ +static unsigned long __rethook_find_ret_addr(struct task_struct *tsk, + struct llist_node **cur) +{ + struct rethook_node *rh = NULL; + struct llist_node *node = *cur; + + if (!node) + node = tsk->rethooks.first; + else + node = node->next; + + while (node) { + rh = container_of(node, struct rethook_node, llist); + if (rh->ret_addr != (unsigned long)arch_rethook_trampoline) { + *cur = node; + return rh->ret_addr; + } + node = node->next; + } + return 0; +} +NOKPROBE_SYMBOL(__rethook_find_ret_addr); + +/** + * rethook_find_ret_addr -- Find correct return address modified by rethook + * @tsk: Target task + * @frame: A frame pointer + * @cur: a storage of the loop cursor llist_node pointer for next call + * + * Find the correct return address modified by a rethook on @tsk in unsigned + * long type. If it finds the return address, this returns that address value, + * or this returns 0. + * The @tsk must be 'current' or a task which is not running. @frame is a hint + * to get the currect return address - which is compared with the + * rethook::frame field. The @cur is a loop cursor for searching the + * kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the + * first call, but '@cur' itself must NOT NULL. + */ +unsigned long rethook_find_ret_addr(struct task_struct *tsk, unsigned long frame, + struct llist_node **cur) +{ + struct rethook_node *rhn = NULL; + unsigned long ret; + + if (WARN_ON_ONCE(!cur)) + return 0; + + do { + ret = __rethook_find_ret_addr(tsk, cur); + if (!ret) + break; + rhn = container_of(*cur, struct rethook_node, llist); + } while (rhn->frame != frame); + + return ret; +} +NOKPROBE_SYMBOL(rethook_find_ret_addr); + +void __weak arch_rethook_fixup_return(struct pt_regs *regs, + unsigned long correct_ret_addr) +{ + /* + * Do nothing by default. If the architecture which uses a + * frame pointer to record real return address on the stack, + * it should fill this function to fixup the return address + * so that stacktrace works from the rethook handler. + */ +} + +unsigned long rethook_trampoline_handler(struct pt_regs *regs, + unsigned long frame) +{ + struct rethook_node *rhn; + struct llist_node *first, *node = NULL; + unsigned long correct_ret_addr = __rethook_find_ret_addr(current, &node); + + if (!correct_ret_addr) { + pr_err("rethook: Return address not found! Maybe there is a bug in the kernel\n"); + BUG_ON(1); + } + + instruction_pointer_set(regs, correct_ret_addr); + arch_rethook_fixup_return(regs, correct_ret_addr); + + first = current->rethooks.first; + current->rethooks.first = node->next; + node->next = NULL; + + while (first) { + rhn = container_of(first, struct rethook_node, llist); + if (WARN_ON_ONCE(rhn->frame != frame)) + break; + if (rhn->rethook->handler) + rhn->rethook->handler(rhn, rhn->rethook->data, regs); + + first = first->next; + rethook_node_recycle(rhn); + } + + return correct_ret_addr; +} + From patchwork Wed Jan 12 14:03:33 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Masami Hiramatsu (Google)" X-Patchwork-Id: 12711440 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 32B36C433EF for ; Wed, 12 Jan 2022 14:03:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353892AbiALODw (ORCPT ); Wed, 12 Jan 2022 09:03:52 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41324 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353897AbiALODp (ORCPT ); Wed, 12 Jan 2022 09:03:45 -0500 Received: from ams.source.kernel.org (ams.source.kernel.org [IPv6:2604:1380:4601:e00::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C224FC061759; Wed, 12 Jan 2022 06:03:41 -0800 (PST) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id 8BC1FB81EF6; Wed, 12 Jan 2022 14:03:40 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id E06E2C36AEA; Wed, 12 Jan 2022 14:03:35 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1641996219; bh=tzqau06Ug3zjELYtX9vo3t11Cd6xCmSb0rhMW9gNyRk=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=NSol9FqSp90Pt70uE+RZ4DXjYgV+L26W0oXWk7RZU0YlCrTkyeoxLQ1cLRIM/3WsQ bT5AcQSr72by6bM09S4eiJt9sWstug8p0C9R9w02GUE4zaOGr3+Kz9OjVmw2a/DhIk 9uV4ozdPpD93DcD7aai2loI59JM5uRO4o5EddLOMni0nsImUN/ZcJ0TgcRuLWwOaYa rGx5GABmIxJbuKoYt5Iy5VQflgLPV7m9wr8q44O9Kw7WkI8l4NBTkHVrEUMJEzkQ/K so9l7OCQ+NQU6p2ckMxEhwG5ckE6PweAVzJHB7IWYMn86SRayJJ9OfyAtwcAvmNqOX nkPrnP6ZhiMMQ== From: Masami Hiramatsu To: Jiri Olsa , Alexei Starovoitov Cc: Daniel Borkmann , Andrii Nakryiko , Masami Hiramatsu , netdev@vger.kernel.org, bpf@vger.kernel.org, lkml , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Steven Rostedt , "Naveen N . Rao" , Anil S Keshavamurthy , "David S . Miller" Subject: [RFC PATCH v2 4/8] rethook: x86: Add rethook x86 implementation Date: Wed, 12 Jan 2022 23:03:33 +0900 Message-Id: <164199621352.1247129.832357773512186691.stgit@devnote2> X-Mailer: git-send-email 2.25.1 In-Reply-To: <164199616622.1247129.783024987490980883.stgit@devnote2> References: <164199616622.1247129.783024987490980883.stgit@devnote2> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-State: RFC Add rethook for x86 implementation. Most of the code has been copied from kretprobes on x86. Signed-off-by: Masami Hiramatsu --- arch/x86/Kconfig | 1 arch/x86/kernel/Makefile | 1 arch/x86/kernel/rethook.c | 115 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 arch/x86/kernel/rethook.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7399327d1eff..939c4c897e63 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -219,6 +219,7 @@ config X86 select HAVE_KPROBES_ON_FTRACE select HAVE_FUNCTION_ERROR_INJECTION select HAVE_KRETPROBES + select HAVE_RETHOOK select HAVE_KVM select HAVE_LIVEPATCH if X86_64 select HAVE_MIXED_BREAKPOINTS_REGS diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 2ff3e600f426..66593d8c4d74 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_TRACING) += trace.o +obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o diff --git a/arch/x86/kernel/rethook.c b/arch/x86/kernel/rethook.c new file mode 100644 index 000000000000..f2f3b9526e43 --- /dev/null +++ b/arch/x86/kernel/rethook.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * x86 implementation of rethook. Mostly copied from arch/x86/kernel/kprobes/core.c. + */ +#include +#include +#include + +#include "kprobes/common.h" + +/* + * Called from arch_rethook_trampoline + */ +__used __visible void arch_rethook_trampoline_callback(struct pt_regs *regs) +{ + unsigned long *frame_pointer; + + /* fixup registers */ + regs->cs = __KERNEL_CS; +#ifdef CONFIG_X86_32 + regs->gs = 0; +#endif + regs->ip = (unsigned long)&arch_rethook_trampoline; + regs->orig_ax = ~0UL; + regs->sp += sizeof(long); + frame_pointer = ®s->sp + 1; + + /* + * The return address at 'frame_pointer' is recovered by the + * arch_rethook_fixup_return() which called from this + * rethook_trampoline_handler(). + */ + rethook_trampoline_handler(regs, (unsigned long)frame_pointer); + + /* + * Copy FLAGS to 'pt_regs::sp' so that arch_rethook_trapmoline() + * can do RET right after POPF. + */ + regs->sp = regs->flags; +} +NOKPROBE_SYMBOL(arch_rethook_trampoline_callback); + +/* + * When a target function returns, this code saves registers and calls + * arch_rethook_trampoline_callback(), which calls the rethook handler. + */ +asm( + ".text\n" + ".global arch_rethook_trampoline\n" + ".type arch_rethook_trampoline, @function\n" + "arch_rethook_trampoline:\n" +#ifdef CONFIG_X86_64 + /* Push a fake return address to tell the unwinder it's a kretprobe. */ + " pushq $arch_rethook_trampoline\n" + UNWIND_HINT_FUNC + /* Save the 'sp - 8', this will be fixed later. */ + " pushq %rsp\n" + " pushfq\n" + SAVE_REGS_STRING + " movq %rsp, %rdi\n" + " call arch_rethook_trampoline_callback\n" + RESTORE_REGS_STRING + /* In the callback function, 'regs->flags' is copied to 'regs->sp'. */ + " addq $8, %rsp\n" + " popfq\n" +#else + /* Push a fake return address to tell the unwinder it's a kretprobe. */ + " pushl $arch_rethook_trampoline\n" + UNWIND_HINT_FUNC + /* Save the 'sp - 4', this will be fixed later. */ + " pushl %esp\n" + " pushfl\n" + SAVE_REGS_STRING + " movl %esp, %eax\n" + " call arch_rethook_trampoline_callback\n" + RESTORE_REGS_STRING + /* In the callback function, 'regs->flags' is copied to 'regs->sp'. */ + " addl $4, %esp\n" + " popfl\n" +#endif + " ret\n" + ".size arch_rethook_trampoline, .-arch_rethook_trampoline\n" +); +NOKPROBE_SYMBOL(arch_rethook_trampoline); +/* + * arch_rethook_trampoline() skips updating frame pointer. The frame pointer + * saved in arch_rethook_trampoline_callback() points to the real caller + * function's frame pointer. Thus the arch_rethook_trampoline() doesn't have + * a standard stack frame with CONFIG_FRAME_POINTER=y. + * Let's mark it non-standard function. Anyway, FP unwinder can correctly + * unwind without the hint. + */ +STACK_FRAME_NON_STANDARD_FP(arch_rethook_trampoline); + +/* This is called from rethook_trampoline_handler(). */ +void arch_rethook_fixup_return(struct pt_regs *regs, + unsigned long correct_ret_addr) +{ + unsigned long *frame_pointer = ®s->sp + 1; + + /* Replace fake return address with real one. */ + *frame_pointer = correct_ret_addr; +} + +void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs) +{ + unsigned long *stack = (unsigned long *)regs->sp; + + rh->ret_addr = stack[0]; + rh->frame = regs->sp; + + /* Replace the return addr with trampoline addr */ + stack[0] = (unsigned long) arch_rethook_trampoline; +} +NOKPROBE_SYMBOL(arch_rethook_prepare); From patchwork Wed Jan 12 14:03:44 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Masami Hiramatsu (Google)" X-Patchwork-Id: 12711441 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 87B4DC43217 for ; Wed, 12 Jan 2022 14:04:00 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353901AbiALODz (ORCPT ); Wed, 12 Jan 2022 09:03:55 -0500 Received: from dfw.source.kernel.org ([139.178.84.217]:42620 "EHLO dfw.source.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353926AbiALODv (ORCPT ); Wed, 12 Jan 2022 09:03:51 -0500 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 0C7E460EC0; Wed, 12 Jan 2022 14:03:51 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id D1223C36AE5; Wed, 12 Jan 2022 14:03:46 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1641996230; bh=m2vdMMZ6B4QS0Bu4sOf4Vx/6iPWUkb5M1bNHPY/dO0w=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=oJm6dzTfse9i1CM/DS2dcJE0I5I4fH9RbxCXX0zGT7AiKU63EXduxn3qc2Wc6d7Zd RG+wFCrnKpLUeWs2PjKsjE0HJAfXDGjByVX1Jl0vwyb5yvO7mMjPPQ7EMp+SuCQvwd l4VImOdNK0OeWpP173AxHlUmDBKpNVExIVIgbbizlp/zrDtlw6xMzCdDk1oXVeMYPo WcFzm1DGj0YZLxtRNqk5IxLCpwcLFjQbf6q+cSW3boAG5W9ifhdcAlTA2dYbtr3rJQ xITPRdIdmSUwyP6tnFDsd624K72B3dND0ZBpZ3xQ+wX0YSTxgbFkNz1qniRvGG+KzB HzZGFMP77SswQ== From: Masami Hiramatsu To: Jiri Olsa , Alexei Starovoitov Cc: Daniel Borkmann , Andrii Nakryiko , Masami Hiramatsu , netdev@vger.kernel.org, bpf@vger.kernel.org, lkml , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Steven Rostedt , "Naveen N . Rao" , Anil S Keshavamurthy , "David S . Miller" Subject: [RFC PATCH v2 5/8] fprobe: Add exit_handler support Date: Wed, 12 Jan 2022 23:03:44 +0900 Message-Id: <164199622465.1247129.17452431908551186543.stgit@devnote2> X-Mailer: git-send-email 2.25.1 In-Reply-To: <164199616622.1247129.783024987490980883.stgit@devnote2> References: <164199616622.1247129.783024987490980883.stgit@devnote2> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-State: RFC Add exit_handler to fprobe. fprobe + rethook allows us to hook the kernel function return without fgraph tracer. Eventually, the fgraph tracer will be generic array based return hooking and fprobe may use it if user requests. Since both array-based approach and list-based approach have Pros and Cons, (e.g. memory consumption v.s. less missing events) it is better to keep both but fprobe will provide the same exit-handler interface. Signed-off-by: Masami Hiramatsu --- include/linux/fprobe.h | 4 +++ kernel/trace/Kconfig | 1 + kernel/trace/fprobe.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index 614d28d5828b..f566f59af760 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -5,6 +5,7 @@ #include #include +#include struct fprobe { const char **syms; @@ -14,7 +15,10 @@ struct fprobe { struct ftrace_ops ftrace; unsigned long nmissed; unsigned int flags; + struct rethook *rethook; + void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); + void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs); }; #define FPROBE_FL_DISABLED 1 diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 44c473ad9021..00bdd2a2f417 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -238,6 +238,7 @@ config FPROBE bool "Kernel Function Probe (fprobe)" depends on FUNCTION_TRACER depends on DYNAMIC_FTRACE_WITH_REGS + select RETHOOK default n help This option enables kernel function probe feature, which is diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 0247fc7d75e2..3333893e5217 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -7,12 +7,20 @@ #include #include #include +#include #include #include +struct fprobe_rethook_node { + struct rethook_node node; + unsigned long entry_ip; +}; + static void fprobe_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { + struct fprobe_rethook_node *fpr; + struct rethook_node *rh; struct fprobe *fp; int bit; @@ -29,10 +37,34 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip, if (fp->entry_handler) fp->entry_handler(fp, ip, ftrace_get_regs(fregs)); + if (fp->exit_handler) { + rh = rethook_try_get(fp->rethook); + if (!rh) { + fp->nmissed++; + goto out; + } + fpr = container_of(rh, struct fprobe_rethook_node, node); + fpr->entry_ip = ip; + rethook_hook_current(rh, ftrace_get_regs(fregs)); + } + +out: ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(fprobe_handler); +static void fprobe_exit_handler(struct rethook_node *rh, void *data, + struct pt_regs *regs) +{ + struct fprobe *fp = (struct fprobe *)data; + struct fprobe_rethook_node *fpr; + + fpr = container_of(rh, struct fprobe_rethook_node, node); + + fp->exit_handler(fp, fpr->entry_ip, regs); +} +NOKPROBE_SYMBOL(fprobe_exit_handler); + static int convert_func_addresses(struct fprobe *fp) { unsigned int i; @@ -64,6 +96,7 @@ static int convert_func_addresses(struct fprobe *fp) */ int register_fprobe(struct fprobe *fp) { + unsigned int i, size; int ret; if (!fp || !fp->nentry || (!fp->syms && !fp->addrs) || @@ -78,10 +111,29 @@ int register_fprobe(struct fprobe *fp) fp->ftrace.func = fprobe_handler; fp->ftrace.flags = FTRACE_OPS_FL_SAVE_REGS; + /* Initialize rethook if needed */ + if (fp->exit_handler) { + size = fp->nentry * num_possible_cpus() * 2; + fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler); + for (i = 0; i < size; i++) { + struct rethook_node *node; + + node = kzalloc(sizeof(struct fprobe_rethook_node), GFP_KERNEL); + if (!node) { + rethook_free(fp->rethook); + ret = -ENOMEM; + goto out; + } + rethook_add_node(fp->rethook, node); + } + } else + fp->rethook = NULL; + ret = ftrace_set_filter_ips(&fp->ftrace, fp->addrs, fp->nentry, 0, 0); if (!ret) ret = register_ftrace_function(&fp->ftrace); +out: if (ret < 0 && fp->syms) { kfree(fp->addrs); fp->addrs = NULL; @@ -104,9 +156,12 @@ int unregister_fprobe(struct fprobe *fp) ret = unregister_ftrace_function(&fp->ftrace); - if (!ret && fp->syms) { - kfree(fp->addrs); - fp->addrs = NULL; + if (!ret) { + rethook_free(fp->rethook); + if (fp->syms) { + kfree(fp->addrs); + fp->addrs = NULL; + } } return ret; } From patchwork Wed Jan 12 14:03:56 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Masami Hiramatsu (Google)" X-Patchwork-Id: 12711442 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 10460C4332F for ; Wed, 12 Jan 2022 14:04:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353909AbiALOEX (ORCPT ); Wed, 12 Jan 2022 09:04:23 -0500 Received: from dfw.source.kernel.org ([139.178.84.217]:42816 "EHLO dfw.source.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353897AbiALOED (ORCPT ); Wed, 12 Jan 2022 09:04:03 -0500 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id AFD2860ECE; Wed, 12 Jan 2022 14:04:02 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 47834C36AEA; Wed, 12 Jan 2022 14:03:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1641996242; bh=b5cSC0Voo0MtqAVukvSA69tutmiwlNEMrP3tolpDRq4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=fEfBpAL0fs4u2zH1JVob4yS11dT6illLKu2wHy3jXU1zrLD8vD4nZgcN3P/1Bhh3Q FO5EWszPAKnTczfrFwS2b2ub0qUQM3aGfT4zlwVVtFBKOFHAAkG7WmYKe2a8HIaK6I 3qs0SOgy97EGIDxar/VJep/NmJ32PKacy/DWH0q56DApgqCBv+zhAiCiO9m281jc98 WacrJGa+MxUxvnXKEYYMfrhrdRjOHrpAVDHEQfSSX69eyMLs9ye8ahhiq0nPvDscjg l6j527jqSRUG3cW61GRRqG6k/N2Os9MzYBUxr6BR0ItmYCUWJp+qpFspyal7eLzQVK aacdNj3WOG4Qw== From: Masami Hiramatsu To: Jiri Olsa , Alexei Starovoitov Cc: Daniel Borkmann , Andrii Nakryiko , Masami Hiramatsu , netdev@vger.kernel.org, bpf@vger.kernel.org, lkml , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Steven Rostedt , "Naveen N . Rao" , Anil S Keshavamurthy , "David S . Miller" Subject: [RFC PATCH v2 6/8] fprobe: Add sample program for fprobe Date: Wed, 12 Jan 2022 23:03:56 +0900 Message-Id: <164199623581.1247129.9124741655034911016.stgit@devnote2> X-Mailer: git-send-email 2.25.1 In-Reply-To: <164199616622.1247129.783024987490980883.stgit@devnote2> References: <164199616622.1247129.783024987490980883.stgit@devnote2> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-State: RFC Add a sample program for the fprobe. Signed-off-by: Masami Hiramatsu --- Changes in v2: - Fix infinit loop for multiple symbols. - Fix memory leaks for copied string and entry array. - Update for new fprobe APIs. - Fix style issues. --- samples/Kconfig | 7 +++ samples/Makefile | 1 samples/fprobe/Makefile | 3 + samples/fprobe/fprobe_example.c | 103 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+) create mode 100644 samples/fprobe/Makefile create mode 100644 samples/fprobe/fprobe_example.c diff --git a/samples/Kconfig b/samples/Kconfig index 43d2e9aa557f..e010c2c1256c 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -73,6 +73,13 @@ config SAMPLE_HW_BREAKPOINT help This builds kernel hardware breakpoint example modules. +config SAMPLE_FPROBE + tristate "Build fprobe examples -- loadable modules only" + depends on FPROBE && m + help + This builds a fprobe example module. This module has an option 'symbol'. + You can specify a probed symbol or symbols separated with ','. + config SAMPLE_KFIFO tristate "Build kfifo examples -- loadable modules only" depends on m diff --git a/samples/Makefile b/samples/Makefile index 4bcd6b93bffa..4f73fe7aa473 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -32,3 +32,4 @@ obj-$(CONFIG_SAMPLE_INTEL_MEI) += mei/ subdir-$(CONFIG_SAMPLE_WATCHDOG) += watchdog subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/ +obj-$(CONFIG_SAMPLE_FPROBE) += fprobe/ diff --git a/samples/fprobe/Makefile b/samples/fprobe/Makefile new file mode 100644 index 000000000000..ecccbfa6e99b --- /dev/null +++ b/samples/fprobe/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_SAMPLE_FPROBE) += fprobe_example.o diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c new file mode 100644 index 000000000000..c28320537f98 --- /dev/null +++ b/samples/fprobe/fprobe_example.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Here's a sample kernel module showing the use of fprobe to dump a + * stack trace and selected registers when kernel_clone() is called. + * + * For more information on theory of operation of kprobes, see + * Documentation/trace/kprobes.rst + * + * You will see the trace data in /var/log/messages and on the console + * whenever kernel_clone() is invoked to create a new process. + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include + +#define MAX_SYMBOL_LEN 4096 +struct fprobe sample_probe; +static char symbol[MAX_SYMBOL_LEN] = "kernel_clone"; +module_param_string(symbol, symbol, sizeof(symbol), 0644); + +static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs) +{ + pr_info("Enter <%pS> ip = 0x%p\n", (void *)ip, (void *)ip); +} + +static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs) +{ + unsigned long rip = instruction_pointer(regs); + + pr_info("Return from <%pS> ip = 0x%p to rip = 0x%p (%pS)\n", + (void *)ip, (void *)ip, (void *)rip, (void *)rip); +} + +static char *symbuf; + +static int __init fprobe_init(void) +{ + const char **syms; + char *p; + int ret, count, i; + + sample_probe.entry_handler = sample_entry_handler; + sample_probe.exit_handler = sample_exit_handler; + + if (strchr(symbol, ',')) { + symbuf = kstrdup(symbol, GFP_KERNEL); + if (!symbuf) + return -ENOMEM; + p = symbuf; + count = 1; + while ((p = strchr(++p, ',')) != NULL) + count++; + } else { + count = 1; + symbuf = symbol; + } + pr_info("%d symbols found\n", count); + + syms = kcalloc(count, sizeof(char *), GFP_KERNEL); + if (!syms) { + ret = -ENOMEM; + goto error; + } + + p = symbuf; + for (i = 0; i < count; i++) + syms[i] = strsep(&p, ","); + + sample_probe.syms = syms; + sample_probe.nentry = count; + + ret = register_fprobe(&sample_probe); + if (ret < 0) { + pr_err("register_fprobe failed, returned %d\n", ret); + goto error; + } + pr_info("Planted fprobe at %s\n", symbol); + return 0; + +error: + if (symbuf != symbol) + kfree(symbuf); + return ret; +} + +static void __exit fprobe_exit(void) +{ + unregister_fprobe(&sample_probe); + + kfree(sample_probe.syms); + if (symbuf != symbol) + kfree(symbuf); + + pr_info("fprobe at %s unregistered\n", symbol); +} + +module_init(fprobe_init) +module_exit(fprobe_exit) +MODULE_LICENSE("GPL"); From patchwork Wed Jan 12 14:04:07 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Masami Hiramatsu (Google)" X-Patchwork-Id: 12711443 X-Patchwork-Delegate: bpf@iogearbox.net Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 890C0C433F5 for ; Wed, 12 Jan 2022 14:04:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353967AbiALOE3 (ORCPT ); Wed, 12 Jan 2022 09:04:29 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41468 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353942AbiALOEP (ORCPT ); Wed, 12 Jan 2022 09:04:15 -0500 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DA42DC06175D; Wed, 12 Jan 2022 06:04:14 -0800 (PST) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 7A6B760BA0; Wed, 12 Jan 2022 14:04:14 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 29E4CC36AE5; Wed, 12 Jan 2022 14:04:09 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1641996253; bh=AQZsBKlvjmmS4Et2w8pOxGb2swLZ6ZWiHMtyT66wAnA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=cCVQnCbvtit0iQr5GqTTCREni+sECr0Wij1XIs9cgVzbhIFbV5OvWevMOOBqmsXuK Co+Lsfd1l9uunqZYLeo7UQ3rstj7+lz/R0MPNsxNpFpfcpCZVgEdkIs8RRZbjQm2eO AJg/MBMSqkjF/xrQHl+qOIcZYeHrfwGpPE7HLoFWaMrHV2RfmdGimNg3ke9KQm6RbH kpK5jh0w6NShnnvIBHn6l3W/b+UpLxE65KDPU8fKP9D1Xv5PRnCZxc5X/7AvzZI0qH n406gkS6BsXCTfkR0MK0JeQw7DrHHalcQ5o5Go79WyX8oiHXytdp2A/vMugbG4zjrU IiNKyJCUQ1dTA== From: Masami Hiramatsu To: Jiri Olsa , Alexei Starovoitov Cc: Daniel Borkmann , Andrii Nakryiko , Masami Hiramatsu , netdev@vger.kernel.org, bpf@vger.kernel.org, lkml , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Steven Rostedt , "Naveen N . Rao" , Anil S Keshavamurthy , "David S . Miller" Subject: [RFC PATCH v2 7/8] bpf: Add kprobe link for attaching raw kprobes Date: Wed, 12 Jan 2022 23:04:07 +0900 Message-Id: <164199624758.1247129.1566911813214393074.stgit@devnote2> X-Mailer: git-send-email 2.25.1 In-Reply-To: <164199616622.1247129.783024987490980883.stgit@devnote2> References: <164199616622.1247129.783024987490980883.stgit@devnote2> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-Delegate: bpf@iogearbox.net X-Patchwork-State: RFC From: Jiri Olsa Adding new link type BPF_LINK_TYPE_KPROBE to attach so called "kprobes" directly through fprobe API. Note that since the using kprobes with multiple same handler is not efficient, this uses the fprobe which natively support multiple probe points for one same handler, but limited on function entry and exit. Adding new attach type BPF_TRACE_RAW_KPROBE that enables such link for kprobe program. The new link allows to create multiple kprobes link by using new link_create interface: struct { __aligned_u64 addrs; __u32 cnt; __u64 bpf_cookie; } kprobe; Plus new flag BPF_F_KPROBE_RETURN for link_create.flags to create return probe. Signed-off-by: Jiri Olsa Signed-off-by: Masami Hiramatsu --- include/linux/bpf_types.h | 1 include/uapi/linux/bpf.h | 12 ++ kernel/bpf/syscall.c | 195 +++++++++++++++++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 12 ++ 4 files changed, 215 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 48a91c51c015..a9000feab34e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -140,3 +140,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) #ifdef CONFIG_PERF_EVENTS BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif +BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE, kprobe) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ba5af15e25f5..10e9b56a074e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -995,6 +995,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, + BPF_TRACE_RAW_KPROBE, __MAX_BPF_ATTACH_TYPE }; @@ -1009,6 +1010,7 @@ enum bpf_link_type { BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, + BPF_LINK_TYPE_KPROBE = 8, MAX_BPF_LINK_TYPE, }; @@ -1111,6 +1113,11 @@ enum bpf_link_type { */ #define BPF_F_SLEEPABLE (1U << 4) +/* link_create flags used in LINK_CREATE command for BPF_TRACE_RAW_KPROBE + * attach type. + */ +#define BPF_F_KPROBE_RETURN (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1463,6 +1470,11 @@ union bpf_attr { */ __u64 bpf_cookie; } perf_event; + struct { + __aligned_u64 addrs; + __u32 cnt; + __u64 bpf_cookie; + } kprobe; }; } link_create; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1033ee8c0caf..066fac3cfaa5 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -31,6 +31,7 @@ #include #include #include +#include #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ @@ -3013,8 +3014,182 @@ static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *pro fput(perf_file); return err; } +#else +static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -ENOTSUPP; +} #endif /* CONFIG_PERF_EVENTS */ +#ifdef CONFIG_FPROBE + +/* Note that this is called 'kprobe_link' but using fprobe inside */ +struct bpf_kprobe_link { + struct bpf_link link; + struct fprobe fp; + bool is_return; + unsigned long *addrs; + u32 cnt; + u64 bpf_cookie; +}; + +static void bpf_kprobe_link_release(struct bpf_link *link) +{ + struct bpf_kprobe_link *kprobe_link; + + kprobe_link = container_of(link, struct bpf_kprobe_link, link); + + unregister_fprobe(&kprobe_link->fp); +} + +static void bpf_kprobe_link_dealloc(struct bpf_link *link) +{ + struct bpf_kprobe_link *kprobe_link; + + kprobe_link = container_of(link, struct bpf_kprobe_link, link); + kfree(kprobe_link->addrs); + kfree(kprobe_link); +} + +static const struct bpf_link_ops bpf_kprobe_link_lops = { + .release = bpf_kprobe_link_release, + .dealloc = bpf_kprobe_link_dealloc, +}; + +static int kprobe_link_prog_run(struct bpf_kprobe_link *kprobe_link, + struct pt_regs *regs) +{ + struct bpf_trace_run_ctx run_ctx; + struct bpf_run_ctx *old_run_ctx; + int err; + + if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { + err = 0; + goto out; + } + + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + run_ctx.bpf_cookie = kprobe_link->bpf_cookie; + + rcu_read_lock(); + migrate_disable(); + err = bpf_prog_run(kprobe_link->link.prog, regs); + migrate_enable(); + rcu_read_unlock(); + + bpf_reset_run_ctx(old_run_ctx); + + out: + __this_cpu_dec(bpf_prog_active); + return err; +} + +static void kprobe_link_entry_handler(struct fprobe *fp, unsigned long entry_ip, + struct pt_regs *regs) +{ + struct bpf_kprobe_link *kprobe_link; + + /* + * Because fprobe's regs->ip is set to the next instruction of + * dynamic-ftrace insturction, correct entry ip must be set, so + * that the bpf program can access entry address via regs as same + * as kprobes. + */ + instruction_pointer_set(regs, entry_ip); + kprobe_link = container_of(fp, struct bpf_kprobe_link, fp); + kprobe_link_prog_run(kprobe_link, regs); +} + +static void kprobe_link_exit_handler(struct fprobe *fp, unsigned long entry_ip, + struct pt_regs *regs) +{ + struct bpf_kprobe_link *kprobe_link; + + kprobe_link = container_of(fp, struct bpf_kprobe_link, fp); + kprobe_link_prog_run(kprobe_link, regs); +} + +static int bpf_kprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct bpf_link_primer link_primer; + struct bpf_kprobe_link *link = NULL; + unsigned long *addrs; + u32 flags, cnt, size; + void __user *uaddrs; + u64 **tmp; + int err; + + flags = attr->link_create.flags; + if (flags & ~BPF_F_KPROBE_RETURN) + return -EINVAL; + + uaddrs = u64_to_user_ptr(attr->link_create.kprobe.addrs); + cnt = attr->link_create.kprobe.cnt; + size = cnt * sizeof(*tmp); + + tmp = kzalloc(size, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + if (copy_from_user(tmp, uaddrs, size)) { + err = -EFAULT; + goto error; + } + + /* TODO add extra copy for 32bit archs */ + if (sizeof(u64) != sizeof(void *)) { + err = -EINVAL; + goto error; + } + + addrs = (unsigned long *) tmp; + + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) { + err = -ENOMEM; + goto error; + } + + bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE, &bpf_kprobe_link_lops, prog); + + err = bpf_link_prime(&link->link, &link_primer); + if (err) + goto error; + + link->is_return = flags & BPF_F_KPROBE_RETURN; + link->addrs = addrs; + link->cnt = cnt; + link->bpf_cookie = attr->link_create.kprobe.bpf_cookie; + + link->fp.addrs = addrs; + link->fp.nentry = cnt; + + if (link->is_return) + link->fp.exit_handler = kprobe_link_exit_handler; + else + link->fp.entry_handler = kprobe_link_entry_handler; + + err = register_fprobe(&link->fp); + if (err) { + bpf_link_cleanup(&link_primer); + goto error; + } + + return bpf_link_settle(&link_primer); + +error: + kfree(link); + kfree(tmp); + + return err; +} +#else /* !CONFIG_FPROBE */ +static int bpf_kprobe_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -ENOTSUPP; +} +#endif + #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd static int bpf_raw_tracepoint_open(const union bpf_attr *attr) @@ -4241,7 +4416,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr, return -EINVAL; } -#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len +#define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe.bpf_cookie static int link_create(union bpf_attr *attr, bpfptr_t uattr) { enum bpf_prog_type ptype; @@ -4265,7 +4440,6 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) ret = tracing_bpf_link_attach(attr, uattr, prog); goto out; case BPF_PROG_TYPE_PERF_EVENT: - case BPF_PROG_TYPE_KPROBE: case BPF_PROG_TYPE_TRACEPOINT: if (attr->link_create.attach_type != BPF_PERF_EVENT) { ret = -EINVAL; @@ -4273,6 +4447,14 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) } ptype = prog->type; break; + case BPF_PROG_TYPE_KPROBE: + if (attr->link_create.attach_type != BPF_PERF_EVENT && + attr->link_create.attach_type != BPF_TRACE_RAW_KPROBE) { + ret = -EINVAL; + goto out; + } + ptype = prog->type; + break; default: ptype = attach_type_to_prog_type(attr->link_create.attach_type); if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { @@ -4304,13 +4486,16 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) ret = bpf_xdp_link_attach(attr, prog); break; #endif -#ifdef CONFIG_PERF_EVENTS case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_TRACEPOINT: - case BPF_PROG_TYPE_KPROBE: ret = bpf_perf_link_attach(attr, prog); break; -#endif + case BPF_PROG_TYPE_KPROBE: + if (attr->link_create.attach_type == BPF_PERF_EVENT) + ret = bpf_perf_link_attach(attr, prog); + else + ret = bpf_kprobe_link_attach(attr, prog); + break; default: ret = -EINVAL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ba5af15e25f5..10e9b56a074e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -995,6 +995,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, + BPF_TRACE_RAW_KPROBE, __MAX_BPF_ATTACH_TYPE }; @@ -1009,6 +1010,7 @@ enum bpf_link_type { BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, + BPF_LINK_TYPE_KPROBE = 8, MAX_BPF_LINK_TYPE, }; @@ -1111,6 +1113,11 @@ enum bpf_link_type { */ #define BPF_F_SLEEPABLE (1U << 4) +/* link_create flags used in LINK_CREATE command for BPF_TRACE_RAW_KPROBE + * attach type. + */ +#define BPF_F_KPROBE_RETURN (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1463,6 +1470,11 @@ union bpf_attr { */ __u64 bpf_cookie; } perf_event; + struct { + __aligned_u64 addrs; + __u32 cnt; + __u64 bpf_cookie; + } kprobe; }; } link_create; From patchwork Wed Jan 12 14:04:19 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Masami Hiramatsu (Google)" X-Patchwork-Id: 12711444 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1D271C433FE for ; Wed, 12 Jan 2022 14:05:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S242643AbiALOFC (ORCPT ); Wed, 12 Jan 2022 09:05:02 -0500 Received: from ams.source.kernel.org ([145.40.68.75]:56512 "EHLO ams.source.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S242273AbiALOE2 (ORCPT ); Wed, 12 Jan 2022 09:04:28 -0500 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id C207CB81EF5; Wed, 12 Jan 2022 14:04:26 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id C72EDC36AEA; Wed, 12 Jan 2022 14:04:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1641996265; bh=OlhRkWarq2RHlk3uHOGFYdiuiqRk58njvjxQ42ZY9TU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=jJCFv1dUffxBIV/xLifBA4mJgvr7Z25LztkEtalF7iUNkLK6VHJG0FEz2tfnoL0Hv zwc2RTvaWneot0VR95TrQcyB8XB3SiYtPJMQKDPwk8aNFyNe2PlID7kcafDMK+c+OX ys6pI4ae2e7ymJUnBM7adwouoa7swHgAf6lUzjm9a+peUIrSgooyPZfAV0FBXc3UnY yhthtphMdeQRVl4Ari2FZFn3dQhWjJSQmw/vPSUaIsBoZt649cMWzP09dCS66YHnfI NBrZlU3W87jb3Oben9wegx6+fLoVEcirhYS3CaEzeGLAa6oob/7JAe4sF9I7Szzu1p 5fVt6B10MG+hA== From: Masami Hiramatsu To: Jiri Olsa , Alexei Starovoitov Cc: Daniel Borkmann , Andrii Nakryiko , Masami Hiramatsu , netdev@vger.kernel.org, bpf@vger.kernel.org, lkml , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Steven Rostedt , "Naveen N . Rao" , Anil S Keshavamurthy , "David S . Miller" Subject: [RFC PATCH v2 8/8] [DO NOT MERGE] Out-of-tree: Support wildcard symbol option to sample Date: Wed, 12 Jan 2022 23:04:19 +0900 Message-Id: <164199625932.1247129.8866493903208911902.stgit@devnote2> X-Mailer: git-send-email 2.25.1 In-Reply-To: <164199616622.1247129.783024987490980883.stgit@devnote2> References: <164199616622.1247129.783024987490980883.stgit@devnote2> User-Agent: StGit/0.19 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org X-Patchwork-State: RFC This is not intended to be merged to upstream code (since this expose some kernel internal functions just for an example.) But this is good to show how the fprobe is time-efficient for registering a probe on thousands of functions. # time insmod fprobe_example.ko symbol='btrfs_*' [ 36.130947] fprobe_init: 1028 symbols found [ 36.177901] fprobe_init: Planted fprobe at btrfs_* real 0m 0.08s user 0m 0.00s sys 0m 0.07s Signed-off-by: Masami Hiramatsu --- kernel/kallsyms.c | 1 + kernel/trace/ftrace.c | 1 + samples/fprobe/fprobe_example.c | 69 ++++++++++++++++++++++++++++++++++----- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 3011bc33a5ba..d0c4073acbfd 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -246,6 +246,7 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, } return 0; } +EXPORT_SYMBOL_GPL(kallsyms_on_each_symbol); #endif /* CONFIG_LIVEPATCH */ static unsigned long get_symbol_pos(unsigned long addr, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d38ae5063be3..feb69ecc5d2c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1580,6 +1580,7 @@ unsigned long ftrace_location(unsigned long ip) { return ftrace_location_range(ip, ip); } +EXPORT_SYMBOL_GPL(ftrace_location); /** * ftrace_text_reserved - return true if range contains an ftrace location diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c index c28320537f98..df034e00661e 100644 --- a/samples/fprobe/fprobe_example.c +++ b/samples/fprobe/fprobe_example.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) "%s: " fmt, __func__ +#include #include #include #include @@ -37,16 +38,51 @@ static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_r static char *symbuf; +struct sym_search_param { + unsigned long *addrs; + const char *pat; + int cnt; +}; + +#define MAX_FPROBE_ENTS (16 * 1024) + +static int wildcard_match(void *data, const char *symbol, struct module *mod, + unsigned long addr) +{ + struct sym_search_param *param = (struct sym_search_param *)data; + + if (glob_match(param->pat, symbol)) { + if (!ftrace_location(addr)) + return 0; + + if (param->addrs) + param->addrs[param->cnt] = addr; + param->cnt++; + if (param->cnt >= MAX_FPROBE_ENTS) + return -E2BIG; + } + return 0; +} + static int __init fprobe_init(void) { - const char **syms; + struct sym_search_param param = {.pat = symbol, .addrs = NULL, .cnt = 0}; + unsigned long *addrs = NULL; + const char **syms = NULL; char *p; int ret, count, i; + bool wildcard = false; sample_probe.entry_handler = sample_entry_handler; sample_probe.exit_handler = sample_exit_handler; - if (strchr(symbol, ',')) { + if (strchr(symbol, '*')) { + kallsyms_on_each_symbol(wildcard_match, ¶m); + count = param.cnt; + if (!count) + return -ENOENT; + wildcard = true; + } else if (strchr(symbol, ',')) { symbuf = kstrdup(symbol, GFP_KERNEL); if (!symbuf) return -ENOMEM; @@ -58,19 +94,31 @@ static int __init fprobe_init(void) count = 1; symbuf = symbol; } - pr_info("%d symbols found\n", count); - syms = kcalloc(count, sizeof(char *), GFP_KERNEL); - if (!syms) { + if (wildcard) + addrs = kcalloc(count, sizeof(unsigned long), GFP_KERNEL); + else + syms = kcalloc(count, sizeof(char *), GFP_KERNEL); + if (!syms && !addrs) { ret = -ENOMEM; goto error; } - p = symbuf; - for (i = 0; i < count; i++) - syms[i] = strsep(&p, ","); + if (wildcard) { + param.addrs = addrs; + param.cnt = 0; + + kallsyms_on_each_symbol(wildcard_match, ¶m); + count = param.cnt; + sample_probe.addrs = addrs; + } else { + p = symbuf; + for (i = 0; i < count; i++) + syms[i] = strsep(&p, ","); + sample_probe.syms = syms; + } + pr_info("%d symbols found\n", count); - sample_probe.syms = syms; sample_probe.nentry = count; ret = register_fprobe(&sample_probe); @@ -82,6 +130,8 @@ static int __init fprobe_init(void) return 0; error: + kfree(addrs); + kfree(syms); if (symbuf != symbol) kfree(symbuf); return ret; @@ -92,6 +142,7 @@ static void __exit fprobe_exit(void) unregister_fprobe(&sample_probe); kfree(sample_probe.syms); + kfree(sample_probe.addrs); if (symbuf != symbol) kfree(symbuf);