@@ -29,6 +29,19 @@ struct tracepoint_func {
int prio;
};
+/**
+ * enum tracepoint_flags - Tracepoint flags
+ * @TRACEPOINT_MAY_EXIST: On registration, don't warn if the tracepoint is
+ * already registered.
+ * @TRACEPOINT_MAY_FAULT: The tracepoint probe callback will be called with
+ * preemption enabled, and is allowed to take page
+ * faults.
+ */
+enum tracepoint_flags {
+ TRACEPOINT_MAY_EXIST = (1 << 0),
+ TRACEPOINT_MAY_FAULT = (1 << 1),
+};
+
struct tracepoint {
const char *name; /* Tracepoint name */
struct static_key key;
@@ -39,6 +52,7 @@ struct tracepoint {
int (*regfunc)(void);
void (*unregfunc)(void);
struct tracepoint_func __rcu *funcs;
+ unsigned int flags;
};
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
@@ -17,6 +17,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/rcupdate.h>
+#include <linux/rcupdate_trace.h>
#include <linux/tracepoint-defs.h>
#include <linux/static_call.h>
@@ -40,17 +41,10 @@ extern int
tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
int prio);
extern int
-tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data,
- int prio);
+tracepoint_probe_register_prio_flags(struct tracepoint *tp, void *probe, void *data,
+ int prio, unsigned int flags);
extern int
tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
-static inline int
-tracepoint_probe_register_may_exist(struct tracepoint *tp, void *probe,
- void *data)
-{
- return tracepoint_probe_register_prio_may_exist(tp, probe, data,
- TRACEPOINT_DEFAULT_PRIO);
-}
extern void
for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
void *priv);
@@ -89,6 +83,7 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
#ifdef CONFIG_TRACEPOINTS
static inline void tracepoint_synchronize_unregister(void)
{
+ synchronize_rcu_tasks_trace();
synchronize_srcu(&tracepoint_srcu);
synchronize_rcu();
}
@@ -191,9 +186,10 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
* it_func[0] is never NULL because there is at least one element in the array
* when the array itself is non NULL.
*/
-#define __DO_TRACE(name, args, cond, rcuidle) \
+#define __DO_TRACE(name, args, cond, rcuidle, tp_flags) \
do { \
int __maybe_unused __idx = 0; \
+ bool mayfault = (tp_flags) & TRACEPOINT_MAY_FAULT; \
\
if (!(cond)) \
return; \
@@ -202,8 +198,12 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
"Bad RCU usage for tracepoint")) \
return; \
\
- /* keep srcu and sched-rcu usage consistent */ \
- preempt_disable_notrace(); \
+ if (mayfault) { \
+ rcu_read_lock_trace(); \
+ } else { \
+ /* keep srcu and sched-rcu usage consistent */ \
+ preempt_disable_notrace(); \
+ } \
\
/* \
* For rcuidle callers, use srcu since sched-rcu \
@@ -221,20 +221,23 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
} \
\
- preempt_enable_notrace(); \
+ if (mayfault) \
+ rcu_read_unlock_trace(); \
+ else \
+ preempt_enable_notrace(); \
} while (0)
#ifndef MODULE
-#define __DECLARE_TRACE_RCU(name, proto, args, cond) \
+#define __DECLARE_TRACE_RCU(name, proto, args, cond, tp_flags) \
static inline void trace_##name##_rcuidle(proto) \
{ \
if (static_key_false(&__tracepoint_##name.key)) \
__DO_TRACE(name, \
TP_ARGS(args), \
- TP_CONDITION(cond), 1); \
+ TP_CONDITION(cond), 1, tp_flags); \
}
#else
-#define __DECLARE_TRACE_RCU(name, proto, args, cond)
+#define __DECLARE_TRACE_RCU(name, proto, args, cond, tp_flags)
#endif
/*
@@ -248,7 +251,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
* site if it is not watching, as it will need to be active when the
* tracepoint is enabled.
*/
-#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, tp_flags) \
extern int __traceiter_##name(data_proto); \
DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \
extern struct tracepoint __tracepoint_##name; \
@@ -257,14 +260,16 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
if (static_key_false(&__tracepoint_##name.key)) \
__DO_TRACE(name, \
TP_ARGS(args), \
- TP_CONDITION(cond), 0); \
+ TP_CONDITION(cond), 0, tp_flags); \
if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \
WARN_ONCE(!rcu_is_watching(), \
"RCU not watching for tracepoint"); \
} \
+ if ((tp_flags) & TRACEPOINT_MAY_FAULT) \
+ might_fault(); \
} \
__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \
- PARAMS(cond)) \
+ PARAMS(cond), tp_flags) \
static inline int \
register_trace_##name(void (*probe)(data_proto), void *data) \
{ \
@@ -279,6 +284,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
(void *)probe, data, prio); \
} \
static inline int \
+ register_trace_prio_flags_##name(void (*probe)(data_proto), void *data, \
+ int prio, unsigned int flags) \
+ { \
+ return tracepoint_probe_register_prio_flags(&__tracepoint_##name, \
+ (void *)probe, data, prio, flags); \
+ } \
+ static inline int \
unregister_trace_##name(void (*probe)(data_proto), void *data) \
{ \
return tracepoint_probe_unregister(&__tracepoint_##name,\
@@ -299,7 +311,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
* structures, so we create an array of pointers that will be used for iteration
* on the tracepoints.
*/
-#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \
+#define DEFINE_TRACE_FN_FLAGS(_name, _reg, _unreg, proto, args, tp_flags) \
static const char __tpstrtab_##_name[] \
__section("__tracepoints_strings") = #_name; \
extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \
@@ -315,7 +327,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
.probestub = &__probestub_##_name, \
.regfunc = _reg, \
.unregfunc = _unreg, \
- .funcs = NULL }; \
+ .funcs = NULL, \
+ .flags = (tp_flags), \
+ }; \
__TRACEPOINT_ENTRY(_name); \
int __traceiter_##_name(void *__data, proto) \
{ \
@@ -338,8 +352,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
} \
DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
+#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \
+ DEFINE_TRACE_FN_FLAGS(_name, _reg, _unreg, PARAMS(proto), PARAMS(args), 0)
+
#define DEFINE_TRACE(name, proto, args) \
- DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
+ DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args))
#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \
EXPORT_SYMBOL_GPL(__tracepoint_##name); \
@@ -352,7 +369,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
#else /* !TRACEPOINTS_ENABLED */
-#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, tp_flags) \
static inline void trace_##name(proto) \
{ } \
static inline void trace_##name##_rcuidle(proto) \
@@ -364,6 +381,18 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
return -ENOSYS; \
} \
static inline int \
+ register_trace_prio_##name(void (*probe)(data_proto), \
+ void *data, int prio) \
+ { \
+ return -ENOSYS; \
+ } \
+ static inline int \
+ register_trace_prio_flags_##name(void (*probe)(data_proto), \
+ void *data, int prio, unsigned int flags) \
+ { \
+ return -ENOSYS; \
+ } \
+ static inline int \
unregister_trace_##name(void (*probe)(data_proto), \
void *data) \
{ \
@@ -378,6 +407,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
return false; \
}
+#define DEFINE_TRACE_FN_FLAGS(name, reg, unreg, proto, args, tp_flags)
#define DEFINE_TRACE_FN(name, reg, unreg, proto, args)
#define DEFINE_TRACE(name, proto, args)
#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
@@ -432,12 +462,17 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
#define DECLARE_TRACE(name, proto, args) \
__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \
cpu_online(raw_smp_processor_id()), \
- PARAMS(void *__data, proto))
+ PARAMS(void *__data, proto), 0)
+
+#define DECLARE_TRACE_MAY_FAULT(name, proto, args) \
+ __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \
+ cpu_online(raw_smp_processor_id()), \
+ PARAMS(void *__data, proto), TRACEPOINT_MAY_FAULT)
#define DECLARE_TRACE_CONDITION(name, proto, args, cond) \
__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \
cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
- PARAMS(void *__data, proto))
+ PARAMS(void *__data, proto), 0)
#define TRACE_EVENT_FLAGS(event, flag)
@@ -568,6 +603,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
#define TRACE_EVENT_FN(name, proto, args, struct, \
assign, print, reg, unreg) \
DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT_FN_MAY_FAULT(name, proto, args, struct, \
+ assign, print, reg, unreg) \
+ DECLARE_TRACE_MAY_FAULT(name, PARAMS(proto), PARAMS(args))
#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, \
assign, print, reg, unreg) \
DECLARE_TRACE_CONDITION(name, PARAMS(proto), \
@@ -41,6 +41,12 @@
assign, print, reg, unreg) \
DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args))
+#undef TRACE_EVENT_FN_MAY_FAULT
+#define TRACE_EVENT_FN_MAY_FAULT(name, proto, args, tstruct, \
+ assign, print, reg, unreg) \
+ DEFINE_TRACE_FN_FLAGS(name, reg, unreg, PARAMS(proto), \
+ PARAMS(args), TRACEPOINT_MAY_FAULT)
+
#undef TRACE_EVENT_FN_COND
#define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, \
assign, print, reg, unreg) \
@@ -106,6 +112,7 @@
#undef TRACE_EVENT
#undef TRACE_EVENT_FN
+#undef TRACE_EVENT_FN_MAY_FAULT
#undef TRACE_EVENT_FN_COND
#undef TRACE_EVENT_CONDITION
#undef TRACE_EVENT_NOP
@@ -77,6 +77,12 @@
TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \
PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \
+#undef TRACE_EVENT_FN_MAY_FAULT
+#define TRACE_EVENT_FN_MAY_FAULT(name, proto, args, tstruct, \
+ assign, print, reg, unreg) \
+ TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \
+ PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \
+
#undef TRACE_EVENT_FN_COND
#define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, \
assign, print, reg, unreg) \
@@ -1936,6 +1936,7 @@ config BINDGEN_VERSION_TEXT
#
config TRACEPOINTS
bool
+ select TASKS_TRACE_RCU
source "kernel/Kconfig.kexec"
@@ -2471,7 +2471,9 @@ int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *li
if (prog->aux->max_tp_access > btp->writable_size)
return -EINVAL;
- return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link);
+ return tracepoint_probe_register_prio_flags(tp, (void *)btp->bpf_func,
+ link, TRACEPOINT_DEFAULT_PRIO,
+ TRACEPOINT_MAY_EXIST);
}
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
@@ -705,8 +705,9 @@ static int __register_trace_fprobe(struct trace_fprobe *tf)
* At first, put __probestub_##TP function on the tracepoint
* and put a fprobe on the stub function.
*/
- ret = tracepoint_probe_register_prio_may_exist(tpoint,
- tpoint->probestub, NULL, 0);
+ ret = tracepoint_probe_register_prio_flags(tpoint,
+ tpoint->probestub, NULL, 0,
+ TRACEPOINT_MAY_EXIST);
if (ret < 0)
return ret;
return register_fprobe_ips(&tf->fp, &ip, 1);
@@ -111,11 +111,16 @@ static inline void *allocate_probes(int count)
return p == NULL ? NULL : p->probes;
}
-static void srcu_free_old_probes(struct rcu_head *head)
+static void rcu_tasks_trace_free_old_probes(struct rcu_head *head)
{
kfree(container_of(head, struct tp_probes, rcu));
}
+static void srcu_free_old_probes(struct rcu_head *head)
+{
+ call_rcu_tasks_trace(head, rcu_tasks_trace_free_old_probes);
+}
+
static void rcu_free_old_probes(struct rcu_head *head)
{
call_srcu(&tracepoint_srcu, head, srcu_free_old_probes);
@@ -136,7 +141,7 @@ static __init int release_early_probes(void)
return 0;
}
-/* SRCU is initialized at core_initcall */
+/* SRCU and Tasks Trace RCU are initialized at core_initcall */
postcore_initcall(release_early_probes);
static inline void release_probes(struct tracepoint_func *old)
@@ -146,8 +151,9 @@ static inline void release_probes(struct tracepoint_func *old)
struct tp_probes, probes[0]);
/*
- * We can't free probes if SRCU is not initialized yet.
- * Postpone the freeing till after SRCU is initialized.
+ * We can't free probes if SRCU and Tasks Trace RCU are not
+ * initialized yet. Postpone the freeing till after both are
+ * initialized.
*/
if (unlikely(!ok_to_free_tracepoints)) {
tp_probes->rcu.next = early_probes;
@@ -156,10 +162,9 @@ static inline void release_probes(struct tracepoint_func *old)
}
/*
- * Tracepoint probes are protected by both sched RCU and SRCU,
- * by calling the SRCU callback in the sched RCU callback we
- * cover both cases. So let us chain the SRCU and sched RCU
- * callbacks to wait for both grace periods.
+ * Tracepoint probes are protected by sched RCU, SRCU and
+ * Tasks Trace RCU by chaining the callbacks we cover all three
+ * cases and wait for all three grace periods.
*/
call_rcu(&tp_probes->rcu, rcu_free_old_probes);
}
@@ -460,30 +465,45 @@ static int tracepoint_remove_func(struct tracepoint *tp,
}
/**
- * tracepoint_probe_register_prio_may_exist - Connect a probe to a tracepoint with priority
+ * tracepoint_probe_register_prio_flags - Connect a probe to a tracepoint with priority and flags
* @tp: tracepoint
* @probe: probe handler
* @data: tracepoint data
* @prio: priority of this function over other registered functions
+ * @flags: tracepoint flags argument (enum tracepoint_flags bits)
*
- * Same as tracepoint_probe_register_prio() except that it will not warn
- * if the tracepoint is already registered.
+ * Returns 0 if ok, error value on error.
+ * Note: if @tp is within a module, the caller is responsible for
+ * unregistering the probe before the module is gone. This can be
+ * performed either with a tracepoint module going notifier, or from
+ * within module exit functions.
*/
-int tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe,
- void *data, int prio)
+int tracepoint_probe_register_prio_flags(struct tracepoint *tp, void *probe,
+ void *data, int prio, unsigned int flags)
{
struct tracepoint_func tp_func;
int ret;
+ /*
+ * For a probe to be registered to a tracepoint they must share the
+ * same MAY_FAULT flag value.
+ */
+ if ((tp->flags & TRACEPOINT_MAY_FAULT) != (flags & TRACEPOINT_MAY_FAULT))
+ return -EINVAL;
+
mutex_lock(&tracepoints_mutex);
tp_func.func = probe;
tp_func.data = data;
tp_func.prio = prio;
- ret = tracepoint_add_func(tp, &tp_func, prio, false);
+ /*
+ * When the MAY_EXIST flag is set, don't warn if the tracepoint is
+ * already registered.
+ */
+ ret = tracepoint_add_func(tp, &tp_func, prio, flags & TRACEPOINT_MAY_EXIST);
mutex_unlock(&tracepoints_mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio_may_exist);
+EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio_flags);
/**
* tracepoint_probe_register_prio - Connect a probe to a tracepoint with priority
@@ -501,16 +521,7 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio_may_exist);
int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe,
void *data, int prio)
{
- struct tracepoint_func tp_func;
- int ret;
-
- mutex_lock(&tracepoints_mutex);
- tp_func.func = probe;
- tp_func.data = data;
- tp_func.prio = prio;
- ret = tracepoint_add_func(tp, &tp_func, prio, true);
- mutex_unlock(&tracepoints_mutex);
- return ret;
+ return tracepoint_probe_register_prio_flags(tp, probe, data, prio, 0);
}
EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio);
@@ -520,6 +531,8 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio);
* @probe: probe handler
* @data: tracepoint data
*
+ * Non-faultable probes can only be registered on non-faultable tracepoints.
+ *
* Returns 0 if ok, error value on error.
* Note: if @tp is within a module, the caller is responsible for
* unregistering the probe before the module is gone. This can be
@@ -528,7 +541,7 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio);
*/
int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
{
- return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO);
+ return tracepoint_probe_register_prio_flags(tp, probe, data, TRACEPOINT_DEFAULT_PRIO, 0);
}
EXPORT_SYMBOL_GPL(tracepoint_probe_register);