tracing/probes: Add tracepoint support on fprobe_events
authorMasami Hiramatsu (Google) <mhiramat@kernel.org>
Tue, 6 Jun 2023 12:39:55 +0000 (21:39 +0900)
committerMasami Hiramatsu (Google) <mhiramat@kernel.org>
Tue, 6 Jun 2023 12:39:55 +0000 (21:39 +0900)
Allow fprobe_events to trace raw tracepoints so that user can trace
tracepoints which don't have traceevent wrappers. This new event is
always available if the fprobe_events is enabled (thus no kconfig),
because the fprobe_events depends on the trace-event and traceporint.

e.g.
 # echo 't sched_overutilized_tp' >> dynamic_events
 # echo 't 9p_client_req' >> dynamic_events
 # cat dynamic_events
t:tracepoints/sched_overutilized_tp sched_overutilized_tp
t:tracepoints/_9p_client_req 9p_client_req

The event name is based on the tracepoint name, but if it is started
with digit character, an underscore '_' will be added.

NOTE: to avoid further confusion, this renames TPARG_FL_TPOINT to
TPARG_FL_TEVENT because this flag is used for eprobe (trace-event probe).
And reuse TPARG_FL_TPOINT for this raw tracepoint probe.

Link: https://lore.kernel.org/all/168507471874.913472.17214624519622959593.stgit@mhiramat.roam.corp.google.com/
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/oe-kbuild-all/202305020453.afTJ3VVp-lkp@intel.com/
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
include/linux/tracepoint-defs.h
include/linux/tracepoint.h
kernel/trace/trace.c
kernel/trace/trace_eprobe.c
kernel/trace/trace_fprobe.c
kernel/trace/trace_probe.c
kernel/trace/trace_probe.h

index e7c2276be33ebebe2aa97aa5c3e551749cef9ba9..4dc4955f0fbfe6c00f686314ffb96592753479a2 100644 (file)
@@ -35,6 +35,7 @@ struct tracepoint {
        struct static_call_key *static_call_key;
        void *static_call_tramp;
        void *iterator;
+       void *probestub;
        int (*regfunc)(void);
        void (*unregfunc)(void);
        struct tracepoint_func __rcu *funcs;
index 6811e43c1b5c2afed0bad0a2e7469e8573916eb9..88c0ba623ee6bf5b59cdf0521961ab46c67e2829 100644 (file)
@@ -303,6 +303,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
        __section("__tracepoints_strings") = #_name;                    \
        extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \
        int __traceiter_##_name(void *__data, proto);                   \
+       void __probestub_##_name(void *__data, proto);                  \
        struct tracepoint __tracepoint_##_name  __used                  \
        __section("__tracepoints") = {                                  \
                .name = __tpstrtab_##_name,                             \
@@ -310,6 +311,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
                .static_call_key = &STATIC_CALL_KEY(tp_func_##_name),   \
                .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
                .iterator = &__traceiter_##_name,                       \
+               .probestub = &__probestub_##_name,                      \
                .regfunc = _reg,                                        \
                .unregfunc = _unreg,                                    \
                .funcs = NULL };                                        \
@@ -330,6 +332,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
                }                                                       \
                return 0;                                               \
        }                                                               \
+       void __probestub_##_name(void *__data, proto)                   \
+       {                                                               \
+       }                                                               \
        DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
 
 #define DEFINE_TRACE(name, proto, args)                \
index 755b0bf2e1acdaadd50d42708349e1644386dd2d..fa4e1a18da70bba18bb1ebed78bd73d21b0e7ee2 100644 (file)
@@ -5681,6 +5681,7 @@ static const char readme_msg[] =
 #endif
 #ifdef CONFIG_FPROBE_EVENTS
        "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
+       "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
 #endif
 #ifdef CONFIG_HIST_TRIGGERS
        "\t           s:[synthetic/]<event> <field> [<field>]\n"
index 67e854979d53ec5bbc90641919031484bc4c9502..fd64cd5d57451a6a82c304d317f97f116e9b15f4 100644 (file)
@@ -817,7 +817,7 @@ find_and_get_event(const char *system, const char *event_name)
 
 static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
 {
-       unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TPOINT;
+       unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT;
        int ret;
 
        ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], flags);
index 48dbbc72b7dd57fc42803c5567e20761f753a710..aa71ccb4205c9003762ceea3d3b780b128cf404e 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/rculist.h>
 #include <linux/security.h>
+#include <linux/tracepoint.h>
 #include <linux/uaccess.h>
 
 #include "trace_dynevent.h"
@@ -17,6 +18,7 @@
 #include "trace_probe_tmpl.h"
 
 #define FPROBE_EVENT_SYSTEM "fprobes"
+#define TRACEPOINT_EVENT_SYSTEM "tracepoints"
 #define RETHOOK_MAXACTIVE_MAX 4096
 
 static int trace_fprobe_create(const char *raw_command);
@@ -41,6 +43,8 @@ struct trace_fprobe {
        struct dyn_event        devent;
        struct fprobe           fp;
        const char              *symbol;
+       struct tracepoint       *tpoint;
+       struct module           *mod;
        struct trace_probe      tp;
 };
 
@@ -68,6 +72,11 @@ static bool trace_fprobe_is_return(struct trace_fprobe *tf)
        return tf->fp.exit_handler != NULL;
 }
 
+static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf)
+{
+       return tf->tpoint != NULL;
+}
+
 static const char *trace_fprobe_symbol(struct trace_fprobe *tf)
 {
        return tf->symbol ? tf->symbol : "unknown";
@@ -668,6 +677,21 @@ static int __register_trace_fprobe(struct trace_fprobe *tf)
        else
                tf->fp.flags |= FPROBE_FL_DISABLED;
 
+       if (trace_fprobe_is_tracepoint(tf)) {
+               struct tracepoint *tpoint = tf->tpoint;
+               unsigned long ip = (unsigned long)tpoint->probestub;
+               /*
+                * Here, we do 2 steps to enable fprobe on a tracepoint.
+                * At first, put __probestub_##TP function on the tracepoint
+                * and put a fprobe on the stub function.
+                */
+               ret = tracepoint_probe_register_prio_may_exist(tpoint,
+                                       tpoint->probestub, NULL, 0);
+               if (ret < 0)
+                       return ret;
+               return register_fprobe_ips(&tf->fp, &ip, 1);
+       }
+
        /* TODO: handle filter, nofilter or symbol list */
        return register_fprobe(&tf->fp, tf->symbol, NULL);
 }
@@ -678,6 +702,12 @@ static void __unregister_trace_fprobe(struct trace_fprobe *tf)
        if (trace_fprobe_is_registered(tf)) {
                unregister_fprobe(&tf->fp);
                memset(&tf->fp, 0, sizeof(tf->fp));
+               if (trace_fprobe_is_tracepoint(tf)) {
+                       tracepoint_probe_unregister(tf->tpoint,
+                                       tf->tpoint->probestub, NULL);
+                       tf->tpoint = NULL;
+                       tf->mod = NULL;
+               }
        }
 }
 
@@ -741,7 +771,8 @@ static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to)
 {
        int ret;
 
-       if (trace_fprobe_is_return(tf) != trace_fprobe_is_return(to)) {
+       if (trace_fprobe_is_return(tf) != trace_fprobe_is_return(to) ||
+           trace_fprobe_is_tracepoint(tf) != trace_fprobe_is_tracepoint(to)) {
                trace_probe_log_set_index(0);
                trace_probe_log_err(0, DIFF_PROBE_TYPE);
                return -EEXIST;
@@ -811,6 +842,60 @@ end:
        return ret;
 }
 
+#ifdef CONFIG_MODULES
+static int __tracepoint_probe_module_cb(struct notifier_block *self,
+                                       unsigned long val, void *data)
+{
+       struct tp_module *tp_mod = data;
+       struct trace_fprobe *tf;
+       struct dyn_event *pos;
+
+       if (val != MODULE_STATE_GOING)
+               return NOTIFY_DONE;
+
+       mutex_lock(&event_mutex);
+       for_each_trace_fprobe(tf, pos) {
+               if (tp_mod->mod == tf->mod) {
+                       tracepoint_probe_unregister(tf->tpoint,
+                                       tf->tpoint->probestub, NULL);
+                       tf->tpoint = NULL;
+                       tf->mod = NULL;
+               }
+       }
+       mutex_unlock(&event_mutex);
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block tracepoint_module_nb = {
+       .notifier_call = __tracepoint_probe_module_cb,
+};
+#endif /* CONFIG_MODULES */
+
+struct __find_tracepoint_cb_data {
+       const char *tp_name;
+       struct tracepoint *tpoint;
+};
+
+static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
+{
+       struct __find_tracepoint_cb_data *data = priv;
+
+       if (!data->tpoint && !strcmp(data->tp_name, tp->name))
+               data->tpoint = tp;
+}
+
+static struct tracepoint *find_tracepoint(const char *tp_name)
+{
+       struct __find_tracepoint_cb_data data = {
+               .tp_name = tp_name,
+       };
+
+       for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
+
+       return data.tpoint;
+}
+
 static int __trace_fprobe_create(int argc, const char *argv[])
 {
        /*
@@ -819,6 +904,8 @@ static int __trace_fprobe_create(int argc, const char *argv[])
         *      f[:[GRP/][EVENT]] [MOD:]KSYM [FETCHARGS]
         *  - Add fexit probe:
         *      f[N][:[GRP/][EVENT]] [MOD:]KSYM%return [FETCHARGS]
+        *  - Add tracepoint probe:
+        *      t[:[GRP/][EVENT]] TRACEPOINT [FETCHARGS]
         *
         * Fetch args:
         *  $retval     : fetch return value
@@ -844,10 +931,16 @@ static int __trace_fprobe_create(int argc, const char *argv[])
        char buf[MAX_EVENT_NAME_LEN];
        char gbuf[MAX_EVENT_NAME_LEN];
        unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE;
+       bool is_tracepoint = false;
 
-       if (argv[0][0] != 'f' || argc < 2)
+       if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2)
                return -ECANCELED;
 
+       if (argv[0][0] == 't') {
+               is_tracepoint = true;
+               group = TRACEPOINT_EVENT_SYSTEM;
+       }
+
        trace_probe_log_init("trace_fprobe", argc, argv);
 
        event = strchr(&argv[0][1], ':');
@@ -881,14 +974,14 @@ static int __trace_fprobe_create(int argc, const char *argv[])
 
        trace_probe_log_set_index(1);
 
-       /* a symbol specified */
+       /* a symbol(or tracepoint) must be specified */
        symbol = kstrdup(argv[1], GFP_KERNEL);
        if (!symbol)
                return -ENOMEM;
 
        tmp = strchr(symbol, '%');
        if (tmp) {
-               if (!strcmp(tmp, "%return")) {
+               if (!is_tracepoint && !strcmp(tmp, "%return")) {
                        *tmp = '\0';
                        is_return = true;
                } else {
@@ -907,6 +1000,9 @@ static int __trace_fprobe_create(int argc, const char *argv[])
        else
                flags |= TPARG_FL_FENTRY;
 
+       if (is_tracepoint)
+               flags |= TPARG_FL_TPOINT;
+
        trace_probe_log_set_index(0);
        if (event) {
                ret = traceprobe_parse_event_name(&event, &group, gbuf,
@@ -917,8 +1013,11 @@ static int __trace_fprobe_create(int argc, const char *argv[])
 
        if (!event) {
                /* Make a new event name */
-               snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol,
-                        is_return ? "exit" : "entry");
+               if (is_tracepoint)
+                       strscpy(buf, symbol, MAX_EVENT_NAME_LEN);
+               else
+                       snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol,
+                                is_return ? "exit" : "entry");
                sanitize_event_name(buf);
                event = buf;
        }
@@ -932,6 +1031,18 @@ static int __trace_fprobe_create(int argc, const char *argv[])
                WARN_ON_ONCE(ret != -ENOMEM);
                goto out;       /* We know tf is not allocated */
        }
+
+       if (is_tracepoint) {
+               tf->tpoint = find_tracepoint(tf->symbol);
+               if (!tf->tpoint) {
+                       trace_probe_log_set_index(1);
+                       trace_probe_log_err(0, NO_TRACEPOINT);
+                       goto parse_error;
+               }
+               tf->mod = __module_text_address(
+                               (unsigned long)tf->tpoint->probestub);
+       }
+
        argc -= 2; argv += 2;
 
        /* parse arguments */
@@ -991,7 +1102,10 @@ static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
        struct trace_fprobe *tf = to_trace_fprobe(ev);
        int i;
 
-       seq_putc(m, 'f');
+       if (trace_fprobe_is_tracepoint(tf))
+               seq_putc(m, 't');
+       else
+               seq_putc(m, 'f');
        if (trace_fprobe_is_return(tf) && tf->fp.nr_maxactive)
                seq_printf(m, "%d", tf->fp.nr_maxactive);
        seq_printf(m, ":%s/%s", trace_probe_group_name(&tf->tp),
@@ -1048,6 +1162,12 @@ static __init int init_fprobe_trace_early(void)
        if (ret)
                return ret;
 
+#ifdef CONFIG_MODULES
+       ret = register_tracepoint_module_notifier(&tracepoint_module_nb);
+       if (ret)
+               return ret;
+#endif
+
        return 0;
 }
 core_initcall(init_fprobe_trace_early);
index c39860fb2e41f37582d619f960bec39910a01ae2..798f18d24ebc756d894bb9473e05b4560bcdd51e 100644 (file)
@@ -292,7 +292,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
        int ret = 0;
        int len;
 
-       if (flags & TPARG_FL_TPOINT) {
+       if (flags & TPARG_FL_TEVENT) {
                if (code->data)
                        return -EFAULT;
                code->data = kstrdup(arg, GFP_KERNEL);
@@ -326,8 +326,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
        } else if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
                code->op = FETCH_OP_COMM;
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
-       } else if (((flags & TPARG_FL_MASK) ==
-                   (TPARG_FL_KERNEL | TPARG_FL_FENTRY)) &&
+       } else if (tparg_is_function_entry(flags) &&
                   (len = str_has_prefix(arg, "arg"))) {
                ret = kstrtoul(arg + len, 10, &param);
                if (ret) {
@@ -338,6 +337,12 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
                }
                code->op = FETCH_OP_ARG;
                code->param = (unsigned int)param - 1;
+               /*
+                * The tracepoint probe will probe a stub function, and the
+                * first parameter of the stub is a dummy and should be ignored.
+                */
+               if (flags & TPARG_FL_TPOINT)
+                       code->param++;
 #endif
        } else
                goto inval_var;
@@ -393,7 +398,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
                break;
 
        case '%':       /* named register */
-               if (flags & (TPARG_FL_TPOINT | TPARG_FL_FPROBE)) {
+               if (flags & (TPARG_FL_TEVENT | TPARG_FL_FPROBE)) {
                        /* eprobe and fprobe do not handle registers */
                        trace_probe_log_err(offs, BAD_VAR);
                        break;
@@ -633,7 +638,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
         * Since $comm and immediate string can not be dereferenced,
         * we can find those by strcmp. But ignore for eprobes.
         */
-       if (!(flags & TPARG_FL_TPOINT) &&
+       if (!(flags & TPARG_FL_TEVENT) &&
            (strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
             strncmp(arg, "\\\"", 2) == 0)) {
                /* The type of $comm must be "string", and not an array. */
index 8f4f23e8b2348656eb97eb1fdcdb63fa05840fe0..e6b94fcdb88681884caa44008f61065311c20805 100644 (file)
@@ -359,16 +359,24 @@ int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_a
 
 /*
  * The flags used for parsing trace_probe arguments.
- * TPARG_FL_RETURN, TPARG_FL_FENTRY and TPARG_FL_TPOINT are mutually exclusive.
+ * TPARG_FL_RETURN, TPARG_FL_FENTRY and TPARG_FL_TEVENT are mutually exclusive.
  * TPARG_FL_KERNEL and TPARG_FL_USER are also mutually exclusive.
+ * TPARG_FL_FPROBE and TPARG_FL_TPOINT are optional but it should be with
+ * TPARG_FL_KERNEL.
  */
 #define TPARG_FL_RETURN BIT(0)
 #define TPARG_FL_KERNEL BIT(1)
 #define TPARG_FL_FENTRY BIT(2)
-#define TPARG_FL_TPOINT BIT(3)
+#define TPARG_FL_TEVENT BIT(3)
 #define TPARG_FL_USER   BIT(4)
 #define TPARG_FL_FPROBE BIT(5)
-#define TPARG_FL_MASK  GENMASK(4, 0)
+#define TPARG_FL_TPOINT BIT(6)
+#define TPARG_FL_LOC_MASK      GENMASK(4, 0)
+
+static inline bool tparg_is_function_entry(unsigned int flags)
+{
+       return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_FENTRY);
+}
 
 extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
                                const char *argv, unsigned int flags);
@@ -415,6 +423,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
        C(MAXACT_TOO_BIG,       "Maxactive is too big"),                \
        C(BAD_PROBE_ADDR,       "Invalid probed address or symbol"),    \
        C(BAD_RETPROBE,         "Retprobe address must be an function entry"), \
+       C(NO_TRACEPOINT,        "Tracepoint is not found"),             \
        C(BAD_ADDR_SUFFIX,      "Invalid probed address suffix"), \
        C(NO_GROUP_NAME,        "Group name is not specified"),         \
        C(GROUP_TOO_LONG,       "Group name is too long"),              \