1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
8 #include <linux/module.h>
9 #include <linux/kallsyms.h>
10 #include <linux/uaccess.h>
11 #include <linux/kmemleak.h>
12 #include <linux/ftrace.h>
13 #include <trace/events/sched.h>
17 #define RECORD_CMDLINE 1
20 static int sched_cmdline_ref;
21 static int sched_tgid_ref;
22 static DEFINE_MUTEX(sched_register_mutex);
25 probe_sched_switch(void *ignore, bool preempt,
26 struct task_struct *prev, struct task_struct *next,
27 unsigned int prev_state)
31 flags = (RECORD_TGID * !!sched_tgid_ref) +
32 (RECORD_CMDLINE * !!sched_cmdline_ref);
36 tracing_record_taskinfo_sched_switch(prev, next, flags);
40 probe_sched_wakeup(void *ignore, struct task_struct *wakee)
44 flags = (RECORD_TGID * !!sched_tgid_ref) +
45 (RECORD_CMDLINE * !!sched_cmdline_ref);
49 tracing_record_taskinfo_sched_switch(current, wakee, flags);
52 static int tracing_sched_register(void)
56 ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
58 pr_info("wakeup trace: Couldn't activate tracepoint"
59 " probe to kernel_sched_wakeup\n");
63 ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
65 pr_info("wakeup trace: Couldn't activate tracepoint"
66 " probe to kernel_sched_wakeup_new\n");
70 ret = register_trace_sched_switch(probe_sched_switch, NULL);
72 pr_info("sched trace: Couldn't activate tracepoint"
73 " probe to kernel_sched_switch\n");
74 goto fail_deprobe_wake_new;
78 fail_deprobe_wake_new:
79 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
81 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
85 static void tracing_sched_unregister(void)
87 unregister_trace_sched_switch(probe_sched_switch, NULL);
88 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
89 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
92 static void tracing_start_sched_switch(int ops)
96 mutex_lock(&sched_register_mutex);
97 sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
109 if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
110 tracing_sched_register();
111 mutex_unlock(&sched_register_mutex);
114 static void tracing_stop_sched_switch(int ops)
116 mutex_lock(&sched_register_mutex);
128 if (!sched_cmdline_ref && !sched_tgid_ref)
129 tracing_sched_unregister();
130 mutex_unlock(&sched_register_mutex);
133 void tracing_start_cmdline_record(void)
135 tracing_start_sched_switch(RECORD_CMDLINE);
138 void tracing_stop_cmdline_record(void)
140 tracing_stop_sched_switch(RECORD_CMDLINE);
143 void tracing_start_tgid_record(void)
145 tracing_start_sched_switch(RECORD_TGID);
148 void tracing_stop_tgid_record(void)
150 tracing_stop_sched_switch(RECORD_TGID);
154 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
155 * is the tgid last observed corresponding to pid=i.
157 static int *tgid_map;
159 /* The maximum valid index into tgid_map. */
160 static size_t tgid_map_max;
162 #define SAVED_CMDLINES_DEFAULT 128
163 #define NO_CMDLINE_MAP UINT_MAX
165 * Preemption must be disabled before acquiring trace_cmdline_lock.
166 * The various trace_arrays' max_lock must be acquired in a context
167 * where interrupt is disabled.
169 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
170 struct saved_cmdlines_buffer {
171 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
172 unsigned *map_cmdline_to_pid;
173 unsigned cmdline_num;
175 char saved_cmdlines[];
177 static struct saved_cmdlines_buffer *savedcmd;
179 /* Holds the size of a cmdline and pid element */
180 #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s) \
181 (TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
183 static inline char *get_saved_cmdlines(int idx)
185 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
188 static inline void set_cmdline(int idx, const char *cmdline)
190 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
193 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
195 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
198 free_pages((unsigned long)s, order);
201 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
203 struct saved_cmdlines_buffer *s;
208 /* Figure out how much is needed to hold the given number of cmdlines */
209 orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
210 order = get_order(orig_size);
211 size = 1 << (order + PAGE_SHIFT);
212 page = alloc_pages(GFP_KERNEL, order);
216 s = page_address(page);
217 kmemleak_alloc(s, size, 1, GFP_KERNEL);
218 memset(s, 0, sizeof(*s));
220 /* Round up to actual allocation */
221 val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
222 s->cmdline_num = val;
224 /* Place map_cmdline_to_pid array right after saved_cmdlines */
225 s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
228 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
229 sizeof(s->map_pid_to_cmdline));
230 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
231 val * sizeof(*s->map_cmdline_to_pid));
236 int trace_create_savedcmd(void)
238 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
240 return savedcmd ? 0 : -ENOMEM;
243 int trace_save_cmdline(struct task_struct *tsk)
247 /* treat recording of idle task as a success */
251 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
254 * It's not the end of the world if we don't get
255 * the lock, but we also don't want to spin
256 * nor do we want to disable interrupts,
257 * so if we miss here, then better luck next time.
259 * This is called within the scheduler and wake up, so interrupts
260 * had better been disabled and run queue lock been held.
262 lockdep_assert_preemption_disabled();
263 if (!arch_spin_trylock(&trace_cmdline_lock))
266 idx = savedcmd->map_pid_to_cmdline[tpid];
267 if (idx == NO_CMDLINE_MAP) {
268 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
270 savedcmd->map_pid_to_cmdline[tpid] = idx;
271 savedcmd->cmdline_idx = idx;
274 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
275 set_cmdline(idx, tsk->comm);
277 arch_spin_unlock(&trace_cmdline_lock);
282 static void __trace_find_cmdline(int pid, char comm[])
288 strcpy(comm, "<idle>");
292 if (WARN_ON_ONCE(pid < 0)) {
293 strcpy(comm, "<XXX>");
297 tpid = pid & (PID_MAX_DEFAULT - 1);
298 map = savedcmd->map_pid_to_cmdline[tpid];
299 if (map != NO_CMDLINE_MAP) {
300 tpid = savedcmd->map_cmdline_to_pid[map];
302 strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
306 strcpy(comm, "<...>");
309 void trace_find_cmdline(int pid, char comm[])
312 arch_spin_lock(&trace_cmdline_lock);
314 __trace_find_cmdline(pid, comm);
316 arch_spin_unlock(&trace_cmdline_lock);
320 static int *trace_find_tgid_ptr(int pid)
323 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
324 * if we observe a non-NULL tgid_map then we also observe the correct
327 int *map = smp_load_acquire(&tgid_map);
329 if (unlikely(!map || pid > tgid_map_max))
335 int trace_find_tgid(int pid)
337 int *ptr = trace_find_tgid_ptr(pid);
339 return ptr ? *ptr : 0;
342 static int trace_save_tgid(struct task_struct *tsk)
346 /* treat recording of idle task as a success */
350 ptr = trace_find_tgid_ptr(tsk->pid);
358 static bool tracing_record_taskinfo_skip(int flags)
360 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
362 if (!__this_cpu_read(trace_taskinfo_save))
368 * tracing_record_taskinfo - record the task info of a task
370 * @task: task to record
371 * @flags: TRACE_RECORD_CMDLINE for recording comm
372 * TRACE_RECORD_TGID for recording tgid
374 void tracing_record_taskinfo(struct task_struct *task, int flags)
378 if (tracing_record_taskinfo_skip(flags))
382 * Record as much task information as possible. If some fail, continue
383 * to try to record the others.
385 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
386 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
388 /* If recording any information failed, retry again soon. */
392 __this_cpu_write(trace_taskinfo_save, false);
396 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
398 * @prev: previous task during sched_switch
399 * @next: next task during sched_switch
400 * @flags: TRACE_RECORD_CMDLINE for recording comm
401 * TRACE_RECORD_TGID for recording tgid
403 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
404 struct task_struct *next, int flags)
408 if (tracing_record_taskinfo_skip(flags))
412 * Record as much task information as possible. If some fail, continue
413 * to try to record the others.
415 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
416 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
417 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
418 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
420 /* If recording any information failed, retry again soon. */
424 __this_cpu_write(trace_taskinfo_save, false);
427 /* Helpers to record a specific task information */
428 void tracing_record_cmdline(struct task_struct *task)
430 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
433 void tracing_record_tgid(struct task_struct *task)
435 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
438 int trace_alloc_tgid_map(void)
445 tgid_map_max = pid_max;
446 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
452 * Pairs with smp_load_acquire() in
453 * trace_find_tgid_ptr() to ensure that if it observes
454 * the tgid_map we just allocated then it also observes
455 * the corresponding tgid_map_max value.
457 smp_store_release(&tgid_map, map);
461 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
465 return trace_find_tgid_ptr(pid);
468 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
472 return trace_find_tgid_ptr(pid);
475 static void saved_tgids_stop(struct seq_file *m, void *v)
479 static int saved_tgids_show(struct seq_file *m, void *v)
481 int *entry = (int *)v;
482 int pid = entry - tgid_map;
488 seq_printf(m, "%d %d\n", pid, tgid);
492 static const struct seq_operations tracing_saved_tgids_seq_ops = {
493 .start = saved_tgids_start,
494 .stop = saved_tgids_stop,
495 .next = saved_tgids_next,
496 .show = saved_tgids_show,
499 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
503 ret = tracing_check_open_get_tr(NULL);
507 return seq_open(filp, &tracing_saved_tgids_seq_ops);
511 const struct file_operations tracing_saved_tgids_fops = {
512 .open = tracing_saved_tgids_open,
515 .release = seq_release,
518 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
520 unsigned int *ptr = v;
522 if (*pos || m->count)
527 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
529 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
538 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
544 arch_spin_lock(&trace_cmdline_lock);
546 v = &savedcmd->map_cmdline_to_pid[0];
548 v = saved_cmdlines_next(m, v, &l);
556 static void saved_cmdlines_stop(struct seq_file *m, void *v)
558 arch_spin_unlock(&trace_cmdline_lock);
562 static int saved_cmdlines_show(struct seq_file *m, void *v)
564 char buf[TASK_COMM_LEN];
565 unsigned int *pid = v;
567 __trace_find_cmdline(*pid, buf);
568 seq_printf(m, "%d %s\n", *pid, buf);
572 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
573 .start = saved_cmdlines_start,
574 .next = saved_cmdlines_next,
575 .stop = saved_cmdlines_stop,
576 .show = saved_cmdlines_show,
579 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
583 ret = tracing_check_open_get_tr(NULL);
587 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
590 const struct file_operations tracing_saved_cmdlines_fops = {
591 .open = tracing_saved_cmdlines_open,
594 .release = seq_release,
598 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
599 size_t cnt, loff_t *ppos)
605 arch_spin_lock(&trace_cmdline_lock);
606 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
607 arch_spin_unlock(&trace_cmdline_lock);
610 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
613 void trace_free_saved_cmdlines_buffer(void)
615 free_saved_cmdlines_buffer(savedcmd);
618 static int tracing_resize_saved_cmdlines(unsigned int val)
620 struct saved_cmdlines_buffer *s, *savedcmd_temp;
622 s = allocate_cmdlines_buffer(val);
627 arch_spin_lock(&trace_cmdline_lock);
628 savedcmd_temp = savedcmd;
630 arch_spin_unlock(&trace_cmdline_lock);
632 free_saved_cmdlines_buffer(savedcmd_temp);
638 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
639 size_t cnt, loff_t *ppos)
644 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
648 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
649 if (!val || val > PID_MAX_DEFAULT)
652 ret = tracing_resize_saved_cmdlines((unsigned int)val);
661 const struct file_operations tracing_saved_cmdlines_size_fops = {
662 .open = tracing_open_generic,
663 .read = tracing_saved_cmdlines_size_read,
664 .write = tracing_saved_cmdlines_size_write,