Merge tag 'firewire-fixes-6.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / kernel / trace / trace_sched_switch.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * trace context switch
4  *
5  * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
6  *
7  */
8 #include <linux/module.h>
9 #include <linux/kallsyms.h>
10 #include <linux/uaccess.h>
11 #include <linux/kmemleak.h>
12 #include <linux/ftrace.h>
13 #include <trace/events/sched.h>
14
15 #include "trace.h"
16
17 #define RECORD_CMDLINE  1
18 #define RECORD_TGID     2
19
20 static int              sched_cmdline_ref;
21 static int              sched_tgid_ref;
22 static DEFINE_MUTEX(sched_register_mutex);
23
24 static void
25 probe_sched_switch(void *ignore, bool preempt,
26                    struct task_struct *prev, struct task_struct *next,
27                    unsigned int prev_state)
28 {
29         int flags;
30
31         flags = (RECORD_TGID * !!sched_tgid_ref) +
32                 (RECORD_CMDLINE * !!sched_cmdline_ref);
33
34         if (!flags)
35                 return;
36         tracing_record_taskinfo_sched_switch(prev, next, flags);
37 }
38
39 static void
40 probe_sched_wakeup(void *ignore, struct task_struct *wakee)
41 {
42         int flags;
43
44         flags = (RECORD_TGID * !!sched_tgid_ref) +
45                 (RECORD_CMDLINE * !!sched_cmdline_ref);
46
47         if (!flags)
48                 return;
49         tracing_record_taskinfo_sched_switch(current, wakee, flags);
50 }
51
52 static int tracing_sched_register(void)
53 {
54         int ret;
55
56         ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
57         if (ret) {
58                 pr_info("wakeup trace: Couldn't activate tracepoint"
59                         " probe to kernel_sched_wakeup\n");
60                 return ret;
61         }
62
63         ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
64         if (ret) {
65                 pr_info("wakeup trace: Couldn't activate tracepoint"
66                         " probe to kernel_sched_wakeup_new\n");
67                 goto fail_deprobe;
68         }
69
70         ret = register_trace_sched_switch(probe_sched_switch, NULL);
71         if (ret) {
72                 pr_info("sched trace: Couldn't activate tracepoint"
73                         " probe to kernel_sched_switch\n");
74                 goto fail_deprobe_wake_new;
75         }
76
77         return ret;
78 fail_deprobe_wake_new:
79         unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
80 fail_deprobe:
81         unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
82         return ret;
83 }
84
85 static void tracing_sched_unregister(void)
86 {
87         unregister_trace_sched_switch(probe_sched_switch, NULL);
88         unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
89         unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
90 }
91
92 static void tracing_start_sched_switch(int ops)
93 {
94         bool sched_register;
95
96         mutex_lock(&sched_register_mutex);
97         sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
98
99         switch (ops) {
100         case RECORD_CMDLINE:
101                 sched_cmdline_ref++;
102                 break;
103
104         case RECORD_TGID:
105                 sched_tgid_ref++;
106                 break;
107         }
108
109         if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
110                 tracing_sched_register();
111         mutex_unlock(&sched_register_mutex);
112 }
113
114 static void tracing_stop_sched_switch(int ops)
115 {
116         mutex_lock(&sched_register_mutex);
117
118         switch (ops) {
119         case RECORD_CMDLINE:
120                 sched_cmdline_ref--;
121                 break;
122
123         case RECORD_TGID:
124                 sched_tgid_ref--;
125                 break;
126         }
127
128         if (!sched_cmdline_ref && !sched_tgid_ref)
129                 tracing_sched_unregister();
130         mutex_unlock(&sched_register_mutex);
131 }
132
133 void tracing_start_cmdline_record(void)
134 {
135         tracing_start_sched_switch(RECORD_CMDLINE);
136 }
137
138 void tracing_stop_cmdline_record(void)
139 {
140         tracing_stop_sched_switch(RECORD_CMDLINE);
141 }
142
143 void tracing_start_tgid_record(void)
144 {
145         tracing_start_sched_switch(RECORD_TGID);
146 }
147
148 void tracing_stop_tgid_record(void)
149 {
150         tracing_stop_sched_switch(RECORD_TGID);
151 }
152
153 /*
154  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
155  * is the tgid last observed corresponding to pid=i.
156  */
157 static int *tgid_map;
158
159 /* The maximum valid index into tgid_map. */
160 static size_t tgid_map_max;
161
162 #define SAVED_CMDLINES_DEFAULT 128
163 #define NO_CMDLINE_MAP UINT_MAX
164 /*
165  * Preemption must be disabled before acquiring trace_cmdline_lock.
166  * The various trace_arrays' max_lock must be acquired in a context
167  * where interrupt is disabled.
168  */
169 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
170 struct saved_cmdlines_buffer {
171         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
172         unsigned *map_cmdline_to_pid;
173         unsigned cmdline_num;
174         int cmdline_idx;
175         char saved_cmdlines[];
176 };
177 static struct saved_cmdlines_buffer *savedcmd;
178
179 /* Holds the size of a cmdline and pid element */
180 #define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s)                       \
181         (TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
182
183 static inline char *get_saved_cmdlines(int idx)
184 {
185         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
186 }
187
188 static inline void set_cmdline(int idx, const char *cmdline)
189 {
190         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
191 }
192
193 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
194 {
195         int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
196
197         kmemleak_free(s);
198         free_pages((unsigned long)s, order);
199 }
200
201 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
202 {
203         struct saved_cmdlines_buffer *s;
204         struct page *page;
205         int orig_size, size;
206         int order;
207
208         /* Figure out how much is needed to hold the given number of cmdlines */
209         orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
210         order = get_order(orig_size);
211         size = 1 << (order + PAGE_SHIFT);
212         page = alloc_pages(GFP_KERNEL, order);
213         if (!page)
214                 return NULL;
215
216         s = page_address(page);
217         kmemleak_alloc(s, size, 1, GFP_KERNEL);
218         memset(s, 0, sizeof(*s));
219
220         /* Round up to actual allocation */
221         val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
222         s->cmdline_num = val;
223
224         /* Place map_cmdline_to_pid array right after saved_cmdlines */
225         s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
226
227         s->cmdline_idx = 0;
228         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
229                sizeof(s->map_pid_to_cmdline));
230         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
231                val * sizeof(*s->map_cmdline_to_pid));
232
233         return s;
234 }
235
236 int trace_create_savedcmd(void)
237 {
238         savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
239
240         return savedcmd ? 0 : -ENOMEM;
241 }
242
243 int trace_save_cmdline(struct task_struct *tsk)
244 {
245         unsigned tpid, idx;
246
247         /* treat recording of idle task as a success */
248         if (!tsk->pid)
249                 return 1;
250
251         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
252
253         /*
254          * It's not the end of the world if we don't get
255          * the lock, but we also don't want to spin
256          * nor do we want to disable interrupts,
257          * so if we miss here, then better luck next time.
258          *
259          * This is called within the scheduler and wake up, so interrupts
260          * had better been disabled and run queue lock been held.
261          */
262         lockdep_assert_preemption_disabled();
263         if (!arch_spin_trylock(&trace_cmdline_lock))
264                 return 0;
265
266         idx = savedcmd->map_pid_to_cmdline[tpid];
267         if (idx == NO_CMDLINE_MAP) {
268                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
269
270                 savedcmd->map_pid_to_cmdline[tpid] = idx;
271                 savedcmd->cmdline_idx = idx;
272         }
273
274         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
275         set_cmdline(idx, tsk->comm);
276
277         arch_spin_unlock(&trace_cmdline_lock);
278
279         return 1;
280 }
281
282 static void __trace_find_cmdline(int pid, char comm[])
283 {
284         unsigned map;
285         int tpid;
286
287         if (!pid) {
288                 strcpy(comm, "<idle>");
289                 return;
290         }
291
292         if (WARN_ON_ONCE(pid < 0)) {
293                 strcpy(comm, "<XXX>");
294                 return;
295         }
296
297         tpid = pid & (PID_MAX_DEFAULT - 1);
298         map = savedcmd->map_pid_to_cmdline[tpid];
299         if (map != NO_CMDLINE_MAP) {
300                 tpid = savedcmd->map_cmdline_to_pid[map];
301                 if (tpid == pid) {
302                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
303                         return;
304                 }
305         }
306         strcpy(comm, "<...>");
307 }
308
309 void trace_find_cmdline(int pid, char comm[])
310 {
311         preempt_disable();
312         arch_spin_lock(&trace_cmdline_lock);
313
314         __trace_find_cmdline(pid, comm);
315
316         arch_spin_unlock(&trace_cmdline_lock);
317         preempt_enable();
318 }
319
320 static int *trace_find_tgid_ptr(int pid)
321 {
322         /*
323          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
324          * if we observe a non-NULL tgid_map then we also observe the correct
325          * tgid_map_max.
326          */
327         int *map = smp_load_acquire(&tgid_map);
328
329         if (unlikely(!map || pid > tgid_map_max))
330                 return NULL;
331
332         return &map[pid];
333 }
334
335 int trace_find_tgid(int pid)
336 {
337         int *ptr = trace_find_tgid_ptr(pid);
338
339         return ptr ? *ptr : 0;
340 }
341
342 static int trace_save_tgid(struct task_struct *tsk)
343 {
344         int *ptr;
345
346         /* treat recording of idle task as a success */
347         if (!tsk->pid)
348                 return 1;
349
350         ptr = trace_find_tgid_ptr(tsk->pid);
351         if (!ptr)
352                 return 0;
353
354         *ptr = tsk->tgid;
355         return 1;
356 }
357
358 static bool tracing_record_taskinfo_skip(int flags)
359 {
360         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
361                 return true;
362         if (!__this_cpu_read(trace_taskinfo_save))
363                 return true;
364         return false;
365 }
366
367 /**
368  * tracing_record_taskinfo - record the task info of a task
369  *
370  * @task:  task to record
371  * @flags: TRACE_RECORD_CMDLINE for recording comm
372  *         TRACE_RECORD_TGID for recording tgid
373  */
374 void tracing_record_taskinfo(struct task_struct *task, int flags)
375 {
376         bool done;
377
378         if (tracing_record_taskinfo_skip(flags))
379                 return;
380
381         /*
382          * Record as much task information as possible. If some fail, continue
383          * to try to record the others.
384          */
385         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
386         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
387
388         /* If recording any information failed, retry again soon. */
389         if (!done)
390                 return;
391
392         __this_cpu_write(trace_taskinfo_save, false);
393 }
394
395 /**
396  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
397  *
398  * @prev: previous task during sched_switch
399  * @next: next task during sched_switch
400  * @flags: TRACE_RECORD_CMDLINE for recording comm
401  *         TRACE_RECORD_TGID for recording tgid
402  */
403 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
404                                           struct task_struct *next, int flags)
405 {
406         bool done;
407
408         if (tracing_record_taskinfo_skip(flags))
409                 return;
410
411         /*
412          * Record as much task information as possible. If some fail, continue
413          * to try to record the others.
414          */
415         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
416         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
417         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
418         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
419
420         /* If recording any information failed, retry again soon. */
421         if (!done)
422                 return;
423
424         __this_cpu_write(trace_taskinfo_save, false);
425 }
426
427 /* Helpers to record a specific task information */
428 void tracing_record_cmdline(struct task_struct *task)
429 {
430         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
431 }
432
433 void tracing_record_tgid(struct task_struct *task)
434 {
435         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
436 }
437
438 int trace_alloc_tgid_map(void)
439 {
440         int *map;
441
442         if (tgid_map)
443                 return 0;
444
445         tgid_map_max = pid_max;
446         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
447                        GFP_KERNEL);
448         if (!map)
449                 return -ENOMEM;
450
451         /*
452          * Pairs with smp_load_acquire() in
453          * trace_find_tgid_ptr() to ensure that if it observes
454          * the tgid_map we just allocated then it also observes
455          * the corresponding tgid_map_max value.
456          */
457         smp_store_release(&tgid_map, map);
458         return 0;
459 }
460
461 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
462 {
463         int pid = ++(*pos);
464
465         return trace_find_tgid_ptr(pid);
466 }
467
468 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
469 {
470         int pid = *pos;
471
472         return trace_find_tgid_ptr(pid);
473 }
474
475 static void saved_tgids_stop(struct seq_file *m, void *v)
476 {
477 }
478
479 static int saved_tgids_show(struct seq_file *m, void *v)
480 {
481         int *entry = (int *)v;
482         int pid = entry - tgid_map;
483         int tgid = *entry;
484
485         if (tgid == 0)
486                 return SEQ_SKIP;
487
488         seq_printf(m, "%d %d\n", pid, tgid);
489         return 0;
490 }
491
492 static const struct seq_operations tracing_saved_tgids_seq_ops = {
493         .start          = saved_tgids_start,
494         .stop           = saved_tgids_stop,
495         .next           = saved_tgids_next,
496         .show           = saved_tgids_show,
497 };
498
499 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
500 {
501         int ret;
502
503         ret = tracing_check_open_get_tr(NULL);
504         if (ret)
505                 return ret;
506
507         return seq_open(filp, &tracing_saved_tgids_seq_ops);
508 }
509
510
511 const struct file_operations tracing_saved_tgids_fops = {
512         .open           = tracing_saved_tgids_open,
513         .read           = seq_read,
514         .llseek         = seq_lseek,
515         .release        = seq_release,
516 };
517
518 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
519 {
520         unsigned int *ptr = v;
521
522         if (*pos || m->count)
523                 ptr++;
524
525         (*pos)++;
526
527         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
528              ptr++) {
529                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
530                         continue;
531
532                 return ptr;
533         }
534
535         return NULL;
536 }
537
538 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
539 {
540         void *v;
541         loff_t l = 0;
542
543         preempt_disable();
544         arch_spin_lock(&trace_cmdline_lock);
545
546         v = &savedcmd->map_cmdline_to_pid[0];
547         while (l <= *pos) {
548                 v = saved_cmdlines_next(m, v, &l);
549                 if (!v)
550                         return NULL;
551         }
552
553         return v;
554 }
555
556 static void saved_cmdlines_stop(struct seq_file *m, void *v)
557 {
558         arch_spin_unlock(&trace_cmdline_lock);
559         preempt_enable();
560 }
561
562 static int saved_cmdlines_show(struct seq_file *m, void *v)
563 {
564         char buf[TASK_COMM_LEN];
565         unsigned int *pid = v;
566
567         __trace_find_cmdline(*pid, buf);
568         seq_printf(m, "%d %s\n", *pid, buf);
569         return 0;
570 }
571
572 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
573         .start          = saved_cmdlines_start,
574         .next           = saved_cmdlines_next,
575         .stop           = saved_cmdlines_stop,
576         .show           = saved_cmdlines_show,
577 };
578
579 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
580 {
581         int ret;
582
583         ret = tracing_check_open_get_tr(NULL);
584         if (ret)
585                 return ret;
586
587         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
588 }
589
590 const struct file_operations tracing_saved_cmdlines_fops = {
591         .open           = tracing_saved_cmdlines_open,
592         .read           = seq_read,
593         .llseek         = seq_lseek,
594         .release        = seq_release,
595 };
596
597 static ssize_t
598 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
599                                  size_t cnt, loff_t *ppos)
600 {
601         char buf[64];
602         int r;
603
604         preempt_disable();
605         arch_spin_lock(&trace_cmdline_lock);
606         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
607         arch_spin_unlock(&trace_cmdline_lock);
608         preempt_enable();
609
610         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
611 }
612
613 void trace_free_saved_cmdlines_buffer(void)
614 {
615         free_saved_cmdlines_buffer(savedcmd);
616 }
617
618 static int tracing_resize_saved_cmdlines(unsigned int val)
619 {
620         struct saved_cmdlines_buffer *s, *savedcmd_temp;
621
622         s = allocate_cmdlines_buffer(val);
623         if (!s)
624                 return -ENOMEM;
625
626         preempt_disable();
627         arch_spin_lock(&trace_cmdline_lock);
628         savedcmd_temp = savedcmd;
629         savedcmd = s;
630         arch_spin_unlock(&trace_cmdline_lock);
631         preempt_enable();
632         free_saved_cmdlines_buffer(savedcmd_temp);
633
634         return 0;
635 }
636
637 static ssize_t
638 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
639                                   size_t cnt, loff_t *ppos)
640 {
641         unsigned long val;
642         int ret;
643
644         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
645         if (ret)
646                 return ret;
647
648         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
649         if (!val || val > PID_MAX_DEFAULT)
650                 return -EINVAL;
651
652         ret = tracing_resize_saved_cmdlines((unsigned int)val);
653         if (ret < 0)
654                 return ret;
655
656         *ppos += cnt;
657
658         return cnt;
659 }
660
661 const struct file_operations tracing_saved_cmdlines_size_fops = {
662         .open           = tracing_open_generic,
663         .read           = tracing_saved_cmdlines_size_read,
664         .write          = tracing_saved_cmdlines_size_write,
665 };