tracing: Add .graph suffix option to histogram value
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 static struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0)
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 if (!trace_parser_loaded(&parser))
732                         break;
733
734                 ret = -EINVAL;
735                 if (kstrtoul(parser.buffer, 0, &val))
736                         break;
737
738                 pid = (pid_t)val;
739
740                 if (trace_pid_list_set(pid_list, pid) < 0) {
741                         ret = -1;
742                         break;
743                 }
744                 nr_pids++;
745
746                 trace_parser_clear(&parser);
747                 ret = 0;
748         }
749         trace_parser_put(&parser);
750
751         if (ret < 0) {
752                 trace_pid_list_free(pid_list);
753                 return ret;
754         }
755
756         if (!nr_pids) {
757                 /* Cleared the list of pids */
758                 trace_pid_list_free(pid_list);
759                 pid_list = NULL;
760         }
761
762         *new_pid_list = pid_list;
763
764         return read;
765 }
766
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769         u64 ts;
770
771         /* Early boot up does not have a buffer yet */
772         if (!buf->buffer)
773                 return trace_clock_local();
774
775         ts = ring_buffer_time_stamp(buf->buffer);
776         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778         return ts;
779 }
780
781 u64 ftrace_now(int cpu)
782 {
783         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797         /*
798          * For quick access (irqsoff uses this in fast path), just
799          * return the mirror variable of the state of the ring buffer.
800          * It's a little racy, but we don't really care.
801          */
802         smp_rmb();
803         return !global_trace.buffer_disabled;
804 }
805
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer            *trace_types __read_mostly;
822
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
854 static inline void trace_access_lock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 /* gain it for accessing the whole ring buffer. */
858                 down_write(&all_cpu_access_lock);
859         } else {
860                 /* gain it for accessing a cpu ring buffer. */
861
862                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863                 down_read(&all_cpu_access_lock);
864
865                 /* Secondly block other access to this @cpu ring buffer. */
866                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867         }
868 }
869
870 static inline void trace_access_unlock(int cpu)
871 {
872         if (cpu == RING_BUFFER_ALL_CPUS) {
873                 up_write(&all_cpu_access_lock);
874         } else {
875                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876                 up_read(&all_cpu_access_lock);
877         }
878 }
879
880 static inline void trace_access_lock_init(void)
881 {
882         int cpu;
883
884         for_each_possible_cpu(cpu)
885                 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
892 static inline void trace_access_lock(int cpu)
893 {
894         (void)cpu;
895         mutex_lock(&access_lock);
896 }
897
898 static inline void trace_access_unlock(int cpu)
899 {
900         (void)cpu;
901         mutex_unlock(&access_lock);
902 }
903
904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912                                  unsigned int trace_ctx,
913                                  int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915                                       struct trace_buffer *buffer,
916                                       unsigned int trace_ctx,
917                                       int skip, struct pt_regs *regs);
918
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921                                         unsigned int trace_ctx,
922                                         int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926                                       struct trace_buffer *buffer,
927                                       unsigned long trace_ctx,
928                                       int skip, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936                   int type, unsigned int trace_ctx)
937 {
938         struct trace_entry *ent = ring_buffer_event_data(event);
939
940         tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945                           int type,
946                           unsigned long len,
947                           unsigned int trace_ctx)
948 {
949         struct ring_buffer_event *event;
950
951         event = ring_buffer_lock_reserve(buffer, len);
952         if (event != NULL)
953                 trace_event_setup(event, type, trace_ctx);
954
955         return event;
956 }
957
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960         if (tr->array_buffer.buffer)
961                 ring_buffer_record_on(tr->array_buffer.buffer);
962         /*
963          * This flag is looked at when buffers haven't been allocated
964          * yet, or by some tracers (like irqsoff), that just want to
965          * know if the ring buffer has been disabled, but it can handle
966          * races of where it gets disabled but we still do a record.
967          * As the check is in the fast path of the tracers, it is more
968          * important to be fast than accurate.
969          */
970         tr->buffer_disabled = 0;
971         /* Make the flag seen by readers */
972         smp_wmb();
973 }
974
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983         tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991         __this_cpu_write(trace_taskinfo_save, true);
992
993         /* If this is the temp buffer, we need to commit fully */
994         if (this_cpu_read(trace_buffered_event) == event) {
995                 /* Length is in event->array[0] */
996                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997                 /* Release the temp buffer */
998                 this_cpu_dec(trace_buffered_event_cnt);
999                 /* ring_buffer_unlock_commit() enables preemption */
1000                 preempt_enable_notrace();
1001         } else
1002                 ring_buffer_unlock_commit(buffer);
1003 }
1004
1005 /**
1006  * __trace_puts - write a constant string into the trace buffer.
1007  * @ip:    The address of the caller
1008  * @str:   The constant string to write
1009  * @size:  The size of the string.
1010  */
1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013         struct ring_buffer_event *event;
1014         struct trace_buffer *buffer;
1015         struct print_entry *entry;
1016         unsigned int trace_ctx;
1017         int alloc;
1018
1019         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020                 return 0;
1021
1022         if (unlikely(tracing_selftest_running || tracing_disabled))
1023                 return 0;
1024
1025         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026
1027         trace_ctx = tracing_gen_ctx();
1028         buffer = global_trace.array_buffer.buffer;
1029         ring_buffer_nest_start(buffer);
1030         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031                                             trace_ctx);
1032         if (!event) {
1033                 size = 0;
1034                 goto out;
1035         }
1036
1037         entry = ring_buffer_event_data(event);
1038         entry->ip = ip;
1039
1040         memcpy(&entry->buf, str, size);
1041
1042         /* Add a newline if necessary */
1043         if (entry->buf[size - 1] != '\n') {
1044                 entry->buf[size] = '\n';
1045                 entry->buf[size + 1] = '\0';
1046         } else
1047                 entry->buf[size] = '\0';
1048
1049         __buffer_unlock_commit(buffer, event);
1050         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051  out:
1052         ring_buffer_nest_end(buffer);
1053         return size;
1054 }
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1056
1057 /**
1058  * __trace_bputs - write the pointer to a constant string into trace buffer
1059  * @ip:    The address of the caller
1060  * @str:   The constant string to write to the buffer to
1061  */
1062 int __trace_bputs(unsigned long ip, const char *str)
1063 {
1064         struct ring_buffer_event *event;
1065         struct trace_buffer *buffer;
1066         struct bputs_entry *entry;
1067         unsigned int trace_ctx;
1068         int size = sizeof(struct bputs_entry);
1069         int ret = 0;
1070
1071         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072                 return 0;
1073
1074         if (unlikely(tracing_selftest_running || tracing_disabled))
1075                 return 0;
1076
1077         trace_ctx = tracing_gen_ctx();
1078         buffer = global_trace.array_buffer.buffer;
1079
1080         ring_buffer_nest_start(buffer);
1081         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082                                             trace_ctx);
1083         if (!event)
1084                 goto out;
1085
1086         entry = ring_buffer_event_data(event);
1087         entry->ip                       = ip;
1088         entry->str                      = str;
1089
1090         __buffer_unlock_commit(buffer, event);
1091         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092
1093         ret = 1;
1094  out:
1095         ring_buffer_nest_end(buffer);
1096         return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102                                            void *cond_data)
1103 {
1104         struct tracer *tracer = tr->current_trace;
1105         unsigned long flags;
1106
1107         if (in_nmi()) {
1108                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1110                 return;
1111         }
1112
1113         if (!tr->allocated_snapshot) {
1114                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115                 internal_trace_puts("*** stopping trace here!   ***\n");
1116                 tracing_off();
1117                 return;
1118         }
1119
1120         /* Note, snapshot can not be used when the tracer uses it */
1121         if (tracer->use_max_tr) {
1122                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124                 return;
1125         }
1126
1127         local_irq_save(flags);
1128         update_max_tr(tr, current, smp_processor_id(), cond_data);
1129         local_irq_restore(flags);
1130 }
1131
1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134         tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
1151 void tracing_snapshot(void)
1152 {
1153         struct trace_array *tr = &global_trace;
1154
1155         tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:         The tracing instance to snapshot
1162  * @cond_data:  The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174         tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177
1178 /**
1179  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180  * @tr:         The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194         void *cond_data = NULL;
1195
1196         local_irq_disable();
1197         arch_spin_lock(&tr->max_lock);
1198
1199         if (tr->cond_snapshot)
1200                 cond_data = tr->cond_snapshot->cond_data;
1201
1202         arch_spin_unlock(&tr->max_lock);
1203         local_irq_enable();
1204
1205         return cond_data;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1208
1209 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1210                                         struct array_buffer *size_buf, int cpu_id);
1211 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1212
1213 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1214 {
1215         int ret;
1216
1217         if (!tr->allocated_snapshot) {
1218
1219                 /* allocate spare buffer */
1220                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1221                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1222                 if (ret < 0)
1223                         return ret;
1224
1225                 tr->allocated_snapshot = true;
1226         }
1227
1228         return 0;
1229 }
1230
1231 static void free_snapshot(struct trace_array *tr)
1232 {
1233         /*
1234          * We don't free the ring buffer. instead, resize it because
1235          * The max_tr ring buffer has some state (e.g. ring->clock) and
1236          * we want preserve it.
1237          */
1238         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1239         set_buffer_entries(&tr->max_buffer, 1);
1240         tracing_reset_online_cpus(&tr->max_buffer);
1241         tr->allocated_snapshot = false;
1242 }
1243
1244 /**
1245  * tracing_alloc_snapshot - allocate snapshot buffer.
1246  *
1247  * This only allocates the snapshot buffer if it isn't already
1248  * allocated - it doesn't also take a snapshot.
1249  *
1250  * This is meant to be used in cases where the snapshot buffer needs
1251  * to be set up for events that can't sleep but need to be able to
1252  * trigger a snapshot.
1253  */
1254 int tracing_alloc_snapshot(void)
1255 {
1256         struct trace_array *tr = &global_trace;
1257         int ret;
1258
1259         ret = tracing_alloc_snapshot_instance(tr);
1260         WARN_ON(ret < 0);
1261
1262         return ret;
1263 }
1264 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1265
1266 /**
1267  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1268  *
1269  * This is similar to tracing_snapshot(), but it will allocate the
1270  * snapshot buffer if it isn't already allocated. Use this only
1271  * where it is safe to sleep, as the allocation may sleep.
1272  *
1273  * This causes a swap between the snapshot buffer and the current live
1274  * tracing buffer. You can use this to take snapshots of the live
1275  * trace when some condition is triggered, but continue to trace.
1276  */
1277 void tracing_snapshot_alloc(void)
1278 {
1279         int ret;
1280
1281         ret = tracing_alloc_snapshot();
1282         if (ret < 0)
1283                 return;
1284
1285         tracing_snapshot();
1286 }
1287 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1288
1289 /**
1290  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1291  * @tr:         The tracing instance
1292  * @cond_data:  User data to associate with the snapshot
1293  * @update:     Implementation of the cond_snapshot update function
1294  *
1295  * Check whether the conditional snapshot for the given instance has
1296  * already been enabled, or if the current tracer is already using a
1297  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1298  * save the cond_data and update function inside.
1299  *
1300  * Returns 0 if successful, error otherwise.
1301  */
1302 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1303                                  cond_update_fn_t update)
1304 {
1305         struct cond_snapshot *cond_snapshot;
1306         int ret = 0;
1307
1308         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1309         if (!cond_snapshot)
1310                 return -ENOMEM;
1311
1312         cond_snapshot->cond_data = cond_data;
1313         cond_snapshot->update = update;
1314
1315         mutex_lock(&trace_types_lock);
1316
1317         ret = tracing_alloc_snapshot_instance(tr);
1318         if (ret)
1319                 goto fail_unlock;
1320
1321         if (tr->current_trace->use_max_tr) {
1322                 ret = -EBUSY;
1323                 goto fail_unlock;
1324         }
1325
1326         /*
1327          * The cond_snapshot can only change to NULL without the
1328          * trace_types_lock. We don't care if we race with it going
1329          * to NULL, but we want to make sure that it's not set to
1330          * something other than NULL when we get here, which we can
1331          * do safely with only holding the trace_types_lock and not
1332          * having to take the max_lock.
1333          */
1334         if (tr->cond_snapshot) {
1335                 ret = -EBUSY;
1336                 goto fail_unlock;
1337         }
1338
1339         local_irq_disable();
1340         arch_spin_lock(&tr->max_lock);
1341         tr->cond_snapshot = cond_snapshot;
1342         arch_spin_unlock(&tr->max_lock);
1343         local_irq_enable();
1344
1345         mutex_unlock(&trace_types_lock);
1346
1347         return ret;
1348
1349  fail_unlock:
1350         mutex_unlock(&trace_types_lock);
1351         kfree(cond_snapshot);
1352         return ret;
1353 }
1354 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1355
1356 /**
1357  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1358  * @tr:         The tracing instance
1359  *
1360  * Check whether the conditional snapshot for the given instance is
1361  * enabled; if so, free the cond_snapshot associated with it,
1362  * otherwise return -EINVAL.
1363  *
1364  * Returns 0 if successful, error otherwise.
1365  */
1366 int tracing_snapshot_cond_disable(struct trace_array *tr)
1367 {
1368         int ret = 0;
1369
1370         local_irq_disable();
1371         arch_spin_lock(&tr->max_lock);
1372
1373         if (!tr->cond_snapshot)
1374                 ret = -EINVAL;
1375         else {
1376                 kfree(tr->cond_snapshot);
1377                 tr->cond_snapshot = NULL;
1378         }
1379
1380         arch_spin_unlock(&tr->max_lock);
1381         local_irq_enable();
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /*
1496          * nr_entries can not be zero and the startup
1497          * tests require some buffer space. Therefore
1498          * ensure we have at least 4096 bytes of buffer.
1499          */
1500         trace_buf_size = max(4096UL, buf_size);
1501         return 1;
1502 }
1503 __setup("trace_buf_size=", set_buf_size);
1504
1505 static int __init set_tracing_thresh(char *str)
1506 {
1507         unsigned long threshold;
1508         int ret;
1509
1510         if (!str)
1511                 return 0;
1512         ret = kstrtoul(str, 0, &threshold);
1513         if (ret < 0)
1514                 return 0;
1515         tracing_thresh = threshold * 1000;
1516         return 1;
1517 }
1518 __setup("tracing_thresh=", set_tracing_thresh);
1519
1520 unsigned long nsecs_to_usecs(unsigned long nsecs)
1521 {
1522         return nsecs / 1000;
1523 }
1524
1525 /*
1526  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1527  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1528  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1529  * of strings in the order that the evals (enum) were defined.
1530  */
1531 #undef C
1532 #define C(a, b) b
1533
1534 /* These must match the bit positions in trace_iterator_flags */
1535 static const char *trace_options[] = {
1536         TRACE_FLAGS
1537         NULL
1538 };
1539
1540 static struct {
1541         u64 (*func)(void);
1542         const char *name;
1543         int in_ns;              /* is this clock in nanoseconds? */
1544 } trace_clocks[] = {
1545         { trace_clock_local,            "local",        1 },
1546         { trace_clock_global,           "global",       1 },
1547         { trace_clock_counter,          "counter",      0 },
1548         { trace_clock_jiffies,          "uptime",       0 },
1549         { trace_clock,                  "perf",         1 },
1550         { ktime_get_mono_fast_ns,       "mono",         1 },
1551         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1552         { ktime_get_boot_fast_ns,       "boot",         1 },
1553         { ktime_get_tai_fast_ns,        "tai",          1 },
1554         ARCH_TRACE_CLOCKS
1555 };
1556
1557 bool trace_clock_in_ns(struct trace_array *tr)
1558 {
1559         if (trace_clocks[tr->clock_id].in_ns)
1560                 return true;
1561
1562         return false;
1563 }
1564
1565 /*
1566  * trace_parser_get_init - gets the buffer for trace parser
1567  */
1568 int trace_parser_get_init(struct trace_parser *parser, int size)
1569 {
1570         memset(parser, 0, sizeof(*parser));
1571
1572         parser->buffer = kmalloc(size, GFP_KERNEL);
1573         if (!parser->buffer)
1574                 return 1;
1575
1576         parser->size = size;
1577         return 0;
1578 }
1579
1580 /*
1581  * trace_parser_put - frees the buffer for trace parser
1582  */
1583 void trace_parser_put(struct trace_parser *parser)
1584 {
1585         kfree(parser->buffer);
1586         parser->buffer = NULL;
1587 }
1588
1589 /*
1590  * trace_get_user - reads the user input string separated by  space
1591  * (matched by isspace(ch))
1592  *
1593  * For each string found the 'struct trace_parser' is updated,
1594  * and the function returns.
1595  *
1596  * Returns number of bytes read.
1597  *
1598  * See kernel/trace/trace.h for 'struct trace_parser' details.
1599  */
1600 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1601         size_t cnt, loff_t *ppos)
1602 {
1603         char ch;
1604         size_t read = 0;
1605         ssize_t ret;
1606
1607         if (!*ppos)
1608                 trace_parser_clear(parser);
1609
1610         ret = get_user(ch, ubuf++);
1611         if (ret)
1612                 goto out;
1613
1614         read++;
1615         cnt--;
1616
1617         /*
1618          * The parser is not finished with the last write,
1619          * continue reading the user input without skipping spaces.
1620          */
1621         if (!parser->cont) {
1622                 /* skip white space */
1623                 while (cnt && isspace(ch)) {
1624                         ret = get_user(ch, ubuf++);
1625                         if (ret)
1626                                 goto out;
1627                         read++;
1628                         cnt--;
1629                 }
1630
1631                 parser->idx = 0;
1632
1633                 /* only spaces were written */
1634                 if (isspace(ch) || !ch) {
1635                         *ppos += read;
1636                         ret = read;
1637                         goto out;
1638                 }
1639         }
1640
1641         /* read the non-space input */
1642         while (cnt && !isspace(ch) && ch) {
1643                 if (parser->idx < parser->size - 1)
1644                         parser->buffer[parser->idx++] = ch;
1645                 else {
1646                         ret = -EINVAL;
1647                         goto out;
1648                 }
1649                 ret = get_user(ch, ubuf++);
1650                 if (ret)
1651                         goto out;
1652                 read++;
1653                 cnt--;
1654         }
1655
1656         /* We either got finished input or we have to wait for another call. */
1657         if (isspace(ch) || !ch) {
1658                 parser->buffer[parser->idx] = 0;
1659                 parser->cont = false;
1660         } else if (parser->idx < parser->size - 1) {
1661                 parser->cont = true;
1662                 parser->buffer[parser->idx++] = ch;
1663                 /* Make sure the parsed string always terminates with '\0'. */
1664                 parser->buffer[parser->idx] = 0;
1665         } else {
1666                 ret = -EINVAL;
1667                 goto out;
1668         }
1669
1670         *ppos += read;
1671         ret = read;
1672
1673 out:
1674         return ret;
1675 }
1676
1677 /* TODO add a seq_buf_to_buffer() */
1678 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1679 {
1680         int len;
1681
1682         if (trace_seq_used(s) <= s->seq.readpos)
1683                 return -EBUSY;
1684
1685         len = trace_seq_used(s) - s->seq.readpos;
1686         if (cnt > len)
1687                 cnt = len;
1688         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1689
1690         s->seq.readpos += cnt;
1691         return cnt;
1692 }
1693
1694 unsigned long __read_mostly     tracing_thresh;
1695 static const struct file_operations tracing_max_lat_fops;
1696
1697 #ifdef LATENCY_FS_NOTIFY
1698
1699 static struct workqueue_struct *fsnotify_wq;
1700
1701 static void latency_fsnotify_workfn(struct work_struct *work)
1702 {
1703         struct trace_array *tr = container_of(work, struct trace_array,
1704                                               fsnotify_work);
1705         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1706 }
1707
1708 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1709 {
1710         struct trace_array *tr = container_of(iwork, struct trace_array,
1711                                               fsnotify_irqwork);
1712         queue_work(fsnotify_wq, &tr->fsnotify_work);
1713 }
1714
1715 static void trace_create_maxlat_file(struct trace_array *tr,
1716                                      struct dentry *d_tracer)
1717 {
1718         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1719         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1720         tr->d_max_latency = trace_create_file("tracing_max_latency",
1721                                               TRACE_MODE_WRITE,
1722                                               d_tracer, &tr->max_latency,
1723                                               &tracing_max_lat_fops);
1724 }
1725
1726 __init static int latency_fsnotify_init(void)
1727 {
1728         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1729                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1730         if (!fsnotify_wq) {
1731                 pr_err("Unable to allocate tr_max_lat_wq\n");
1732                 return -ENOMEM;
1733         }
1734         return 0;
1735 }
1736
1737 late_initcall_sync(latency_fsnotify_init);
1738
1739 void latency_fsnotify(struct trace_array *tr)
1740 {
1741         if (!fsnotify_wq)
1742                 return;
1743         /*
1744          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1745          * possible that we are called from __schedule() or do_idle(), which
1746          * could cause a deadlock.
1747          */
1748         irq_work_queue(&tr->fsnotify_irqwork);
1749 }
1750
1751 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1752         || defined(CONFIG_OSNOISE_TRACER)
1753
1754 #define trace_create_maxlat_file(tr, d_tracer)                          \
1755         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1756                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1757
1758 #else
1759 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1760 #endif
1761
1762 #ifdef CONFIG_TRACER_MAX_TRACE
1763 /*
1764  * Copy the new maximum trace into the separate maximum-trace
1765  * structure. (this way the maximum trace is permanently saved,
1766  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1767  */
1768 static void
1769 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1770 {
1771         struct array_buffer *trace_buf = &tr->array_buffer;
1772         struct array_buffer *max_buf = &tr->max_buffer;
1773         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1774         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1775
1776         max_buf->cpu = cpu;
1777         max_buf->time_start = data->preempt_timestamp;
1778
1779         max_data->saved_latency = tr->max_latency;
1780         max_data->critical_start = data->critical_start;
1781         max_data->critical_end = data->critical_end;
1782
1783         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1784         max_data->pid = tsk->pid;
1785         /*
1786          * If tsk == current, then use current_uid(), as that does not use
1787          * RCU. The irq tracer can be called out of RCU scope.
1788          */
1789         if (tsk == current)
1790                 max_data->uid = current_uid();
1791         else
1792                 max_data->uid = task_uid(tsk);
1793
1794         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1795         max_data->policy = tsk->policy;
1796         max_data->rt_priority = tsk->rt_priority;
1797
1798         /* record this tasks comm */
1799         tracing_record_cmdline(tsk);
1800         latency_fsnotify(tr);
1801 }
1802
1803 /**
1804  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1805  * @tr: tracer
1806  * @tsk: the task with the latency
1807  * @cpu: The cpu that initiated the trace.
1808  * @cond_data: User data associated with a conditional snapshot
1809  *
1810  * Flip the buffers between the @tr and the max_tr and record information
1811  * about which task was the cause of this latency.
1812  */
1813 void
1814 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1815               void *cond_data)
1816 {
1817         if (tr->stop_count)
1818                 return;
1819
1820         WARN_ON_ONCE(!irqs_disabled());
1821
1822         if (!tr->allocated_snapshot) {
1823                 /* Only the nop tracer should hit this when disabling */
1824                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1825                 return;
1826         }
1827
1828         arch_spin_lock(&tr->max_lock);
1829
1830         /* Inherit the recordable setting from array_buffer */
1831         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1832                 ring_buffer_record_on(tr->max_buffer.buffer);
1833         else
1834                 ring_buffer_record_off(tr->max_buffer.buffer);
1835
1836 #ifdef CONFIG_TRACER_SNAPSHOT
1837         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1838                 goto out_unlock;
1839 #endif
1840         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1841
1842         __update_max_tr(tr, tsk, cpu);
1843
1844  out_unlock:
1845         arch_spin_unlock(&tr->max_lock);
1846 }
1847
1848 /**
1849  * update_max_tr_single - only copy one trace over, and reset the rest
1850  * @tr: tracer
1851  * @tsk: task with the latency
1852  * @cpu: the cpu of the buffer to copy.
1853  *
1854  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1855  */
1856 void
1857 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1858 {
1859         int ret;
1860
1861         if (tr->stop_count)
1862                 return;
1863
1864         WARN_ON_ONCE(!irqs_disabled());
1865         if (!tr->allocated_snapshot) {
1866                 /* Only the nop tracer should hit this when disabling */
1867                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1868                 return;
1869         }
1870
1871         arch_spin_lock(&tr->max_lock);
1872
1873         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1874
1875         if (ret == -EBUSY) {
1876                 /*
1877                  * We failed to swap the buffer due to a commit taking
1878                  * place on this CPU. We fail to record, but we reset
1879                  * the max trace buffer (no one writes directly to it)
1880                  * and flag that it failed.
1881                  */
1882                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1883                         "Failed to swap buffers due to commit in progress\n");
1884         }
1885
1886         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1887
1888         __update_max_tr(tr, tsk, cpu);
1889         arch_spin_unlock(&tr->max_lock);
1890 }
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1892
1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1894 {
1895         /* Iterators are static, they should be filled or empty */
1896         if (trace_buffer_iter(iter, iter->cpu_file))
1897                 return 0;
1898
1899         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1900                                 full);
1901 }
1902
1903 #ifdef CONFIG_FTRACE_STARTUP_TEST
1904 static bool selftests_can_run;
1905
1906 struct trace_selftests {
1907         struct list_head                list;
1908         struct tracer                   *type;
1909 };
1910
1911 static LIST_HEAD(postponed_selftests);
1912
1913 static int save_selftest(struct tracer *type)
1914 {
1915         struct trace_selftests *selftest;
1916
1917         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1918         if (!selftest)
1919                 return -ENOMEM;
1920
1921         selftest->type = type;
1922         list_add(&selftest->list, &postponed_selftests);
1923         return 0;
1924 }
1925
1926 static int run_tracer_selftest(struct tracer *type)
1927 {
1928         struct trace_array *tr = &global_trace;
1929         struct tracer *saved_tracer = tr->current_trace;
1930         int ret;
1931
1932         if (!type->selftest || tracing_selftest_disabled)
1933                 return 0;
1934
1935         /*
1936          * If a tracer registers early in boot up (before scheduling is
1937          * initialized and such), then do not run its selftests yet.
1938          * Instead, run it a little later in the boot process.
1939          */
1940         if (!selftests_can_run)
1941                 return save_selftest(type);
1942
1943         if (!tracing_is_on()) {
1944                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1945                         type->name);
1946                 return 0;
1947         }
1948
1949         /*
1950          * Run a selftest on this tracer.
1951          * Here we reset the trace buffer, and set the current
1952          * tracer to be this tracer. The tracer can then run some
1953          * internal tracing to verify that everything is in order.
1954          * If we fail, we do not register this tracer.
1955          */
1956         tracing_reset_online_cpus(&tr->array_buffer);
1957
1958         tr->current_trace = type;
1959
1960 #ifdef CONFIG_TRACER_MAX_TRACE
1961         if (type->use_max_tr) {
1962                 /* If we expanded the buffers, make sure the max is expanded too */
1963                 if (ring_buffer_expanded)
1964                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1965                                            RING_BUFFER_ALL_CPUS);
1966                 tr->allocated_snapshot = true;
1967         }
1968 #endif
1969
1970         /* the test is responsible for initializing and enabling */
1971         pr_info("Testing tracer %s: ", type->name);
1972         ret = type->selftest(type, tr);
1973         /* the test is responsible for resetting too */
1974         tr->current_trace = saved_tracer;
1975         if (ret) {
1976                 printk(KERN_CONT "FAILED!\n");
1977                 /* Add the warning after printing 'FAILED' */
1978                 WARN_ON(1);
1979                 return -1;
1980         }
1981         /* Only reset on passing, to avoid touching corrupted buffers */
1982         tracing_reset_online_cpus(&tr->array_buffer);
1983
1984 #ifdef CONFIG_TRACER_MAX_TRACE
1985         if (type->use_max_tr) {
1986                 tr->allocated_snapshot = false;
1987
1988                 /* Shrink the max buffer again */
1989                 if (ring_buffer_expanded)
1990                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1991                                            RING_BUFFER_ALL_CPUS);
1992         }
1993 #endif
1994
1995         printk(KERN_CONT "PASSED\n");
1996         return 0;
1997 }
1998
1999 static __init int init_trace_selftests(void)
2000 {
2001         struct trace_selftests *p, *n;
2002         struct tracer *t, **last;
2003         int ret;
2004
2005         selftests_can_run = true;
2006
2007         mutex_lock(&trace_types_lock);
2008
2009         if (list_empty(&postponed_selftests))
2010                 goto out;
2011
2012         pr_info("Running postponed tracer tests:\n");
2013
2014         tracing_selftest_running = true;
2015         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2016                 /* This loop can take minutes when sanitizers are enabled, so
2017                  * lets make sure we allow RCU processing.
2018                  */
2019                 cond_resched();
2020                 ret = run_tracer_selftest(p->type);
2021                 /* If the test fails, then warn and remove from available_tracers */
2022                 if (ret < 0) {
2023                         WARN(1, "tracer: %s failed selftest, disabling\n",
2024                              p->type->name);
2025                         last = &trace_types;
2026                         for (t = trace_types; t; t = t->next) {
2027                                 if (t == p->type) {
2028                                         *last = t->next;
2029                                         break;
2030                                 }
2031                                 last = &t->next;
2032                         }
2033                 }
2034                 list_del(&p->list);
2035                 kfree(p);
2036         }
2037         tracing_selftest_running = false;
2038
2039  out:
2040         mutex_unlock(&trace_types_lock);
2041
2042         return 0;
2043 }
2044 core_initcall(init_trace_selftests);
2045 #else
2046 static inline int run_tracer_selftest(struct tracer *type)
2047 {
2048         return 0;
2049 }
2050 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2051
2052 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2053
2054 static void __init apply_trace_boot_options(void);
2055
2056 /**
2057  * register_tracer - register a tracer with the ftrace system.
2058  * @type: the plugin for the tracer
2059  *
2060  * Register a new plugin tracer.
2061  */
2062 int __init register_tracer(struct tracer *type)
2063 {
2064         struct tracer *t;
2065         int ret = 0;
2066
2067         if (!type->name) {
2068                 pr_info("Tracer must have a name\n");
2069                 return -1;
2070         }
2071
2072         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2073                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2074                 return -1;
2075         }
2076
2077         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2078                 pr_warn("Can not register tracer %s due to lockdown\n",
2079                            type->name);
2080                 return -EPERM;
2081         }
2082
2083         mutex_lock(&trace_types_lock);
2084
2085         tracing_selftest_running = true;
2086
2087         for (t = trace_types; t; t = t->next) {
2088                 if (strcmp(type->name, t->name) == 0) {
2089                         /* already found */
2090                         pr_info("Tracer %s already registered\n",
2091                                 type->name);
2092                         ret = -1;
2093                         goto out;
2094                 }
2095         }
2096
2097         if (!type->set_flag)
2098                 type->set_flag = &dummy_set_flag;
2099         if (!type->flags) {
2100                 /*allocate a dummy tracer_flags*/
2101                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2102                 if (!type->flags) {
2103                         ret = -ENOMEM;
2104                         goto out;
2105                 }
2106                 type->flags->val = 0;
2107                 type->flags->opts = dummy_tracer_opt;
2108         } else
2109                 if (!type->flags->opts)
2110                         type->flags->opts = dummy_tracer_opt;
2111
2112         /* store the tracer for __set_tracer_option */
2113         type->flags->trace = type;
2114
2115         ret = run_tracer_selftest(type);
2116         if (ret < 0)
2117                 goto out;
2118
2119         type->next = trace_types;
2120         trace_types = type;
2121         add_tracer_options(&global_trace, type);
2122
2123  out:
2124         tracing_selftest_running = false;
2125         mutex_unlock(&trace_types_lock);
2126
2127         if (ret || !default_bootup_tracer)
2128                 goto out_unlock;
2129
2130         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2131                 goto out_unlock;
2132
2133         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2134         /* Do we want this tracer to start on bootup? */
2135         tracing_set_tracer(&global_trace, type->name);
2136         default_bootup_tracer = NULL;
2137
2138         apply_trace_boot_options();
2139
2140         /* disable other selftests, since this will break it. */
2141         disable_tracing_selftest("running a tracer");
2142
2143  out_unlock:
2144         return ret;
2145 }
2146
2147 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2148 {
2149         struct trace_buffer *buffer = buf->buffer;
2150
2151         if (!buffer)
2152                 return;
2153
2154         ring_buffer_record_disable(buffer);
2155
2156         /* Make sure all commits have finished */
2157         synchronize_rcu();
2158         ring_buffer_reset_cpu(buffer, cpu);
2159
2160         ring_buffer_record_enable(buffer);
2161 }
2162
2163 void tracing_reset_online_cpus(struct array_buffer *buf)
2164 {
2165         struct trace_buffer *buffer = buf->buffer;
2166
2167         if (!buffer)
2168                 return;
2169
2170         ring_buffer_record_disable(buffer);
2171
2172         /* Make sure all commits have finished */
2173         synchronize_rcu();
2174
2175         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2176
2177         ring_buffer_reset_online_cpus(buffer);
2178
2179         ring_buffer_record_enable(buffer);
2180 }
2181
2182 /* Must have trace_types_lock held */
2183 void tracing_reset_all_online_cpus_unlocked(void)
2184 {
2185         struct trace_array *tr;
2186
2187         lockdep_assert_held(&trace_types_lock);
2188
2189         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2190                 if (!tr->clear_trace)
2191                         continue;
2192                 tr->clear_trace = false;
2193                 tracing_reset_online_cpus(&tr->array_buffer);
2194 #ifdef CONFIG_TRACER_MAX_TRACE
2195                 tracing_reset_online_cpus(&tr->max_buffer);
2196 #endif
2197         }
2198 }
2199
2200 void tracing_reset_all_online_cpus(void)
2201 {
2202         mutex_lock(&trace_types_lock);
2203         tracing_reset_all_online_cpus_unlocked();
2204         mutex_unlock(&trace_types_lock);
2205 }
2206
2207 /*
2208  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2209  * is the tgid last observed corresponding to pid=i.
2210  */
2211 static int *tgid_map;
2212
2213 /* The maximum valid index into tgid_map. */
2214 static size_t tgid_map_max;
2215
2216 #define SAVED_CMDLINES_DEFAULT 128
2217 #define NO_CMDLINE_MAP UINT_MAX
2218 /*
2219  * Preemption must be disabled before acquiring trace_cmdline_lock.
2220  * The various trace_arrays' max_lock must be acquired in a context
2221  * where interrupt is disabled.
2222  */
2223 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2224 struct saved_cmdlines_buffer {
2225         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2226         unsigned *map_cmdline_to_pid;
2227         unsigned cmdline_num;
2228         int cmdline_idx;
2229         char *saved_cmdlines;
2230 };
2231 static struct saved_cmdlines_buffer *savedcmd;
2232
2233 static inline char *get_saved_cmdlines(int idx)
2234 {
2235         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2236 }
2237
2238 static inline void set_cmdline(int idx, const char *cmdline)
2239 {
2240         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2241 }
2242
2243 static int allocate_cmdlines_buffer(unsigned int val,
2244                                     struct saved_cmdlines_buffer *s)
2245 {
2246         s->map_cmdline_to_pid = kmalloc_array(val,
2247                                               sizeof(*s->map_cmdline_to_pid),
2248                                               GFP_KERNEL);
2249         if (!s->map_cmdline_to_pid)
2250                 return -ENOMEM;
2251
2252         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2253         if (!s->saved_cmdlines) {
2254                 kfree(s->map_cmdline_to_pid);
2255                 return -ENOMEM;
2256         }
2257
2258         s->cmdline_idx = 0;
2259         s->cmdline_num = val;
2260         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2261                sizeof(s->map_pid_to_cmdline));
2262         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2263                val * sizeof(*s->map_cmdline_to_pid));
2264
2265         return 0;
2266 }
2267
2268 static int trace_create_savedcmd(void)
2269 {
2270         int ret;
2271
2272         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2273         if (!savedcmd)
2274                 return -ENOMEM;
2275
2276         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2277         if (ret < 0) {
2278                 kfree(savedcmd);
2279                 savedcmd = NULL;
2280                 return -ENOMEM;
2281         }
2282
2283         return 0;
2284 }
2285
2286 int is_tracing_stopped(void)
2287 {
2288         return global_trace.stop_count;
2289 }
2290
2291 /**
2292  * tracing_start - quick start of the tracer
2293  *
2294  * If tracing is enabled but was stopped by tracing_stop,
2295  * this will start the tracer back up.
2296  */
2297 void tracing_start(void)
2298 {
2299         struct trace_buffer *buffer;
2300         unsigned long flags;
2301
2302         if (tracing_disabled)
2303                 return;
2304
2305         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2306         if (--global_trace.stop_count) {
2307                 if (global_trace.stop_count < 0) {
2308                         /* Someone screwed up their debugging */
2309                         WARN_ON_ONCE(1);
2310                         global_trace.stop_count = 0;
2311                 }
2312                 goto out;
2313         }
2314
2315         /* Prevent the buffers from switching */
2316         arch_spin_lock(&global_trace.max_lock);
2317
2318         buffer = global_trace.array_buffer.buffer;
2319         if (buffer)
2320                 ring_buffer_record_enable(buffer);
2321
2322 #ifdef CONFIG_TRACER_MAX_TRACE
2323         buffer = global_trace.max_buffer.buffer;
2324         if (buffer)
2325                 ring_buffer_record_enable(buffer);
2326 #endif
2327
2328         arch_spin_unlock(&global_trace.max_lock);
2329
2330  out:
2331         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2332 }
2333
2334 static void tracing_start_tr(struct trace_array *tr)
2335 {
2336         struct trace_buffer *buffer;
2337         unsigned long flags;
2338
2339         if (tracing_disabled)
2340                 return;
2341
2342         /* If global, we need to also start the max tracer */
2343         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2344                 return tracing_start();
2345
2346         raw_spin_lock_irqsave(&tr->start_lock, flags);
2347
2348         if (--tr->stop_count) {
2349                 if (tr->stop_count < 0) {
2350                         /* Someone screwed up their debugging */
2351                         WARN_ON_ONCE(1);
2352                         tr->stop_count = 0;
2353                 }
2354                 goto out;
2355         }
2356
2357         buffer = tr->array_buffer.buffer;
2358         if (buffer)
2359                 ring_buffer_record_enable(buffer);
2360
2361  out:
2362         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2363 }
2364
2365 /**
2366  * tracing_stop - quick stop of the tracer
2367  *
2368  * Light weight way to stop tracing. Use in conjunction with
2369  * tracing_start.
2370  */
2371 void tracing_stop(void)
2372 {
2373         struct trace_buffer *buffer;
2374         unsigned long flags;
2375
2376         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2377         if (global_trace.stop_count++)
2378                 goto out;
2379
2380         /* Prevent the buffers from switching */
2381         arch_spin_lock(&global_trace.max_lock);
2382
2383         buffer = global_trace.array_buffer.buffer;
2384         if (buffer)
2385                 ring_buffer_record_disable(buffer);
2386
2387 #ifdef CONFIG_TRACER_MAX_TRACE
2388         buffer = global_trace.max_buffer.buffer;
2389         if (buffer)
2390                 ring_buffer_record_disable(buffer);
2391 #endif
2392
2393         arch_spin_unlock(&global_trace.max_lock);
2394
2395  out:
2396         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2397 }
2398
2399 static void tracing_stop_tr(struct trace_array *tr)
2400 {
2401         struct trace_buffer *buffer;
2402         unsigned long flags;
2403
2404         /* If global, we need to also stop the max tracer */
2405         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2406                 return tracing_stop();
2407
2408         raw_spin_lock_irqsave(&tr->start_lock, flags);
2409         if (tr->stop_count++)
2410                 goto out;
2411
2412         buffer = tr->array_buffer.buffer;
2413         if (buffer)
2414                 ring_buffer_record_disable(buffer);
2415
2416  out:
2417         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2418 }
2419
2420 static int trace_save_cmdline(struct task_struct *tsk)
2421 {
2422         unsigned tpid, idx;
2423
2424         /* treat recording of idle task as a success */
2425         if (!tsk->pid)
2426                 return 1;
2427
2428         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2429
2430         /*
2431          * It's not the end of the world if we don't get
2432          * the lock, but we also don't want to spin
2433          * nor do we want to disable interrupts,
2434          * so if we miss here, then better luck next time.
2435          *
2436          * This is called within the scheduler and wake up, so interrupts
2437          * had better been disabled and run queue lock been held.
2438          */
2439         lockdep_assert_preemption_disabled();
2440         if (!arch_spin_trylock(&trace_cmdline_lock))
2441                 return 0;
2442
2443         idx = savedcmd->map_pid_to_cmdline[tpid];
2444         if (idx == NO_CMDLINE_MAP) {
2445                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2446
2447                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2448                 savedcmd->cmdline_idx = idx;
2449         }
2450
2451         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2452         set_cmdline(idx, tsk->comm);
2453
2454         arch_spin_unlock(&trace_cmdline_lock);
2455
2456         return 1;
2457 }
2458
2459 static void __trace_find_cmdline(int pid, char comm[])
2460 {
2461         unsigned map;
2462         int tpid;
2463
2464         if (!pid) {
2465                 strcpy(comm, "<idle>");
2466                 return;
2467         }
2468
2469         if (WARN_ON_ONCE(pid < 0)) {
2470                 strcpy(comm, "<XXX>");
2471                 return;
2472         }
2473
2474         tpid = pid & (PID_MAX_DEFAULT - 1);
2475         map = savedcmd->map_pid_to_cmdline[tpid];
2476         if (map != NO_CMDLINE_MAP) {
2477                 tpid = savedcmd->map_cmdline_to_pid[map];
2478                 if (tpid == pid) {
2479                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2480                         return;
2481                 }
2482         }
2483         strcpy(comm, "<...>");
2484 }
2485
2486 void trace_find_cmdline(int pid, char comm[])
2487 {
2488         preempt_disable();
2489         arch_spin_lock(&trace_cmdline_lock);
2490
2491         __trace_find_cmdline(pid, comm);
2492
2493         arch_spin_unlock(&trace_cmdline_lock);
2494         preempt_enable();
2495 }
2496
2497 static int *trace_find_tgid_ptr(int pid)
2498 {
2499         /*
2500          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2501          * if we observe a non-NULL tgid_map then we also observe the correct
2502          * tgid_map_max.
2503          */
2504         int *map = smp_load_acquire(&tgid_map);
2505
2506         if (unlikely(!map || pid > tgid_map_max))
2507                 return NULL;
2508
2509         return &map[pid];
2510 }
2511
2512 int trace_find_tgid(int pid)
2513 {
2514         int *ptr = trace_find_tgid_ptr(pid);
2515
2516         return ptr ? *ptr : 0;
2517 }
2518
2519 static int trace_save_tgid(struct task_struct *tsk)
2520 {
2521         int *ptr;
2522
2523         /* treat recording of idle task as a success */
2524         if (!tsk->pid)
2525                 return 1;
2526
2527         ptr = trace_find_tgid_ptr(tsk->pid);
2528         if (!ptr)
2529                 return 0;
2530
2531         *ptr = tsk->tgid;
2532         return 1;
2533 }
2534
2535 static bool tracing_record_taskinfo_skip(int flags)
2536 {
2537         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2538                 return true;
2539         if (!__this_cpu_read(trace_taskinfo_save))
2540                 return true;
2541         return false;
2542 }
2543
2544 /**
2545  * tracing_record_taskinfo - record the task info of a task
2546  *
2547  * @task:  task to record
2548  * @flags: TRACE_RECORD_CMDLINE for recording comm
2549  *         TRACE_RECORD_TGID for recording tgid
2550  */
2551 void tracing_record_taskinfo(struct task_struct *task, int flags)
2552 {
2553         bool done;
2554
2555         if (tracing_record_taskinfo_skip(flags))
2556                 return;
2557
2558         /*
2559          * Record as much task information as possible. If some fail, continue
2560          * to try to record the others.
2561          */
2562         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2563         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2564
2565         /* If recording any information failed, retry again soon. */
2566         if (!done)
2567                 return;
2568
2569         __this_cpu_write(trace_taskinfo_save, false);
2570 }
2571
2572 /**
2573  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2574  *
2575  * @prev: previous task during sched_switch
2576  * @next: next task during sched_switch
2577  * @flags: TRACE_RECORD_CMDLINE for recording comm
2578  *         TRACE_RECORD_TGID for recording tgid
2579  */
2580 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2581                                           struct task_struct *next, int flags)
2582 {
2583         bool done;
2584
2585         if (tracing_record_taskinfo_skip(flags))
2586                 return;
2587
2588         /*
2589          * Record as much task information as possible. If some fail, continue
2590          * to try to record the others.
2591          */
2592         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2593         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2594         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2595         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2596
2597         /* If recording any information failed, retry again soon. */
2598         if (!done)
2599                 return;
2600
2601         __this_cpu_write(trace_taskinfo_save, false);
2602 }
2603
2604 /* Helpers to record a specific task information */
2605 void tracing_record_cmdline(struct task_struct *task)
2606 {
2607         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2608 }
2609
2610 void tracing_record_tgid(struct task_struct *task)
2611 {
2612         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2613 }
2614
2615 /*
2616  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2617  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2618  * simplifies those functions and keeps them in sync.
2619  */
2620 enum print_line_t trace_handle_return(struct trace_seq *s)
2621 {
2622         return trace_seq_has_overflowed(s) ?
2623                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2624 }
2625 EXPORT_SYMBOL_GPL(trace_handle_return);
2626
2627 static unsigned short migration_disable_value(void)
2628 {
2629 #if defined(CONFIG_SMP)
2630         return current->migration_disabled;
2631 #else
2632         return 0;
2633 #endif
2634 }
2635
2636 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2637 {
2638         unsigned int trace_flags = irqs_status;
2639         unsigned int pc;
2640
2641         pc = preempt_count();
2642
2643         if (pc & NMI_MASK)
2644                 trace_flags |= TRACE_FLAG_NMI;
2645         if (pc & HARDIRQ_MASK)
2646                 trace_flags |= TRACE_FLAG_HARDIRQ;
2647         if (in_serving_softirq())
2648                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2649         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2650                 trace_flags |= TRACE_FLAG_BH_OFF;
2651
2652         if (tif_need_resched())
2653                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2654         if (test_preempt_need_resched())
2655                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2656         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2657                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2658 }
2659
2660 struct ring_buffer_event *
2661 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2662                           int type,
2663                           unsigned long len,
2664                           unsigned int trace_ctx)
2665 {
2666         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2667 }
2668
2669 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2670 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2671 static int trace_buffered_event_ref;
2672
2673 /**
2674  * trace_buffered_event_enable - enable buffering events
2675  *
2676  * When events are being filtered, it is quicker to use a temporary
2677  * buffer to write the event data into if there's a likely chance
2678  * that it will not be committed. The discard of the ring buffer
2679  * is not as fast as committing, and is much slower than copying
2680  * a commit.
2681  *
2682  * When an event is to be filtered, allocate per cpu buffers to
2683  * write the event data into, and if the event is filtered and discarded
2684  * it is simply dropped, otherwise, the entire data is to be committed
2685  * in one shot.
2686  */
2687 void trace_buffered_event_enable(void)
2688 {
2689         struct ring_buffer_event *event;
2690         struct page *page;
2691         int cpu;
2692
2693         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2694
2695         if (trace_buffered_event_ref++)
2696                 return;
2697
2698         for_each_tracing_cpu(cpu) {
2699                 page = alloc_pages_node(cpu_to_node(cpu),
2700                                         GFP_KERNEL | __GFP_NORETRY, 0);
2701                 if (!page)
2702                         goto failed;
2703
2704                 event = page_address(page);
2705                 memset(event, 0, sizeof(*event));
2706
2707                 per_cpu(trace_buffered_event, cpu) = event;
2708
2709                 preempt_disable();
2710                 if (cpu == smp_processor_id() &&
2711                     __this_cpu_read(trace_buffered_event) !=
2712                     per_cpu(trace_buffered_event, cpu))
2713                         WARN_ON_ONCE(1);
2714                 preempt_enable();
2715         }
2716
2717         return;
2718  failed:
2719         trace_buffered_event_disable();
2720 }
2721
2722 static void enable_trace_buffered_event(void *data)
2723 {
2724         /* Probably not needed, but do it anyway */
2725         smp_rmb();
2726         this_cpu_dec(trace_buffered_event_cnt);
2727 }
2728
2729 static void disable_trace_buffered_event(void *data)
2730 {
2731         this_cpu_inc(trace_buffered_event_cnt);
2732 }
2733
2734 /**
2735  * trace_buffered_event_disable - disable buffering events
2736  *
2737  * When a filter is removed, it is faster to not use the buffered
2738  * events, and to commit directly into the ring buffer. Free up
2739  * the temp buffers when there are no more users. This requires
2740  * special synchronization with current events.
2741  */
2742 void trace_buffered_event_disable(void)
2743 {
2744         int cpu;
2745
2746         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2747
2748         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2749                 return;
2750
2751         if (--trace_buffered_event_ref)
2752                 return;
2753
2754         preempt_disable();
2755         /* For each CPU, set the buffer as used. */
2756         smp_call_function_many(tracing_buffer_mask,
2757                                disable_trace_buffered_event, NULL, 1);
2758         preempt_enable();
2759
2760         /* Wait for all current users to finish */
2761         synchronize_rcu();
2762
2763         for_each_tracing_cpu(cpu) {
2764                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2765                 per_cpu(trace_buffered_event, cpu) = NULL;
2766         }
2767         /*
2768          * Make sure trace_buffered_event is NULL before clearing
2769          * trace_buffered_event_cnt.
2770          */
2771         smp_wmb();
2772
2773         preempt_disable();
2774         /* Do the work on each cpu */
2775         smp_call_function_many(tracing_buffer_mask,
2776                                enable_trace_buffered_event, NULL, 1);
2777         preempt_enable();
2778 }
2779
2780 static struct trace_buffer *temp_buffer;
2781
2782 struct ring_buffer_event *
2783 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2784                           struct trace_event_file *trace_file,
2785                           int type, unsigned long len,
2786                           unsigned int trace_ctx)
2787 {
2788         struct ring_buffer_event *entry;
2789         struct trace_array *tr = trace_file->tr;
2790         int val;
2791
2792         *current_rb = tr->array_buffer.buffer;
2793
2794         if (!tr->no_filter_buffering_ref &&
2795             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2796                 preempt_disable_notrace();
2797                 /*
2798                  * Filtering is on, so try to use the per cpu buffer first.
2799                  * This buffer will simulate a ring_buffer_event,
2800                  * where the type_len is zero and the array[0] will
2801                  * hold the full length.
2802                  * (see include/linux/ring-buffer.h for details on
2803                  *  how the ring_buffer_event is structured).
2804                  *
2805                  * Using a temp buffer during filtering and copying it
2806                  * on a matched filter is quicker than writing directly
2807                  * into the ring buffer and then discarding it when
2808                  * it doesn't match. That is because the discard
2809                  * requires several atomic operations to get right.
2810                  * Copying on match and doing nothing on a failed match
2811                  * is still quicker than no copy on match, but having
2812                  * to discard out of the ring buffer on a failed match.
2813                  */
2814                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2815                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2816
2817                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2818
2819                         /*
2820                          * Preemption is disabled, but interrupts and NMIs
2821                          * can still come in now. If that happens after
2822                          * the above increment, then it will have to go
2823                          * back to the old method of allocating the event
2824                          * on the ring buffer, and if the filter fails, it
2825                          * will have to call ring_buffer_discard_commit()
2826                          * to remove it.
2827                          *
2828                          * Need to also check the unlikely case that the
2829                          * length is bigger than the temp buffer size.
2830                          * If that happens, then the reserve is pretty much
2831                          * guaranteed to fail, as the ring buffer currently
2832                          * only allows events less than a page. But that may
2833                          * change in the future, so let the ring buffer reserve
2834                          * handle the failure in that case.
2835                          */
2836                         if (val == 1 && likely(len <= max_len)) {
2837                                 trace_event_setup(entry, type, trace_ctx);
2838                                 entry->array[0] = len;
2839                                 /* Return with preemption disabled */
2840                                 return entry;
2841                         }
2842                         this_cpu_dec(trace_buffered_event_cnt);
2843                 }
2844                 /* __trace_buffer_lock_reserve() disables preemption */
2845                 preempt_enable_notrace();
2846         }
2847
2848         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2849                                             trace_ctx);
2850         /*
2851          * If tracing is off, but we have triggers enabled
2852          * we still need to look at the event data. Use the temp_buffer
2853          * to store the trace event for the trigger to use. It's recursive
2854          * safe and will not be recorded anywhere.
2855          */
2856         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2857                 *current_rb = temp_buffer;
2858                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2859                                                     trace_ctx);
2860         }
2861         return entry;
2862 }
2863 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2864
2865 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2866 static DEFINE_MUTEX(tracepoint_printk_mutex);
2867
2868 static void output_printk(struct trace_event_buffer *fbuffer)
2869 {
2870         struct trace_event_call *event_call;
2871         struct trace_event_file *file;
2872         struct trace_event *event;
2873         unsigned long flags;
2874         struct trace_iterator *iter = tracepoint_print_iter;
2875
2876         /* We should never get here if iter is NULL */
2877         if (WARN_ON_ONCE(!iter))
2878                 return;
2879
2880         event_call = fbuffer->trace_file->event_call;
2881         if (!event_call || !event_call->event.funcs ||
2882             !event_call->event.funcs->trace)
2883                 return;
2884
2885         file = fbuffer->trace_file;
2886         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2887             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2888              !filter_match_preds(file->filter, fbuffer->entry)))
2889                 return;
2890
2891         event = &fbuffer->trace_file->event_call->event;
2892
2893         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2894         trace_seq_init(&iter->seq);
2895         iter->ent = fbuffer->entry;
2896         event_call->event.funcs->trace(iter, 0, event);
2897         trace_seq_putc(&iter->seq, 0);
2898         printk("%s", iter->seq.buffer);
2899
2900         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2901 }
2902
2903 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2904                              void *buffer, size_t *lenp,
2905                              loff_t *ppos)
2906 {
2907         int save_tracepoint_printk;
2908         int ret;
2909
2910         mutex_lock(&tracepoint_printk_mutex);
2911         save_tracepoint_printk = tracepoint_printk;
2912
2913         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2914
2915         /*
2916          * This will force exiting early, as tracepoint_printk
2917          * is always zero when tracepoint_printk_iter is not allocated
2918          */
2919         if (!tracepoint_print_iter)
2920                 tracepoint_printk = 0;
2921
2922         if (save_tracepoint_printk == tracepoint_printk)
2923                 goto out;
2924
2925         if (tracepoint_printk)
2926                 static_key_enable(&tracepoint_printk_key.key);
2927         else
2928                 static_key_disable(&tracepoint_printk_key.key);
2929
2930  out:
2931         mutex_unlock(&tracepoint_printk_mutex);
2932
2933         return ret;
2934 }
2935
2936 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2937 {
2938         enum event_trigger_type tt = ETT_NONE;
2939         struct trace_event_file *file = fbuffer->trace_file;
2940
2941         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2942                         fbuffer->entry, &tt))
2943                 goto discard;
2944
2945         if (static_key_false(&tracepoint_printk_key.key))
2946                 output_printk(fbuffer);
2947
2948         if (static_branch_unlikely(&trace_event_exports_enabled))
2949                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2950
2951         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2952                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2953
2954 discard:
2955         if (tt)
2956                 event_triggers_post_call(file, tt);
2957
2958 }
2959 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2960
2961 /*
2962  * Skip 3:
2963  *
2964  *   trace_buffer_unlock_commit_regs()
2965  *   trace_event_buffer_commit()
2966  *   trace_event_raw_event_xxx()
2967  */
2968 # define STACK_SKIP 3
2969
2970 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2971                                      struct trace_buffer *buffer,
2972                                      struct ring_buffer_event *event,
2973                                      unsigned int trace_ctx,
2974                                      struct pt_regs *regs)
2975 {
2976         __buffer_unlock_commit(buffer, event);
2977
2978         /*
2979          * If regs is not set, then skip the necessary functions.
2980          * Note, we can still get here via blktrace, wakeup tracer
2981          * and mmiotrace, but that's ok if they lose a function or
2982          * two. They are not that meaningful.
2983          */
2984         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2985         ftrace_trace_userstack(tr, buffer, trace_ctx);
2986 }
2987
2988 /*
2989  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2990  */
2991 void
2992 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2993                                    struct ring_buffer_event *event)
2994 {
2995         __buffer_unlock_commit(buffer, event);
2996 }
2997
2998 void
2999 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3000                parent_ip, unsigned int trace_ctx)
3001 {
3002         struct trace_event_call *call = &event_function;
3003         struct trace_buffer *buffer = tr->array_buffer.buffer;
3004         struct ring_buffer_event *event;
3005         struct ftrace_entry *entry;
3006
3007         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3008                                             trace_ctx);
3009         if (!event)
3010                 return;
3011         entry   = ring_buffer_event_data(event);
3012         entry->ip                       = ip;
3013         entry->parent_ip                = parent_ip;
3014
3015         if (!call_filter_check_discard(call, entry, buffer, event)) {
3016                 if (static_branch_unlikely(&trace_function_exports_enabled))
3017                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3018                 __buffer_unlock_commit(buffer, event);
3019         }
3020 }
3021
3022 #ifdef CONFIG_STACKTRACE
3023
3024 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3025 #define FTRACE_KSTACK_NESTING   4
3026
3027 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3028
3029 struct ftrace_stack {
3030         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3031 };
3032
3033
3034 struct ftrace_stacks {
3035         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3036 };
3037
3038 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3039 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3040
3041 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3042                                  unsigned int trace_ctx,
3043                                  int skip, struct pt_regs *regs)
3044 {
3045         struct trace_event_call *call = &event_kernel_stack;
3046         struct ring_buffer_event *event;
3047         unsigned int size, nr_entries;
3048         struct ftrace_stack *fstack;
3049         struct stack_entry *entry;
3050         int stackidx;
3051
3052         /*
3053          * Add one, for this function and the call to save_stack_trace()
3054          * If regs is set, then these functions will not be in the way.
3055          */
3056 #ifndef CONFIG_UNWINDER_ORC
3057         if (!regs)
3058                 skip++;
3059 #endif
3060
3061         preempt_disable_notrace();
3062
3063         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3064
3065         /* This should never happen. If it does, yell once and skip */
3066         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3067                 goto out;
3068
3069         /*
3070          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3071          * interrupt will either see the value pre increment or post
3072          * increment. If the interrupt happens pre increment it will have
3073          * restored the counter when it returns.  We just need a barrier to
3074          * keep gcc from moving things around.
3075          */
3076         barrier();
3077
3078         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3079         size = ARRAY_SIZE(fstack->calls);
3080
3081         if (regs) {
3082                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3083                                                    size, skip);
3084         } else {
3085                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3086         }
3087
3088         size = nr_entries * sizeof(unsigned long);
3089         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3090                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3091                                     trace_ctx);
3092         if (!event)
3093                 goto out;
3094         entry = ring_buffer_event_data(event);
3095
3096         memcpy(&entry->caller, fstack->calls, size);
3097         entry->size = nr_entries;
3098
3099         if (!call_filter_check_discard(call, entry, buffer, event))
3100                 __buffer_unlock_commit(buffer, event);
3101
3102  out:
3103         /* Again, don't let gcc optimize things here */
3104         barrier();
3105         __this_cpu_dec(ftrace_stack_reserve);
3106         preempt_enable_notrace();
3107
3108 }
3109
3110 static inline void ftrace_trace_stack(struct trace_array *tr,
3111                                       struct trace_buffer *buffer,
3112                                       unsigned int trace_ctx,
3113                                       int skip, struct pt_regs *regs)
3114 {
3115         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3116                 return;
3117
3118         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3119 }
3120
3121 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3122                    int skip)
3123 {
3124         struct trace_buffer *buffer = tr->array_buffer.buffer;
3125
3126         if (rcu_is_watching()) {
3127                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3128                 return;
3129         }
3130
3131         /*
3132          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3133          * but if the above rcu_is_watching() failed, then the NMI
3134          * triggered someplace critical, and ct_irq_enter() should
3135          * not be called from NMI.
3136          */
3137         if (unlikely(in_nmi()))
3138                 return;
3139
3140         ct_irq_enter_irqson();
3141         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3142         ct_irq_exit_irqson();
3143 }
3144
3145 /**
3146  * trace_dump_stack - record a stack back trace in the trace buffer
3147  * @skip: Number of functions to skip (helper handlers)
3148  */
3149 void trace_dump_stack(int skip)
3150 {
3151         if (tracing_disabled || tracing_selftest_running)
3152                 return;
3153
3154 #ifndef CONFIG_UNWINDER_ORC
3155         /* Skip 1 to skip this function. */
3156         skip++;
3157 #endif
3158         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3159                              tracing_gen_ctx(), skip, NULL);
3160 }
3161 EXPORT_SYMBOL_GPL(trace_dump_stack);
3162
3163 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3164 static DEFINE_PER_CPU(int, user_stack_count);
3165
3166 static void
3167 ftrace_trace_userstack(struct trace_array *tr,
3168                        struct trace_buffer *buffer, unsigned int trace_ctx)
3169 {
3170         struct trace_event_call *call = &event_user_stack;
3171         struct ring_buffer_event *event;
3172         struct userstack_entry *entry;
3173
3174         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3175                 return;
3176
3177         /*
3178          * NMIs can not handle page faults, even with fix ups.
3179          * The save user stack can (and often does) fault.
3180          */
3181         if (unlikely(in_nmi()))
3182                 return;
3183
3184         /*
3185          * prevent recursion, since the user stack tracing may
3186          * trigger other kernel events.
3187          */
3188         preempt_disable();
3189         if (__this_cpu_read(user_stack_count))
3190                 goto out;
3191
3192         __this_cpu_inc(user_stack_count);
3193
3194         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3195                                             sizeof(*entry), trace_ctx);
3196         if (!event)
3197                 goto out_drop_count;
3198         entry   = ring_buffer_event_data(event);
3199
3200         entry->tgid             = current->tgid;
3201         memset(&entry->caller, 0, sizeof(entry->caller));
3202
3203         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3204         if (!call_filter_check_discard(call, entry, buffer, event))
3205                 __buffer_unlock_commit(buffer, event);
3206
3207  out_drop_count:
3208         __this_cpu_dec(user_stack_count);
3209  out:
3210         preempt_enable();
3211 }
3212 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3213 static void ftrace_trace_userstack(struct trace_array *tr,
3214                                    struct trace_buffer *buffer,
3215                                    unsigned int trace_ctx)
3216 {
3217 }
3218 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3219
3220 #endif /* CONFIG_STACKTRACE */
3221
3222 static inline void
3223 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3224                           unsigned long long delta)
3225 {
3226         entry->bottom_delta_ts = delta & U32_MAX;
3227         entry->top_delta_ts = (delta >> 32);
3228 }
3229
3230 void trace_last_func_repeats(struct trace_array *tr,
3231                              struct trace_func_repeats *last_info,
3232                              unsigned int trace_ctx)
3233 {
3234         struct trace_buffer *buffer = tr->array_buffer.buffer;
3235         struct func_repeats_entry *entry;
3236         struct ring_buffer_event *event;
3237         u64 delta;
3238
3239         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3240                                             sizeof(*entry), trace_ctx);
3241         if (!event)
3242                 return;
3243
3244         delta = ring_buffer_event_time_stamp(buffer, event) -
3245                 last_info->ts_last_call;
3246
3247         entry = ring_buffer_event_data(event);
3248         entry->ip = last_info->ip;
3249         entry->parent_ip = last_info->parent_ip;
3250         entry->count = last_info->count;
3251         func_repeats_set_delta_ts(entry, delta);
3252
3253         __buffer_unlock_commit(buffer, event);
3254 }
3255
3256 /* created for use with alloc_percpu */
3257 struct trace_buffer_struct {
3258         int nesting;
3259         char buffer[4][TRACE_BUF_SIZE];
3260 };
3261
3262 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3263
3264 /*
3265  * This allows for lockless recording.  If we're nested too deeply, then
3266  * this returns NULL.
3267  */
3268 static char *get_trace_buf(void)
3269 {
3270         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3271
3272         if (!trace_percpu_buffer || buffer->nesting >= 4)
3273                 return NULL;
3274
3275         buffer->nesting++;
3276
3277         /* Interrupts must see nesting incremented before we use the buffer */
3278         barrier();
3279         return &buffer->buffer[buffer->nesting - 1][0];
3280 }
3281
3282 static void put_trace_buf(void)
3283 {
3284         /* Don't let the decrement of nesting leak before this */
3285         barrier();
3286         this_cpu_dec(trace_percpu_buffer->nesting);
3287 }
3288
3289 static int alloc_percpu_trace_buffer(void)
3290 {
3291         struct trace_buffer_struct __percpu *buffers;
3292
3293         if (trace_percpu_buffer)
3294                 return 0;
3295
3296         buffers = alloc_percpu(struct trace_buffer_struct);
3297         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3298                 return -ENOMEM;
3299
3300         trace_percpu_buffer = buffers;
3301         return 0;
3302 }
3303
3304 static int buffers_allocated;
3305
3306 void trace_printk_init_buffers(void)
3307 {
3308         if (buffers_allocated)
3309                 return;
3310
3311         if (alloc_percpu_trace_buffer())
3312                 return;
3313
3314         /* trace_printk() is for debug use only. Don't use it in production. */
3315
3316         pr_warn("\n");
3317         pr_warn("**********************************************************\n");
3318         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3319         pr_warn("**                                                      **\n");
3320         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3321         pr_warn("**                                                      **\n");
3322         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3323         pr_warn("** unsafe for production use.                           **\n");
3324         pr_warn("**                                                      **\n");
3325         pr_warn("** If you see this message and you are not debugging    **\n");
3326         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3327         pr_warn("**                                                      **\n");
3328         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3329         pr_warn("**********************************************************\n");
3330
3331         /* Expand the buffers to set size */
3332         tracing_update_buffers();
3333
3334         buffers_allocated = 1;
3335
3336         /*
3337          * trace_printk_init_buffers() can be called by modules.
3338          * If that happens, then we need to start cmdline recording
3339          * directly here. If the global_trace.buffer is already
3340          * allocated here, then this was called by module code.
3341          */
3342         if (global_trace.array_buffer.buffer)
3343                 tracing_start_cmdline_record();
3344 }
3345 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3346
3347 void trace_printk_start_comm(void)
3348 {
3349         /* Start tracing comms if trace printk is set */
3350         if (!buffers_allocated)
3351                 return;
3352         tracing_start_cmdline_record();
3353 }
3354
3355 static void trace_printk_start_stop_comm(int enabled)
3356 {
3357         if (!buffers_allocated)
3358                 return;
3359
3360         if (enabled)
3361                 tracing_start_cmdline_record();
3362         else
3363                 tracing_stop_cmdline_record();
3364 }
3365
3366 /**
3367  * trace_vbprintk - write binary msg to tracing buffer
3368  * @ip:    The address of the caller
3369  * @fmt:   The string format to write to the buffer
3370  * @args:  Arguments for @fmt
3371  */
3372 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3373 {
3374         struct trace_event_call *call = &event_bprint;
3375         struct ring_buffer_event *event;
3376         struct trace_buffer *buffer;
3377         struct trace_array *tr = &global_trace;
3378         struct bprint_entry *entry;
3379         unsigned int trace_ctx;
3380         char *tbuffer;
3381         int len = 0, size;
3382
3383         if (unlikely(tracing_selftest_running || tracing_disabled))
3384                 return 0;
3385
3386         /* Don't pollute graph traces with trace_vprintk internals */
3387         pause_graph_tracing();
3388
3389         trace_ctx = tracing_gen_ctx();
3390         preempt_disable_notrace();
3391
3392         tbuffer = get_trace_buf();
3393         if (!tbuffer) {
3394                 len = 0;
3395                 goto out_nobuffer;
3396         }
3397
3398         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3399
3400         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3401                 goto out_put;
3402
3403         size = sizeof(*entry) + sizeof(u32) * len;
3404         buffer = tr->array_buffer.buffer;
3405         ring_buffer_nest_start(buffer);
3406         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3407                                             trace_ctx);
3408         if (!event)
3409                 goto out;
3410         entry = ring_buffer_event_data(event);
3411         entry->ip                       = ip;
3412         entry->fmt                      = fmt;
3413
3414         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3415         if (!call_filter_check_discard(call, entry, buffer, event)) {
3416                 __buffer_unlock_commit(buffer, event);
3417                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3418         }
3419
3420 out:
3421         ring_buffer_nest_end(buffer);
3422 out_put:
3423         put_trace_buf();
3424
3425 out_nobuffer:
3426         preempt_enable_notrace();
3427         unpause_graph_tracing();
3428
3429         return len;
3430 }
3431 EXPORT_SYMBOL_GPL(trace_vbprintk);
3432
3433 __printf(3, 0)
3434 static int
3435 __trace_array_vprintk(struct trace_buffer *buffer,
3436                       unsigned long ip, const char *fmt, va_list args)
3437 {
3438         struct trace_event_call *call = &event_print;
3439         struct ring_buffer_event *event;
3440         int len = 0, size;
3441         struct print_entry *entry;
3442         unsigned int trace_ctx;
3443         char *tbuffer;
3444
3445         if (tracing_disabled || tracing_selftest_running)
3446                 return 0;
3447
3448         /* Don't pollute graph traces with trace_vprintk internals */
3449         pause_graph_tracing();
3450
3451         trace_ctx = tracing_gen_ctx();
3452         preempt_disable_notrace();
3453
3454
3455         tbuffer = get_trace_buf();
3456         if (!tbuffer) {
3457                 len = 0;
3458                 goto out_nobuffer;
3459         }
3460
3461         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3462
3463         size = sizeof(*entry) + len + 1;
3464         ring_buffer_nest_start(buffer);
3465         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3466                                             trace_ctx);
3467         if (!event)
3468                 goto out;
3469         entry = ring_buffer_event_data(event);
3470         entry->ip = ip;
3471
3472         memcpy(&entry->buf, tbuffer, len + 1);
3473         if (!call_filter_check_discard(call, entry, buffer, event)) {
3474                 __buffer_unlock_commit(buffer, event);
3475                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3476         }
3477
3478 out:
3479         ring_buffer_nest_end(buffer);
3480         put_trace_buf();
3481
3482 out_nobuffer:
3483         preempt_enable_notrace();
3484         unpause_graph_tracing();
3485
3486         return len;
3487 }
3488
3489 __printf(3, 0)
3490 int trace_array_vprintk(struct trace_array *tr,
3491                         unsigned long ip, const char *fmt, va_list args)
3492 {
3493         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3494 }
3495
3496 /**
3497  * trace_array_printk - Print a message to a specific instance
3498  * @tr: The instance trace_array descriptor
3499  * @ip: The instruction pointer that this is called from.
3500  * @fmt: The format to print (printf format)
3501  *
3502  * If a subsystem sets up its own instance, they have the right to
3503  * printk strings into their tracing instance buffer using this
3504  * function. Note, this function will not write into the top level
3505  * buffer (use trace_printk() for that), as writing into the top level
3506  * buffer should only have events that can be individually disabled.
3507  * trace_printk() is only used for debugging a kernel, and should not
3508  * be ever incorporated in normal use.
3509  *
3510  * trace_array_printk() can be used, as it will not add noise to the
3511  * top level tracing buffer.
3512  *
3513  * Note, trace_array_init_printk() must be called on @tr before this
3514  * can be used.
3515  */
3516 __printf(3, 0)
3517 int trace_array_printk(struct trace_array *tr,
3518                        unsigned long ip, const char *fmt, ...)
3519 {
3520         int ret;
3521         va_list ap;
3522
3523         if (!tr)
3524                 return -ENOENT;
3525
3526         /* This is only allowed for created instances */
3527         if (tr == &global_trace)
3528                 return 0;
3529
3530         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3531                 return 0;
3532
3533         va_start(ap, fmt);
3534         ret = trace_array_vprintk(tr, ip, fmt, ap);
3535         va_end(ap);
3536         return ret;
3537 }
3538 EXPORT_SYMBOL_GPL(trace_array_printk);
3539
3540 /**
3541  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3542  * @tr: The trace array to initialize the buffers for
3543  *
3544  * As trace_array_printk() only writes into instances, they are OK to
3545  * have in the kernel (unlike trace_printk()). This needs to be called
3546  * before trace_array_printk() can be used on a trace_array.
3547  */
3548 int trace_array_init_printk(struct trace_array *tr)
3549 {
3550         if (!tr)
3551                 return -ENOENT;
3552
3553         /* This is only allowed for created instances */
3554         if (tr == &global_trace)
3555                 return -EINVAL;
3556
3557         return alloc_percpu_trace_buffer();
3558 }
3559 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3560
3561 __printf(3, 4)
3562 int trace_array_printk_buf(struct trace_buffer *buffer,
3563                            unsigned long ip, const char *fmt, ...)
3564 {
3565         int ret;
3566         va_list ap;
3567
3568         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3569                 return 0;
3570
3571         va_start(ap, fmt);
3572         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3573         va_end(ap);
3574         return ret;
3575 }
3576
3577 __printf(2, 0)
3578 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3579 {
3580         return trace_array_vprintk(&global_trace, ip, fmt, args);
3581 }
3582 EXPORT_SYMBOL_GPL(trace_vprintk);
3583
3584 static void trace_iterator_increment(struct trace_iterator *iter)
3585 {
3586         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3587
3588         iter->idx++;
3589         if (buf_iter)
3590                 ring_buffer_iter_advance(buf_iter);
3591 }
3592
3593 static struct trace_entry *
3594 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3595                 unsigned long *lost_events)
3596 {
3597         struct ring_buffer_event *event;
3598         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3599
3600         if (buf_iter) {
3601                 event = ring_buffer_iter_peek(buf_iter, ts);
3602                 if (lost_events)
3603                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3604                                 (unsigned long)-1 : 0;
3605         } else {
3606                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3607                                          lost_events);
3608         }
3609
3610         if (event) {
3611                 iter->ent_size = ring_buffer_event_length(event);
3612                 return ring_buffer_event_data(event);
3613         }
3614         iter->ent_size = 0;
3615         return NULL;
3616 }
3617
3618 static struct trace_entry *
3619 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3620                   unsigned long *missing_events, u64 *ent_ts)
3621 {
3622         struct trace_buffer *buffer = iter->array_buffer->buffer;
3623         struct trace_entry *ent, *next = NULL;
3624         unsigned long lost_events = 0, next_lost = 0;
3625         int cpu_file = iter->cpu_file;
3626         u64 next_ts = 0, ts;
3627         int next_cpu = -1;
3628         int next_size = 0;
3629         int cpu;
3630
3631         /*
3632          * If we are in a per_cpu trace file, don't bother by iterating over
3633          * all cpu and peek directly.
3634          */
3635         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3636                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3637                         return NULL;
3638                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3639                 if (ent_cpu)
3640                         *ent_cpu = cpu_file;
3641
3642                 return ent;
3643         }
3644
3645         for_each_tracing_cpu(cpu) {
3646
3647                 if (ring_buffer_empty_cpu(buffer, cpu))
3648                         continue;
3649
3650                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3651
3652                 /*
3653                  * Pick the entry with the smallest timestamp:
3654                  */
3655                 if (ent && (!next || ts < next_ts)) {
3656                         next = ent;
3657                         next_cpu = cpu;
3658                         next_ts = ts;
3659                         next_lost = lost_events;
3660                         next_size = iter->ent_size;
3661                 }
3662         }
3663
3664         iter->ent_size = next_size;
3665
3666         if (ent_cpu)
3667                 *ent_cpu = next_cpu;
3668
3669         if (ent_ts)
3670                 *ent_ts = next_ts;
3671
3672         if (missing_events)
3673                 *missing_events = next_lost;
3674
3675         return next;
3676 }
3677
3678 #define STATIC_FMT_BUF_SIZE     128
3679 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3680
3681 static char *trace_iter_expand_format(struct trace_iterator *iter)
3682 {
3683         char *tmp;
3684
3685         /*
3686          * iter->tr is NULL when used with tp_printk, which makes
3687          * this get called where it is not safe to call krealloc().
3688          */
3689         if (!iter->tr || iter->fmt == static_fmt_buf)
3690                 return NULL;
3691
3692         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3693                        GFP_KERNEL);
3694         if (tmp) {
3695                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3696                 iter->fmt = tmp;
3697         }
3698
3699         return tmp;
3700 }
3701
3702 /* Returns true if the string is safe to dereference from an event */
3703 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3704                            bool star, int len)
3705 {
3706         unsigned long addr = (unsigned long)str;
3707         struct trace_event *trace_event;
3708         struct trace_event_call *event;
3709
3710         /* Ignore strings with no length */
3711         if (star && !len)
3712                 return true;
3713
3714         /* OK if part of the event data */
3715         if ((addr >= (unsigned long)iter->ent) &&
3716             (addr < (unsigned long)iter->ent + iter->ent_size))
3717                 return true;
3718
3719         /* OK if part of the temp seq buffer */
3720         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3721             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3722                 return true;
3723
3724         /* Core rodata can not be freed */
3725         if (is_kernel_rodata(addr))
3726                 return true;
3727
3728         if (trace_is_tracepoint_string(str))
3729                 return true;
3730
3731         /*
3732          * Now this could be a module event, referencing core module
3733          * data, which is OK.
3734          */
3735         if (!iter->ent)
3736                 return false;
3737
3738         trace_event = ftrace_find_event(iter->ent->type);
3739         if (!trace_event)
3740                 return false;
3741
3742         event = container_of(trace_event, struct trace_event_call, event);
3743         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3744                 return false;
3745
3746         /* Would rather have rodata, but this will suffice */
3747         if (within_module_core(addr, event->module))
3748                 return true;
3749
3750         return false;
3751 }
3752
3753 static const char *show_buffer(struct trace_seq *s)
3754 {
3755         struct seq_buf *seq = &s->seq;
3756
3757         seq_buf_terminate(seq);
3758
3759         return seq->buffer;
3760 }
3761
3762 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3763
3764 static int test_can_verify_check(const char *fmt, ...)
3765 {
3766         char buf[16];
3767         va_list ap;
3768         int ret;
3769
3770         /*
3771          * The verifier is dependent on vsnprintf() modifies the va_list
3772          * passed to it, where it is sent as a reference. Some architectures
3773          * (like x86_32) passes it by value, which means that vsnprintf()
3774          * does not modify the va_list passed to it, and the verifier
3775          * would then need to be able to understand all the values that
3776          * vsnprintf can use. If it is passed by value, then the verifier
3777          * is disabled.
3778          */
3779         va_start(ap, fmt);
3780         vsnprintf(buf, 16, "%d", ap);
3781         ret = va_arg(ap, int);
3782         va_end(ap);
3783
3784         return ret;
3785 }
3786
3787 static void test_can_verify(void)
3788 {
3789         if (!test_can_verify_check("%d %d", 0, 1)) {
3790                 pr_info("trace event string verifier disabled\n");
3791                 static_branch_inc(&trace_no_verify);
3792         }
3793 }
3794
3795 /**
3796  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3797  * @iter: The iterator that holds the seq buffer and the event being printed
3798  * @fmt: The format used to print the event
3799  * @ap: The va_list holding the data to print from @fmt.
3800  *
3801  * This writes the data into the @iter->seq buffer using the data from
3802  * @fmt and @ap. If the format has a %s, then the source of the string
3803  * is examined to make sure it is safe to print, otherwise it will
3804  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3805  * pointer.
3806  */
3807 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3808                          va_list ap)
3809 {
3810         const char *p = fmt;
3811         const char *str;
3812         int i, j;
3813
3814         if (WARN_ON_ONCE(!fmt))
3815                 return;
3816
3817         if (static_branch_unlikely(&trace_no_verify))
3818                 goto print;
3819
3820         /* Don't bother checking when doing a ftrace_dump() */
3821         if (iter->fmt == static_fmt_buf)
3822                 goto print;
3823
3824         while (*p) {
3825                 bool star = false;
3826                 int len = 0;
3827
3828                 j = 0;
3829
3830                 /* We only care about %s and variants */
3831                 for (i = 0; p[i]; i++) {
3832                         if (i + 1 >= iter->fmt_size) {
3833                                 /*
3834                                  * If we can't expand the copy buffer,
3835                                  * just print it.
3836                                  */
3837                                 if (!trace_iter_expand_format(iter))
3838                                         goto print;
3839                         }
3840
3841                         if (p[i] == '\\' && p[i+1]) {
3842                                 i++;
3843                                 continue;
3844                         }
3845                         if (p[i] == '%') {
3846                                 /* Need to test cases like %08.*s */
3847                                 for (j = 1; p[i+j]; j++) {
3848                                         if (isdigit(p[i+j]) ||
3849                                             p[i+j] == '.')
3850                                                 continue;
3851                                         if (p[i+j] == '*') {
3852                                                 star = true;
3853                                                 continue;
3854                                         }
3855                                         break;
3856                                 }
3857                                 if (p[i+j] == 's')
3858                                         break;
3859                                 star = false;
3860                         }
3861                         j = 0;
3862                 }
3863                 /* If no %s found then just print normally */
3864                 if (!p[i])
3865                         break;
3866
3867                 /* Copy up to the %s, and print that */
3868                 strncpy(iter->fmt, p, i);
3869                 iter->fmt[i] = '\0';
3870                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3871
3872                 /*
3873                  * If iter->seq is full, the above call no longer guarantees
3874                  * that ap is in sync with fmt processing, and further calls
3875                  * to va_arg() can return wrong positional arguments.
3876                  *
3877                  * Ensure that ap is no longer used in this case.
3878                  */
3879                 if (iter->seq.full) {
3880                         p = "";
3881                         break;
3882                 }
3883
3884                 if (star)
3885                         len = va_arg(ap, int);
3886
3887                 /* The ap now points to the string data of the %s */
3888                 str = va_arg(ap, const char *);
3889
3890                 /*
3891                  * If you hit this warning, it is likely that the
3892                  * trace event in question used %s on a string that
3893                  * was saved at the time of the event, but may not be
3894                  * around when the trace is read. Use __string(),
3895                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3896                  * instead. See samples/trace_events/trace-events-sample.h
3897                  * for reference.
3898                  */
3899                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3900                               "fmt: '%s' current_buffer: '%s'",
3901                               fmt, show_buffer(&iter->seq))) {
3902                         int ret;
3903
3904                         /* Try to safely read the string */
3905                         if (star) {
3906                                 if (len + 1 > iter->fmt_size)
3907                                         len = iter->fmt_size - 1;
3908                                 if (len < 0)
3909                                         len = 0;
3910                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3911                                 iter->fmt[len] = 0;
3912                                 star = false;
3913                         } else {
3914                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3915                                                                   iter->fmt_size);
3916                         }
3917                         if (ret < 0)
3918                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3919                         else
3920                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3921                                                  str, iter->fmt);
3922                         str = "[UNSAFE-MEMORY]";
3923                         strcpy(iter->fmt, "%s");
3924                 } else {
3925                         strncpy(iter->fmt, p + i, j + 1);
3926                         iter->fmt[j+1] = '\0';
3927                 }
3928                 if (star)
3929                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3930                 else
3931                         trace_seq_printf(&iter->seq, iter->fmt, str);
3932
3933                 p += i + j + 1;
3934         }
3935  print:
3936         if (*p)
3937                 trace_seq_vprintf(&iter->seq, p, ap);
3938 }
3939
3940 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3941 {
3942         const char *p, *new_fmt;
3943         char *q;
3944
3945         if (WARN_ON_ONCE(!fmt))
3946                 return fmt;
3947
3948         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3949                 return fmt;
3950
3951         p = fmt;
3952         new_fmt = q = iter->fmt;
3953         while (*p) {
3954                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3955                         if (!trace_iter_expand_format(iter))
3956                                 return fmt;
3957
3958                         q += iter->fmt - new_fmt;
3959                         new_fmt = iter->fmt;
3960                 }
3961
3962                 *q++ = *p++;
3963
3964                 /* Replace %p with %px */
3965                 if (p[-1] == '%') {
3966                         if (p[0] == '%') {
3967                                 *q++ = *p++;
3968                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3969                                 *q++ = *p++;
3970                                 *q++ = 'x';
3971                         }
3972                 }
3973         }
3974         *q = '\0';
3975
3976         return new_fmt;
3977 }
3978
3979 #define STATIC_TEMP_BUF_SIZE    128
3980 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3981
3982 /* Find the next real entry, without updating the iterator itself */
3983 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3984                                           int *ent_cpu, u64 *ent_ts)
3985 {
3986         /* __find_next_entry will reset ent_size */
3987         int ent_size = iter->ent_size;
3988         struct trace_entry *entry;
3989
3990         /*
3991          * If called from ftrace_dump(), then the iter->temp buffer
3992          * will be the static_temp_buf and not created from kmalloc.
3993          * If the entry size is greater than the buffer, we can
3994          * not save it. Just return NULL in that case. This is only
3995          * used to add markers when two consecutive events' time
3996          * stamps have a large delta. See trace_print_lat_context()
3997          */
3998         if (iter->temp == static_temp_buf &&
3999             STATIC_TEMP_BUF_SIZE < ent_size)
4000                 return NULL;
4001
4002         /*
4003          * The __find_next_entry() may call peek_next_entry(), which may
4004          * call ring_buffer_peek() that may make the contents of iter->ent
4005          * undefined. Need to copy iter->ent now.
4006          */
4007         if (iter->ent && iter->ent != iter->temp) {
4008                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4009                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4010                         void *temp;
4011                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4012                         if (!temp)
4013                                 return NULL;
4014                         kfree(iter->temp);
4015                         iter->temp = temp;
4016                         iter->temp_size = iter->ent_size;
4017                 }
4018                 memcpy(iter->temp, iter->ent, iter->ent_size);
4019                 iter->ent = iter->temp;
4020         }
4021         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4022         /* Put back the original ent_size */
4023         iter->ent_size = ent_size;
4024
4025         return entry;
4026 }
4027
4028 /* Find the next real entry, and increment the iterator to the next entry */
4029 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4030 {
4031         iter->ent = __find_next_entry(iter, &iter->cpu,
4032                                       &iter->lost_events, &iter->ts);
4033
4034         if (iter->ent)
4035                 trace_iterator_increment(iter);
4036
4037         return iter->ent ? iter : NULL;
4038 }
4039
4040 static void trace_consume(struct trace_iterator *iter)
4041 {
4042         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4043                             &iter->lost_events);
4044 }
4045
4046 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4047 {
4048         struct trace_iterator *iter = m->private;
4049         int i = (int)*pos;
4050         void *ent;
4051
4052         WARN_ON_ONCE(iter->leftover);
4053
4054         (*pos)++;
4055
4056         /* can't go backwards */
4057         if (iter->idx > i)
4058                 return NULL;
4059
4060         if (iter->idx < 0)
4061                 ent = trace_find_next_entry_inc(iter);
4062         else
4063                 ent = iter;
4064
4065         while (ent && iter->idx < i)
4066                 ent = trace_find_next_entry_inc(iter);
4067
4068         iter->pos = *pos;
4069
4070         return ent;
4071 }
4072
4073 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4074 {
4075         struct ring_buffer_iter *buf_iter;
4076         unsigned long entries = 0;
4077         u64 ts;
4078
4079         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4080
4081         buf_iter = trace_buffer_iter(iter, cpu);
4082         if (!buf_iter)
4083                 return;
4084
4085         ring_buffer_iter_reset(buf_iter);
4086
4087         /*
4088          * We could have the case with the max latency tracers
4089          * that a reset never took place on a cpu. This is evident
4090          * by the timestamp being before the start of the buffer.
4091          */
4092         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4093                 if (ts >= iter->array_buffer->time_start)
4094                         break;
4095                 entries++;
4096                 ring_buffer_iter_advance(buf_iter);
4097         }
4098
4099         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4100 }
4101
4102 /*
4103  * The current tracer is copied to avoid a global locking
4104  * all around.
4105  */
4106 static void *s_start(struct seq_file *m, loff_t *pos)
4107 {
4108         struct trace_iterator *iter = m->private;
4109         struct trace_array *tr = iter->tr;
4110         int cpu_file = iter->cpu_file;
4111         void *p = NULL;
4112         loff_t l = 0;
4113         int cpu;
4114
4115         /*
4116          * copy the tracer to avoid using a global lock all around.
4117          * iter->trace is a copy of current_trace, the pointer to the
4118          * name may be used instead of a strcmp(), as iter->trace->name
4119          * will point to the same string as current_trace->name.
4120          */
4121         mutex_lock(&trace_types_lock);
4122         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4123                 *iter->trace = *tr->current_trace;
4124         mutex_unlock(&trace_types_lock);
4125
4126 #ifdef CONFIG_TRACER_MAX_TRACE
4127         if (iter->snapshot && iter->trace->use_max_tr)
4128                 return ERR_PTR(-EBUSY);
4129 #endif
4130
4131         if (*pos != iter->pos) {
4132                 iter->ent = NULL;
4133                 iter->cpu = 0;
4134                 iter->idx = -1;
4135
4136                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4137                         for_each_tracing_cpu(cpu)
4138                                 tracing_iter_reset(iter, cpu);
4139                 } else
4140                         tracing_iter_reset(iter, cpu_file);
4141
4142                 iter->leftover = 0;
4143                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4144                         ;
4145
4146         } else {
4147                 /*
4148                  * If we overflowed the seq_file before, then we want
4149                  * to just reuse the trace_seq buffer again.
4150                  */
4151                 if (iter->leftover)
4152                         p = iter;
4153                 else {
4154                         l = *pos - 1;
4155                         p = s_next(m, p, &l);
4156                 }
4157         }
4158
4159         trace_event_read_lock();
4160         trace_access_lock(cpu_file);
4161         return p;
4162 }
4163
4164 static void s_stop(struct seq_file *m, void *p)
4165 {
4166         struct trace_iterator *iter = m->private;
4167
4168 #ifdef CONFIG_TRACER_MAX_TRACE
4169         if (iter->snapshot && iter->trace->use_max_tr)
4170                 return;
4171 #endif
4172
4173         trace_access_unlock(iter->cpu_file);
4174         trace_event_read_unlock();
4175 }
4176
4177 static void
4178 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4179                       unsigned long *entries, int cpu)
4180 {
4181         unsigned long count;
4182
4183         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4184         /*
4185          * If this buffer has skipped entries, then we hold all
4186          * entries for the trace and we need to ignore the
4187          * ones before the time stamp.
4188          */
4189         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4190                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4191                 /* total is the same as the entries */
4192                 *total = count;
4193         } else
4194                 *total = count +
4195                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4196         *entries = count;
4197 }
4198
4199 static void
4200 get_total_entries(struct array_buffer *buf,
4201                   unsigned long *total, unsigned long *entries)
4202 {
4203         unsigned long t, e;
4204         int cpu;
4205
4206         *total = 0;
4207         *entries = 0;
4208
4209         for_each_tracing_cpu(cpu) {
4210                 get_total_entries_cpu(buf, &t, &e, cpu);
4211                 *total += t;
4212                 *entries += e;
4213         }
4214 }
4215
4216 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4217 {
4218         unsigned long total, entries;
4219
4220         if (!tr)
4221                 tr = &global_trace;
4222
4223         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4224
4225         return entries;
4226 }
4227
4228 unsigned long trace_total_entries(struct trace_array *tr)
4229 {
4230         unsigned long total, entries;
4231
4232         if (!tr)
4233                 tr = &global_trace;
4234
4235         get_total_entries(&tr->array_buffer, &total, &entries);
4236
4237         return entries;
4238 }
4239
4240 static void print_lat_help_header(struct seq_file *m)
4241 {
4242         seq_puts(m, "#                    _------=> CPU#            \n"
4243                     "#                   / _-----=> irqs-off/BH-disabled\n"
4244                     "#                  | / _----=> need-resched    \n"
4245                     "#                  || / _---=> hardirq/softirq \n"
4246                     "#                  ||| / _--=> preempt-depth   \n"
4247                     "#                  |||| / _-=> migrate-disable \n"
4248                     "#                  ||||| /     delay           \n"
4249                     "#  cmd     pid     |||||| time  |   caller     \n"
4250                     "#     \\   /        ||||||  \\    |    /       \n");
4251 }
4252
4253 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4254 {
4255         unsigned long total;
4256         unsigned long entries;
4257
4258         get_total_entries(buf, &total, &entries);
4259         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4260                    entries, total, num_online_cpus());
4261         seq_puts(m, "#\n");
4262 }
4263
4264 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4265                                    unsigned int flags)
4266 {
4267         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4268
4269         print_event_info(buf, m);
4270
4271         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4272         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4273 }
4274
4275 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4276                                        unsigned int flags)
4277 {
4278         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4279         static const char space[] = "            ";
4280         int prec = tgid ? 12 : 2;
4281
4282         print_event_info(buf, m);
4283
4284         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4285         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4286         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4287         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4288         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4289         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4290         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4291         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4292 }
4293
4294 void
4295 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4296 {
4297         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4298         struct array_buffer *buf = iter->array_buffer;
4299         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4300         struct tracer *type = iter->trace;
4301         unsigned long entries;
4302         unsigned long total;
4303         const char *name = type->name;
4304
4305         get_total_entries(buf, &total, &entries);
4306
4307         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4308                    name, UTS_RELEASE);
4309         seq_puts(m, "# -----------------------------------"
4310                  "---------------------------------\n");
4311         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4312                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4313                    nsecs_to_usecs(data->saved_latency),
4314                    entries,
4315                    total,
4316                    buf->cpu,
4317                    preempt_model_none()      ? "server" :
4318                    preempt_model_voluntary() ? "desktop" :
4319                    preempt_model_full()      ? "preempt" :
4320                    preempt_model_rt()        ? "preempt_rt" :
4321                    "unknown",
4322                    /* These are reserved for later use */
4323                    0, 0, 0, 0);
4324 #ifdef CONFIG_SMP
4325         seq_printf(m, " #P:%d)\n", num_online_cpus());
4326 #else
4327         seq_puts(m, ")\n");
4328 #endif
4329         seq_puts(m, "#    -----------------\n");
4330         seq_printf(m, "#    | task: %.16s-%d "
4331                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4332                    data->comm, data->pid,
4333                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4334                    data->policy, data->rt_priority);
4335         seq_puts(m, "#    -----------------\n");
4336
4337         if (data->critical_start) {
4338                 seq_puts(m, "#  => started at: ");
4339                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4340                 trace_print_seq(m, &iter->seq);
4341                 seq_puts(m, "\n#  => ended at:   ");
4342                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4343                 trace_print_seq(m, &iter->seq);
4344                 seq_puts(m, "\n#\n");
4345         }
4346
4347         seq_puts(m, "#\n");
4348 }
4349
4350 static void test_cpu_buff_start(struct trace_iterator *iter)
4351 {
4352         struct trace_seq *s = &iter->seq;
4353         struct trace_array *tr = iter->tr;
4354
4355         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4356                 return;
4357
4358         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4359                 return;
4360
4361         if (cpumask_available(iter->started) &&
4362             cpumask_test_cpu(iter->cpu, iter->started))
4363                 return;
4364
4365         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4366                 return;
4367
4368         if (cpumask_available(iter->started))
4369                 cpumask_set_cpu(iter->cpu, iter->started);
4370
4371         /* Don't print started cpu buffer for the first entry of the trace */
4372         if (iter->idx > 1)
4373                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4374                                 iter->cpu);
4375 }
4376
4377 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4378 {
4379         struct trace_array *tr = iter->tr;
4380         struct trace_seq *s = &iter->seq;
4381         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4382         struct trace_entry *entry;
4383         struct trace_event *event;
4384
4385         entry = iter->ent;
4386
4387         test_cpu_buff_start(iter);
4388
4389         event = ftrace_find_event(entry->type);
4390
4391         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4392                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4393                         trace_print_lat_context(iter);
4394                 else
4395                         trace_print_context(iter);
4396         }
4397
4398         if (trace_seq_has_overflowed(s))
4399                 return TRACE_TYPE_PARTIAL_LINE;
4400
4401         if (event)
4402                 return event->funcs->trace(iter, sym_flags, event);
4403
4404         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4405
4406         return trace_handle_return(s);
4407 }
4408
4409 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4410 {
4411         struct trace_array *tr = iter->tr;
4412         struct trace_seq *s = &iter->seq;
4413         struct trace_entry *entry;
4414         struct trace_event *event;
4415
4416         entry = iter->ent;
4417
4418         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4419                 trace_seq_printf(s, "%d %d %llu ",
4420                                  entry->pid, iter->cpu, iter->ts);
4421
4422         if (trace_seq_has_overflowed(s))
4423                 return TRACE_TYPE_PARTIAL_LINE;
4424
4425         event = ftrace_find_event(entry->type);
4426         if (event)
4427                 return event->funcs->raw(iter, 0, event);
4428
4429         trace_seq_printf(s, "%d ?\n", entry->type);
4430
4431         return trace_handle_return(s);
4432 }
4433
4434 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4435 {
4436         struct trace_array *tr = iter->tr;
4437         struct trace_seq *s = &iter->seq;
4438         unsigned char newline = '\n';
4439         struct trace_entry *entry;
4440         struct trace_event *event;
4441
4442         entry = iter->ent;
4443
4444         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4445                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4446                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4447                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4448                 if (trace_seq_has_overflowed(s))
4449                         return TRACE_TYPE_PARTIAL_LINE;
4450         }
4451
4452         event = ftrace_find_event(entry->type);
4453         if (event) {
4454                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4455                 if (ret != TRACE_TYPE_HANDLED)
4456                         return ret;
4457         }
4458
4459         SEQ_PUT_FIELD(s, newline);
4460
4461         return trace_handle_return(s);
4462 }
4463
4464 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4465 {
4466         struct trace_array *tr = iter->tr;
4467         struct trace_seq *s = &iter->seq;
4468         struct trace_entry *entry;
4469         struct trace_event *event;
4470
4471         entry = iter->ent;
4472
4473         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4474                 SEQ_PUT_FIELD(s, entry->pid);
4475                 SEQ_PUT_FIELD(s, iter->cpu);
4476                 SEQ_PUT_FIELD(s, iter->ts);
4477                 if (trace_seq_has_overflowed(s))
4478                         return TRACE_TYPE_PARTIAL_LINE;
4479         }
4480
4481         event = ftrace_find_event(entry->type);
4482         return event ? event->funcs->binary(iter, 0, event) :
4483                 TRACE_TYPE_HANDLED;
4484 }
4485
4486 int trace_empty(struct trace_iterator *iter)
4487 {
4488         struct ring_buffer_iter *buf_iter;
4489         int cpu;
4490
4491         /* If we are looking at one CPU buffer, only check that one */
4492         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4493                 cpu = iter->cpu_file;
4494                 buf_iter = trace_buffer_iter(iter, cpu);
4495                 if (buf_iter) {
4496                         if (!ring_buffer_iter_empty(buf_iter))
4497                                 return 0;
4498                 } else {
4499                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4500                                 return 0;
4501                 }
4502                 return 1;
4503         }
4504
4505         for_each_tracing_cpu(cpu) {
4506                 buf_iter = trace_buffer_iter(iter, cpu);
4507                 if (buf_iter) {
4508                         if (!ring_buffer_iter_empty(buf_iter))
4509                                 return 0;
4510                 } else {
4511                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4512                                 return 0;
4513                 }
4514         }
4515
4516         return 1;
4517 }
4518
4519 /*  Called with trace_event_read_lock() held. */
4520 enum print_line_t print_trace_line(struct trace_iterator *iter)
4521 {
4522         struct trace_array *tr = iter->tr;
4523         unsigned long trace_flags = tr->trace_flags;
4524         enum print_line_t ret;
4525
4526         if (iter->lost_events) {
4527                 if (iter->lost_events == (unsigned long)-1)
4528                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4529                                          iter->cpu);
4530                 else
4531                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4532                                          iter->cpu, iter->lost_events);
4533                 if (trace_seq_has_overflowed(&iter->seq))
4534                         return TRACE_TYPE_PARTIAL_LINE;
4535         }
4536
4537         if (iter->trace && iter->trace->print_line) {
4538                 ret = iter->trace->print_line(iter);
4539                 if (ret != TRACE_TYPE_UNHANDLED)
4540                         return ret;
4541         }
4542
4543         if (iter->ent->type == TRACE_BPUTS &&
4544                         trace_flags & TRACE_ITER_PRINTK &&
4545                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4546                 return trace_print_bputs_msg_only(iter);
4547
4548         if (iter->ent->type == TRACE_BPRINT &&
4549                         trace_flags & TRACE_ITER_PRINTK &&
4550                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4551                 return trace_print_bprintk_msg_only(iter);
4552
4553         if (iter->ent->type == TRACE_PRINT &&
4554                         trace_flags & TRACE_ITER_PRINTK &&
4555                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4556                 return trace_print_printk_msg_only(iter);
4557
4558         if (trace_flags & TRACE_ITER_BIN)
4559                 return print_bin_fmt(iter);
4560
4561         if (trace_flags & TRACE_ITER_HEX)
4562                 return print_hex_fmt(iter);
4563
4564         if (trace_flags & TRACE_ITER_RAW)
4565                 return print_raw_fmt(iter);
4566
4567         return print_trace_fmt(iter);
4568 }
4569
4570 void trace_latency_header(struct seq_file *m)
4571 {
4572         struct trace_iterator *iter = m->private;
4573         struct trace_array *tr = iter->tr;
4574
4575         /* print nothing if the buffers are empty */
4576         if (trace_empty(iter))
4577                 return;
4578
4579         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4580                 print_trace_header(m, iter);
4581
4582         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4583                 print_lat_help_header(m);
4584 }
4585
4586 void trace_default_header(struct seq_file *m)
4587 {
4588         struct trace_iterator *iter = m->private;
4589         struct trace_array *tr = iter->tr;
4590         unsigned long trace_flags = tr->trace_flags;
4591
4592         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4593                 return;
4594
4595         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4596                 /* print nothing if the buffers are empty */
4597                 if (trace_empty(iter))
4598                         return;
4599                 print_trace_header(m, iter);
4600                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4601                         print_lat_help_header(m);
4602         } else {
4603                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4604                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4605                                 print_func_help_header_irq(iter->array_buffer,
4606                                                            m, trace_flags);
4607                         else
4608                                 print_func_help_header(iter->array_buffer, m,
4609                                                        trace_flags);
4610                 }
4611         }
4612 }
4613
4614 static void test_ftrace_alive(struct seq_file *m)
4615 {
4616         if (!ftrace_is_dead())
4617                 return;
4618         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4619                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4620 }
4621
4622 #ifdef CONFIG_TRACER_MAX_TRACE
4623 static void show_snapshot_main_help(struct seq_file *m)
4624 {
4625         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4626                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4627                     "#                      Takes a snapshot of the main buffer.\n"
4628                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4629                     "#                      (Doesn't have to be '2' works with any number that\n"
4630                     "#                       is not a '0' or '1')\n");
4631 }
4632
4633 static void show_snapshot_percpu_help(struct seq_file *m)
4634 {
4635         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4636 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4637         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4638                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4639 #else
4640         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4641                     "#                     Must use main snapshot file to allocate.\n");
4642 #endif
4643         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4644                     "#                      (Doesn't have to be '2' works with any number that\n"
4645                     "#                       is not a '0' or '1')\n");
4646 }
4647
4648 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4649 {
4650         if (iter->tr->allocated_snapshot)
4651                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4652         else
4653                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4654
4655         seq_puts(m, "# Snapshot commands:\n");
4656         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4657                 show_snapshot_main_help(m);
4658         else
4659                 show_snapshot_percpu_help(m);
4660 }
4661 #else
4662 /* Should never be called */
4663 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4664 #endif
4665
4666 static int s_show(struct seq_file *m, void *v)
4667 {
4668         struct trace_iterator *iter = v;
4669         int ret;
4670
4671         if (iter->ent == NULL) {
4672                 if (iter->tr) {
4673                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4674                         seq_puts(m, "#\n");
4675                         test_ftrace_alive(m);
4676                 }
4677                 if (iter->snapshot && trace_empty(iter))
4678                         print_snapshot_help(m, iter);
4679                 else if (iter->trace && iter->trace->print_header)
4680                         iter->trace->print_header(m);
4681                 else
4682                         trace_default_header(m);
4683
4684         } else if (iter->leftover) {
4685                 /*
4686                  * If we filled the seq_file buffer earlier, we
4687                  * want to just show it now.
4688                  */
4689                 ret = trace_print_seq(m, &iter->seq);
4690
4691                 /* ret should this time be zero, but you never know */
4692                 iter->leftover = ret;
4693
4694         } else {
4695                 print_trace_line(iter);
4696                 ret = trace_print_seq(m, &iter->seq);
4697                 /*
4698                  * If we overflow the seq_file buffer, then it will
4699                  * ask us for this data again at start up.
4700                  * Use that instead.
4701                  *  ret is 0 if seq_file write succeeded.
4702                  *        -1 otherwise.
4703                  */
4704                 iter->leftover = ret;
4705         }
4706
4707         return 0;
4708 }
4709
4710 /*
4711  * Should be used after trace_array_get(), trace_types_lock
4712  * ensures that i_cdev was already initialized.
4713  */
4714 static inline int tracing_get_cpu(struct inode *inode)
4715 {
4716         if (inode->i_cdev) /* See trace_create_cpu_file() */
4717                 return (long)inode->i_cdev - 1;
4718         return RING_BUFFER_ALL_CPUS;
4719 }
4720
4721 static const struct seq_operations tracer_seq_ops = {
4722         .start          = s_start,
4723         .next           = s_next,
4724         .stop           = s_stop,
4725         .show           = s_show,
4726 };
4727
4728 static struct trace_iterator *
4729 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4730 {
4731         struct trace_array *tr = inode->i_private;
4732         struct trace_iterator *iter;
4733         int cpu;
4734
4735         if (tracing_disabled)
4736                 return ERR_PTR(-ENODEV);
4737
4738         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4739         if (!iter)
4740                 return ERR_PTR(-ENOMEM);
4741
4742         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4743                                     GFP_KERNEL);
4744         if (!iter->buffer_iter)
4745                 goto release;
4746
4747         /*
4748          * trace_find_next_entry() may need to save off iter->ent.
4749          * It will place it into the iter->temp buffer. As most
4750          * events are less than 128, allocate a buffer of that size.
4751          * If one is greater, then trace_find_next_entry() will
4752          * allocate a new buffer to adjust for the bigger iter->ent.
4753          * It's not critical if it fails to get allocated here.
4754          */
4755         iter->temp = kmalloc(128, GFP_KERNEL);
4756         if (iter->temp)
4757                 iter->temp_size = 128;
4758
4759         /*
4760          * trace_event_printf() may need to modify given format
4761          * string to replace %p with %px so that it shows real address
4762          * instead of hash value. However, that is only for the event
4763          * tracing, other tracer may not need. Defer the allocation
4764          * until it is needed.
4765          */
4766         iter->fmt = NULL;
4767         iter->fmt_size = 0;
4768
4769         /*
4770          * We make a copy of the current tracer to avoid concurrent
4771          * changes on it while we are reading.
4772          */
4773         mutex_lock(&trace_types_lock);
4774         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4775         if (!iter->trace)
4776                 goto fail;
4777
4778         *iter->trace = *tr->current_trace;
4779
4780         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4781                 goto fail;
4782
4783         iter->tr = tr;
4784
4785 #ifdef CONFIG_TRACER_MAX_TRACE
4786         /* Currently only the top directory has a snapshot */
4787         if (tr->current_trace->print_max || snapshot)
4788                 iter->array_buffer = &tr->max_buffer;
4789         else
4790 #endif
4791                 iter->array_buffer = &tr->array_buffer;
4792         iter->snapshot = snapshot;
4793         iter->pos = -1;
4794         iter->cpu_file = tracing_get_cpu(inode);
4795         mutex_init(&iter->mutex);
4796
4797         /* Notify the tracer early; before we stop tracing. */
4798         if (iter->trace->open)
4799                 iter->trace->open(iter);
4800
4801         /* Annotate start of buffers if we had overruns */
4802         if (ring_buffer_overruns(iter->array_buffer->buffer))
4803                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4804
4805         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4806         if (trace_clocks[tr->clock_id].in_ns)
4807                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4808
4809         /*
4810          * If pause-on-trace is enabled, then stop the trace while
4811          * dumping, unless this is the "snapshot" file
4812          */
4813         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4814                 tracing_stop_tr(tr);
4815
4816         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4817                 for_each_tracing_cpu(cpu) {
4818                         iter->buffer_iter[cpu] =
4819                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4820                                                          cpu, GFP_KERNEL);
4821                 }
4822                 ring_buffer_read_prepare_sync();
4823                 for_each_tracing_cpu(cpu) {
4824                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4825                         tracing_iter_reset(iter, cpu);
4826                 }
4827         } else {
4828                 cpu = iter->cpu_file;
4829                 iter->buffer_iter[cpu] =
4830                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4831                                                  cpu, GFP_KERNEL);
4832                 ring_buffer_read_prepare_sync();
4833                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4834                 tracing_iter_reset(iter, cpu);
4835         }
4836
4837         mutex_unlock(&trace_types_lock);
4838
4839         return iter;
4840
4841  fail:
4842         mutex_unlock(&trace_types_lock);
4843         kfree(iter->trace);
4844         kfree(iter->temp);
4845         kfree(iter->buffer_iter);
4846 release:
4847         seq_release_private(inode, file);
4848         return ERR_PTR(-ENOMEM);
4849 }
4850
4851 int tracing_open_generic(struct inode *inode, struct file *filp)
4852 {
4853         int ret;
4854
4855         ret = tracing_check_open_get_tr(NULL);
4856         if (ret)
4857                 return ret;
4858
4859         filp->private_data = inode->i_private;
4860         return 0;
4861 }
4862
4863 bool tracing_is_disabled(void)
4864 {
4865         return (tracing_disabled) ? true: false;
4866 }
4867
4868 /*
4869  * Open and update trace_array ref count.
4870  * Must have the current trace_array passed to it.
4871  */
4872 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4873 {
4874         struct trace_array *tr = inode->i_private;
4875         int ret;
4876
4877         ret = tracing_check_open_get_tr(tr);
4878         if (ret)
4879                 return ret;
4880
4881         filp->private_data = inode->i_private;
4882
4883         return 0;
4884 }
4885
4886 static int tracing_mark_open(struct inode *inode, struct file *filp)
4887 {
4888         stream_open(inode, filp);
4889         return tracing_open_generic_tr(inode, filp);
4890 }
4891
4892 static int tracing_release(struct inode *inode, struct file *file)
4893 {
4894         struct trace_array *tr = inode->i_private;
4895         struct seq_file *m = file->private_data;
4896         struct trace_iterator *iter;
4897         int cpu;
4898
4899         if (!(file->f_mode & FMODE_READ)) {
4900                 trace_array_put(tr);
4901                 return 0;
4902         }
4903
4904         /* Writes do not use seq_file */
4905         iter = m->private;
4906         mutex_lock(&trace_types_lock);
4907
4908         for_each_tracing_cpu(cpu) {
4909                 if (iter->buffer_iter[cpu])
4910                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4911         }
4912
4913         if (iter->trace && iter->trace->close)
4914                 iter->trace->close(iter);
4915
4916         if (!iter->snapshot && tr->stop_count)
4917                 /* reenable tracing if it was previously enabled */
4918                 tracing_start_tr(tr);
4919
4920         __trace_array_put(tr);
4921
4922         mutex_unlock(&trace_types_lock);
4923
4924         mutex_destroy(&iter->mutex);
4925         free_cpumask_var(iter->started);
4926         kfree(iter->fmt);
4927         kfree(iter->temp);
4928         kfree(iter->trace);
4929         kfree(iter->buffer_iter);
4930         seq_release_private(inode, file);
4931
4932         return 0;
4933 }
4934
4935 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4936 {
4937         struct trace_array *tr = inode->i_private;
4938
4939         trace_array_put(tr);
4940         return 0;
4941 }
4942
4943 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4944 {
4945         struct trace_array *tr = inode->i_private;
4946
4947         trace_array_put(tr);
4948
4949         return single_release(inode, file);
4950 }
4951
4952 static int tracing_open(struct inode *inode, struct file *file)
4953 {
4954         struct trace_array *tr = inode->i_private;
4955         struct trace_iterator *iter;
4956         int ret;
4957
4958         ret = tracing_check_open_get_tr(tr);
4959         if (ret)
4960                 return ret;
4961
4962         /* If this file was open for write, then erase contents */
4963         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4964                 int cpu = tracing_get_cpu(inode);
4965                 struct array_buffer *trace_buf = &tr->array_buffer;
4966
4967 #ifdef CONFIG_TRACER_MAX_TRACE
4968                 if (tr->current_trace->print_max)
4969                         trace_buf = &tr->max_buffer;
4970 #endif
4971
4972                 if (cpu == RING_BUFFER_ALL_CPUS)
4973                         tracing_reset_online_cpus(trace_buf);
4974                 else
4975                         tracing_reset_cpu(trace_buf, cpu);
4976         }
4977
4978         if (file->f_mode & FMODE_READ) {
4979                 iter = __tracing_open(inode, file, false);
4980                 if (IS_ERR(iter))
4981                         ret = PTR_ERR(iter);
4982                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4983                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4984         }
4985
4986         if (ret < 0)
4987                 trace_array_put(tr);
4988
4989         return ret;
4990 }
4991
4992 /*
4993  * Some tracers are not suitable for instance buffers.
4994  * A tracer is always available for the global array (toplevel)
4995  * or if it explicitly states that it is.
4996  */
4997 static bool
4998 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4999 {
5000         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5001 }
5002
5003 /* Find the next tracer that this trace array may use */
5004 static struct tracer *
5005 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5006 {
5007         while (t && !trace_ok_for_array(t, tr))
5008                 t = t->next;
5009
5010         return t;
5011 }
5012
5013 static void *
5014 t_next(struct seq_file *m, void *v, loff_t *pos)
5015 {
5016         struct trace_array *tr = m->private;
5017         struct tracer *t = v;
5018
5019         (*pos)++;
5020
5021         if (t)
5022                 t = get_tracer_for_array(tr, t->next);
5023
5024         return t;
5025 }
5026
5027 static void *t_start(struct seq_file *m, loff_t *pos)
5028 {
5029         struct trace_array *tr = m->private;
5030         struct tracer *t;
5031         loff_t l = 0;
5032
5033         mutex_lock(&trace_types_lock);
5034
5035         t = get_tracer_for_array(tr, trace_types);
5036         for (; t && l < *pos; t = t_next(m, t, &l))
5037                         ;
5038
5039         return t;
5040 }
5041
5042 static void t_stop(struct seq_file *m, void *p)
5043 {
5044         mutex_unlock(&trace_types_lock);
5045 }
5046
5047 static int t_show(struct seq_file *m, void *v)
5048 {
5049         struct tracer *t = v;
5050
5051         if (!t)
5052                 return 0;
5053
5054         seq_puts(m, t->name);
5055         if (t->next)
5056                 seq_putc(m, ' ');
5057         else
5058                 seq_putc(m, '\n');
5059
5060         return 0;
5061 }
5062
5063 static const struct seq_operations show_traces_seq_ops = {
5064         .start          = t_start,
5065         .next           = t_next,
5066         .stop           = t_stop,
5067         .show           = t_show,
5068 };
5069
5070 static int show_traces_open(struct inode *inode, struct file *file)
5071 {
5072         struct trace_array *tr = inode->i_private;
5073         struct seq_file *m;
5074         int ret;
5075
5076         ret = tracing_check_open_get_tr(tr);
5077         if (ret)
5078                 return ret;
5079
5080         ret = seq_open(file, &show_traces_seq_ops);
5081         if (ret) {
5082                 trace_array_put(tr);
5083                 return ret;
5084         }
5085
5086         m = file->private_data;
5087         m->private = tr;
5088
5089         return 0;
5090 }
5091
5092 static int show_traces_release(struct inode *inode, struct file *file)
5093 {
5094         struct trace_array *tr = inode->i_private;
5095
5096         trace_array_put(tr);
5097         return seq_release(inode, file);
5098 }
5099
5100 static ssize_t
5101 tracing_write_stub(struct file *filp, const char __user *ubuf,
5102                    size_t count, loff_t *ppos)
5103 {
5104         return count;
5105 }
5106
5107 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5108 {
5109         int ret;
5110
5111         if (file->f_mode & FMODE_READ)
5112                 ret = seq_lseek(file, offset, whence);
5113         else
5114                 file->f_pos = ret = 0;
5115
5116         return ret;
5117 }
5118
5119 static const struct file_operations tracing_fops = {
5120         .open           = tracing_open,
5121         .read           = seq_read,
5122         .write          = tracing_write_stub,
5123         .llseek         = tracing_lseek,
5124         .release        = tracing_release,
5125 };
5126
5127 static const struct file_operations show_traces_fops = {
5128         .open           = show_traces_open,
5129         .read           = seq_read,
5130         .llseek         = seq_lseek,
5131         .release        = show_traces_release,
5132 };
5133
5134 static ssize_t
5135 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5136                      size_t count, loff_t *ppos)
5137 {
5138         struct trace_array *tr = file_inode(filp)->i_private;
5139         char *mask_str;
5140         int len;
5141
5142         len = snprintf(NULL, 0, "%*pb\n",
5143                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5144         mask_str = kmalloc(len, GFP_KERNEL);
5145         if (!mask_str)
5146                 return -ENOMEM;
5147
5148         len = snprintf(mask_str, len, "%*pb\n",
5149                        cpumask_pr_args(tr->tracing_cpumask));
5150         if (len >= count) {
5151                 count = -EINVAL;
5152                 goto out_err;
5153         }
5154         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5155
5156 out_err:
5157         kfree(mask_str);
5158
5159         return count;
5160 }
5161
5162 int tracing_set_cpumask(struct trace_array *tr,
5163                         cpumask_var_t tracing_cpumask_new)
5164 {
5165         int cpu;
5166
5167         if (!tr)
5168                 return -EINVAL;
5169
5170         local_irq_disable();
5171         arch_spin_lock(&tr->max_lock);
5172         for_each_tracing_cpu(cpu) {
5173                 /*
5174                  * Increase/decrease the disabled counter if we are
5175                  * about to flip a bit in the cpumask:
5176                  */
5177                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5178                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5179                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5180                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5181                 }
5182                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5183                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5184                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5185                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5186                 }
5187         }
5188         arch_spin_unlock(&tr->max_lock);
5189         local_irq_enable();
5190
5191         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5192
5193         return 0;
5194 }
5195
5196 static ssize_t
5197 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5198                       size_t count, loff_t *ppos)
5199 {
5200         struct trace_array *tr = file_inode(filp)->i_private;
5201         cpumask_var_t tracing_cpumask_new;
5202         int err;
5203
5204         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5205                 return -ENOMEM;
5206
5207         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5208         if (err)
5209                 goto err_free;
5210
5211         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5212         if (err)
5213                 goto err_free;
5214
5215         free_cpumask_var(tracing_cpumask_new);
5216
5217         return count;
5218
5219 err_free:
5220         free_cpumask_var(tracing_cpumask_new);
5221
5222         return err;
5223 }
5224
5225 static const struct file_operations tracing_cpumask_fops = {
5226         .open           = tracing_open_generic_tr,
5227         .read           = tracing_cpumask_read,
5228         .write          = tracing_cpumask_write,
5229         .release        = tracing_release_generic_tr,
5230         .llseek         = generic_file_llseek,
5231 };
5232
5233 static int tracing_trace_options_show(struct seq_file *m, void *v)
5234 {
5235         struct tracer_opt *trace_opts;
5236         struct trace_array *tr = m->private;
5237         u32 tracer_flags;
5238         int i;
5239
5240         mutex_lock(&trace_types_lock);
5241         tracer_flags = tr->current_trace->flags->val;
5242         trace_opts = tr->current_trace->flags->opts;
5243
5244         for (i = 0; trace_options[i]; i++) {
5245                 if (tr->trace_flags & (1 << i))
5246                         seq_printf(m, "%s\n", trace_options[i]);
5247                 else
5248                         seq_printf(m, "no%s\n", trace_options[i]);
5249         }
5250
5251         for (i = 0; trace_opts[i].name; i++) {
5252                 if (tracer_flags & trace_opts[i].bit)
5253                         seq_printf(m, "%s\n", trace_opts[i].name);
5254                 else
5255                         seq_printf(m, "no%s\n", trace_opts[i].name);
5256         }
5257         mutex_unlock(&trace_types_lock);
5258
5259         return 0;
5260 }
5261
5262 static int __set_tracer_option(struct trace_array *tr,
5263                                struct tracer_flags *tracer_flags,
5264                                struct tracer_opt *opts, int neg)
5265 {
5266         struct tracer *trace = tracer_flags->trace;
5267         int ret;
5268
5269         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5270         if (ret)
5271                 return ret;
5272
5273         if (neg)
5274                 tracer_flags->val &= ~opts->bit;
5275         else
5276                 tracer_flags->val |= opts->bit;
5277         return 0;
5278 }
5279
5280 /* Try to assign a tracer specific option */
5281 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5282 {
5283         struct tracer *trace = tr->current_trace;
5284         struct tracer_flags *tracer_flags = trace->flags;
5285         struct tracer_opt *opts = NULL;
5286         int i;
5287
5288         for (i = 0; tracer_flags->opts[i].name; i++) {
5289                 opts = &tracer_flags->opts[i];
5290
5291                 if (strcmp(cmp, opts->name) == 0)
5292                         return __set_tracer_option(tr, trace->flags, opts, neg);
5293         }
5294
5295         return -EINVAL;
5296 }
5297
5298 /* Some tracers require overwrite to stay enabled */
5299 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5300 {
5301         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5302                 return -1;
5303
5304         return 0;
5305 }
5306
5307 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5308 {
5309         int *map;
5310
5311         if ((mask == TRACE_ITER_RECORD_TGID) ||
5312             (mask == TRACE_ITER_RECORD_CMD))
5313                 lockdep_assert_held(&event_mutex);
5314
5315         /* do nothing if flag is already set */
5316         if (!!(tr->trace_flags & mask) == !!enabled)
5317                 return 0;
5318
5319         /* Give the tracer a chance to approve the change */
5320         if (tr->current_trace->flag_changed)
5321                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5322                         return -EINVAL;
5323
5324         if (enabled)
5325                 tr->trace_flags |= mask;
5326         else
5327                 tr->trace_flags &= ~mask;
5328
5329         if (mask == TRACE_ITER_RECORD_CMD)
5330                 trace_event_enable_cmd_record(enabled);
5331
5332         if (mask == TRACE_ITER_RECORD_TGID) {
5333                 if (!tgid_map) {
5334                         tgid_map_max = pid_max;
5335                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5336                                        GFP_KERNEL);
5337
5338                         /*
5339                          * Pairs with smp_load_acquire() in
5340                          * trace_find_tgid_ptr() to ensure that if it observes
5341                          * the tgid_map we just allocated then it also observes
5342                          * the corresponding tgid_map_max value.
5343                          */
5344                         smp_store_release(&tgid_map, map);
5345                 }
5346                 if (!tgid_map) {
5347                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5348                         return -ENOMEM;
5349                 }
5350
5351                 trace_event_enable_tgid_record(enabled);
5352         }
5353
5354         if (mask == TRACE_ITER_EVENT_FORK)
5355                 trace_event_follow_fork(tr, enabled);
5356
5357         if (mask == TRACE_ITER_FUNC_FORK)
5358                 ftrace_pid_follow_fork(tr, enabled);
5359
5360         if (mask == TRACE_ITER_OVERWRITE) {
5361                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5362 #ifdef CONFIG_TRACER_MAX_TRACE
5363                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5364 #endif
5365         }
5366
5367         if (mask == TRACE_ITER_PRINTK) {
5368                 trace_printk_start_stop_comm(enabled);
5369                 trace_printk_control(enabled);
5370         }
5371
5372         return 0;
5373 }
5374
5375 int trace_set_options(struct trace_array *tr, char *option)
5376 {
5377         char *cmp;
5378         int neg = 0;
5379         int ret;
5380         size_t orig_len = strlen(option);
5381         int len;
5382
5383         cmp = strstrip(option);
5384
5385         len = str_has_prefix(cmp, "no");
5386         if (len)
5387                 neg = 1;
5388
5389         cmp += len;
5390
5391         mutex_lock(&event_mutex);
5392         mutex_lock(&trace_types_lock);
5393
5394         ret = match_string(trace_options, -1, cmp);
5395         /* If no option could be set, test the specific tracer options */
5396         if (ret < 0)
5397                 ret = set_tracer_option(tr, cmp, neg);
5398         else
5399                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5400
5401         mutex_unlock(&trace_types_lock);
5402         mutex_unlock(&event_mutex);
5403
5404         /*
5405          * If the first trailing whitespace is replaced with '\0' by strstrip,
5406          * turn it back into a space.
5407          */
5408         if (orig_len > strlen(option))
5409                 option[strlen(option)] = ' ';
5410
5411         return ret;
5412 }
5413
5414 static void __init apply_trace_boot_options(void)
5415 {
5416         char *buf = trace_boot_options_buf;
5417         char *option;
5418
5419         while (true) {
5420                 option = strsep(&buf, ",");
5421
5422                 if (!option)
5423                         break;
5424
5425                 if (*option)
5426                         trace_set_options(&global_trace, option);
5427
5428                 /* Put back the comma to allow this to be called again */
5429                 if (buf)
5430                         *(buf - 1) = ',';
5431         }
5432 }
5433
5434 static ssize_t
5435 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5436                         size_t cnt, loff_t *ppos)
5437 {
5438         struct seq_file *m = filp->private_data;
5439         struct trace_array *tr = m->private;
5440         char buf[64];
5441         int ret;
5442
5443         if (cnt >= sizeof(buf))
5444                 return -EINVAL;
5445
5446         if (copy_from_user(buf, ubuf, cnt))
5447                 return -EFAULT;
5448
5449         buf[cnt] = 0;
5450
5451         ret = trace_set_options(tr, buf);
5452         if (ret < 0)
5453                 return ret;
5454
5455         *ppos += cnt;
5456
5457         return cnt;
5458 }
5459
5460 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5461 {
5462         struct trace_array *tr = inode->i_private;
5463         int ret;
5464
5465         ret = tracing_check_open_get_tr(tr);
5466         if (ret)
5467                 return ret;
5468
5469         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5470         if (ret < 0)
5471                 trace_array_put(tr);
5472
5473         return ret;
5474 }
5475
5476 static const struct file_operations tracing_iter_fops = {
5477         .open           = tracing_trace_options_open,
5478         .read           = seq_read,
5479         .llseek         = seq_lseek,
5480         .release        = tracing_single_release_tr,
5481         .write          = tracing_trace_options_write,
5482 };
5483
5484 static const char readme_msg[] =
5485         "tracing mini-HOWTO:\n\n"
5486         "# echo 0 > tracing_on : quick way to disable tracing\n"
5487         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5488         " Important files:\n"
5489         "  trace\t\t\t- The static contents of the buffer\n"
5490         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5491         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5492         "  current_tracer\t- function and latency tracers\n"
5493         "  available_tracers\t- list of configured tracers for current_tracer\n"
5494         "  error_log\t- error log for failed commands (that support it)\n"
5495         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5496         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5497         "  trace_clock\t\t- change the clock used to order events\n"
5498         "       local:   Per cpu clock but may not be synced across CPUs\n"
5499         "      global:   Synced across CPUs but slows tracing down.\n"
5500         "     counter:   Not a clock, but just an increment\n"
5501         "      uptime:   Jiffy counter from time of boot\n"
5502         "        perf:   Same clock that perf events use\n"
5503 #ifdef CONFIG_X86_64
5504         "     x86-tsc:   TSC cycle counter\n"
5505 #endif
5506         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5507         "       delta:   Delta difference against a buffer-wide timestamp\n"
5508         "    absolute:   Absolute (standalone) timestamp\n"
5509         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5510         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5511         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5512         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5513         "\t\t\t  Remove sub-buffer with rmdir\n"
5514         "  trace_options\t\t- Set format or modify how tracing happens\n"
5515         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5516         "\t\t\t  option name\n"
5517         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5518 #ifdef CONFIG_DYNAMIC_FTRACE
5519         "\n  available_filter_functions - list of functions that can be filtered on\n"
5520         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5521         "\t\t\t  functions\n"
5522         "\t     accepts: func_full_name or glob-matching-pattern\n"
5523         "\t     modules: Can select a group via module\n"
5524         "\t      Format: :mod:<module-name>\n"
5525         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5526         "\t    triggers: a command to perform when function is hit\n"
5527         "\t      Format: <function>:<trigger>[:count]\n"
5528         "\t     trigger: traceon, traceoff\n"
5529         "\t\t      enable_event:<system>:<event>\n"
5530         "\t\t      disable_event:<system>:<event>\n"
5531 #ifdef CONFIG_STACKTRACE
5532         "\t\t      stacktrace\n"
5533 #endif
5534 #ifdef CONFIG_TRACER_SNAPSHOT
5535         "\t\t      snapshot\n"
5536 #endif
5537         "\t\t      dump\n"
5538         "\t\t      cpudump\n"
5539         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5540         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5541         "\t     The first one will disable tracing every time do_fault is hit\n"
5542         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5543         "\t       The first time do trap is hit and it disables tracing, the\n"
5544         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5545         "\t       the counter will not decrement. It only decrements when the\n"
5546         "\t       trigger did work\n"
5547         "\t     To remove trigger without count:\n"
5548         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5549         "\t     To remove trigger with a count:\n"
5550         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5551         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5552         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5553         "\t    modules: Can select a group via module command :mod:\n"
5554         "\t    Does not accept triggers\n"
5555 #endif /* CONFIG_DYNAMIC_FTRACE */
5556 #ifdef CONFIG_FUNCTION_TRACER
5557         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5558         "\t\t    (function)\n"
5559         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5560         "\t\t    (function)\n"
5561 #endif
5562 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5563         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5564         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5565         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5566 #endif
5567 #ifdef CONFIG_TRACER_SNAPSHOT
5568         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5569         "\t\t\t  snapshot buffer. Read the contents for more\n"
5570         "\t\t\t  information\n"
5571 #endif
5572 #ifdef CONFIG_STACK_TRACER
5573         "  stack_trace\t\t- Shows the max stack trace when active\n"
5574         "  stack_max_size\t- Shows current max stack size that was traced\n"
5575         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5576         "\t\t\t  new trace)\n"
5577 #ifdef CONFIG_DYNAMIC_FTRACE
5578         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5579         "\t\t\t  traces\n"
5580 #endif
5581 #endif /* CONFIG_STACK_TRACER */
5582 #ifdef CONFIG_DYNAMIC_EVENTS
5583         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5584         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5585 #endif
5586 #ifdef CONFIG_KPROBE_EVENTS
5587         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5588         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5589 #endif
5590 #ifdef CONFIG_UPROBE_EVENTS
5591         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5592         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5593 #endif
5594 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5595         "\t  accepts: event-definitions (one definition per line)\n"
5596         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5597         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5598 #ifdef CONFIG_HIST_TRIGGERS
5599         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5600 #endif
5601         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5602         "\t           -:[<group>/][<event>]\n"
5603 #ifdef CONFIG_KPROBE_EVENTS
5604         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5605   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5606 #endif
5607 #ifdef CONFIG_UPROBE_EVENTS
5608   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5609 #endif
5610         "\t     args: <name>=fetcharg[:type]\n"
5611         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5612 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5613         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5614 #else
5615         "\t           $stack<index>, $stack, $retval, $comm,\n"
5616 #endif
5617         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5618         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5619         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5620         "\t           <type>\\[<array-size>\\]\n"
5621 #ifdef CONFIG_HIST_TRIGGERS
5622         "\t    field: <stype> <name>;\n"
5623         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5624         "\t           [unsigned] char/int/long\n"
5625 #endif
5626         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5627         "\t            of the <attached-group>/<attached-event>.\n"
5628 #endif
5629         "  events/\t\t- Directory containing all trace event subsystems:\n"
5630         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5631         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5632         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5633         "\t\t\t  events\n"
5634         "      filter\t\t- If set, only events passing filter are traced\n"
5635         "  events/<system>/<event>/\t- Directory containing control files for\n"
5636         "\t\t\t  <event>:\n"
5637         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5638         "      filter\t\t- If set, only events passing filter are traced\n"
5639         "      trigger\t\t- If set, a command to perform when event is hit\n"
5640         "\t    Format: <trigger>[:count][if <filter>]\n"
5641         "\t   trigger: traceon, traceoff\n"
5642         "\t            enable_event:<system>:<event>\n"
5643         "\t            disable_event:<system>:<event>\n"
5644 #ifdef CONFIG_HIST_TRIGGERS
5645         "\t            enable_hist:<system>:<event>\n"
5646         "\t            disable_hist:<system>:<event>\n"
5647 #endif
5648 #ifdef CONFIG_STACKTRACE
5649         "\t\t    stacktrace\n"
5650 #endif
5651 #ifdef CONFIG_TRACER_SNAPSHOT
5652         "\t\t    snapshot\n"
5653 #endif
5654 #ifdef CONFIG_HIST_TRIGGERS
5655         "\t\t    hist (see below)\n"
5656 #endif
5657         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5658         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5659         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5660         "\t                  events/block/block_unplug/trigger\n"
5661         "\t   The first disables tracing every time block_unplug is hit.\n"
5662         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5663         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5664         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5665         "\t   Like function triggers, the counter is only decremented if it\n"
5666         "\t    enabled or disabled tracing.\n"
5667         "\t   To remove a trigger without a count:\n"
5668         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5669         "\t   To remove a trigger with a count:\n"
5670         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5671         "\t   Filters can be ignored when removing a trigger.\n"
5672 #ifdef CONFIG_HIST_TRIGGERS
5673         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5674         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5675         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5676         "\t            [:values=<field1[,field2,...]>]\n"
5677         "\t            [:sort=<field1[,field2,...]>]\n"
5678         "\t            [:size=#entries]\n"
5679         "\t            [:pause][:continue][:clear]\n"
5680         "\t            [:name=histname1]\n"
5681         "\t            [:<handler>.<action>]\n"
5682         "\t            [if <filter>]\n\n"
5683         "\t    Note, special fields can be used as well:\n"
5684         "\t            common_timestamp - to record current timestamp\n"
5685         "\t            common_cpu - to record the CPU the event happened on\n"
5686         "\n"
5687         "\t    A hist trigger variable can be:\n"
5688         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5689         "\t        - a reference to another variable e.g. y=$x,\n"
5690         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5691         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5692         "\n"
5693         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5694         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5695         "\t    variable reference, field or numeric literal.\n"
5696         "\n"
5697         "\t    When a matching event is hit, an entry is added to a hash\n"
5698         "\t    table using the key(s) and value(s) named, and the value of a\n"
5699         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5700         "\t    correspond to fields in the event's format description.  Keys\n"
5701         "\t    can be any field, or the special string 'stacktrace'.\n"
5702         "\t    Compound keys consisting of up to two fields can be specified\n"
5703         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5704         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5705         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5706         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5707         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5708         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5709         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5710         "\t    its histogram data will be shared with other triggers of the\n"
5711         "\t    same name, and trigger hits will update this common data.\n\n"
5712         "\t    Reading the 'hist' file for the event will dump the hash\n"
5713         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5714         "\t    triggers attached to an event, there will be a table for each\n"
5715         "\t    trigger in the output.  The table displayed for a named\n"
5716         "\t    trigger will be the same as any other instance having the\n"
5717         "\t    same name.  The default format used to display a given field\n"
5718         "\t    can be modified by appending any of the following modifiers\n"
5719         "\t    to the field name, as applicable:\n\n"
5720         "\t            .hex        display a number as a hex value\n"
5721         "\t            .sym        display an address as a symbol\n"
5722         "\t            .sym-offset display an address as a symbol and offset\n"
5723         "\t            .execname   display a common_pid as a program name\n"
5724         "\t            .syscall    display a syscall id as a syscall name\n"
5725         "\t            .log2       display log2 value rather than raw number\n"
5726         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5727         "\t            .usecs      display a common_timestamp in microseconds\n"
5728         "\t            .percent    display a number of percentage value\n"
5729         "\t            .graph      display a bar-graph of a value\n\n"
5730         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5731         "\t    trigger or to start a hist trigger but not log any events\n"
5732         "\t    until told to do so.  'continue' can be used to start or\n"
5733         "\t    restart a paused hist trigger.\n\n"
5734         "\t    The 'clear' parameter will clear the contents of a running\n"
5735         "\t    hist trigger and leave its current paused/active state\n"
5736         "\t    unchanged.\n\n"
5737         "\t    The enable_hist and disable_hist triggers can be used to\n"
5738         "\t    have one event conditionally start and stop another event's\n"
5739         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5740         "\t    the enable_event and disable_event triggers.\n\n"
5741         "\t    Hist trigger handlers and actions are executed whenever a\n"
5742         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5743         "\t        <handler>.<action>\n\n"
5744         "\t    The available handlers are:\n\n"
5745         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5746         "\t        onmax(var)               - invoke if var exceeds current max\n"
5747         "\t        onchange(var)            - invoke action if var changes\n\n"
5748         "\t    The available actions are:\n\n"
5749         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5750         "\t        save(field,...)                      - save current event fields\n"
5751 #ifdef CONFIG_TRACER_SNAPSHOT
5752         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5753 #endif
5754 #ifdef CONFIG_SYNTH_EVENTS
5755         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5756         "\t  Write into this file to define/undefine new synthetic events.\n"
5757         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5758 #endif
5759 #endif
5760 ;
5761
5762 static ssize_t
5763 tracing_readme_read(struct file *filp, char __user *ubuf,
5764                        size_t cnt, loff_t *ppos)
5765 {
5766         return simple_read_from_buffer(ubuf, cnt, ppos,
5767                                         readme_msg, strlen(readme_msg));
5768 }
5769
5770 static const struct file_operations tracing_readme_fops = {
5771         .open           = tracing_open_generic,
5772         .read           = tracing_readme_read,
5773         .llseek         = generic_file_llseek,
5774 };
5775
5776 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5777 {
5778         int pid = ++(*pos);
5779
5780         return trace_find_tgid_ptr(pid);
5781 }
5782
5783 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5784 {
5785         int pid = *pos;
5786
5787         return trace_find_tgid_ptr(pid);
5788 }
5789
5790 static void saved_tgids_stop(struct seq_file *m, void *v)
5791 {
5792 }
5793
5794 static int saved_tgids_show(struct seq_file *m, void *v)
5795 {
5796         int *entry = (int *)v;
5797         int pid = entry - tgid_map;
5798         int tgid = *entry;
5799
5800         if (tgid == 0)
5801                 return SEQ_SKIP;
5802
5803         seq_printf(m, "%d %d\n", pid, tgid);
5804         return 0;
5805 }
5806
5807 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5808         .start          = saved_tgids_start,
5809         .stop           = saved_tgids_stop,
5810         .next           = saved_tgids_next,
5811         .show           = saved_tgids_show,
5812 };
5813
5814 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5815 {
5816         int ret;
5817
5818         ret = tracing_check_open_get_tr(NULL);
5819         if (ret)
5820                 return ret;
5821
5822         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5823 }
5824
5825
5826 static const struct file_operations tracing_saved_tgids_fops = {
5827         .open           = tracing_saved_tgids_open,
5828         .read           = seq_read,
5829         .llseek         = seq_lseek,
5830         .release        = seq_release,
5831 };
5832
5833 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5834 {
5835         unsigned int *ptr = v;
5836
5837         if (*pos || m->count)
5838                 ptr++;
5839
5840         (*pos)++;
5841
5842         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5843              ptr++) {
5844                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5845                         continue;
5846
5847                 return ptr;
5848         }
5849
5850         return NULL;
5851 }
5852
5853 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5854 {
5855         void *v;
5856         loff_t l = 0;
5857
5858         preempt_disable();
5859         arch_spin_lock(&trace_cmdline_lock);
5860
5861         v = &savedcmd->map_cmdline_to_pid[0];
5862         while (l <= *pos) {
5863                 v = saved_cmdlines_next(m, v, &l);
5864                 if (!v)
5865                         return NULL;
5866         }
5867
5868         return v;
5869 }
5870
5871 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5872 {
5873         arch_spin_unlock(&trace_cmdline_lock);
5874         preempt_enable();
5875 }
5876
5877 static int saved_cmdlines_show(struct seq_file *m, void *v)
5878 {
5879         char buf[TASK_COMM_LEN];
5880         unsigned int *pid = v;
5881
5882         __trace_find_cmdline(*pid, buf);
5883         seq_printf(m, "%d %s\n", *pid, buf);
5884         return 0;
5885 }
5886
5887 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5888         .start          = saved_cmdlines_start,
5889         .next           = saved_cmdlines_next,
5890         .stop           = saved_cmdlines_stop,
5891         .show           = saved_cmdlines_show,
5892 };
5893
5894 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5895 {
5896         int ret;
5897
5898         ret = tracing_check_open_get_tr(NULL);
5899         if (ret)
5900                 return ret;
5901
5902         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5903 }
5904
5905 static const struct file_operations tracing_saved_cmdlines_fops = {
5906         .open           = tracing_saved_cmdlines_open,
5907         .read           = seq_read,
5908         .llseek         = seq_lseek,
5909         .release        = seq_release,
5910 };
5911
5912 static ssize_t
5913 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5914                                  size_t cnt, loff_t *ppos)
5915 {
5916         char buf[64];
5917         int r;
5918
5919         preempt_disable();
5920         arch_spin_lock(&trace_cmdline_lock);
5921         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5922         arch_spin_unlock(&trace_cmdline_lock);
5923         preempt_enable();
5924
5925         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5926 }
5927
5928 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5929 {
5930         kfree(s->saved_cmdlines);
5931         kfree(s->map_cmdline_to_pid);
5932         kfree(s);
5933 }
5934
5935 static int tracing_resize_saved_cmdlines(unsigned int val)
5936 {
5937         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5938
5939         s = kmalloc(sizeof(*s), GFP_KERNEL);
5940         if (!s)
5941                 return -ENOMEM;
5942
5943         if (allocate_cmdlines_buffer(val, s) < 0) {
5944                 kfree(s);
5945                 return -ENOMEM;
5946         }
5947
5948         preempt_disable();
5949         arch_spin_lock(&trace_cmdline_lock);
5950         savedcmd_temp = savedcmd;
5951         savedcmd = s;
5952         arch_spin_unlock(&trace_cmdline_lock);
5953         preempt_enable();
5954         free_saved_cmdlines_buffer(savedcmd_temp);
5955
5956         return 0;
5957 }
5958
5959 static ssize_t
5960 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5961                                   size_t cnt, loff_t *ppos)
5962 {
5963         unsigned long val;
5964         int ret;
5965
5966         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5967         if (ret)
5968                 return ret;
5969
5970         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5971         if (!val || val > PID_MAX_DEFAULT)
5972                 return -EINVAL;
5973
5974         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5975         if (ret < 0)
5976                 return ret;
5977
5978         *ppos += cnt;
5979
5980         return cnt;
5981 }
5982
5983 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5984         .open           = tracing_open_generic,
5985         .read           = tracing_saved_cmdlines_size_read,
5986         .write          = tracing_saved_cmdlines_size_write,
5987 };
5988
5989 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5990 static union trace_eval_map_item *
5991 update_eval_map(union trace_eval_map_item *ptr)
5992 {
5993         if (!ptr->map.eval_string) {
5994                 if (ptr->tail.next) {
5995                         ptr = ptr->tail.next;
5996                         /* Set ptr to the next real item (skip head) */
5997                         ptr++;
5998                 } else
5999                         return NULL;
6000         }
6001         return ptr;
6002 }
6003
6004 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6005 {
6006         union trace_eval_map_item *ptr = v;
6007
6008         /*
6009          * Paranoid! If ptr points to end, we don't want to increment past it.
6010          * This really should never happen.
6011          */
6012         (*pos)++;
6013         ptr = update_eval_map(ptr);
6014         if (WARN_ON_ONCE(!ptr))
6015                 return NULL;
6016
6017         ptr++;
6018         ptr = update_eval_map(ptr);
6019
6020         return ptr;
6021 }
6022
6023 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6024 {
6025         union trace_eval_map_item *v;
6026         loff_t l = 0;
6027
6028         mutex_lock(&trace_eval_mutex);
6029
6030         v = trace_eval_maps;
6031         if (v)
6032                 v++;
6033
6034         while (v && l < *pos) {
6035                 v = eval_map_next(m, v, &l);
6036         }
6037
6038         return v;
6039 }
6040
6041 static void eval_map_stop(struct seq_file *m, void *v)
6042 {
6043         mutex_unlock(&trace_eval_mutex);
6044 }
6045
6046 static int eval_map_show(struct seq_file *m, void *v)
6047 {
6048         union trace_eval_map_item *ptr = v;
6049
6050         seq_printf(m, "%s %ld (%s)\n",
6051                    ptr->map.eval_string, ptr->map.eval_value,
6052                    ptr->map.system);
6053
6054         return 0;
6055 }
6056
6057 static const struct seq_operations tracing_eval_map_seq_ops = {
6058         .start          = eval_map_start,
6059         .next           = eval_map_next,
6060         .stop           = eval_map_stop,
6061         .show           = eval_map_show,
6062 };
6063
6064 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6065 {
6066         int ret;
6067
6068         ret = tracing_check_open_get_tr(NULL);
6069         if (ret)
6070                 return ret;
6071
6072         return seq_open(filp, &tracing_eval_map_seq_ops);
6073 }
6074
6075 static const struct file_operations tracing_eval_map_fops = {
6076         .open           = tracing_eval_map_open,
6077         .read           = seq_read,
6078         .llseek         = seq_lseek,
6079         .release        = seq_release,
6080 };
6081
6082 static inline union trace_eval_map_item *
6083 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6084 {
6085         /* Return tail of array given the head */
6086         return ptr + ptr->head.length + 1;
6087 }
6088
6089 static void
6090 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6091                            int len)
6092 {
6093         struct trace_eval_map **stop;
6094         struct trace_eval_map **map;
6095         union trace_eval_map_item *map_array;
6096         union trace_eval_map_item *ptr;
6097
6098         stop = start + len;
6099
6100         /*
6101          * The trace_eval_maps contains the map plus a head and tail item,
6102          * where the head holds the module and length of array, and the
6103          * tail holds a pointer to the next list.
6104          */
6105         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6106         if (!map_array) {
6107                 pr_warn("Unable to allocate trace eval mapping\n");
6108                 return;
6109         }
6110
6111         mutex_lock(&trace_eval_mutex);
6112
6113         if (!trace_eval_maps)
6114                 trace_eval_maps = map_array;
6115         else {
6116                 ptr = trace_eval_maps;
6117                 for (;;) {
6118                         ptr = trace_eval_jmp_to_tail(ptr);
6119                         if (!ptr->tail.next)
6120                                 break;
6121                         ptr = ptr->tail.next;
6122
6123                 }
6124                 ptr->tail.next = map_array;
6125         }
6126         map_array->head.mod = mod;
6127         map_array->head.length = len;
6128         map_array++;
6129
6130         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6131                 map_array->map = **map;
6132                 map_array++;
6133         }
6134         memset(map_array, 0, sizeof(*map_array));
6135
6136         mutex_unlock(&trace_eval_mutex);
6137 }
6138
6139 static void trace_create_eval_file(struct dentry *d_tracer)
6140 {
6141         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6142                           NULL, &tracing_eval_map_fops);
6143 }
6144
6145 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6146 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6147 static inline void trace_insert_eval_map_file(struct module *mod,
6148                               struct trace_eval_map **start, int len) { }
6149 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6150
6151 static void trace_insert_eval_map(struct module *mod,
6152                                   struct trace_eval_map **start, int len)
6153 {
6154         struct trace_eval_map **map;
6155
6156         if (len <= 0)
6157                 return;
6158
6159         map = start;
6160
6161         trace_event_eval_update(map, len);
6162
6163         trace_insert_eval_map_file(mod, start, len);
6164 }
6165
6166 static ssize_t
6167 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6168                        size_t cnt, loff_t *ppos)
6169 {
6170         struct trace_array *tr = filp->private_data;
6171         char buf[MAX_TRACER_SIZE+2];
6172         int r;
6173
6174         mutex_lock(&trace_types_lock);
6175         r = sprintf(buf, "%s\n", tr->current_trace->name);
6176         mutex_unlock(&trace_types_lock);
6177
6178         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6179 }
6180
6181 int tracer_init(struct tracer *t, struct trace_array *tr)
6182 {
6183         tracing_reset_online_cpus(&tr->array_buffer);
6184         return t->init(tr);
6185 }
6186
6187 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6188 {
6189         int cpu;
6190
6191         for_each_tracing_cpu(cpu)
6192                 per_cpu_ptr(buf->data, cpu)->entries = val;
6193 }
6194
6195 #ifdef CONFIG_TRACER_MAX_TRACE
6196 /* resize @tr's buffer to the size of @size_tr's entries */
6197 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6198                                         struct array_buffer *size_buf, int cpu_id)
6199 {
6200         int cpu, ret = 0;
6201
6202         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6203                 for_each_tracing_cpu(cpu) {
6204                         ret = ring_buffer_resize(trace_buf->buffer,
6205                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6206                         if (ret < 0)
6207                                 break;
6208                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6209                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6210                 }
6211         } else {
6212                 ret = ring_buffer_resize(trace_buf->buffer,
6213                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6214                 if (ret == 0)
6215                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6216                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6217         }
6218
6219         return ret;
6220 }
6221 #endif /* CONFIG_TRACER_MAX_TRACE */
6222
6223 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6224                                         unsigned long size, int cpu)
6225 {
6226         int ret;
6227
6228         /*
6229          * If kernel or user changes the size of the ring buffer
6230          * we use the size that was given, and we can forget about
6231          * expanding it later.
6232          */
6233         ring_buffer_expanded = true;
6234
6235         /* May be called before buffers are initialized */
6236         if (!tr->array_buffer.buffer)
6237                 return 0;
6238
6239         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6240         if (ret < 0)
6241                 return ret;
6242
6243 #ifdef CONFIG_TRACER_MAX_TRACE
6244         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6245             !tr->current_trace->use_max_tr)
6246                 goto out;
6247
6248         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6249         if (ret < 0) {
6250                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6251                                                      &tr->array_buffer, cpu);
6252                 if (r < 0) {
6253                         /*
6254                          * AARGH! We are left with different
6255                          * size max buffer!!!!
6256                          * The max buffer is our "snapshot" buffer.
6257                          * When a tracer needs a snapshot (one of the
6258                          * latency tracers), it swaps the max buffer
6259                          * with the saved snap shot. We succeeded to
6260                          * update the size of the main buffer, but failed to
6261                          * update the size of the max buffer. But when we tried
6262                          * to reset the main buffer to the original size, we
6263                          * failed there too. This is very unlikely to
6264                          * happen, but if it does, warn and kill all
6265                          * tracing.
6266                          */
6267                         WARN_ON(1);
6268                         tracing_disabled = 1;
6269                 }
6270                 return ret;
6271         }
6272
6273         if (cpu == RING_BUFFER_ALL_CPUS)
6274                 set_buffer_entries(&tr->max_buffer, size);
6275         else
6276                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6277
6278  out:
6279 #endif /* CONFIG_TRACER_MAX_TRACE */
6280
6281         if (cpu == RING_BUFFER_ALL_CPUS)
6282                 set_buffer_entries(&tr->array_buffer, size);
6283         else
6284                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6285
6286         return ret;
6287 }
6288
6289 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6290                                   unsigned long size, int cpu_id)
6291 {
6292         int ret;
6293
6294         mutex_lock(&trace_types_lock);
6295
6296         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6297                 /* make sure, this cpu is enabled in the mask */
6298                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6299                         ret = -EINVAL;
6300                         goto out;
6301                 }
6302         }
6303
6304         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6305         if (ret < 0)
6306                 ret = -ENOMEM;
6307
6308 out:
6309         mutex_unlock(&trace_types_lock);
6310
6311         return ret;
6312 }
6313
6314
6315 /**
6316  * tracing_update_buffers - used by tracing facility to expand ring buffers
6317  *
6318  * To save on memory when the tracing is never used on a system with it
6319  * configured in. The ring buffers are set to a minimum size. But once
6320  * a user starts to use the tracing facility, then they need to grow
6321  * to their default size.
6322  *
6323  * This function is to be called when a tracer is about to be used.
6324  */
6325 int tracing_update_buffers(void)
6326 {
6327         int ret = 0;
6328
6329         mutex_lock(&trace_types_lock);
6330         if (!ring_buffer_expanded)
6331                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6332                                                 RING_BUFFER_ALL_CPUS);
6333         mutex_unlock(&trace_types_lock);
6334
6335         return ret;
6336 }
6337
6338 struct trace_option_dentry;
6339
6340 static void
6341 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6342
6343 /*
6344  * Used to clear out the tracer before deletion of an instance.
6345  * Must have trace_types_lock held.
6346  */
6347 static void tracing_set_nop(struct trace_array *tr)
6348 {
6349         if (tr->current_trace == &nop_trace)
6350                 return;
6351         
6352         tr->current_trace->enabled--;
6353
6354         if (tr->current_trace->reset)
6355                 tr->current_trace->reset(tr);
6356
6357         tr->current_trace = &nop_trace;
6358 }
6359
6360 static bool tracer_options_updated;
6361
6362 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6363 {
6364         /* Only enable if the directory has been created already. */
6365         if (!tr->dir)
6366                 return;
6367
6368         /* Only create trace option files after update_tracer_options finish */
6369         if (!tracer_options_updated)
6370                 return;
6371
6372         create_trace_option_files(tr, t);
6373 }
6374
6375 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6376 {
6377         struct tracer *t;
6378 #ifdef CONFIG_TRACER_MAX_TRACE
6379         bool had_max_tr;
6380 #endif
6381         int ret = 0;
6382
6383         mutex_lock(&trace_types_lock);
6384
6385         if (!ring_buffer_expanded) {
6386                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6387                                                 RING_BUFFER_ALL_CPUS);
6388                 if (ret < 0)
6389                         goto out;
6390                 ret = 0;
6391         }
6392
6393         for (t = trace_types; t; t = t->next) {
6394                 if (strcmp(t->name, buf) == 0)
6395                         break;
6396         }
6397         if (!t) {
6398                 ret = -EINVAL;
6399                 goto out;
6400         }
6401         if (t == tr->current_trace)
6402                 goto out;
6403
6404 #ifdef CONFIG_TRACER_SNAPSHOT
6405         if (t->use_max_tr) {
6406                 local_irq_disable();
6407                 arch_spin_lock(&tr->max_lock);
6408                 if (tr->cond_snapshot)
6409                         ret = -EBUSY;
6410                 arch_spin_unlock(&tr->max_lock);
6411                 local_irq_enable();
6412                 if (ret)
6413                         goto out;
6414         }
6415 #endif
6416         /* Some tracers won't work on kernel command line */
6417         if (system_state < SYSTEM_RUNNING && t->noboot) {
6418                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6419                         t->name);
6420                 goto out;
6421         }
6422
6423         /* Some tracers are only allowed for the top level buffer */
6424         if (!trace_ok_for_array(t, tr)) {
6425                 ret = -EINVAL;
6426                 goto out;
6427         }
6428
6429         /* If trace pipe files are being read, we can't change the tracer */
6430         if (tr->trace_ref) {
6431                 ret = -EBUSY;
6432                 goto out;
6433         }
6434
6435         trace_branch_disable();
6436
6437         tr->current_trace->enabled--;
6438
6439         if (tr->current_trace->reset)
6440                 tr->current_trace->reset(tr);
6441
6442 #ifdef CONFIG_TRACER_MAX_TRACE
6443         had_max_tr = tr->current_trace->use_max_tr;
6444
6445         /* Current trace needs to be nop_trace before synchronize_rcu */
6446         tr->current_trace = &nop_trace;
6447
6448         if (had_max_tr && !t->use_max_tr) {
6449                 /*
6450                  * We need to make sure that the update_max_tr sees that
6451                  * current_trace changed to nop_trace to keep it from
6452                  * swapping the buffers after we resize it.
6453                  * The update_max_tr is called from interrupts disabled
6454                  * so a synchronized_sched() is sufficient.
6455                  */
6456                 synchronize_rcu();
6457                 free_snapshot(tr);
6458         }
6459
6460         if (t->use_max_tr && !tr->allocated_snapshot) {
6461                 ret = tracing_alloc_snapshot_instance(tr);
6462                 if (ret < 0)
6463                         goto out;
6464         }
6465 #else
6466         tr->current_trace = &nop_trace;
6467 #endif
6468
6469         if (t->init) {
6470                 ret = tracer_init(t, tr);
6471                 if (ret)
6472                         goto out;
6473         }
6474
6475         tr->current_trace = t;
6476         tr->current_trace->enabled++;
6477         trace_branch_enable(tr);
6478  out:
6479         mutex_unlock(&trace_types_lock);
6480
6481         return ret;
6482 }
6483
6484 static ssize_t
6485 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6486                         size_t cnt, loff_t *ppos)
6487 {
6488         struct trace_array *tr = filp->private_data;
6489         char buf[MAX_TRACER_SIZE+1];
6490         char *name;
6491         size_t ret;
6492         int err;
6493
6494         ret = cnt;
6495
6496         if (cnt > MAX_TRACER_SIZE)
6497                 cnt = MAX_TRACER_SIZE;
6498
6499         if (copy_from_user(buf, ubuf, cnt))
6500                 return -EFAULT;
6501
6502         buf[cnt] = 0;
6503
6504         name = strim(buf);
6505
6506         err = tracing_set_tracer(tr, name);
6507         if (err)
6508                 return err;
6509
6510         *ppos += ret;
6511
6512         return ret;
6513 }
6514
6515 static ssize_t
6516 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6517                    size_t cnt, loff_t *ppos)
6518 {
6519         char buf[64];
6520         int r;
6521
6522         r = snprintf(buf, sizeof(buf), "%ld\n",
6523                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6524         if (r > sizeof(buf))
6525                 r = sizeof(buf);
6526         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6527 }
6528
6529 static ssize_t
6530 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6531                     size_t cnt, loff_t *ppos)
6532 {
6533         unsigned long val;
6534         int ret;
6535
6536         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6537         if (ret)
6538                 return ret;
6539
6540         *ptr = val * 1000;
6541
6542         return cnt;
6543 }
6544
6545 static ssize_t
6546 tracing_thresh_read(struct file *filp, char __user *ubuf,
6547                     size_t cnt, loff_t *ppos)
6548 {
6549         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6550 }
6551
6552 static ssize_t
6553 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6554                      size_t cnt, loff_t *ppos)
6555 {
6556         struct trace_array *tr = filp->private_data;
6557         int ret;
6558
6559         mutex_lock(&trace_types_lock);
6560         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6561         if (ret < 0)
6562                 goto out;
6563
6564         if (tr->current_trace->update_thresh) {
6565                 ret = tr->current_trace->update_thresh(tr);
6566                 if (ret < 0)
6567                         goto out;
6568         }
6569
6570         ret = cnt;
6571 out:
6572         mutex_unlock(&trace_types_lock);
6573
6574         return ret;
6575 }
6576
6577 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6578
6579 static ssize_t
6580 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6581                      size_t cnt, loff_t *ppos)
6582 {
6583         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6584 }
6585
6586 static ssize_t
6587 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6588                       size_t cnt, loff_t *ppos)
6589 {
6590         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6591 }
6592
6593 #endif
6594
6595 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6596 {
6597         struct trace_array *tr = inode->i_private;
6598         struct trace_iterator *iter;
6599         int ret;
6600
6601         ret = tracing_check_open_get_tr(tr);
6602         if (ret)
6603                 return ret;
6604
6605         mutex_lock(&trace_types_lock);
6606
6607         /* create a buffer to store the information to pass to userspace */
6608         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6609         if (!iter) {
6610                 ret = -ENOMEM;
6611                 __trace_array_put(tr);
6612                 goto out;
6613         }
6614
6615         trace_seq_init(&iter->seq);
6616         iter->trace = tr->current_trace;
6617
6618         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6619                 ret = -ENOMEM;
6620                 goto fail;
6621         }
6622
6623         /* trace pipe does not show start of buffer */
6624         cpumask_setall(iter->started);
6625
6626         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6627                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6628
6629         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6630         if (trace_clocks[tr->clock_id].in_ns)
6631                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6632
6633         iter->tr = tr;
6634         iter->array_buffer = &tr->array_buffer;
6635         iter->cpu_file = tracing_get_cpu(inode);
6636         mutex_init(&iter->mutex);
6637         filp->private_data = iter;
6638
6639         if (iter->trace->pipe_open)
6640                 iter->trace->pipe_open(iter);
6641
6642         nonseekable_open(inode, filp);
6643
6644         tr->trace_ref++;
6645 out:
6646         mutex_unlock(&trace_types_lock);
6647         return ret;
6648
6649 fail:
6650         kfree(iter);
6651         __trace_array_put(tr);
6652         mutex_unlock(&trace_types_lock);
6653         return ret;
6654 }
6655
6656 static int tracing_release_pipe(struct inode *inode, struct file *file)
6657 {
6658         struct trace_iterator *iter = file->private_data;
6659         struct trace_array *tr = inode->i_private;
6660
6661         mutex_lock(&trace_types_lock);
6662
6663         tr->trace_ref--;
6664
6665         if (iter->trace->pipe_close)
6666                 iter->trace->pipe_close(iter);
6667
6668         mutex_unlock(&trace_types_lock);
6669
6670         free_cpumask_var(iter->started);
6671         kfree(iter->fmt);
6672         mutex_destroy(&iter->mutex);
6673         kfree(iter);
6674
6675         trace_array_put(tr);
6676
6677         return 0;
6678 }
6679
6680 static __poll_t
6681 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6682 {
6683         struct trace_array *tr = iter->tr;
6684
6685         /* Iterators are static, they should be filled or empty */
6686         if (trace_buffer_iter(iter, iter->cpu_file))
6687                 return EPOLLIN | EPOLLRDNORM;
6688
6689         if (tr->trace_flags & TRACE_ITER_BLOCK)
6690                 /*
6691                  * Always select as readable when in blocking mode
6692                  */
6693                 return EPOLLIN | EPOLLRDNORM;
6694         else
6695                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6696                                              filp, poll_table, iter->tr->buffer_percent);
6697 }
6698
6699 static __poll_t
6700 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6701 {
6702         struct trace_iterator *iter = filp->private_data;
6703
6704         return trace_poll(iter, filp, poll_table);
6705 }
6706
6707 /* Must be called with iter->mutex held. */
6708 static int tracing_wait_pipe(struct file *filp)
6709 {
6710         struct trace_iterator *iter = filp->private_data;
6711         int ret;
6712
6713         while (trace_empty(iter)) {
6714
6715                 if ((filp->f_flags & O_NONBLOCK)) {
6716                         return -EAGAIN;
6717                 }
6718
6719                 /*
6720                  * We block until we read something and tracing is disabled.
6721                  * We still block if tracing is disabled, but we have never
6722                  * read anything. This allows a user to cat this file, and
6723                  * then enable tracing. But after we have read something,
6724                  * we give an EOF when tracing is again disabled.
6725                  *
6726                  * iter->pos will be 0 if we haven't read anything.
6727                  */
6728                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6729                         break;
6730
6731                 mutex_unlock(&iter->mutex);
6732
6733                 ret = wait_on_pipe(iter, 0);
6734
6735                 mutex_lock(&iter->mutex);
6736
6737                 if (ret)
6738                         return ret;
6739         }
6740
6741         return 1;
6742 }
6743
6744 /*
6745  * Consumer reader.
6746  */
6747 static ssize_t
6748 tracing_read_pipe(struct file *filp, char __user *ubuf,
6749                   size_t cnt, loff_t *ppos)
6750 {
6751         struct trace_iterator *iter = filp->private_data;
6752         ssize_t sret;
6753
6754         /*
6755          * Avoid more than one consumer on a single file descriptor
6756          * This is just a matter of traces coherency, the ring buffer itself
6757          * is protected.
6758          */
6759         mutex_lock(&iter->mutex);
6760
6761         /* return any leftover data */
6762         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6763         if (sret != -EBUSY)
6764                 goto out;
6765
6766         trace_seq_init(&iter->seq);
6767
6768         if (iter->trace->read) {
6769                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6770                 if (sret)
6771                         goto out;
6772         }
6773
6774 waitagain:
6775         sret = tracing_wait_pipe(filp);
6776         if (sret <= 0)
6777                 goto out;
6778
6779         /* stop when tracing is finished */
6780         if (trace_empty(iter)) {
6781                 sret = 0;
6782                 goto out;
6783         }
6784
6785         if (cnt >= PAGE_SIZE)
6786                 cnt = PAGE_SIZE - 1;
6787
6788         /* reset all but tr, trace, and overruns */
6789         trace_iterator_reset(iter);
6790         cpumask_clear(iter->started);
6791         trace_seq_init(&iter->seq);
6792
6793         trace_event_read_lock();
6794         trace_access_lock(iter->cpu_file);
6795         while (trace_find_next_entry_inc(iter) != NULL) {
6796                 enum print_line_t ret;
6797                 int save_len = iter->seq.seq.len;
6798
6799                 ret = print_trace_line(iter);
6800                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6801                         /* don't print partial lines */
6802                         iter->seq.seq.len = save_len;
6803                         break;
6804                 }
6805                 if (ret != TRACE_TYPE_NO_CONSUME)
6806                         trace_consume(iter);
6807
6808                 if (trace_seq_used(&iter->seq) >= cnt)
6809                         break;
6810
6811                 /*
6812                  * Setting the full flag means we reached the trace_seq buffer
6813                  * size and we should leave by partial output condition above.
6814                  * One of the trace_seq_* functions is not used properly.
6815                  */
6816                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6817                           iter->ent->type);
6818         }
6819         trace_access_unlock(iter->cpu_file);
6820         trace_event_read_unlock();
6821
6822         /* Now copy what we have to the user */
6823         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6824         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6825                 trace_seq_init(&iter->seq);
6826
6827         /*
6828          * If there was nothing to send to user, in spite of consuming trace
6829          * entries, go back to wait for more entries.
6830          */
6831         if (sret == -EBUSY)
6832                 goto waitagain;
6833
6834 out:
6835         mutex_unlock(&iter->mutex);
6836
6837         return sret;
6838 }
6839
6840 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6841                                      unsigned int idx)
6842 {
6843         __free_page(spd->pages[idx]);
6844 }
6845
6846 static size_t
6847 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6848 {
6849         size_t count;
6850         int save_len;
6851         int ret;
6852
6853         /* Seq buffer is page-sized, exactly what we need. */
6854         for (;;) {
6855                 save_len = iter->seq.seq.len;
6856                 ret = print_trace_line(iter);
6857
6858                 if (trace_seq_has_overflowed(&iter->seq)) {
6859                         iter->seq.seq.len = save_len;
6860                         break;
6861                 }
6862
6863                 /*
6864                  * This should not be hit, because it should only
6865                  * be set if the iter->seq overflowed. But check it
6866                  * anyway to be safe.
6867                  */
6868                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6869                         iter->seq.seq.len = save_len;
6870                         break;
6871                 }
6872
6873                 count = trace_seq_used(&iter->seq) - save_len;
6874                 if (rem < count) {
6875                         rem = 0;
6876                         iter->seq.seq.len = save_len;
6877                         break;
6878                 }
6879
6880                 if (ret != TRACE_TYPE_NO_CONSUME)
6881                         trace_consume(iter);
6882                 rem -= count;
6883                 if (!trace_find_next_entry_inc(iter))   {
6884                         rem = 0;
6885                         iter->ent = NULL;
6886                         break;
6887                 }
6888         }
6889
6890         return rem;
6891 }
6892
6893 static ssize_t tracing_splice_read_pipe(struct file *filp,
6894                                         loff_t *ppos,
6895                                         struct pipe_inode_info *pipe,
6896                                         size_t len,
6897                                         unsigned int flags)
6898 {
6899         struct page *pages_def[PIPE_DEF_BUFFERS];
6900         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6901         struct trace_iterator *iter = filp->private_data;
6902         struct splice_pipe_desc spd = {
6903                 .pages          = pages_def,
6904                 .partial        = partial_def,
6905                 .nr_pages       = 0, /* This gets updated below. */
6906                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6907                 .ops            = &default_pipe_buf_ops,
6908                 .spd_release    = tracing_spd_release_pipe,
6909         };
6910         ssize_t ret;
6911         size_t rem;
6912         unsigned int i;
6913
6914         if (splice_grow_spd(pipe, &spd))
6915                 return -ENOMEM;
6916
6917         mutex_lock(&iter->mutex);
6918
6919         if (iter->trace->splice_read) {
6920                 ret = iter->trace->splice_read(iter, filp,
6921                                                ppos, pipe, len, flags);
6922                 if (ret)
6923                         goto out_err;
6924         }
6925
6926         ret = tracing_wait_pipe(filp);
6927         if (ret <= 0)
6928                 goto out_err;
6929
6930         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6931                 ret = -EFAULT;
6932                 goto out_err;
6933         }
6934
6935         trace_event_read_lock();
6936         trace_access_lock(iter->cpu_file);
6937
6938         /* Fill as many pages as possible. */
6939         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6940                 spd.pages[i] = alloc_page(GFP_KERNEL);
6941                 if (!spd.pages[i])
6942                         break;
6943
6944                 rem = tracing_fill_pipe_page(rem, iter);
6945
6946                 /* Copy the data into the page, so we can start over. */
6947                 ret = trace_seq_to_buffer(&iter->seq,
6948                                           page_address(spd.pages[i]),
6949                                           trace_seq_used(&iter->seq));
6950                 if (ret < 0) {
6951                         __free_page(spd.pages[i]);
6952                         break;
6953                 }
6954                 spd.partial[i].offset = 0;
6955                 spd.partial[i].len = trace_seq_used(&iter->seq);
6956
6957                 trace_seq_init(&iter->seq);
6958         }
6959
6960         trace_access_unlock(iter->cpu_file);
6961         trace_event_read_unlock();
6962         mutex_unlock(&iter->mutex);
6963
6964         spd.nr_pages = i;
6965
6966         if (i)
6967                 ret = splice_to_pipe(pipe, &spd);
6968         else
6969                 ret = 0;
6970 out:
6971         splice_shrink_spd(&spd);
6972         return ret;
6973
6974 out_err:
6975         mutex_unlock(&iter->mutex);
6976         goto out;
6977 }
6978
6979 static ssize_t
6980 tracing_entries_read(struct file *filp, char __user *ubuf,
6981                      size_t cnt, loff_t *ppos)
6982 {
6983         struct inode *inode = file_inode(filp);
6984         struct trace_array *tr = inode->i_private;
6985         int cpu = tracing_get_cpu(inode);
6986         char buf[64];
6987         int r = 0;
6988         ssize_t ret;
6989
6990         mutex_lock(&trace_types_lock);
6991
6992         if (cpu == RING_BUFFER_ALL_CPUS) {
6993                 int cpu, buf_size_same;
6994                 unsigned long size;
6995
6996                 size = 0;
6997                 buf_size_same = 1;
6998                 /* check if all cpu sizes are same */
6999                 for_each_tracing_cpu(cpu) {
7000                         /* fill in the size from first enabled cpu */
7001                         if (size == 0)
7002                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7003                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7004                                 buf_size_same = 0;
7005                                 break;
7006                         }
7007                 }
7008
7009                 if (buf_size_same) {
7010                         if (!ring_buffer_expanded)
7011                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7012                                             size >> 10,
7013                                             trace_buf_size >> 10);
7014                         else
7015                                 r = sprintf(buf, "%lu\n", size >> 10);
7016                 } else
7017                         r = sprintf(buf, "X\n");
7018         } else
7019                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7020
7021         mutex_unlock(&trace_types_lock);
7022
7023         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7024         return ret;
7025 }
7026
7027 static ssize_t
7028 tracing_entries_write(struct file *filp, const char __user *ubuf,
7029                       size_t cnt, loff_t *ppos)
7030 {
7031         struct inode *inode = file_inode(filp);
7032         struct trace_array *tr = inode->i_private;
7033         unsigned long val;
7034         int ret;
7035
7036         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7037         if (ret)
7038                 return ret;
7039
7040         /* must have at least 1 entry */
7041         if (!val)
7042                 return -EINVAL;
7043
7044         /* value is in KB */
7045         val <<= 10;
7046         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7047         if (ret < 0)
7048                 return ret;
7049
7050         *ppos += cnt;
7051
7052         return cnt;
7053 }
7054
7055 static ssize_t
7056 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7057                                 size_t cnt, loff_t *ppos)
7058 {
7059         struct trace_array *tr = filp->private_data;
7060         char buf[64];
7061         int r, cpu;
7062         unsigned long size = 0, expanded_size = 0;
7063
7064         mutex_lock(&trace_types_lock);
7065         for_each_tracing_cpu(cpu) {
7066                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7067                 if (!ring_buffer_expanded)
7068                         expanded_size += trace_buf_size >> 10;
7069         }
7070         if (ring_buffer_expanded)
7071                 r = sprintf(buf, "%lu\n", size);
7072         else
7073                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7074         mutex_unlock(&trace_types_lock);
7075
7076         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7077 }
7078
7079 static ssize_t
7080 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7081                           size_t cnt, loff_t *ppos)
7082 {
7083         /*
7084          * There is no need to read what the user has written, this function
7085          * is just to make sure that there is no error when "echo" is used
7086          */
7087
7088         *ppos += cnt;
7089
7090         return cnt;
7091 }
7092
7093 static int
7094 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7095 {
7096         struct trace_array *tr = inode->i_private;
7097
7098         /* disable tracing ? */
7099         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7100                 tracer_tracing_off(tr);
7101         /* resize the ring buffer to 0 */
7102         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7103
7104         trace_array_put(tr);
7105
7106         return 0;
7107 }
7108
7109 static ssize_t
7110 tracing_mark_write(struct file *filp, const char __user *ubuf,
7111                                         size_t cnt, loff_t *fpos)
7112 {
7113         struct trace_array *tr = filp->private_data;
7114         struct ring_buffer_event *event;
7115         enum event_trigger_type tt = ETT_NONE;
7116         struct trace_buffer *buffer;
7117         struct print_entry *entry;
7118         ssize_t written;
7119         int size;
7120         int len;
7121
7122 /* Used in tracing_mark_raw_write() as well */
7123 #define FAULTED_STR "<faulted>"
7124 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7125
7126         if (tracing_disabled)
7127                 return -EINVAL;
7128
7129         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7130                 return -EINVAL;
7131
7132         if (cnt > TRACE_BUF_SIZE)
7133                 cnt = TRACE_BUF_SIZE;
7134
7135         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7136
7137         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7138
7139         /* If less than "<faulted>", then make sure we can still add that */
7140         if (cnt < FAULTED_SIZE)
7141                 size += FAULTED_SIZE - cnt;
7142
7143         buffer = tr->array_buffer.buffer;
7144         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7145                                             tracing_gen_ctx());
7146         if (unlikely(!event))
7147                 /* Ring buffer disabled, return as if not open for write */
7148                 return -EBADF;
7149
7150         entry = ring_buffer_event_data(event);
7151         entry->ip = _THIS_IP_;
7152
7153         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7154         if (len) {
7155                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7156                 cnt = FAULTED_SIZE;
7157                 written = -EFAULT;
7158         } else
7159                 written = cnt;
7160
7161         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7162                 /* do not add \n before testing triggers, but add \0 */
7163                 entry->buf[cnt] = '\0';
7164                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7165         }
7166
7167         if (entry->buf[cnt - 1] != '\n') {
7168                 entry->buf[cnt] = '\n';
7169                 entry->buf[cnt + 1] = '\0';
7170         } else
7171                 entry->buf[cnt] = '\0';
7172
7173         if (static_branch_unlikely(&trace_marker_exports_enabled))
7174                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7175         __buffer_unlock_commit(buffer, event);
7176
7177         if (tt)
7178                 event_triggers_post_call(tr->trace_marker_file, tt);
7179
7180         return written;
7181 }
7182
7183 /* Limit it for now to 3K (including tag) */
7184 #define RAW_DATA_MAX_SIZE (1024*3)
7185
7186 static ssize_t
7187 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7188                                         size_t cnt, loff_t *fpos)
7189 {
7190         struct trace_array *tr = filp->private_data;
7191         struct ring_buffer_event *event;
7192         struct trace_buffer *buffer;
7193         struct raw_data_entry *entry;
7194         ssize_t written;
7195         int size;
7196         int len;
7197
7198 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7199
7200         if (tracing_disabled)
7201                 return -EINVAL;
7202
7203         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7204                 return -EINVAL;
7205
7206         /* The marker must at least have a tag id */
7207         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7208                 return -EINVAL;
7209
7210         if (cnt > TRACE_BUF_SIZE)
7211                 cnt = TRACE_BUF_SIZE;
7212
7213         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7214
7215         size = sizeof(*entry) + cnt;
7216         if (cnt < FAULT_SIZE_ID)
7217                 size += FAULT_SIZE_ID - cnt;
7218
7219         buffer = tr->array_buffer.buffer;
7220         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7221                                             tracing_gen_ctx());
7222         if (!event)
7223                 /* Ring buffer disabled, return as if not open for write */
7224                 return -EBADF;
7225
7226         entry = ring_buffer_event_data(event);
7227
7228         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7229         if (len) {
7230                 entry->id = -1;
7231                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7232                 written = -EFAULT;
7233         } else
7234                 written = cnt;
7235
7236         __buffer_unlock_commit(buffer, event);
7237
7238         return written;
7239 }
7240
7241 static int tracing_clock_show(struct seq_file *m, void *v)
7242 {
7243         struct trace_array *tr = m->private;
7244         int i;
7245
7246         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7247                 seq_printf(m,
7248                         "%s%s%s%s", i ? " " : "",
7249                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7250                         i == tr->clock_id ? "]" : "");
7251         seq_putc(m, '\n');
7252
7253         return 0;
7254 }
7255
7256 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7257 {
7258         int i;
7259
7260         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7261                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7262                         break;
7263         }
7264         if (i == ARRAY_SIZE(trace_clocks))
7265                 return -EINVAL;
7266
7267         mutex_lock(&trace_types_lock);
7268
7269         tr->clock_id = i;
7270
7271         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7272
7273         /*
7274          * New clock may not be consistent with the previous clock.
7275          * Reset the buffer so that it doesn't have incomparable timestamps.
7276          */
7277         tracing_reset_online_cpus(&tr->array_buffer);
7278
7279 #ifdef CONFIG_TRACER_MAX_TRACE
7280         if (tr->max_buffer.buffer)
7281                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7282         tracing_reset_online_cpus(&tr->max_buffer);
7283 #endif
7284
7285         mutex_unlock(&trace_types_lock);
7286
7287         return 0;
7288 }
7289
7290 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7291                                    size_t cnt, loff_t *fpos)
7292 {
7293         struct seq_file *m = filp->private_data;
7294         struct trace_array *tr = m->private;
7295         char buf[64];
7296         const char *clockstr;
7297         int ret;
7298
7299         if (cnt >= sizeof(buf))
7300                 return -EINVAL;
7301
7302         if (copy_from_user(buf, ubuf, cnt))
7303                 return -EFAULT;
7304
7305         buf[cnt] = 0;
7306
7307         clockstr = strstrip(buf);
7308
7309         ret = tracing_set_clock(tr, clockstr);
7310         if (ret)
7311                 return ret;
7312
7313         *fpos += cnt;
7314
7315         return cnt;
7316 }
7317
7318 static int tracing_clock_open(struct inode *inode, struct file *file)
7319 {
7320         struct trace_array *tr = inode->i_private;
7321         int ret;
7322
7323         ret = tracing_check_open_get_tr(tr);
7324         if (ret)
7325                 return ret;
7326
7327         ret = single_open(file, tracing_clock_show, inode->i_private);
7328         if (ret < 0)
7329                 trace_array_put(tr);
7330
7331         return ret;
7332 }
7333
7334 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7335 {
7336         struct trace_array *tr = m->private;
7337
7338         mutex_lock(&trace_types_lock);
7339
7340         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7341                 seq_puts(m, "delta [absolute]\n");
7342         else
7343                 seq_puts(m, "[delta] absolute\n");
7344
7345         mutex_unlock(&trace_types_lock);
7346
7347         return 0;
7348 }
7349
7350 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7351 {
7352         struct trace_array *tr = inode->i_private;
7353         int ret;
7354
7355         ret = tracing_check_open_get_tr(tr);
7356         if (ret)
7357                 return ret;
7358
7359         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7360         if (ret < 0)
7361                 trace_array_put(tr);
7362
7363         return ret;
7364 }
7365
7366 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7367 {
7368         if (rbe == this_cpu_read(trace_buffered_event))
7369                 return ring_buffer_time_stamp(buffer);
7370
7371         return ring_buffer_event_time_stamp(buffer, rbe);
7372 }
7373
7374 /*
7375  * Set or disable using the per CPU trace_buffer_event when possible.
7376  */
7377 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7378 {
7379         int ret = 0;
7380
7381         mutex_lock(&trace_types_lock);
7382
7383         if (set && tr->no_filter_buffering_ref++)
7384                 goto out;
7385
7386         if (!set) {
7387                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7388                         ret = -EINVAL;
7389                         goto out;
7390                 }
7391
7392                 --tr->no_filter_buffering_ref;
7393         }
7394  out:
7395         mutex_unlock(&trace_types_lock);
7396
7397         return ret;
7398 }
7399
7400 struct ftrace_buffer_info {
7401         struct trace_iterator   iter;
7402         void                    *spare;
7403         unsigned int            spare_cpu;
7404         unsigned int            read;
7405 };
7406
7407 #ifdef CONFIG_TRACER_SNAPSHOT
7408 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7409 {
7410         struct trace_array *tr = inode->i_private;
7411         struct trace_iterator *iter;
7412         struct seq_file *m;
7413         int ret;
7414
7415         ret = tracing_check_open_get_tr(tr);
7416         if (ret)
7417                 return ret;
7418
7419         if (file->f_mode & FMODE_READ) {
7420                 iter = __tracing_open(inode, file, true);
7421                 if (IS_ERR(iter))
7422                         ret = PTR_ERR(iter);
7423         } else {
7424                 /* Writes still need the seq_file to hold the private data */
7425                 ret = -ENOMEM;
7426                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7427                 if (!m)
7428                         goto out;
7429                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7430                 if (!iter) {
7431                         kfree(m);
7432                         goto out;
7433                 }
7434                 ret = 0;
7435
7436                 iter->tr = tr;
7437                 iter->array_buffer = &tr->max_buffer;
7438                 iter->cpu_file = tracing_get_cpu(inode);
7439                 m->private = iter;
7440                 file->private_data = m;
7441         }
7442 out:
7443         if (ret < 0)
7444                 trace_array_put(tr);
7445
7446         return ret;
7447 }
7448
7449 static ssize_t
7450 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7451                        loff_t *ppos)
7452 {
7453         struct seq_file *m = filp->private_data;
7454         struct trace_iterator *iter = m->private;
7455         struct trace_array *tr = iter->tr;
7456         unsigned long val;
7457         int ret;
7458
7459         ret = tracing_update_buffers();
7460         if (ret < 0)
7461                 return ret;
7462
7463         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7464         if (ret)
7465                 return ret;
7466
7467         mutex_lock(&trace_types_lock);
7468
7469         if (tr->current_trace->use_max_tr) {
7470                 ret = -EBUSY;
7471                 goto out;
7472         }
7473
7474         local_irq_disable();
7475         arch_spin_lock(&tr->max_lock);
7476         if (tr->cond_snapshot)
7477                 ret = -EBUSY;
7478         arch_spin_unlock(&tr->max_lock);
7479         local_irq_enable();
7480         if (ret)
7481                 goto out;
7482
7483         switch (val) {
7484         case 0:
7485                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7486                         ret = -EINVAL;
7487                         break;
7488                 }
7489                 if (tr->allocated_snapshot)
7490                         free_snapshot(tr);
7491                 break;
7492         case 1:
7493 /* Only allow per-cpu swap if the ring buffer supports it */
7494 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7495                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7496                         ret = -EINVAL;
7497                         break;
7498                 }
7499 #endif
7500                 if (tr->allocated_snapshot)
7501                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7502                                         &tr->array_buffer, iter->cpu_file);
7503                 else
7504                         ret = tracing_alloc_snapshot_instance(tr);
7505                 if (ret < 0)
7506                         break;
7507                 local_irq_disable();
7508                 /* Now, we're going to swap */
7509                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7510                         update_max_tr(tr, current, smp_processor_id(), NULL);
7511                 else
7512                         update_max_tr_single(tr, current, iter->cpu_file);
7513                 local_irq_enable();
7514                 break;
7515         default:
7516                 if (tr->allocated_snapshot) {
7517                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7518                                 tracing_reset_online_cpus(&tr->max_buffer);
7519                         else
7520                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7521                 }
7522                 break;
7523         }
7524
7525         if (ret >= 0) {
7526                 *ppos += cnt;
7527                 ret = cnt;
7528         }
7529 out:
7530         mutex_unlock(&trace_types_lock);
7531         return ret;
7532 }
7533
7534 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7535 {
7536         struct seq_file *m = file->private_data;
7537         int ret;
7538
7539         ret = tracing_release(inode, file);
7540
7541         if (file->f_mode & FMODE_READ)
7542                 return ret;
7543
7544         /* If write only, the seq_file is just a stub */
7545         if (m)
7546                 kfree(m->private);
7547         kfree(m);
7548
7549         return 0;
7550 }
7551
7552 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7553 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7554                                     size_t count, loff_t *ppos);
7555 static int tracing_buffers_release(struct inode *inode, struct file *file);
7556 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7557                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7558
7559 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7560 {
7561         struct ftrace_buffer_info *info;
7562         int ret;
7563
7564         /* The following checks for tracefs lockdown */
7565         ret = tracing_buffers_open(inode, filp);
7566         if (ret < 0)
7567                 return ret;
7568
7569         info = filp->private_data;
7570
7571         if (info->iter.trace->use_max_tr) {
7572                 tracing_buffers_release(inode, filp);
7573                 return -EBUSY;
7574         }
7575
7576         info->iter.snapshot = true;
7577         info->iter.array_buffer = &info->iter.tr->max_buffer;
7578
7579         return ret;
7580 }
7581
7582 #endif /* CONFIG_TRACER_SNAPSHOT */
7583
7584
7585 static const struct file_operations tracing_thresh_fops = {
7586         .open           = tracing_open_generic,
7587         .read           = tracing_thresh_read,
7588         .write          = tracing_thresh_write,
7589         .llseek         = generic_file_llseek,
7590 };
7591
7592 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7593 static const struct file_operations tracing_max_lat_fops = {
7594         .open           = tracing_open_generic,
7595         .read           = tracing_max_lat_read,
7596         .write          = tracing_max_lat_write,
7597         .llseek         = generic_file_llseek,
7598 };
7599 #endif
7600
7601 static const struct file_operations set_tracer_fops = {
7602         .open           = tracing_open_generic,
7603         .read           = tracing_set_trace_read,
7604         .write          = tracing_set_trace_write,
7605         .llseek         = generic_file_llseek,
7606 };
7607
7608 static const struct file_operations tracing_pipe_fops = {
7609         .open           = tracing_open_pipe,
7610         .poll           = tracing_poll_pipe,
7611         .read           = tracing_read_pipe,
7612         .splice_read    = tracing_splice_read_pipe,
7613         .release        = tracing_release_pipe,
7614         .llseek         = no_llseek,
7615 };
7616
7617 static const struct file_operations tracing_entries_fops = {
7618         .open           = tracing_open_generic_tr,
7619         .read           = tracing_entries_read,
7620         .write          = tracing_entries_write,
7621         .llseek         = generic_file_llseek,
7622         .release        = tracing_release_generic_tr,
7623 };
7624
7625 static const struct file_operations tracing_total_entries_fops = {
7626         .open           = tracing_open_generic_tr,
7627         .read           = tracing_total_entries_read,
7628         .llseek         = generic_file_llseek,
7629         .release        = tracing_release_generic_tr,
7630 };
7631
7632 static const struct file_operations tracing_free_buffer_fops = {
7633         .open           = tracing_open_generic_tr,
7634         .write          = tracing_free_buffer_write,
7635         .release        = tracing_free_buffer_release,
7636 };
7637
7638 static const struct file_operations tracing_mark_fops = {
7639         .open           = tracing_mark_open,
7640         .write          = tracing_mark_write,
7641         .release        = tracing_release_generic_tr,
7642 };
7643
7644 static const struct file_operations tracing_mark_raw_fops = {
7645         .open           = tracing_mark_open,
7646         .write          = tracing_mark_raw_write,
7647         .release        = tracing_release_generic_tr,
7648 };
7649
7650 static const struct file_operations trace_clock_fops = {
7651         .open           = tracing_clock_open,
7652         .read           = seq_read,
7653         .llseek         = seq_lseek,
7654         .release        = tracing_single_release_tr,
7655         .write          = tracing_clock_write,
7656 };
7657
7658 static const struct file_operations trace_time_stamp_mode_fops = {
7659         .open           = tracing_time_stamp_mode_open,
7660         .read           = seq_read,
7661         .llseek         = seq_lseek,
7662         .release        = tracing_single_release_tr,
7663 };
7664
7665 #ifdef CONFIG_TRACER_SNAPSHOT
7666 static const struct file_operations snapshot_fops = {
7667         .open           = tracing_snapshot_open,
7668         .read           = seq_read,
7669         .write          = tracing_snapshot_write,
7670         .llseek         = tracing_lseek,
7671         .release        = tracing_snapshot_release,
7672 };
7673
7674 static const struct file_operations snapshot_raw_fops = {
7675         .open           = snapshot_raw_open,
7676         .read           = tracing_buffers_read,
7677         .release        = tracing_buffers_release,
7678         .splice_read    = tracing_buffers_splice_read,
7679         .llseek         = no_llseek,
7680 };
7681
7682 #endif /* CONFIG_TRACER_SNAPSHOT */
7683
7684 /*
7685  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7686  * @filp: The active open file structure
7687  * @ubuf: The userspace provided buffer to read value into
7688  * @cnt: The maximum number of bytes to read
7689  * @ppos: The current "file" position
7690  *
7691  * This function implements the write interface for a struct trace_min_max_param.
7692  * The filp->private_data must point to a trace_min_max_param structure that
7693  * defines where to write the value, the min and the max acceptable values,
7694  * and a lock to protect the write.
7695  */
7696 static ssize_t
7697 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7698 {
7699         struct trace_min_max_param *param = filp->private_data;
7700         u64 val;
7701         int err;
7702
7703         if (!param)
7704                 return -EFAULT;
7705
7706         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7707         if (err)
7708                 return err;
7709
7710         if (param->lock)
7711                 mutex_lock(param->lock);
7712
7713         if (param->min && val < *param->min)
7714                 err = -EINVAL;
7715
7716         if (param->max && val > *param->max)
7717                 err = -EINVAL;
7718
7719         if (!err)
7720                 *param->val = val;
7721
7722         if (param->lock)
7723                 mutex_unlock(param->lock);
7724
7725         if (err)
7726                 return err;
7727
7728         return cnt;
7729 }
7730
7731 /*
7732  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7733  * @filp: The active open file structure
7734  * @ubuf: The userspace provided buffer to read value into
7735  * @cnt: The maximum number of bytes to read
7736  * @ppos: The current "file" position
7737  *
7738  * This function implements the read interface for a struct trace_min_max_param.
7739  * The filp->private_data must point to a trace_min_max_param struct with valid
7740  * data.
7741  */
7742 static ssize_t
7743 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7744 {
7745         struct trace_min_max_param *param = filp->private_data;
7746         char buf[U64_STR_SIZE];
7747         int len;
7748         u64 val;
7749
7750         if (!param)
7751                 return -EFAULT;
7752
7753         val = *param->val;
7754
7755         if (cnt > sizeof(buf))
7756                 cnt = sizeof(buf);
7757
7758         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7759
7760         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7761 }
7762
7763 const struct file_operations trace_min_max_fops = {
7764         .open           = tracing_open_generic,
7765         .read           = trace_min_max_read,
7766         .write          = trace_min_max_write,
7767 };
7768
7769 #define TRACING_LOG_ERRS_MAX    8
7770 #define TRACING_LOG_LOC_MAX     128
7771
7772 #define CMD_PREFIX "  Command: "
7773
7774 struct err_info {
7775         const char      **errs; /* ptr to loc-specific array of err strings */
7776         u8              type;   /* index into errs -> specific err string */
7777         u16             pos;    /* caret position */
7778         u64             ts;
7779 };
7780
7781 struct tracing_log_err {
7782         struct list_head        list;
7783         struct err_info         info;
7784         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7785         char                    *cmd;                     /* what caused err */
7786 };
7787
7788 static DEFINE_MUTEX(tracing_err_log_lock);
7789
7790 static struct tracing_log_err *alloc_tracing_log_err(int len)
7791 {
7792         struct tracing_log_err *err;
7793
7794         err = kzalloc(sizeof(*err), GFP_KERNEL);
7795         if (!err)
7796                 return ERR_PTR(-ENOMEM);
7797
7798         err->cmd = kzalloc(len, GFP_KERNEL);
7799         if (!err->cmd) {
7800                 kfree(err);
7801                 return ERR_PTR(-ENOMEM);
7802         }
7803
7804         return err;
7805 }
7806
7807 static void free_tracing_log_err(struct tracing_log_err *err)
7808 {
7809         kfree(err->cmd);
7810         kfree(err);
7811 }
7812
7813 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7814                                                    int len)
7815 {
7816         struct tracing_log_err *err;
7817         char *cmd;
7818
7819         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7820                 err = alloc_tracing_log_err(len);
7821                 if (PTR_ERR(err) != -ENOMEM)
7822                         tr->n_err_log_entries++;
7823
7824                 return err;
7825         }
7826         cmd = kzalloc(len, GFP_KERNEL);
7827         if (!cmd)
7828                 return ERR_PTR(-ENOMEM);
7829         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7830         kfree(err->cmd);
7831         err->cmd = cmd;
7832         list_del(&err->list);
7833
7834         return err;
7835 }
7836
7837 /**
7838  * err_pos - find the position of a string within a command for error careting
7839  * @cmd: The tracing command that caused the error
7840  * @str: The string to position the caret at within @cmd
7841  *
7842  * Finds the position of the first occurrence of @str within @cmd.  The
7843  * return value can be passed to tracing_log_err() for caret placement
7844  * within @cmd.
7845  *
7846  * Returns the index within @cmd of the first occurrence of @str or 0
7847  * if @str was not found.
7848  */
7849 unsigned int err_pos(char *cmd, const char *str)
7850 {
7851         char *found;
7852
7853         if (WARN_ON(!strlen(cmd)))
7854                 return 0;
7855
7856         found = strstr(cmd, str);
7857         if (found)
7858                 return found - cmd;
7859
7860         return 0;
7861 }
7862
7863 /**
7864  * tracing_log_err - write an error to the tracing error log
7865  * @tr: The associated trace array for the error (NULL for top level array)
7866  * @loc: A string describing where the error occurred
7867  * @cmd: The tracing command that caused the error
7868  * @errs: The array of loc-specific static error strings
7869  * @type: The index into errs[], which produces the specific static err string
7870  * @pos: The position the caret should be placed in the cmd
7871  *
7872  * Writes an error into tracing/error_log of the form:
7873  *
7874  * <loc>: error: <text>
7875  *   Command: <cmd>
7876  *              ^
7877  *
7878  * tracing/error_log is a small log file containing the last
7879  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7880  * unless there has been a tracing error, and the error log can be
7881  * cleared and have its memory freed by writing the empty string in
7882  * truncation mode to it i.e. echo > tracing/error_log.
7883  *
7884  * NOTE: the @errs array along with the @type param are used to
7885  * produce a static error string - this string is not copied and saved
7886  * when the error is logged - only a pointer to it is saved.  See
7887  * existing callers for examples of how static strings are typically
7888  * defined for use with tracing_log_err().
7889  */
7890 void tracing_log_err(struct trace_array *tr,
7891                      const char *loc, const char *cmd,
7892                      const char **errs, u8 type, u16 pos)
7893 {
7894         struct tracing_log_err *err;
7895         int len = 0;
7896
7897         if (!tr)
7898                 tr = &global_trace;
7899
7900         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7901
7902         mutex_lock(&tracing_err_log_lock);
7903         err = get_tracing_log_err(tr, len);
7904         if (PTR_ERR(err) == -ENOMEM) {
7905                 mutex_unlock(&tracing_err_log_lock);
7906                 return;
7907         }
7908
7909         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7910         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7911
7912         err->info.errs = errs;
7913         err->info.type = type;
7914         err->info.pos = pos;
7915         err->info.ts = local_clock();
7916
7917         list_add_tail(&err->list, &tr->err_log);
7918         mutex_unlock(&tracing_err_log_lock);
7919 }
7920
7921 static void clear_tracing_err_log(struct trace_array *tr)
7922 {
7923         struct tracing_log_err *err, *next;
7924
7925         mutex_lock(&tracing_err_log_lock);
7926         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7927                 list_del(&err->list);
7928                 free_tracing_log_err(err);
7929         }
7930
7931         tr->n_err_log_entries = 0;
7932         mutex_unlock(&tracing_err_log_lock);
7933 }
7934
7935 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7936 {
7937         struct trace_array *tr = m->private;
7938
7939         mutex_lock(&tracing_err_log_lock);
7940
7941         return seq_list_start(&tr->err_log, *pos);
7942 }
7943
7944 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7945 {
7946         struct trace_array *tr = m->private;
7947
7948         return seq_list_next(v, &tr->err_log, pos);
7949 }
7950
7951 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7952 {
7953         mutex_unlock(&tracing_err_log_lock);
7954 }
7955
7956 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7957 {
7958         u16 i;
7959
7960         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7961                 seq_putc(m, ' ');
7962         for (i = 0; i < pos; i++)
7963                 seq_putc(m, ' ');
7964         seq_puts(m, "^\n");
7965 }
7966
7967 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7968 {
7969         struct tracing_log_err *err = v;
7970
7971         if (err) {
7972                 const char *err_text = err->info.errs[err->info.type];
7973                 u64 sec = err->info.ts;
7974                 u32 nsec;
7975
7976                 nsec = do_div(sec, NSEC_PER_SEC);
7977                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7978                            err->loc, err_text);
7979                 seq_printf(m, "%s", err->cmd);
7980                 tracing_err_log_show_pos(m, err->info.pos);
7981         }
7982
7983         return 0;
7984 }
7985
7986 static const struct seq_operations tracing_err_log_seq_ops = {
7987         .start  = tracing_err_log_seq_start,
7988         .next   = tracing_err_log_seq_next,
7989         .stop   = tracing_err_log_seq_stop,
7990         .show   = tracing_err_log_seq_show
7991 };
7992
7993 static int tracing_err_log_open(struct inode *inode, struct file *file)
7994 {
7995         struct trace_array *tr = inode->i_private;
7996         int ret = 0;
7997
7998         ret = tracing_check_open_get_tr(tr);
7999         if (ret)
8000                 return ret;
8001
8002         /* If this file was opened for write, then erase contents */
8003         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8004                 clear_tracing_err_log(tr);
8005
8006         if (file->f_mode & FMODE_READ) {
8007                 ret = seq_open(file, &tracing_err_log_seq_ops);
8008                 if (!ret) {
8009                         struct seq_file *m = file->private_data;
8010                         m->private = tr;
8011                 } else {
8012                         trace_array_put(tr);
8013                 }
8014         }
8015         return ret;
8016 }
8017
8018 static ssize_t tracing_err_log_write(struct file *file,
8019                                      const char __user *buffer,
8020                                      size_t count, loff_t *ppos)
8021 {
8022         return count;
8023 }
8024
8025 static int tracing_err_log_release(struct inode *inode, struct file *file)
8026 {
8027         struct trace_array *tr = inode->i_private;
8028
8029         trace_array_put(tr);
8030
8031         if (file->f_mode & FMODE_READ)
8032                 seq_release(inode, file);
8033
8034         return 0;
8035 }
8036
8037 static const struct file_operations tracing_err_log_fops = {
8038         .open           = tracing_err_log_open,
8039         .write          = tracing_err_log_write,
8040         .read           = seq_read,
8041         .llseek         = seq_lseek,
8042         .release        = tracing_err_log_release,
8043 };
8044
8045 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8046 {
8047         struct trace_array *tr = inode->i_private;
8048         struct ftrace_buffer_info *info;
8049         int ret;
8050
8051         ret = tracing_check_open_get_tr(tr);
8052         if (ret)
8053                 return ret;
8054
8055         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8056         if (!info) {
8057                 trace_array_put(tr);
8058                 return -ENOMEM;
8059         }
8060
8061         mutex_lock(&trace_types_lock);
8062
8063         info->iter.tr           = tr;
8064         info->iter.cpu_file     = tracing_get_cpu(inode);
8065         info->iter.trace        = tr->current_trace;
8066         info->iter.array_buffer = &tr->array_buffer;
8067         info->spare             = NULL;
8068         /* Force reading ring buffer for first read */
8069         info->read              = (unsigned int)-1;
8070
8071         filp->private_data = info;
8072
8073         tr->trace_ref++;
8074
8075         mutex_unlock(&trace_types_lock);
8076
8077         ret = nonseekable_open(inode, filp);
8078         if (ret < 0)
8079                 trace_array_put(tr);
8080
8081         return ret;
8082 }
8083
8084 static __poll_t
8085 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8086 {
8087         struct ftrace_buffer_info *info = filp->private_data;
8088         struct trace_iterator *iter = &info->iter;
8089
8090         return trace_poll(iter, filp, poll_table);
8091 }
8092
8093 static ssize_t
8094 tracing_buffers_read(struct file *filp, char __user *ubuf,
8095                      size_t count, loff_t *ppos)
8096 {
8097         struct ftrace_buffer_info *info = filp->private_data;
8098         struct trace_iterator *iter = &info->iter;
8099         ssize_t ret = 0;
8100         ssize_t size;
8101
8102         if (!count)
8103                 return 0;
8104
8105 #ifdef CONFIG_TRACER_MAX_TRACE
8106         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8107                 return -EBUSY;
8108 #endif
8109
8110         if (!info->spare) {
8111                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8112                                                           iter->cpu_file);
8113                 if (IS_ERR(info->spare)) {
8114                         ret = PTR_ERR(info->spare);
8115                         info->spare = NULL;
8116                 } else {
8117                         info->spare_cpu = iter->cpu_file;
8118                 }
8119         }
8120         if (!info->spare)
8121                 return ret;
8122
8123         /* Do we have previous read data to read? */
8124         if (info->read < PAGE_SIZE)
8125                 goto read;
8126
8127  again:
8128         trace_access_lock(iter->cpu_file);
8129         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8130                                     &info->spare,
8131                                     count,
8132                                     iter->cpu_file, 0);
8133         trace_access_unlock(iter->cpu_file);
8134
8135         if (ret < 0) {
8136                 if (trace_empty(iter)) {
8137                         if ((filp->f_flags & O_NONBLOCK))
8138                                 return -EAGAIN;
8139
8140                         ret = wait_on_pipe(iter, 0);
8141                         if (ret)
8142                                 return ret;
8143
8144                         goto again;
8145                 }
8146                 return 0;
8147         }
8148
8149         info->read = 0;
8150  read:
8151         size = PAGE_SIZE - info->read;
8152         if (size > count)
8153                 size = count;
8154
8155         ret = copy_to_user(ubuf, info->spare + info->read, size);
8156         if (ret == size)
8157                 return -EFAULT;
8158
8159         size -= ret;
8160
8161         *ppos += size;
8162         info->read += size;
8163
8164         return size;
8165 }
8166
8167 static int tracing_buffers_release(struct inode *inode, struct file *file)
8168 {
8169         struct ftrace_buffer_info *info = file->private_data;
8170         struct trace_iterator *iter = &info->iter;
8171
8172         mutex_lock(&trace_types_lock);
8173
8174         iter->tr->trace_ref--;
8175
8176         __trace_array_put(iter->tr);
8177
8178         iter->wait_index++;
8179         /* Make sure the waiters see the new wait_index */
8180         smp_wmb();
8181
8182         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8183
8184         if (info->spare)
8185                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8186                                            info->spare_cpu, info->spare);
8187         kvfree(info);
8188
8189         mutex_unlock(&trace_types_lock);
8190
8191         return 0;
8192 }
8193
8194 struct buffer_ref {
8195         struct trace_buffer     *buffer;
8196         void                    *page;
8197         int                     cpu;
8198         refcount_t              refcount;
8199 };
8200
8201 static void buffer_ref_release(struct buffer_ref *ref)
8202 {
8203         if (!refcount_dec_and_test(&ref->refcount))
8204                 return;
8205         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8206         kfree(ref);
8207 }
8208
8209 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8210                                     struct pipe_buffer *buf)
8211 {
8212         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8213
8214         buffer_ref_release(ref);
8215         buf->private = 0;
8216 }
8217
8218 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8219                                 struct pipe_buffer *buf)
8220 {
8221         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8222
8223         if (refcount_read(&ref->refcount) > INT_MAX/2)
8224                 return false;
8225
8226         refcount_inc(&ref->refcount);
8227         return true;
8228 }
8229
8230 /* Pipe buffer operations for a buffer. */
8231 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8232         .release                = buffer_pipe_buf_release,
8233         .get                    = buffer_pipe_buf_get,
8234 };
8235
8236 /*
8237  * Callback from splice_to_pipe(), if we need to release some pages
8238  * at the end of the spd in case we error'ed out in filling the pipe.
8239  */
8240 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8241 {
8242         struct buffer_ref *ref =
8243                 (struct buffer_ref *)spd->partial[i].private;
8244
8245         buffer_ref_release(ref);
8246         spd->partial[i].private = 0;
8247 }
8248
8249 static ssize_t
8250 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8251                             struct pipe_inode_info *pipe, size_t len,
8252                             unsigned int flags)
8253 {
8254         struct ftrace_buffer_info *info = file->private_data;
8255         struct trace_iterator *iter = &info->iter;
8256         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8257         struct page *pages_def[PIPE_DEF_BUFFERS];
8258         struct splice_pipe_desc spd = {
8259                 .pages          = pages_def,
8260                 .partial        = partial_def,
8261                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8262                 .ops            = &buffer_pipe_buf_ops,
8263                 .spd_release    = buffer_spd_release,
8264         };
8265         struct buffer_ref *ref;
8266         int entries, i;
8267         ssize_t ret = 0;
8268
8269 #ifdef CONFIG_TRACER_MAX_TRACE
8270         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8271                 return -EBUSY;
8272 #endif
8273
8274         if (*ppos & (PAGE_SIZE - 1))
8275                 return -EINVAL;
8276
8277         if (len & (PAGE_SIZE - 1)) {
8278                 if (len < PAGE_SIZE)
8279                         return -EINVAL;
8280                 len &= PAGE_MASK;
8281         }
8282
8283         if (splice_grow_spd(pipe, &spd))
8284                 return -ENOMEM;
8285
8286  again:
8287         trace_access_lock(iter->cpu_file);
8288         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8289
8290         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8291                 struct page *page;
8292                 int r;
8293
8294                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8295                 if (!ref) {
8296                         ret = -ENOMEM;
8297                         break;
8298                 }
8299
8300                 refcount_set(&ref->refcount, 1);
8301                 ref->buffer = iter->array_buffer->buffer;
8302                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8303                 if (IS_ERR(ref->page)) {
8304                         ret = PTR_ERR(ref->page);
8305                         ref->page = NULL;
8306                         kfree(ref);
8307                         break;
8308                 }
8309                 ref->cpu = iter->cpu_file;
8310
8311                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8312                                           len, iter->cpu_file, 1);
8313                 if (r < 0) {
8314                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8315                                                    ref->page);
8316                         kfree(ref);
8317                         break;
8318                 }
8319
8320                 page = virt_to_page(ref->page);
8321
8322                 spd.pages[i] = page;
8323                 spd.partial[i].len = PAGE_SIZE;
8324                 spd.partial[i].offset = 0;
8325                 spd.partial[i].private = (unsigned long)ref;
8326                 spd.nr_pages++;
8327                 *ppos += PAGE_SIZE;
8328
8329                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8330         }
8331
8332         trace_access_unlock(iter->cpu_file);
8333         spd.nr_pages = i;
8334
8335         /* did we read anything? */
8336         if (!spd.nr_pages) {
8337                 long wait_index;
8338
8339                 if (ret)
8340                         goto out;
8341
8342                 ret = -EAGAIN;
8343                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8344                         goto out;
8345
8346                 wait_index = READ_ONCE(iter->wait_index);
8347
8348                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8349                 if (ret)
8350                         goto out;
8351
8352                 /* No need to wait after waking up when tracing is off */
8353                 if (!tracer_tracing_is_on(iter->tr))
8354                         goto out;
8355
8356                 /* Make sure we see the new wait_index */
8357                 smp_rmb();
8358                 if (wait_index != iter->wait_index)
8359                         goto out;
8360
8361                 goto again;
8362         }
8363
8364         ret = splice_to_pipe(pipe, &spd);
8365 out:
8366         splice_shrink_spd(&spd);
8367
8368         return ret;
8369 }
8370
8371 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8372 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8373 {
8374         struct ftrace_buffer_info *info = file->private_data;
8375         struct trace_iterator *iter = &info->iter;
8376
8377         if (cmd)
8378                 return -ENOIOCTLCMD;
8379
8380         mutex_lock(&trace_types_lock);
8381
8382         iter->wait_index++;
8383         /* Make sure the waiters see the new wait_index */
8384         smp_wmb();
8385
8386         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8387
8388         mutex_unlock(&trace_types_lock);
8389         return 0;
8390 }
8391
8392 static const struct file_operations tracing_buffers_fops = {
8393         .open           = tracing_buffers_open,
8394         .read           = tracing_buffers_read,
8395         .poll           = tracing_buffers_poll,
8396         .release        = tracing_buffers_release,
8397         .splice_read    = tracing_buffers_splice_read,
8398         .unlocked_ioctl = tracing_buffers_ioctl,
8399         .llseek         = no_llseek,
8400 };
8401
8402 static ssize_t
8403 tracing_stats_read(struct file *filp, char __user *ubuf,
8404                    size_t count, loff_t *ppos)
8405 {
8406         struct inode *inode = file_inode(filp);
8407         struct trace_array *tr = inode->i_private;
8408         struct array_buffer *trace_buf = &tr->array_buffer;
8409         int cpu = tracing_get_cpu(inode);
8410         struct trace_seq *s;
8411         unsigned long cnt;
8412         unsigned long long t;
8413         unsigned long usec_rem;
8414
8415         s = kmalloc(sizeof(*s), GFP_KERNEL);
8416         if (!s)
8417                 return -ENOMEM;
8418
8419         trace_seq_init(s);
8420
8421         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8422         trace_seq_printf(s, "entries: %ld\n", cnt);
8423
8424         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8425         trace_seq_printf(s, "overrun: %ld\n", cnt);
8426
8427         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8428         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8429
8430         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8431         trace_seq_printf(s, "bytes: %ld\n", cnt);
8432
8433         if (trace_clocks[tr->clock_id].in_ns) {
8434                 /* local or global for trace_clock */
8435                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8436                 usec_rem = do_div(t, USEC_PER_SEC);
8437                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8438                                                                 t, usec_rem);
8439
8440                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8441                 usec_rem = do_div(t, USEC_PER_SEC);
8442                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8443         } else {
8444                 /* counter or tsc mode for trace_clock */
8445                 trace_seq_printf(s, "oldest event ts: %llu\n",
8446                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8447
8448                 trace_seq_printf(s, "now ts: %llu\n",
8449                                 ring_buffer_time_stamp(trace_buf->buffer));
8450         }
8451
8452         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8453         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8454
8455         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8456         trace_seq_printf(s, "read events: %ld\n", cnt);
8457
8458         count = simple_read_from_buffer(ubuf, count, ppos,
8459                                         s->buffer, trace_seq_used(s));
8460
8461         kfree(s);
8462
8463         return count;
8464 }
8465
8466 static const struct file_operations tracing_stats_fops = {
8467         .open           = tracing_open_generic_tr,
8468         .read           = tracing_stats_read,
8469         .llseek         = generic_file_llseek,
8470         .release        = tracing_release_generic_tr,
8471 };
8472
8473 #ifdef CONFIG_DYNAMIC_FTRACE
8474
8475 static ssize_t
8476 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8477                   size_t cnt, loff_t *ppos)
8478 {
8479         ssize_t ret;
8480         char *buf;
8481         int r;
8482
8483         /* 256 should be plenty to hold the amount needed */
8484         buf = kmalloc(256, GFP_KERNEL);
8485         if (!buf)
8486                 return -ENOMEM;
8487
8488         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8489                       ftrace_update_tot_cnt,
8490                       ftrace_number_of_pages,
8491                       ftrace_number_of_groups);
8492
8493         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8494         kfree(buf);
8495         return ret;
8496 }
8497
8498 static const struct file_operations tracing_dyn_info_fops = {
8499         .open           = tracing_open_generic,
8500         .read           = tracing_read_dyn_info,
8501         .llseek         = generic_file_llseek,
8502 };
8503 #endif /* CONFIG_DYNAMIC_FTRACE */
8504
8505 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8506 static void
8507 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8508                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8509                 void *data)
8510 {
8511         tracing_snapshot_instance(tr);
8512 }
8513
8514 static void
8515 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8516                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8517                       void *data)
8518 {
8519         struct ftrace_func_mapper *mapper = data;
8520         long *count = NULL;
8521
8522         if (mapper)
8523                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8524
8525         if (count) {
8526
8527                 if (*count <= 0)
8528                         return;
8529
8530                 (*count)--;
8531         }
8532
8533         tracing_snapshot_instance(tr);
8534 }
8535
8536 static int
8537 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8538                       struct ftrace_probe_ops *ops, void *data)
8539 {
8540         struct ftrace_func_mapper *mapper = data;
8541         long *count = NULL;
8542
8543         seq_printf(m, "%ps:", (void *)ip);
8544
8545         seq_puts(m, "snapshot");
8546
8547         if (mapper)
8548                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8549
8550         if (count)
8551                 seq_printf(m, ":count=%ld\n", *count);
8552         else
8553                 seq_puts(m, ":unlimited\n");
8554
8555         return 0;
8556 }
8557
8558 static int
8559 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8560                      unsigned long ip, void *init_data, void **data)
8561 {
8562         struct ftrace_func_mapper *mapper = *data;
8563
8564         if (!mapper) {
8565                 mapper = allocate_ftrace_func_mapper();
8566                 if (!mapper)
8567                         return -ENOMEM;
8568                 *data = mapper;
8569         }
8570
8571         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8572 }
8573
8574 static void
8575 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8576                      unsigned long ip, void *data)
8577 {
8578         struct ftrace_func_mapper *mapper = data;
8579
8580         if (!ip) {
8581                 if (!mapper)
8582                         return;
8583                 free_ftrace_func_mapper(mapper, NULL);
8584                 return;
8585         }
8586
8587         ftrace_func_mapper_remove_ip(mapper, ip);
8588 }
8589
8590 static struct ftrace_probe_ops snapshot_probe_ops = {
8591         .func                   = ftrace_snapshot,
8592         .print                  = ftrace_snapshot_print,
8593 };
8594
8595 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8596         .func                   = ftrace_count_snapshot,
8597         .print                  = ftrace_snapshot_print,
8598         .init                   = ftrace_snapshot_init,
8599         .free                   = ftrace_snapshot_free,
8600 };
8601
8602 static int
8603 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8604                                char *glob, char *cmd, char *param, int enable)
8605 {
8606         struct ftrace_probe_ops *ops;
8607         void *count = (void *)-1;
8608         char *number;
8609         int ret;
8610
8611         if (!tr)
8612                 return -ENODEV;
8613
8614         /* hash funcs only work with set_ftrace_filter */
8615         if (!enable)
8616                 return -EINVAL;
8617
8618         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8619
8620         if (glob[0] == '!')
8621                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8622
8623         if (!param)
8624                 goto out_reg;
8625
8626         number = strsep(&param, ":");
8627
8628         if (!strlen(number))
8629                 goto out_reg;
8630
8631         /*
8632          * We use the callback data field (which is a pointer)
8633          * as our counter.
8634          */
8635         ret = kstrtoul(number, 0, (unsigned long *)&count);
8636         if (ret)
8637                 return ret;
8638
8639  out_reg:
8640         ret = tracing_alloc_snapshot_instance(tr);
8641         if (ret < 0)
8642                 goto out;
8643
8644         ret = register_ftrace_function_probe(glob, tr, ops, count);
8645
8646  out:
8647         return ret < 0 ? ret : 0;
8648 }
8649
8650 static struct ftrace_func_command ftrace_snapshot_cmd = {
8651         .name                   = "snapshot",
8652         .func                   = ftrace_trace_snapshot_callback,
8653 };
8654
8655 static __init int register_snapshot_cmd(void)
8656 {
8657         return register_ftrace_command(&ftrace_snapshot_cmd);
8658 }
8659 #else
8660 static inline __init int register_snapshot_cmd(void) { return 0; }
8661 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8662
8663 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8664 {
8665         if (WARN_ON(!tr->dir))
8666                 return ERR_PTR(-ENODEV);
8667
8668         /* Top directory uses NULL as the parent */
8669         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8670                 return NULL;
8671
8672         /* All sub buffers have a descriptor */
8673         return tr->dir;
8674 }
8675
8676 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8677 {
8678         struct dentry *d_tracer;
8679
8680         if (tr->percpu_dir)
8681                 return tr->percpu_dir;
8682
8683         d_tracer = tracing_get_dentry(tr);
8684         if (IS_ERR(d_tracer))
8685                 return NULL;
8686
8687         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8688
8689         MEM_FAIL(!tr->percpu_dir,
8690                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8691
8692         return tr->percpu_dir;
8693 }
8694
8695 static struct dentry *
8696 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8697                       void *data, long cpu, const struct file_operations *fops)
8698 {
8699         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8700
8701         if (ret) /* See tracing_get_cpu() */
8702                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8703         return ret;
8704 }
8705
8706 static void
8707 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8708 {
8709         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8710         struct dentry *d_cpu;
8711         char cpu_dir[30]; /* 30 characters should be more than enough */
8712
8713         if (!d_percpu)
8714                 return;
8715
8716         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8717         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8718         if (!d_cpu) {
8719                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8720                 return;
8721         }
8722
8723         /* per cpu trace_pipe */
8724         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8725                                 tr, cpu, &tracing_pipe_fops);
8726
8727         /* per cpu trace */
8728         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8729                                 tr, cpu, &tracing_fops);
8730
8731         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8732                                 tr, cpu, &tracing_buffers_fops);
8733
8734         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8735                                 tr, cpu, &tracing_stats_fops);
8736
8737         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8738                                 tr, cpu, &tracing_entries_fops);
8739
8740 #ifdef CONFIG_TRACER_SNAPSHOT
8741         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8742                                 tr, cpu, &snapshot_fops);
8743
8744         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8745                                 tr, cpu, &snapshot_raw_fops);
8746 #endif
8747 }
8748
8749 #ifdef CONFIG_FTRACE_SELFTEST
8750 /* Let selftest have access to static functions in this file */
8751 #include "trace_selftest.c"
8752 #endif
8753
8754 static ssize_t
8755 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8756                         loff_t *ppos)
8757 {
8758         struct trace_option_dentry *topt = filp->private_data;
8759         char *buf;
8760
8761         if (topt->flags->val & topt->opt->bit)
8762                 buf = "1\n";
8763         else
8764                 buf = "0\n";
8765
8766         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8767 }
8768
8769 static ssize_t
8770 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8771                          loff_t *ppos)
8772 {
8773         struct trace_option_dentry *topt = filp->private_data;
8774         unsigned long val;
8775         int ret;
8776
8777         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8778         if (ret)
8779                 return ret;
8780
8781         if (val != 0 && val != 1)
8782                 return -EINVAL;
8783
8784         if (!!(topt->flags->val & topt->opt->bit) != val) {
8785                 mutex_lock(&trace_types_lock);
8786                 ret = __set_tracer_option(topt->tr, topt->flags,
8787                                           topt->opt, !val);
8788                 mutex_unlock(&trace_types_lock);
8789                 if (ret)
8790                         return ret;
8791         }
8792
8793         *ppos += cnt;
8794
8795         return cnt;
8796 }
8797
8798
8799 static const struct file_operations trace_options_fops = {
8800         .open = tracing_open_generic,
8801         .read = trace_options_read,
8802         .write = trace_options_write,
8803         .llseek = generic_file_llseek,
8804 };
8805
8806 /*
8807  * In order to pass in both the trace_array descriptor as well as the index
8808  * to the flag that the trace option file represents, the trace_array
8809  * has a character array of trace_flags_index[], which holds the index
8810  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8811  * The address of this character array is passed to the flag option file
8812  * read/write callbacks.
8813  *
8814  * In order to extract both the index and the trace_array descriptor,
8815  * get_tr_index() uses the following algorithm.
8816  *
8817  *   idx = *ptr;
8818  *
8819  * As the pointer itself contains the address of the index (remember
8820  * index[1] == 1).
8821  *
8822  * Then to get the trace_array descriptor, by subtracting that index
8823  * from the ptr, we get to the start of the index itself.
8824  *
8825  *   ptr - idx == &index[0]
8826  *
8827  * Then a simple container_of() from that pointer gets us to the
8828  * trace_array descriptor.
8829  */
8830 static void get_tr_index(void *data, struct trace_array **ptr,
8831                          unsigned int *pindex)
8832 {
8833         *pindex = *(unsigned char *)data;
8834
8835         *ptr = container_of(data - *pindex, struct trace_array,
8836                             trace_flags_index);
8837 }
8838
8839 static ssize_t
8840 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8841                         loff_t *ppos)
8842 {
8843         void *tr_index = filp->private_data;
8844         struct trace_array *tr;
8845         unsigned int index;
8846         char *buf;
8847
8848         get_tr_index(tr_index, &tr, &index);
8849
8850         if (tr->trace_flags & (1 << index))
8851                 buf = "1\n";
8852         else
8853                 buf = "0\n";
8854
8855         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8856 }
8857
8858 static ssize_t
8859 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8860                          loff_t *ppos)
8861 {
8862         void *tr_index = filp->private_data;
8863         struct trace_array *tr;
8864         unsigned int index;
8865         unsigned long val;
8866         int ret;
8867
8868         get_tr_index(tr_index, &tr, &index);
8869
8870         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8871         if (ret)
8872                 return ret;
8873
8874         if (val != 0 && val != 1)
8875                 return -EINVAL;
8876
8877         mutex_lock(&event_mutex);
8878         mutex_lock(&trace_types_lock);
8879         ret = set_tracer_flag(tr, 1 << index, val);
8880         mutex_unlock(&trace_types_lock);
8881         mutex_unlock(&event_mutex);
8882
8883         if (ret < 0)
8884                 return ret;
8885
8886         *ppos += cnt;
8887
8888         return cnt;
8889 }
8890
8891 static const struct file_operations trace_options_core_fops = {
8892         .open = tracing_open_generic,
8893         .read = trace_options_core_read,
8894         .write = trace_options_core_write,
8895         .llseek = generic_file_llseek,
8896 };
8897
8898 struct dentry *trace_create_file(const char *name,
8899                                  umode_t mode,
8900                                  struct dentry *parent,
8901                                  void *data,
8902                                  const struct file_operations *fops)
8903 {
8904         struct dentry *ret;
8905
8906         ret = tracefs_create_file(name, mode, parent, data, fops);
8907         if (!ret)
8908                 pr_warn("Could not create tracefs '%s' entry\n", name);
8909
8910         return ret;
8911 }
8912
8913
8914 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8915 {
8916         struct dentry *d_tracer;
8917
8918         if (tr->options)
8919                 return tr->options;
8920
8921         d_tracer = tracing_get_dentry(tr);
8922         if (IS_ERR(d_tracer))
8923                 return NULL;
8924
8925         tr->options = tracefs_create_dir("options", d_tracer);
8926         if (!tr->options) {
8927                 pr_warn("Could not create tracefs directory 'options'\n");
8928                 return NULL;
8929         }
8930
8931         return tr->options;
8932 }
8933
8934 static void
8935 create_trace_option_file(struct trace_array *tr,
8936                          struct trace_option_dentry *topt,
8937                          struct tracer_flags *flags,
8938                          struct tracer_opt *opt)
8939 {
8940         struct dentry *t_options;
8941
8942         t_options = trace_options_init_dentry(tr);
8943         if (!t_options)
8944                 return;
8945
8946         topt->flags = flags;
8947         topt->opt = opt;
8948         topt->tr = tr;
8949
8950         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8951                                         t_options, topt, &trace_options_fops);
8952
8953 }
8954
8955 static void
8956 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8957 {
8958         struct trace_option_dentry *topts;
8959         struct trace_options *tr_topts;
8960         struct tracer_flags *flags;
8961         struct tracer_opt *opts;
8962         int cnt;
8963         int i;
8964
8965         if (!tracer)
8966                 return;
8967
8968         flags = tracer->flags;
8969
8970         if (!flags || !flags->opts)
8971                 return;
8972
8973         /*
8974          * If this is an instance, only create flags for tracers
8975          * the instance may have.
8976          */
8977         if (!trace_ok_for_array(tracer, tr))
8978                 return;
8979
8980         for (i = 0; i < tr->nr_topts; i++) {
8981                 /* Make sure there's no duplicate flags. */
8982                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8983                         return;
8984         }
8985
8986         opts = flags->opts;
8987
8988         for (cnt = 0; opts[cnt].name; cnt++)
8989                 ;
8990
8991         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8992         if (!topts)
8993                 return;
8994
8995         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8996                             GFP_KERNEL);
8997         if (!tr_topts) {
8998                 kfree(topts);
8999                 return;
9000         }
9001
9002         tr->topts = tr_topts;
9003         tr->topts[tr->nr_topts].tracer = tracer;
9004         tr->topts[tr->nr_topts].topts = topts;
9005         tr->nr_topts++;
9006
9007         for (cnt = 0; opts[cnt].name; cnt++) {
9008                 create_trace_option_file(tr, &topts[cnt], flags,
9009                                          &opts[cnt]);
9010                 MEM_FAIL(topts[cnt].entry == NULL,
9011                           "Failed to create trace option: %s",
9012                           opts[cnt].name);
9013         }
9014 }
9015
9016 static struct dentry *
9017 create_trace_option_core_file(struct trace_array *tr,
9018                               const char *option, long index)
9019 {
9020         struct dentry *t_options;
9021
9022         t_options = trace_options_init_dentry(tr);
9023         if (!t_options)
9024                 return NULL;
9025
9026         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9027                                  (void *)&tr->trace_flags_index[index],
9028                                  &trace_options_core_fops);
9029 }
9030
9031 static void create_trace_options_dir(struct trace_array *tr)
9032 {
9033         struct dentry *t_options;
9034         bool top_level = tr == &global_trace;
9035         int i;
9036
9037         t_options = trace_options_init_dentry(tr);
9038         if (!t_options)
9039                 return;
9040
9041         for (i = 0; trace_options[i]; i++) {
9042                 if (top_level ||
9043                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9044                         create_trace_option_core_file(tr, trace_options[i], i);
9045         }
9046 }
9047
9048 static ssize_t
9049 rb_simple_read(struct file *filp, char __user *ubuf,
9050                size_t cnt, loff_t *ppos)
9051 {
9052         struct trace_array *tr = filp->private_data;
9053         char buf[64];
9054         int r;
9055
9056         r = tracer_tracing_is_on(tr);
9057         r = sprintf(buf, "%d\n", r);
9058
9059         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9060 }
9061
9062 static ssize_t
9063 rb_simple_write(struct file *filp, const char __user *ubuf,
9064                 size_t cnt, loff_t *ppos)
9065 {
9066         struct trace_array *tr = filp->private_data;
9067         struct trace_buffer *buffer = tr->array_buffer.buffer;
9068         unsigned long val;
9069         int ret;
9070
9071         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9072         if (ret)
9073                 return ret;
9074
9075         if (buffer) {
9076                 mutex_lock(&trace_types_lock);
9077                 if (!!val == tracer_tracing_is_on(tr)) {
9078                         val = 0; /* do nothing */
9079                 } else if (val) {
9080                         tracer_tracing_on(tr);
9081                         if (tr->current_trace->start)
9082                                 tr->current_trace->start(tr);
9083                 } else {
9084                         tracer_tracing_off(tr);
9085                         if (tr->current_trace->stop)
9086                                 tr->current_trace->stop(tr);
9087                         /* Wake up any waiters */
9088                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9089                 }
9090                 mutex_unlock(&trace_types_lock);
9091         }
9092
9093         (*ppos)++;
9094
9095         return cnt;
9096 }
9097
9098 static const struct file_operations rb_simple_fops = {
9099         .open           = tracing_open_generic_tr,
9100         .read           = rb_simple_read,
9101         .write          = rb_simple_write,
9102         .release        = tracing_release_generic_tr,
9103         .llseek         = default_llseek,
9104 };
9105
9106 static ssize_t
9107 buffer_percent_read(struct file *filp, char __user *ubuf,
9108                     size_t cnt, loff_t *ppos)
9109 {
9110         struct trace_array *tr = filp->private_data;
9111         char buf[64];
9112         int r;
9113
9114         r = tr->buffer_percent;
9115         r = sprintf(buf, "%d\n", r);
9116
9117         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9118 }
9119
9120 static ssize_t
9121 buffer_percent_write(struct file *filp, const char __user *ubuf,
9122                      size_t cnt, loff_t *ppos)
9123 {
9124         struct trace_array *tr = filp->private_data;
9125         unsigned long val;
9126         int ret;
9127
9128         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9129         if (ret)
9130                 return ret;
9131
9132         if (val > 100)
9133                 return -EINVAL;
9134
9135         if (!val)
9136                 val = 1;
9137
9138         tr->buffer_percent = val;
9139
9140         (*ppos)++;
9141
9142         return cnt;
9143 }
9144
9145 static const struct file_operations buffer_percent_fops = {
9146         .open           = tracing_open_generic_tr,
9147         .read           = buffer_percent_read,
9148         .write          = buffer_percent_write,
9149         .release        = tracing_release_generic_tr,
9150         .llseek         = default_llseek,
9151 };
9152
9153 static struct dentry *trace_instance_dir;
9154
9155 static void
9156 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9157
9158 static int
9159 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9160 {
9161         enum ring_buffer_flags rb_flags;
9162
9163         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9164
9165         buf->tr = tr;
9166
9167         buf->buffer = ring_buffer_alloc(size, rb_flags);
9168         if (!buf->buffer)
9169                 return -ENOMEM;
9170
9171         buf->data = alloc_percpu(struct trace_array_cpu);
9172         if (!buf->data) {
9173                 ring_buffer_free(buf->buffer);
9174                 buf->buffer = NULL;
9175                 return -ENOMEM;
9176         }
9177
9178         /* Allocate the first page for all buffers */
9179         set_buffer_entries(&tr->array_buffer,
9180                            ring_buffer_size(tr->array_buffer.buffer, 0));
9181
9182         return 0;
9183 }
9184
9185 static void free_trace_buffer(struct array_buffer *buf)
9186 {
9187         if (buf->buffer) {
9188                 ring_buffer_free(buf->buffer);
9189                 buf->buffer = NULL;
9190                 free_percpu(buf->data);
9191                 buf->data = NULL;
9192         }
9193 }
9194
9195 static int allocate_trace_buffers(struct trace_array *tr, int size)
9196 {
9197         int ret;
9198
9199         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9200         if (ret)
9201                 return ret;
9202
9203 #ifdef CONFIG_TRACER_MAX_TRACE
9204         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9205                                     allocate_snapshot ? size : 1);
9206         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9207                 free_trace_buffer(&tr->array_buffer);
9208                 return -ENOMEM;
9209         }
9210         tr->allocated_snapshot = allocate_snapshot;
9211
9212         /*
9213          * Only the top level trace array gets its snapshot allocated
9214          * from the kernel command line.
9215          */
9216         allocate_snapshot = false;
9217 #endif
9218
9219         return 0;
9220 }
9221
9222 static void free_trace_buffers(struct trace_array *tr)
9223 {
9224         if (!tr)
9225                 return;
9226
9227         free_trace_buffer(&tr->array_buffer);
9228
9229 #ifdef CONFIG_TRACER_MAX_TRACE
9230         free_trace_buffer(&tr->max_buffer);
9231 #endif
9232 }
9233
9234 static void init_trace_flags_index(struct trace_array *tr)
9235 {
9236         int i;
9237
9238         /* Used by the trace options files */
9239         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9240                 tr->trace_flags_index[i] = i;
9241 }
9242
9243 static void __update_tracer_options(struct trace_array *tr)
9244 {
9245         struct tracer *t;
9246
9247         for (t = trace_types; t; t = t->next)
9248                 add_tracer_options(tr, t);
9249 }
9250
9251 static void update_tracer_options(struct trace_array *tr)
9252 {
9253         mutex_lock(&trace_types_lock);
9254         tracer_options_updated = true;
9255         __update_tracer_options(tr);
9256         mutex_unlock(&trace_types_lock);
9257 }
9258
9259 /* Must have trace_types_lock held */
9260 struct trace_array *trace_array_find(const char *instance)
9261 {
9262         struct trace_array *tr, *found = NULL;
9263
9264         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9265                 if (tr->name && strcmp(tr->name, instance) == 0) {
9266                         found = tr;
9267                         break;
9268                 }
9269         }
9270
9271         return found;
9272 }
9273
9274 struct trace_array *trace_array_find_get(const char *instance)
9275 {
9276         struct trace_array *tr;
9277
9278         mutex_lock(&trace_types_lock);
9279         tr = trace_array_find(instance);
9280         if (tr)
9281                 tr->ref++;
9282         mutex_unlock(&trace_types_lock);
9283
9284         return tr;
9285 }
9286
9287 static int trace_array_create_dir(struct trace_array *tr)
9288 {
9289         int ret;
9290
9291         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9292         if (!tr->dir)
9293                 return -EINVAL;
9294
9295         ret = event_trace_add_tracer(tr->dir, tr);
9296         if (ret) {
9297                 tracefs_remove(tr->dir);
9298                 return ret;
9299         }
9300
9301         init_tracer_tracefs(tr, tr->dir);
9302         __update_tracer_options(tr);
9303
9304         return ret;
9305 }
9306
9307 static struct trace_array *trace_array_create(const char *name)
9308 {
9309         struct trace_array *tr;
9310         int ret;
9311
9312         ret = -ENOMEM;
9313         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9314         if (!tr)
9315                 return ERR_PTR(ret);
9316
9317         tr->name = kstrdup(name, GFP_KERNEL);
9318         if (!tr->name)
9319                 goto out_free_tr;
9320
9321         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9322                 goto out_free_tr;
9323
9324         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9325
9326         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9327
9328         raw_spin_lock_init(&tr->start_lock);
9329
9330         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9331
9332         tr->current_trace = &nop_trace;
9333
9334         INIT_LIST_HEAD(&tr->systems);
9335         INIT_LIST_HEAD(&tr->events);
9336         INIT_LIST_HEAD(&tr->hist_vars);
9337         INIT_LIST_HEAD(&tr->err_log);
9338
9339         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9340                 goto out_free_tr;
9341
9342         if (ftrace_allocate_ftrace_ops(tr) < 0)
9343                 goto out_free_tr;
9344
9345         ftrace_init_trace_array(tr);
9346
9347         init_trace_flags_index(tr);
9348
9349         if (trace_instance_dir) {
9350                 ret = trace_array_create_dir(tr);
9351                 if (ret)
9352                         goto out_free_tr;
9353         } else
9354                 __trace_early_add_events(tr);
9355
9356         list_add(&tr->list, &ftrace_trace_arrays);
9357
9358         tr->ref++;
9359
9360         return tr;
9361
9362  out_free_tr:
9363         ftrace_free_ftrace_ops(tr);
9364         free_trace_buffers(tr);
9365         free_cpumask_var(tr->tracing_cpumask);
9366         kfree(tr->name);
9367         kfree(tr);
9368
9369         return ERR_PTR(ret);
9370 }
9371
9372 static int instance_mkdir(const char *name)
9373 {
9374         struct trace_array *tr;
9375         int ret;
9376
9377         mutex_lock(&event_mutex);
9378         mutex_lock(&trace_types_lock);
9379
9380         ret = -EEXIST;
9381         if (trace_array_find(name))
9382                 goto out_unlock;
9383
9384         tr = trace_array_create(name);
9385
9386         ret = PTR_ERR_OR_ZERO(tr);
9387
9388 out_unlock:
9389         mutex_unlock(&trace_types_lock);
9390         mutex_unlock(&event_mutex);
9391         return ret;
9392 }
9393
9394 /**
9395  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9396  * @name: The name of the trace array to be looked up/created.
9397  *
9398  * Returns pointer to trace array with given name.
9399  * NULL, if it cannot be created.
9400  *
9401  * NOTE: This function increments the reference counter associated with the
9402  * trace array returned. This makes sure it cannot be freed while in use.
9403  * Use trace_array_put() once the trace array is no longer needed.
9404  * If the trace_array is to be freed, trace_array_destroy() needs to
9405  * be called after the trace_array_put(), or simply let user space delete
9406  * it from the tracefs instances directory. But until the
9407  * trace_array_put() is called, user space can not delete it.
9408  *
9409  */
9410 struct trace_array *trace_array_get_by_name(const char *name)
9411 {
9412         struct trace_array *tr;
9413
9414         mutex_lock(&event_mutex);
9415         mutex_lock(&trace_types_lock);
9416
9417         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9418                 if (tr->name && strcmp(tr->name, name) == 0)
9419                         goto out_unlock;
9420         }
9421
9422         tr = trace_array_create(name);
9423
9424         if (IS_ERR(tr))
9425                 tr = NULL;
9426 out_unlock:
9427         if (tr)
9428                 tr->ref++;
9429
9430         mutex_unlock(&trace_types_lock);
9431         mutex_unlock(&event_mutex);
9432         return tr;
9433 }
9434 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9435
9436 static int __remove_instance(struct trace_array *tr)
9437 {
9438         int i;
9439
9440         /* Reference counter for a newly created trace array = 1. */
9441         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9442                 return -EBUSY;
9443
9444         list_del(&tr->list);
9445
9446         /* Disable all the flags that were enabled coming in */
9447         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9448                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9449                         set_tracer_flag(tr, 1 << i, 0);
9450         }
9451
9452         tracing_set_nop(tr);
9453         clear_ftrace_function_probes(tr);
9454         event_trace_del_tracer(tr);
9455         ftrace_clear_pids(tr);
9456         ftrace_destroy_function_files(tr);
9457         tracefs_remove(tr->dir);
9458         free_percpu(tr->last_func_repeats);
9459         free_trace_buffers(tr);
9460
9461         for (i = 0; i < tr->nr_topts; i++) {
9462                 kfree(tr->topts[i].topts);
9463         }
9464         kfree(tr->topts);
9465
9466         free_cpumask_var(tr->tracing_cpumask);
9467         kfree(tr->name);
9468         kfree(tr);
9469
9470         return 0;
9471 }
9472
9473 int trace_array_destroy(struct trace_array *this_tr)
9474 {
9475         struct trace_array *tr;
9476         int ret;
9477
9478         if (!this_tr)
9479                 return -EINVAL;
9480
9481         mutex_lock(&event_mutex);
9482         mutex_lock(&trace_types_lock);
9483
9484         ret = -ENODEV;
9485
9486         /* Making sure trace array exists before destroying it. */
9487         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9488                 if (tr == this_tr) {
9489                         ret = __remove_instance(tr);
9490                         break;
9491                 }
9492         }
9493
9494         mutex_unlock(&trace_types_lock);
9495         mutex_unlock(&event_mutex);
9496
9497         return ret;
9498 }
9499 EXPORT_SYMBOL_GPL(trace_array_destroy);
9500
9501 static int instance_rmdir(const char *name)
9502 {
9503         struct trace_array *tr;
9504         int ret;
9505
9506         mutex_lock(&event_mutex);
9507         mutex_lock(&trace_types_lock);
9508
9509         ret = -ENODEV;
9510         tr = trace_array_find(name);
9511         if (tr)
9512                 ret = __remove_instance(tr);
9513
9514         mutex_unlock(&trace_types_lock);
9515         mutex_unlock(&event_mutex);
9516
9517         return ret;
9518 }
9519
9520 static __init void create_trace_instances(struct dentry *d_tracer)
9521 {
9522         struct trace_array *tr;
9523
9524         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9525                                                          instance_mkdir,
9526                                                          instance_rmdir);
9527         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9528                 return;
9529
9530         mutex_lock(&event_mutex);
9531         mutex_lock(&trace_types_lock);
9532
9533         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9534                 if (!tr->name)
9535                         continue;
9536                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9537                              "Failed to create instance directory\n"))
9538                         break;
9539         }
9540
9541         mutex_unlock(&trace_types_lock);
9542         mutex_unlock(&event_mutex);
9543 }
9544
9545 static void
9546 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9547 {
9548         struct trace_event_file *file;
9549         int cpu;
9550
9551         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9552                         tr, &show_traces_fops);
9553
9554         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9555                         tr, &set_tracer_fops);
9556
9557         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9558                           tr, &tracing_cpumask_fops);
9559
9560         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9561                           tr, &tracing_iter_fops);
9562
9563         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9564                           tr, &tracing_fops);
9565
9566         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9567                           tr, &tracing_pipe_fops);
9568
9569         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9570                           tr, &tracing_entries_fops);
9571
9572         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9573                           tr, &tracing_total_entries_fops);
9574
9575         trace_create_file("free_buffer", 0200, d_tracer,
9576                           tr, &tracing_free_buffer_fops);
9577
9578         trace_create_file("trace_marker", 0220, d_tracer,
9579                           tr, &tracing_mark_fops);
9580
9581         file = __find_event_file(tr, "ftrace", "print");
9582         if (file && file->dir)
9583                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9584                                   file, &event_trigger_fops);
9585         tr->trace_marker_file = file;
9586
9587         trace_create_file("trace_marker_raw", 0220, d_tracer,
9588                           tr, &tracing_mark_raw_fops);
9589
9590         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9591                           &trace_clock_fops);
9592
9593         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9594                           tr, &rb_simple_fops);
9595
9596         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9597                           &trace_time_stamp_mode_fops);
9598
9599         tr->buffer_percent = 50;
9600
9601         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9602                         tr, &buffer_percent_fops);
9603
9604         create_trace_options_dir(tr);
9605
9606         trace_create_maxlat_file(tr, d_tracer);
9607
9608         if (ftrace_create_function_files(tr, d_tracer))
9609                 MEM_FAIL(1, "Could not allocate function filter files");
9610
9611 #ifdef CONFIG_TRACER_SNAPSHOT
9612         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9613                           tr, &snapshot_fops);
9614 #endif
9615
9616         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9617                           tr, &tracing_err_log_fops);
9618
9619         for_each_tracing_cpu(cpu)
9620                 tracing_init_tracefs_percpu(tr, cpu);
9621
9622         ftrace_init_tracefs(tr, d_tracer);
9623 }
9624
9625 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9626 {
9627         struct vfsmount *mnt;
9628         struct file_system_type *type;
9629
9630         /*
9631          * To maintain backward compatibility for tools that mount
9632          * debugfs to get to the tracing facility, tracefs is automatically
9633          * mounted to the debugfs/tracing directory.
9634          */
9635         type = get_fs_type("tracefs");
9636         if (!type)
9637                 return NULL;
9638         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9639         put_filesystem(type);
9640         if (IS_ERR(mnt))
9641                 return NULL;
9642         mntget(mnt);
9643
9644         return mnt;
9645 }
9646
9647 /**
9648  * tracing_init_dentry - initialize top level trace array
9649  *
9650  * This is called when creating files or directories in the tracing
9651  * directory. It is called via fs_initcall() by any of the boot up code
9652  * and expects to return the dentry of the top level tracing directory.
9653  */
9654 int tracing_init_dentry(void)
9655 {
9656         struct trace_array *tr = &global_trace;
9657
9658         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9659                 pr_warn("Tracing disabled due to lockdown\n");
9660                 return -EPERM;
9661         }
9662
9663         /* The top level trace array uses  NULL as parent */
9664         if (tr->dir)
9665                 return 0;
9666
9667         if (WARN_ON(!tracefs_initialized()))
9668                 return -ENODEV;
9669
9670         /*
9671          * As there may still be users that expect the tracing
9672          * files to exist in debugfs/tracing, we must automount
9673          * the tracefs file system there, so older tools still
9674          * work with the newer kernel.
9675          */
9676         tr->dir = debugfs_create_automount("tracing", NULL,
9677                                            trace_automount, NULL);
9678
9679         return 0;
9680 }
9681
9682 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9683 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9684
9685 static struct workqueue_struct *eval_map_wq __initdata;
9686 static struct work_struct eval_map_work __initdata;
9687 static struct work_struct tracerfs_init_work __initdata;
9688
9689 static void __init eval_map_work_func(struct work_struct *work)
9690 {
9691         int len;
9692
9693         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9694         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9695 }
9696
9697 static int __init trace_eval_init(void)
9698 {
9699         INIT_WORK(&eval_map_work, eval_map_work_func);
9700
9701         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9702         if (!eval_map_wq) {
9703                 pr_err("Unable to allocate eval_map_wq\n");
9704                 /* Do work here */
9705                 eval_map_work_func(&eval_map_work);
9706                 return -ENOMEM;
9707         }
9708
9709         queue_work(eval_map_wq, &eval_map_work);
9710         return 0;
9711 }
9712
9713 subsys_initcall(trace_eval_init);
9714
9715 static int __init trace_eval_sync(void)
9716 {
9717         /* Make sure the eval map updates are finished */
9718         if (eval_map_wq)
9719                 destroy_workqueue(eval_map_wq);
9720         return 0;
9721 }
9722
9723 late_initcall_sync(trace_eval_sync);
9724
9725
9726 #ifdef CONFIG_MODULES
9727 static void trace_module_add_evals(struct module *mod)
9728 {
9729         if (!mod->num_trace_evals)
9730                 return;
9731
9732         /*
9733          * Modules with bad taint do not have events created, do
9734          * not bother with enums either.
9735          */
9736         if (trace_module_has_bad_taint(mod))
9737                 return;
9738
9739         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9740 }
9741
9742 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9743 static void trace_module_remove_evals(struct module *mod)
9744 {
9745         union trace_eval_map_item *map;
9746         union trace_eval_map_item **last = &trace_eval_maps;
9747
9748         if (!mod->num_trace_evals)
9749                 return;
9750
9751         mutex_lock(&trace_eval_mutex);
9752
9753         map = trace_eval_maps;
9754
9755         while (map) {
9756                 if (map->head.mod == mod)
9757                         break;
9758                 map = trace_eval_jmp_to_tail(map);
9759                 last = &map->tail.next;
9760                 map = map->tail.next;
9761         }
9762         if (!map)
9763                 goto out;
9764
9765         *last = trace_eval_jmp_to_tail(map)->tail.next;
9766         kfree(map);
9767  out:
9768         mutex_unlock(&trace_eval_mutex);
9769 }
9770 #else
9771 static inline void trace_module_remove_evals(struct module *mod) { }
9772 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9773
9774 static int trace_module_notify(struct notifier_block *self,
9775                                unsigned long val, void *data)
9776 {
9777         struct module *mod = data;
9778
9779         switch (val) {
9780         case MODULE_STATE_COMING:
9781                 trace_module_add_evals(mod);
9782                 break;
9783         case MODULE_STATE_GOING:
9784                 trace_module_remove_evals(mod);
9785                 break;
9786         }
9787
9788         return NOTIFY_OK;
9789 }
9790
9791 static struct notifier_block trace_module_nb = {
9792         .notifier_call = trace_module_notify,
9793         .priority = 0,
9794 };
9795 #endif /* CONFIG_MODULES */
9796
9797 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9798 {
9799
9800         event_trace_init();
9801
9802         init_tracer_tracefs(&global_trace, NULL);
9803         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9804
9805         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9806                         &global_trace, &tracing_thresh_fops);
9807
9808         trace_create_file("README", TRACE_MODE_READ, NULL,
9809                         NULL, &tracing_readme_fops);
9810
9811         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9812                         NULL, &tracing_saved_cmdlines_fops);
9813
9814         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9815                           NULL, &tracing_saved_cmdlines_size_fops);
9816
9817         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9818                         NULL, &tracing_saved_tgids_fops);
9819
9820         trace_create_eval_file(NULL);
9821
9822 #ifdef CONFIG_MODULES
9823         register_module_notifier(&trace_module_nb);
9824 #endif
9825
9826 #ifdef CONFIG_DYNAMIC_FTRACE
9827         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9828                         NULL, &tracing_dyn_info_fops);
9829 #endif
9830
9831         create_trace_instances(NULL);
9832
9833         update_tracer_options(&global_trace);
9834 }
9835
9836 static __init int tracer_init_tracefs(void)
9837 {
9838         int ret;
9839
9840         trace_access_lock_init();
9841
9842         ret = tracing_init_dentry();
9843         if (ret)
9844                 return 0;
9845
9846         if (eval_map_wq) {
9847                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9848                 queue_work(eval_map_wq, &tracerfs_init_work);
9849         } else {
9850                 tracer_init_tracefs_work_func(NULL);
9851         }
9852
9853         rv_init_interface();
9854
9855         return 0;
9856 }
9857
9858 fs_initcall(tracer_init_tracefs);
9859
9860 static int trace_panic_handler(struct notifier_block *this,
9861                                unsigned long event, void *unused)
9862 {
9863         if (ftrace_dump_on_oops)
9864                 ftrace_dump(ftrace_dump_on_oops);
9865         return NOTIFY_OK;
9866 }
9867
9868 static struct notifier_block trace_panic_notifier = {
9869         .notifier_call  = trace_panic_handler,
9870         .next           = NULL,
9871         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9872 };
9873
9874 static int trace_die_handler(struct notifier_block *self,
9875                              unsigned long val,
9876                              void *data)
9877 {
9878         switch (val) {
9879         case DIE_OOPS:
9880                 if (ftrace_dump_on_oops)
9881                         ftrace_dump(ftrace_dump_on_oops);
9882                 break;
9883         default:
9884                 break;
9885         }
9886         return NOTIFY_OK;
9887 }
9888
9889 static struct notifier_block trace_die_notifier = {
9890         .notifier_call = trace_die_handler,
9891         .priority = 200
9892 };
9893
9894 /*
9895  * printk is set to max of 1024, we really don't need it that big.
9896  * Nothing should be printing 1000 characters anyway.
9897  */
9898 #define TRACE_MAX_PRINT         1000
9899
9900 /*
9901  * Define here KERN_TRACE so that we have one place to modify
9902  * it if we decide to change what log level the ftrace dump
9903  * should be at.
9904  */
9905 #define KERN_TRACE              KERN_EMERG
9906
9907 void
9908 trace_printk_seq(struct trace_seq *s)
9909 {
9910         /* Probably should print a warning here. */
9911         if (s->seq.len >= TRACE_MAX_PRINT)
9912                 s->seq.len = TRACE_MAX_PRINT;
9913
9914         /*
9915          * More paranoid code. Although the buffer size is set to
9916          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9917          * an extra layer of protection.
9918          */
9919         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9920                 s->seq.len = s->seq.size - 1;
9921
9922         /* should be zero ended, but we are paranoid. */
9923         s->buffer[s->seq.len] = 0;
9924
9925         printk(KERN_TRACE "%s", s->buffer);
9926
9927         trace_seq_init(s);
9928 }
9929
9930 void trace_init_global_iter(struct trace_iterator *iter)
9931 {
9932         iter->tr = &global_trace;
9933         iter->trace = iter->tr->current_trace;
9934         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9935         iter->array_buffer = &global_trace.array_buffer;
9936
9937         if (iter->trace && iter->trace->open)
9938                 iter->trace->open(iter);
9939
9940         /* Annotate start of buffers if we had overruns */
9941         if (ring_buffer_overruns(iter->array_buffer->buffer))
9942                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9943
9944         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9945         if (trace_clocks[iter->tr->clock_id].in_ns)
9946                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9947
9948         /* Can not use kmalloc for iter.temp and iter.fmt */
9949         iter->temp = static_temp_buf;
9950         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9951         iter->fmt = static_fmt_buf;
9952         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9953 }
9954
9955 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9956 {
9957         /* use static because iter can be a bit big for the stack */
9958         static struct trace_iterator iter;
9959         static atomic_t dump_running;
9960         struct trace_array *tr = &global_trace;
9961         unsigned int old_userobj;
9962         unsigned long flags;
9963         int cnt = 0, cpu;
9964
9965         /* Only allow one dump user at a time. */
9966         if (atomic_inc_return(&dump_running) != 1) {
9967                 atomic_dec(&dump_running);
9968                 return;
9969         }
9970
9971         /*
9972          * Always turn off tracing when we dump.
9973          * We don't need to show trace output of what happens
9974          * between multiple crashes.
9975          *
9976          * If the user does a sysrq-z, then they can re-enable
9977          * tracing with echo 1 > tracing_on.
9978          */
9979         tracing_off();
9980
9981         local_irq_save(flags);
9982
9983         /* Simulate the iterator */
9984         trace_init_global_iter(&iter);
9985
9986         for_each_tracing_cpu(cpu) {
9987                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9988         }
9989
9990         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9991
9992         /* don't look at user memory in panic mode */
9993         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9994
9995         switch (oops_dump_mode) {
9996         case DUMP_ALL:
9997                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9998                 break;
9999         case DUMP_ORIG:
10000                 iter.cpu_file = raw_smp_processor_id();
10001                 break;
10002         case DUMP_NONE:
10003                 goto out_enable;
10004         default:
10005                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10006                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10007         }
10008
10009         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10010
10011         /* Did function tracer already get disabled? */
10012         if (ftrace_is_dead()) {
10013                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10014                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10015         }
10016
10017         /*
10018          * We need to stop all tracing on all CPUS to read
10019          * the next buffer. This is a bit expensive, but is
10020          * not done often. We fill all what we can read,
10021          * and then release the locks again.
10022          */
10023
10024         while (!trace_empty(&iter)) {
10025
10026                 if (!cnt)
10027                         printk(KERN_TRACE "---------------------------------\n");
10028
10029                 cnt++;
10030
10031                 trace_iterator_reset(&iter);
10032                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10033
10034                 if (trace_find_next_entry_inc(&iter) != NULL) {
10035                         int ret;
10036
10037                         ret = print_trace_line(&iter);
10038                         if (ret != TRACE_TYPE_NO_CONSUME)
10039                                 trace_consume(&iter);
10040                 }
10041                 touch_nmi_watchdog();
10042
10043                 trace_printk_seq(&iter.seq);
10044         }
10045
10046         if (!cnt)
10047                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10048         else
10049                 printk(KERN_TRACE "---------------------------------\n");
10050
10051  out_enable:
10052         tr->trace_flags |= old_userobj;
10053
10054         for_each_tracing_cpu(cpu) {
10055                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10056         }
10057         atomic_dec(&dump_running);
10058         local_irq_restore(flags);
10059 }
10060 EXPORT_SYMBOL_GPL(ftrace_dump);
10061
10062 #define WRITE_BUFSIZE  4096
10063
10064 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10065                                 size_t count, loff_t *ppos,
10066                                 int (*createfn)(const char *))
10067 {
10068         char *kbuf, *buf, *tmp;
10069         int ret = 0;
10070         size_t done = 0;
10071         size_t size;
10072
10073         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10074         if (!kbuf)
10075                 return -ENOMEM;
10076
10077         while (done < count) {
10078                 size = count - done;
10079
10080                 if (size >= WRITE_BUFSIZE)
10081                         size = WRITE_BUFSIZE - 1;
10082
10083                 if (copy_from_user(kbuf, buffer + done, size)) {
10084                         ret = -EFAULT;
10085                         goto out;
10086                 }
10087                 kbuf[size] = '\0';
10088                 buf = kbuf;
10089                 do {
10090                         tmp = strchr(buf, '\n');
10091                         if (tmp) {
10092                                 *tmp = '\0';
10093                                 size = tmp - buf + 1;
10094                         } else {
10095                                 size = strlen(buf);
10096                                 if (done + size < count) {
10097                                         if (buf != kbuf)
10098                                                 break;
10099                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10100                                         pr_warn("Line length is too long: Should be less than %d\n",
10101                                                 WRITE_BUFSIZE - 2);
10102                                         ret = -EINVAL;
10103                                         goto out;
10104                                 }
10105                         }
10106                         done += size;
10107
10108                         /* Remove comments */
10109                         tmp = strchr(buf, '#');
10110
10111                         if (tmp)
10112                                 *tmp = '\0';
10113
10114                         ret = createfn(buf);
10115                         if (ret)
10116                                 goto out;
10117                         buf += size;
10118
10119                 } while (done < count);
10120         }
10121         ret = done;
10122
10123 out:
10124         kfree(kbuf);
10125
10126         return ret;
10127 }
10128
10129 __init static int tracer_alloc_buffers(void)
10130 {
10131         int ring_buf_size;
10132         int ret = -ENOMEM;
10133
10134
10135         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10136                 pr_warn("Tracing disabled due to lockdown\n");
10137                 return -EPERM;
10138         }
10139
10140         /*
10141          * Make sure we don't accidentally add more trace options
10142          * than we have bits for.
10143          */
10144         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10145
10146         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10147                 goto out;
10148
10149         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10150                 goto out_free_buffer_mask;
10151
10152         /* Only allocate trace_printk buffers if a trace_printk exists */
10153         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10154                 /* Must be called before global_trace.buffer is allocated */
10155                 trace_printk_init_buffers();
10156
10157         /* To save memory, keep the ring buffer size to its minimum */
10158         if (ring_buffer_expanded)
10159                 ring_buf_size = trace_buf_size;
10160         else
10161                 ring_buf_size = 1;
10162
10163         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10164         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10165
10166         raw_spin_lock_init(&global_trace.start_lock);
10167
10168         /*
10169          * The prepare callbacks allocates some memory for the ring buffer. We
10170          * don't free the buffer if the CPU goes down. If we were to free
10171          * the buffer, then the user would lose any trace that was in the
10172          * buffer. The memory will be removed once the "instance" is removed.
10173          */
10174         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10175                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10176                                       NULL);
10177         if (ret < 0)
10178                 goto out_free_cpumask;
10179         /* Used for event triggers */
10180         ret = -ENOMEM;
10181         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10182         if (!temp_buffer)
10183                 goto out_rm_hp_state;
10184
10185         if (trace_create_savedcmd() < 0)
10186                 goto out_free_temp_buffer;
10187
10188         /* TODO: make the number of buffers hot pluggable with CPUS */
10189         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10190                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10191                 goto out_free_savedcmd;
10192         }
10193
10194         if (global_trace.buffer_disabled)
10195                 tracing_off();
10196
10197         if (trace_boot_clock) {
10198                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10199                 if (ret < 0)
10200                         pr_warn("Trace clock %s not defined, going back to default\n",
10201                                 trace_boot_clock);
10202         }
10203
10204         /*
10205          * register_tracer() might reference current_trace, so it
10206          * needs to be set before we register anything. This is
10207          * just a bootstrap of current_trace anyway.
10208          */
10209         global_trace.current_trace = &nop_trace;
10210
10211         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10212
10213         ftrace_init_global_array_ops(&global_trace);
10214
10215         init_trace_flags_index(&global_trace);
10216
10217         register_tracer(&nop_trace);
10218
10219         /* Function tracing may start here (via kernel command line) */
10220         init_function_trace();
10221
10222         /* All seems OK, enable tracing */
10223         tracing_disabled = 0;
10224
10225         atomic_notifier_chain_register(&panic_notifier_list,
10226                                        &trace_panic_notifier);
10227
10228         register_die_notifier(&trace_die_notifier);
10229
10230         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10231
10232         INIT_LIST_HEAD(&global_trace.systems);
10233         INIT_LIST_HEAD(&global_trace.events);
10234         INIT_LIST_HEAD(&global_trace.hist_vars);
10235         INIT_LIST_HEAD(&global_trace.err_log);
10236         list_add(&global_trace.list, &ftrace_trace_arrays);
10237
10238         apply_trace_boot_options();
10239
10240         register_snapshot_cmd();
10241
10242         test_can_verify();
10243
10244         return 0;
10245
10246 out_free_savedcmd:
10247         free_saved_cmdlines_buffer(savedcmd);
10248 out_free_temp_buffer:
10249         ring_buffer_free(temp_buffer);
10250 out_rm_hp_state:
10251         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10252 out_free_cpumask:
10253         free_cpumask_var(global_trace.tracing_cpumask);
10254 out_free_buffer_mask:
10255         free_cpumask_var(tracing_buffer_mask);
10256 out:
10257         return ret;
10258 }
10259
10260 void __init ftrace_boot_snapshot(void)
10261 {
10262         if (snapshot_at_boot) {
10263                 tracing_snapshot();
10264                 internal_trace_puts("** Boot snapshot taken **\n");
10265         }
10266 }
10267
10268 void __init early_trace_init(void)
10269 {
10270         if (tracepoint_printk) {
10271                 tracepoint_print_iter =
10272                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10273                 if (MEM_FAIL(!tracepoint_print_iter,
10274                              "Failed to allocate trace iterator\n"))
10275                         tracepoint_printk = 0;
10276                 else
10277                         static_key_enable(&tracepoint_printk_key.key);
10278         }
10279         tracer_alloc_buffers();
10280 }
10281
10282 void __init trace_init(void)
10283 {
10284         trace_event_init();
10285 }
10286
10287 __init static void clear_boot_tracer(void)
10288 {
10289         /*
10290          * The default tracer at boot buffer is an init section.
10291          * This function is called in lateinit. If we did not
10292          * find the boot tracer, then clear it out, to prevent
10293          * later registration from accessing the buffer that is
10294          * about to be freed.
10295          */
10296         if (!default_bootup_tracer)
10297                 return;
10298
10299         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10300                default_bootup_tracer);
10301         default_bootup_tracer = NULL;
10302 }
10303
10304 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10305 __init static void tracing_set_default_clock(void)
10306 {
10307         /* sched_clock_stable() is determined in late_initcall */
10308         if (!trace_boot_clock && !sched_clock_stable()) {
10309                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10310                         pr_warn("Can not set tracing clock due to lockdown\n");
10311                         return;
10312                 }
10313
10314                 printk(KERN_WARNING
10315                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10316                        "If you want to keep using the local clock, then add:\n"
10317                        "  \"trace_clock=local\"\n"
10318                        "on the kernel command line\n");
10319                 tracing_set_clock(&global_trace, "global");
10320         }
10321 }
10322 #else
10323 static inline void tracing_set_default_clock(void) { }
10324 #endif
10325
10326 __init static int late_trace_init(void)
10327 {
10328         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10329                 static_key_disable(&tracepoint_printk_key.key);
10330                 tracepoint_printk = 0;
10331         }
10332
10333         tracing_set_default_clock();
10334         clear_boot_tracer();
10335         return 0;
10336 }
10337
10338 late_initcall_sync(late_trace_init);