64a4dde073ef6599ed8f340067ae0bb351f178dc
[sfrench/cifs-2.6.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78
79 void __init disable_tracing_selftest(const char *reason)
80 {
81         if (!tracing_selftest_disabled) {
82                 tracing_selftest_disabled = true;
83                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
84         }
85 }
86 #else
87 #define tracing_selftest_running        0
88 #define tracing_selftest_disabled       0
89 #endif
90
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99         { }
100 };
101
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105         return 0;
106 }
107
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122
123 cpumask_var_t __read_mostly     tracing_buffer_mask;
124
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149         struct module                   *mod;
150         unsigned long                   length;
151 };
152
153 union trace_eval_map_item;
154
155 struct trace_eval_map_tail {
156         /*
157          * "end" is first and points to NULL as it must be different
158          * than "mod" or "eval_string"
159          */
160         union trace_eval_map_item       *next;
161         const char                      *end;   /* points to NULL */
162 };
163
164 static DEFINE_MUTEX(trace_eval_mutex);
165
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174         struct trace_eval_map           map;
175         struct trace_eval_map_head      head;
176         struct trace_eval_map_tail      tail;
177 };
178
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184                                    struct trace_buffer *buffer,
185                                    unsigned int trace_ctx);
186
187 #define MAX_TRACER_SIZE         100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199
200 static int __init set_cmdline_ftrace(char *str)
201 {
202         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203         default_bootup_tracer = bootup_tracer_buf;
204         /* We are using ftrace early, expand it */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212         if (*str++ != '=' || !*str || !strcmp("1", str)) {
213                 ftrace_dump_on_oops = DUMP_ALL;
214                 return 1;
215         }
216
217         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218                 ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225
226 static int __init stop_trace_on_warning(char *str)
227 {
228         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229                 __disable_trace_on_warning = 1;
230         return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233
234 static int __init boot_alloc_snapshot(char *str)
235 {
236         char *slot = boot_snapshot_info + boot_snapshot_index;
237         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238         int ret;
239
240         if (str[0] == '=') {
241                 str++;
242                 if (strlen(str) >= left)
243                         return -1;
244
245                 ret = snprintf(slot, left, "%s\t", str);
246                 boot_snapshot_index += ret;
247         } else {
248                 allocate_snapshot = true;
249                 /* We also need the main ring buffer expanded */
250                 ring_buffer_expanded = true;
251         }
252         return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255
256
257 static int __init boot_snapshot(char *str)
258 {
259         snapshot_at_boot = true;
260         boot_alloc_snapshot(str);
261         return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264
265
266 static int __init boot_instance(char *str)
267 {
268         char *slot = boot_instance_info + boot_instance_index;
269         int left = sizeof(boot_instance_info) - boot_instance_index;
270         int ret;
271
272         if (strlen(str) >= left)
273                 return -1;
274
275         ret = snprintf(slot, left, "%s\t", str);
276         boot_instance_index += ret;
277
278         return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281
282
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284
285 static int __init set_trace_boot_options(char *str)
286 {
287         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288         return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294
295 static int __init set_trace_boot_clock(char *str)
296 {
297         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298         trace_boot_clock = trace_boot_clock_buf;
299         return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302
303 static int __init set_tracepoint_printk(char *str)
304 {
305         /* Ignore the "tp_printk_stop_on_boot" param */
306         if (*str == '_')
307                 return 0;
308
309         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310                 tracepoint_printk = 1;
311         return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317         tracepoint_printk_stop_on_boot = true;
318         return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321
322 unsigned long long ns2usecs(u64 nsec)
323 {
324         nsec += 500;
325         do_div(nsec, 1000);
326         return nsec;
327 }
328
329 static void
330 trace_process_export(struct trace_export *export,
331                struct ring_buffer_event *event, int flag)
332 {
333         struct trace_entry *entry;
334         unsigned int size = 0;
335
336         if (export->flags & flag) {
337                 entry = ring_buffer_event_data(event);
338                 size = ring_buffer_event_length(event);
339                 export->write(export, entry, size);
340         }
341 }
342
343 static DEFINE_MUTEX(ftrace_export_lock);
344
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353         if (export->flags & TRACE_EXPORT_FUNCTION)
354                 static_branch_inc(&trace_function_exports_enabled);
355
356         if (export->flags & TRACE_EXPORT_EVENT)
357                 static_branch_inc(&trace_event_exports_enabled);
358
359         if (export->flags & TRACE_EXPORT_MARKER)
360                 static_branch_inc(&trace_marker_exports_enabled);
361 }
362
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365         if (export->flags & TRACE_EXPORT_FUNCTION)
366                 static_branch_dec(&trace_function_exports_enabled);
367
368         if (export->flags & TRACE_EXPORT_EVENT)
369                 static_branch_dec(&trace_event_exports_enabled);
370
371         if (export->flags & TRACE_EXPORT_MARKER)
372                 static_branch_dec(&trace_marker_exports_enabled);
373 }
374
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377         struct trace_export *export;
378
379         preempt_disable_notrace();
380
381         export = rcu_dereference_raw_check(ftrace_exports_list);
382         while (export) {
383                 trace_process_export(export, event, flag);
384                 export = rcu_dereference_raw_check(export->next);
385         }
386
387         preempt_enable_notrace();
388 }
389
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393         rcu_assign_pointer(export->next, *list);
394         /*
395          * We are entering export into the list but another
396          * CPU might be walking that list. We need to make sure
397          * the export->next pointer is valid before another CPU sees
398          * the export pointer included into the list.
399          */
400         rcu_assign_pointer(*list, export);
401 }
402
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406         struct trace_export **p;
407
408         for (p = list; *p != NULL; p = &(*p)->next)
409                 if (*p == export)
410                         break;
411
412         if (*p != export)
413                 return -1;
414
415         rcu_assign_pointer(*p, (*p)->next);
416
417         return 0;
418 }
419
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423         ftrace_exports_enable(export);
424
425         add_trace_export(list, export);
426 }
427
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431         int ret;
432
433         ret = rm_trace_export(list, export);
434         ftrace_exports_disable(export);
435
436         return ret;
437 }
438
439 int register_ftrace_export(struct trace_export *export)
440 {
441         if (WARN_ON_ONCE(!export->write))
442                 return -1;
443
444         mutex_lock(&ftrace_export_lock);
445
446         add_ftrace_export(&ftrace_exports_list, export);
447
448         mutex_unlock(&ftrace_export_lock);
449
450         return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456         int ret;
457
458         mutex_lock(&ftrace_export_lock);
459
460         ret = rm_ftrace_export(&ftrace_exports_list, export);
461
462         mutex_unlock(&ftrace_export_lock);
463
464         return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS                                             \
470         (FUNCTION_DEFAULT_FLAGS |                                       \
471          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
472          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
473          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
474          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
475          TRACE_ITER_HASH_PTR)
476
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
479                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490         .trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492
493 LIST_HEAD(ftrace_trace_arrays);
494
495 int trace_array_get(struct trace_array *this_tr)
496 {
497         struct trace_array *tr;
498         int ret = -ENODEV;
499
500         mutex_lock(&trace_types_lock);
501         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502                 if (tr == this_tr) {
503                         tr->ref++;
504                         ret = 0;
505                         break;
506                 }
507         }
508         mutex_unlock(&trace_types_lock);
509
510         return ret;
511 }
512
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515         WARN_ON(!this_tr->ref);
516         this_tr->ref--;
517 }
518
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530         if (!this_tr)
531                 return;
532
533         mutex_lock(&trace_types_lock);
534         __trace_array_put(this_tr);
535         mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541         int ret;
542
543         ret = security_locked_down(LOCKDOWN_TRACEFS);
544         if (ret)
545                 return ret;
546
547         if (tracing_disabled)
548                 return -ENODEV;
549
550         if (tr && trace_array_get(tr) < 0)
551                 return -ENODEV;
552
553         return 0;
554 }
555
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557                               struct trace_buffer *buffer,
558                               struct ring_buffer_event *event)
559 {
560         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561             !filter_match_preds(call->filter, rec)) {
562                 __trace_event_discard_commit(buffer, event);
563                 return 1;
564         }
565
566         return 0;
567 }
568
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579         return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594                        struct trace_pid_list *filtered_no_pids,
595                        struct task_struct *task)
596 {
597         /*
598          * If filtered_no_pids is not empty, and the task's pid is listed
599          * in filtered_no_pids, then return true.
600          * Otherwise, if filtered_pids is empty, that means we can
601          * trace all tasks. If it has content, then only trace pids
602          * within filtered_pids.
603          */
604
605         return (filtered_pids &&
606                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
607                 (filtered_no_pids &&
608                  trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624                                   struct task_struct *self,
625                                   struct task_struct *task)
626 {
627         if (!pid_list)
628                 return;
629
630         /* For forks, we only add if the forking task is listed */
631         if (self) {
632                 if (!trace_find_filtered_pid(pid_list, self->pid))
633                         return;
634         }
635
636         /* "self" is set for forks, and NULL for exits */
637         if (self)
638                 trace_pid_list_set(pid_list, task->pid);
639         else
640                 trace_pid_list_clear(pid_list, task->pid);
641 }
642
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657         long pid = (unsigned long)v;
658         unsigned int next;
659
660         (*pos)++;
661
662         /* pid already is +1 of the actual previous bit */
663         if (trace_pid_list_next(pid_list, pid, &next) < 0)
664                 return NULL;
665
666         pid = next;
667
668         /* Return pid + 1 to allow zero to be represented */
669         return (void *)(pid + 1);
670 }
671
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685         unsigned long pid;
686         unsigned int first;
687         loff_t l = 0;
688
689         if (trace_pid_list_first(pid_list, &first) < 0)
690                 return NULL;
691
692         pid = first;
693
694         /* Return pid + 1 so that zero can be the exit value */
695         for (pid++; pid && l < *pos;
696              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697                 ;
698         return (void *)pid;
699 }
700
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711         unsigned long pid = (unsigned long)v - 1;
712
713         seq_printf(m, "%lu\n", pid);
714         return 0;
715 }
716
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE            127
719
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721                     struct trace_pid_list **new_pid_list,
722                     const char __user *ubuf, size_t cnt)
723 {
724         struct trace_pid_list *pid_list;
725         struct trace_parser parser;
726         unsigned long val;
727         int nr_pids = 0;
728         ssize_t read = 0;
729         ssize_t ret;
730         loff_t pos;
731         pid_t pid;
732
733         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734                 return -ENOMEM;
735
736         /*
737          * Always recreate a new array. The write is an all or nothing
738          * operation. Always create a new array when adding new pids by
739          * the user. If the operation fails, then the current list is
740          * not modified.
741          */
742         pid_list = trace_pid_list_alloc();
743         if (!pid_list) {
744                 trace_parser_put(&parser);
745                 return -ENOMEM;
746         }
747
748         if (filtered_pids) {
749                 /* copy the current bits to the new max */
750                 ret = trace_pid_list_first(filtered_pids, &pid);
751                 while (!ret) {
752                         trace_pid_list_set(pid_list, pid);
753                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754                         nr_pids++;
755                 }
756         }
757
758         ret = 0;
759         while (cnt > 0) {
760
761                 pos = 0;
762
763                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
764                 if (ret < 0)
765                         break;
766
767                 read += ret;
768                 ubuf += ret;
769                 cnt -= ret;
770
771                 if (!trace_parser_loaded(&parser))
772                         break;
773
774                 ret = -EINVAL;
775                 if (kstrtoul(parser.buffer, 0, &val))
776                         break;
777
778                 pid = (pid_t)val;
779
780                 if (trace_pid_list_set(pid_list, pid) < 0) {
781                         ret = -1;
782                         break;
783                 }
784                 nr_pids++;
785
786                 trace_parser_clear(&parser);
787                 ret = 0;
788         }
789         trace_parser_put(&parser);
790
791         if (ret < 0) {
792                 trace_pid_list_free(pid_list);
793                 return ret;
794         }
795
796         if (!nr_pids) {
797                 /* Cleared the list of pids */
798                 trace_pid_list_free(pid_list);
799                 pid_list = NULL;
800         }
801
802         *new_pid_list = pid_list;
803
804         return read;
805 }
806
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809         u64 ts;
810
811         /* Early boot up does not have a buffer yet */
812         if (!buf->buffer)
813                 return trace_clock_local();
814
815         ts = ring_buffer_time_stamp(buf->buffer);
816         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817
818         return ts;
819 }
820
821 u64 ftrace_now(int cpu)
822 {
823         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837         /*
838          * For quick access (irqsoff uses this in fast path), just
839          * return the mirror variable of the state of the ring buffer.
840          * It's a little racy, but we don't really care.
841          */
842         smp_rmb();
843         return !global_trace.buffer_disabled;
844 }
845
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
857
858 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer            *trace_types __read_mostly;
862
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893
894 static inline void trace_access_lock(int cpu)
895 {
896         if (cpu == RING_BUFFER_ALL_CPUS) {
897                 /* gain it for accessing the whole ring buffer. */
898                 down_write(&all_cpu_access_lock);
899         } else {
900                 /* gain it for accessing a cpu ring buffer. */
901
902                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903                 down_read(&all_cpu_access_lock);
904
905                 /* Secondly block other access to this @cpu ring buffer. */
906                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
907         }
908 }
909
910 static inline void trace_access_unlock(int cpu)
911 {
912         if (cpu == RING_BUFFER_ALL_CPUS) {
913                 up_write(&all_cpu_access_lock);
914         } else {
915                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916                 up_read(&all_cpu_access_lock);
917         }
918 }
919
920 static inline void trace_access_lock_init(void)
921 {
922         int cpu;
923
924         for_each_possible_cpu(cpu)
925                 mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927
928 #else
929
930 static DEFINE_MUTEX(access_lock);
931
932 static inline void trace_access_lock(int cpu)
933 {
934         (void)cpu;
935         mutex_lock(&access_lock);
936 }
937
938 static inline void trace_access_unlock(int cpu)
939 {
940         (void)cpu;
941         mutex_unlock(&access_lock);
942 }
943
944 static inline void trace_access_lock_init(void)
945 {
946 }
947
948 #endif
949
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952                                  unsigned int trace_ctx,
953                                  int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955                                       struct trace_buffer *buffer,
956                                       unsigned int trace_ctx,
957                                       int skip, struct pt_regs *regs);
958
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961                                         unsigned int trace_ctx,
962                                         int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966                                       struct trace_buffer *buffer,
967                                       unsigned long trace_ctx,
968                                       int skip, struct pt_regs *regs)
969 {
970 }
971
972 #endif
973
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976                   int type, unsigned int trace_ctx)
977 {
978         struct trace_entry *ent = ring_buffer_event_data(event);
979
980         tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985                           int type,
986                           unsigned long len,
987                           unsigned int trace_ctx)
988 {
989         struct ring_buffer_event *event;
990
991         event = ring_buffer_lock_reserve(buffer, len);
992         if (event != NULL)
993                 trace_event_setup(event, type, trace_ctx);
994
995         return event;
996 }
997
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000         if (tr->array_buffer.buffer)
1001                 ring_buffer_record_on(tr->array_buffer.buffer);
1002         /*
1003          * This flag is looked at when buffers haven't been allocated
1004          * yet, or by some tracers (like irqsoff), that just want to
1005          * know if the ring buffer has been disabled, but it can handle
1006          * races of where it gets disabled but we still do a record.
1007          * As the check is in the fast path of the tracers, it is more
1008          * important to be fast than accurate.
1009          */
1010         tr->buffer_disabled = 0;
1011         /* Make the flag seen by readers */
1012         smp_wmb();
1013 }
1014
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023         tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026
1027
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031         __this_cpu_write(trace_taskinfo_save, true);
1032
1033         /* If this is the temp buffer, we need to commit fully */
1034         if (this_cpu_read(trace_buffered_event) == event) {
1035                 /* Length is in event->array[0] */
1036                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037                 /* Release the temp buffer */
1038                 this_cpu_dec(trace_buffered_event_cnt);
1039                 /* ring_buffer_unlock_commit() enables preemption */
1040                 preempt_enable_notrace();
1041         } else
1042                 ring_buffer_unlock_commit(buffer);
1043 }
1044
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046                        const char *str, int size)
1047 {
1048         struct ring_buffer_event *event;
1049         struct trace_buffer *buffer;
1050         struct print_entry *entry;
1051         unsigned int trace_ctx;
1052         int alloc;
1053
1054         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055                 return 0;
1056
1057         if (unlikely(tracing_selftest_running && tr == &global_trace))
1058                 return 0;
1059
1060         if (unlikely(tracing_disabled))
1061                 return 0;
1062
1063         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064
1065         trace_ctx = tracing_gen_ctx();
1066         buffer = tr->array_buffer.buffer;
1067         ring_buffer_nest_start(buffer);
1068         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069                                             trace_ctx);
1070         if (!event) {
1071                 size = 0;
1072                 goto out;
1073         }
1074
1075         entry = ring_buffer_event_data(event);
1076         entry->ip = ip;
1077
1078         memcpy(&entry->buf, str, size);
1079
1080         /* Add a newline if necessary */
1081         if (entry->buf[size - 1] != '\n') {
1082                 entry->buf[size] = '\n';
1083                 entry->buf[size + 1] = '\0';
1084         } else
1085                 entry->buf[size] = '\0';
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090         ring_buffer_nest_end(buffer);
1091         return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:    The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103         return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:    The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114         struct ring_buffer_event *event;
1115         struct trace_buffer *buffer;
1116         struct bputs_entry *entry;
1117         unsigned int trace_ctx;
1118         int size = sizeof(struct bputs_entry);
1119         int ret = 0;
1120
1121         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122                 return 0;
1123
1124         if (unlikely(tracing_selftest_running || tracing_disabled))
1125                 return 0;
1126
1127         trace_ctx = tracing_gen_ctx();
1128         buffer = global_trace.array_buffer.buffer;
1129
1130         ring_buffer_nest_start(buffer);
1131         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132                                             trace_ctx);
1133         if (!event)
1134                 goto out;
1135
1136         entry = ring_buffer_event_data(event);
1137         entry->ip                       = ip;
1138         entry->str                      = str;
1139
1140         __buffer_unlock_commit(buffer, event);
1141         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142
1143         ret = 1;
1144  out:
1145         ring_buffer_nest_end(buffer);
1146         return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152                                            void *cond_data)
1153 {
1154         struct tracer *tracer = tr->current_trace;
1155         unsigned long flags;
1156
1157         if (in_nmi()) {
1158                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160                 return;
1161         }
1162
1163         if (!tr->allocated_snapshot) {
1164                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166                 tracer_tracing_off(tr);
1167                 return;
1168         }
1169
1170         /* Note, snapshot can not be used when the tracer uses it */
1171         if (tracer->use_max_tr) {
1172                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174                 return;
1175         }
1176
1177         local_irq_save(flags);
1178         update_max_tr(tr, current, smp_processor_id(), cond_data);
1179         local_irq_restore(flags);
1180 }
1181
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184         tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203         struct trace_array *tr = &global_trace;
1204
1205         tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:         The tracing instance to snapshot
1212  * @cond_data:  The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224         tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:         The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244         void *cond_data = NULL;
1245
1246         local_irq_disable();
1247         arch_spin_lock(&tr->max_lock);
1248
1249         if (tr->cond_snapshot)
1250                 cond_data = tr->cond_snapshot->cond_data;
1251
1252         arch_spin_unlock(&tr->max_lock);
1253         local_irq_enable();
1254
1255         return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260                                         struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265         int ret;
1266
1267         if (!tr->allocated_snapshot) {
1268
1269                 /* allocate spare buffer */
1270                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272                 if (ret < 0)
1273                         return ret;
1274
1275                 tr->allocated_snapshot = true;
1276         }
1277
1278         return 0;
1279 }
1280
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283         /*
1284          * We don't free the ring buffer. instead, resize it because
1285          * The max_tr ring buffer has some state (e.g. ring->clock) and
1286          * we want preserve it.
1287          */
1288         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289         set_buffer_entries(&tr->max_buffer, 1);
1290         tracing_reset_online_cpus(&tr->max_buffer);
1291         tr->allocated_snapshot = false;
1292 }
1293
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306         struct trace_array *tr = &global_trace;
1307         int ret;
1308
1309         ret = tracing_alloc_snapshot_instance(tr);
1310         WARN_ON(ret < 0);
1311
1312         return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329         int ret;
1330
1331         ret = tracing_alloc_snapshot();
1332         if (ret < 0)
1333                 return;
1334
1335         tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:         The tracing instance
1342  * @cond_data:  User data to associate with the snapshot
1343  * @update:     Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353                                  cond_update_fn_t update)
1354 {
1355         struct cond_snapshot *cond_snapshot;
1356         int ret = 0;
1357
1358         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359         if (!cond_snapshot)
1360                 return -ENOMEM;
1361
1362         cond_snapshot->cond_data = cond_data;
1363         cond_snapshot->update = update;
1364
1365         mutex_lock(&trace_types_lock);
1366
1367         ret = tracing_alloc_snapshot_instance(tr);
1368         if (ret)
1369                 goto fail_unlock;
1370
1371         if (tr->current_trace->use_max_tr) {
1372                 ret = -EBUSY;
1373                 goto fail_unlock;
1374         }
1375
1376         /*
1377          * The cond_snapshot can only change to NULL without the
1378          * trace_types_lock. We don't care if we race with it going
1379          * to NULL, but we want to make sure that it's not set to
1380          * something other than NULL when we get here, which we can
1381          * do safely with only holding the trace_types_lock and not
1382          * having to take the max_lock.
1383          */
1384         if (tr->cond_snapshot) {
1385                 ret = -EBUSY;
1386                 goto fail_unlock;
1387         }
1388
1389         local_irq_disable();
1390         arch_spin_lock(&tr->max_lock);
1391         tr->cond_snapshot = cond_snapshot;
1392         arch_spin_unlock(&tr->max_lock);
1393         local_irq_enable();
1394
1395         mutex_unlock(&trace_types_lock);
1396
1397         return ret;
1398
1399  fail_unlock:
1400         mutex_unlock(&trace_types_lock);
1401         kfree(cond_snapshot);
1402         return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:         The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418         int ret = 0;
1419
1420         local_irq_disable();
1421         arch_spin_lock(&tr->max_lock);
1422
1423         if (!tr->cond_snapshot)
1424                 ret = -EINVAL;
1425         else {
1426                 kfree(tr->cond_snapshot);
1427                 tr->cond_snapshot = NULL;
1428         }
1429
1430         arch_spin_unlock(&tr->max_lock);
1431         local_irq_enable();
1432
1433         return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450         return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455         /* Give warning */
1456         tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461         return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466         return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471         return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)       do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479         if (tr->array_buffer.buffer)
1480                 ring_buffer_record_off(tr->array_buffer.buffer);
1481         /*
1482          * This flag is looked at when buffers haven't been allocated
1483          * yet, or by some tracers (like irqsoff), that just want to
1484          * know if the ring buffer has been disabled, but it can handle
1485          * races of where it gets disabled but we still do a record.
1486          * As the check is in the fast path of the tracers, it is more
1487          * important to be fast than accurate.
1488          */
1489         tr->buffer_disabled = 1;
1490         /* Make the flag seen by readers */
1491         smp_wmb();
1492 }
1493
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504         tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507
1508 void disable_trace_on_warning(void)
1509 {
1510         if (__disable_trace_on_warning) {
1511                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512                         "Disabling tracing due to warning\n");
1513                 tracing_off();
1514         }
1515 }
1516
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525         if (tr->array_buffer.buffer)
1526                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527         return !tr->buffer_disabled;
1528 }
1529
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535         return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538
1539 static int __init set_buf_size(char *str)
1540 {
1541         unsigned long buf_size;
1542
1543         if (!str)
1544                 return 0;
1545         buf_size = memparse(str, &str);
1546         /*
1547          * nr_entries can not be zero and the startup
1548          * tests require some buffer space. Therefore
1549          * ensure we have at least 4096 bytes of buffer.
1550          */
1551         trace_buf_size = max(4096UL, buf_size);
1552         return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558         unsigned long threshold;
1559         int ret;
1560
1561         if (!str)
1562                 return 0;
1563         ret = kstrtoul(str, 0, &threshold);
1564         if (ret < 0)
1565                 return 0;
1566         tracing_thresh = threshold * 1000;
1567         return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573         return nsecs / 1000;
1574 }
1575
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587         TRACE_FLAGS
1588         NULL
1589 };
1590
1591 static struct {
1592         u64 (*func)(void);
1593         const char *name;
1594         int in_ns;              /* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596         { trace_clock_local,            "local",        1 },
1597         { trace_clock_global,           "global",       1 },
1598         { trace_clock_counter,          "counter",      0 },
1599         { trace_clock_jiffies,          "uptime",       0 },
1600         { trace_clock,                  "perf",         1 },
1601         { ktime_get_mono_fast_ns,       "mono",         1 },
1602         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1603         { ktime_get_boot_fast_ns,       "boot",         1 },
1604         { ktime_get_tai_fast_ns,        "tai",          1 },
1605         ARCH_TRACE_CLOCKS
1606 };
1607
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610         if (trace_clocks[tr->clock_id].in_ns)
1611                 return true;
1612
1613         return false;
1614 }
1615
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621         memset(parser, 0, sizeof(*parser));
1622
1623         parser->buffer = kmalloc(size, GFP_KERNEL);
1624         if (!parser->buffer)
1625                 return 1;
1626
1627         parser->size = size;
1628         return 0;
1629 }
1630
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636         kfree(parser->buffer);
1637         parser->buffer = NULL;
1638 }
1639
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652         size_t cnt, loff_t *ppos)
1653 {
1654         char ch;
1655         size_t read = 0;
1656         ssize_t ret;
1657
1658         if (!*ppos)
1659                 trace_parser_clear(parser);
1660
1661         ret = get_user(ch, ubuf++);
1662         if (ret)
1663                 goto out;
1664
1665         read++;
1666         cnt--;
1667
1668         /*
1669          * The parser is not finished with the last write,
1670          * continue reading the user input without skipping spaces.
1671          */
1672         if (!parser->cont) {
1673                 /* skip white space */
1674                 while (cnt && isspace(ch)) {
1675                         ret = get_user(ch, ubuf++);
1676                         if (ret)
1677                                 goto out;
1678                         read++;
1679                         cnt--;
1680                 }
1681
1682                 parser->idx = 0;
1683
1684                 /* only spaces were written */
1685                 if (isspace(ch) || !ch) {
1686                         *ppos += read;
1687                         ret = read;
1688                         goto out;
1689                 }
1690         }
1691
1692         /* read the non-space input */
1693         while (cnt && !isspace(ch) && ch) {
1694                 if (parser->idx < parser->size - 1)
1695                         parser->buffer[parser->idx++] = ch;
1696                 else {
1697                         ret = -EINVAL;
1698                         goto out;
1699                 }
1700                 ret = get_user(ch, ubuf++);
1701                 if (ret)
1702                         goto out;
1703                 read++;
1704                 cnt--;
1705         }
1706
1707         /* We either got finished input or we have to wait for another call. */
1708         if (isspace(ch) || !ch) {
1709                 parser->buffer[parser->idx] = 0;
1710                 parser->cont = false;
1711         } else if (parser->idx < parser->size - 1) {
1712                 parser->cont = true;
1713                 parser->buffer[parser->idx++] = ch;
1714                 /* Make sure the parsed string always terminates with '\0'. */
1715                 parser->buffer[parser->idx] = 0;
1716         } else {
1717                 ret = -EINVAL;
1718                 goto out;
1719         }
1720
1721         *ppos += read;
1722         ret = read;
1723
1724 out:
1725         return ret;
1726 }
1727
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731         int len;
1732
1733         if (trace_seq_used(s) <= s->seq.readpos)
1734                 return -EBUSY;
1735
1736         len = trace_seq_used(s) - s->seq.readpos;
1737         if (cnt > len)
1738                 cnt = len;
1739         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740
1741         s->seq.readpos += cnt;
1742         return cnt;
1743 }
1744
1745 unsigned long __read_mostly     tracing_thresh;
1746
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749
1750 #ifdef LATENCY_FS_NOTIFY
1751
1752 static struct workqueue_struct *fsnotify_wq;
1753
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756         struct trace_array *tr = container_of(work, struct trace_array,
1757                                               fsnotify_work);
1758         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763         struct trace_array *tr = container_of(iwork, struct trace_array,
1764                                               fsnotify_irqwork);
1765         queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769                                      struct dentry *d_tracer)
1770 {
1771         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773         tr->d_max_latency = trace_create_file("tracing_max_latency",
1774                                               TRACE_MODE_WRITE,
1775                                               d_tracer, &tr->max_latency,
1776                                               &tracing_max_lat_fops);
1777 }
1778
1779 __init static int latency_fsnotify_init(void)
1780 {
1781         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1783         if (!fsnotify_wq) {
1784                 pr_err("Unable to allocate tr_max_lat_wq\n");
1785                 return -ENOMEM;
1786         }
1787         return 0;
1788 }
1789
1790 late_initcall_sync(latency_fsnotify_init);
1791
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794         if (!fsnotify_wq)
1795                 return;
1796         /*
1797          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798          * possible that we are called from __schedule() or do_idle(), which
1799          * could cause a deadlock.
1800          */
1801         irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803
1804 #else /* !LATENCY_FS_NOTIFY */
1805
1806 #define trace_create_maxlat_file(tr, d_tracer)                          \
1807         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1808                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1809
1810 #endif
1811
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820         struct array_buffer *trace_buf = &tr->array_buffer;
1821         struct array_buffer *max_buf = &tr->max_buffer;
1822         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824
1825         max_buf->cpu = cpu;
1826         max_buf->time_start = data->preempt_timestamp;
1827
1828         max_data->saved_latency = tr->max_latency;
1829         max_data->critical_start = data->critical_start;
1830         max_data->critical_end = data->critical_end;
1831
1832         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833         max_data->pid = tsk->pid;
1834         /*
1835          * If tsk == current, then use current_uid(), as that does not use
1836          * RCU. The irq tracer can be called out of RCU scope.
1837          */
1838         if (tsk == current)
1839                 max_data->uid = current_uid();
1840         else
1841                 max_data->uid = task_uid(tsk);
1842
1843         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844         max_data->policy = tsk->policy;
1845         max_data->rt_priority = tsk->rt_priority;
1846
1847         /* record this tasks comm */
1848         tracing_record_cmdline(tsk);
1849         latency_fsnotify(tr);
1850 }
1851
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864               void *cond_data)
1865 {
1866         if (tr->stop_count)
1867                 return;
1868
1869         WARN_ON_ONCE(!irqs_disabled());
1870
1871         if (!tr->allocated_snapshot) {
1872                 /* Only the nop tracer should hit this when disabling */
1873                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874                 return;
1875         }
1876
1877         arch_spin_lock(&tr->max_lock);
1878
1879         /* Inherit the recordable setting from array_buffer */
1880         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881                 ring_buffer_record_on(tr->max_buffer.buffer);
1882         else
1883                 ring_buffer_record_off(tr->max_buffer.buffer);
1884
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887                 arch_spin_unlock(&tr->max_lock);
1888                 return;
1889         }
1890 #endif
1891         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892
1893         __update_max_tr(tr, tsk, cpu);
1894
1895         arch_spin_unlock(&tr->max_lock);
1896 }
1897
1898 /**
1899  * update_max_tr_single - only copy one trace over, and reset the rest
1900  * @tr: tracer
1901  * @tsk: task with the latency
1902  * @cpu: the cpu of the buffer to copy.
1903  *
1904  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1905  */
1906 void
1907 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1908 {
1909         int ret;
1910
1911         if (tr->stop_count)
1912                 return;
1913
1914         WARN_ON_ONCE(!irqs_disabled());
1915         if (!tr->allocated_snapshot) {
1916                 /* Only the nop tracer should hit this when disabling */
1917                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1918                 return;
1919         }
1920
1921         arch_spin_lock(&tr->max_lock);
1922
1923         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1924
1925         if (ret == -EBUSY) {
1926                 /*
1927                  * We failed to swap the buffer due to a commit taking
1928                  * place on this CPU. We fail to record, but we reset
1929                  * the max trace buffer (no one writes directly to it)
1930                  * and flag that it failed.
1931                  */
1932                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1933                         "Failed to swap buffers due to commit in progress\n");
1934         }
1935
1936         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1937
1938         __update_max_tr(tr, tsk, cpu);
1939         arch_spin_unlock(&tr->max_lock);
1940 }
1941
1942 #endif /* CONFIG_TRACER_MAX_TRACE */
1943
1944 static int wait_on_pipe(struct trace_iterator *iter, int full)
1945 {
1946         /* Iterators are static, they should be filled or empty */
1947         if (trace_buffer_iter(iter, iter->cpu_file))
1948                 return 0;
1949
1950         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1951                                 full);
1952 }
1953
1954 #ifdef CONFIG_FTRACE_STARTUP_TEST
1955 static bool selftests_can_run;
1956
1957 struct trace_selftests {
1958         struct list_head                list;
1959         struct tracer                   *type;
1960 };
1961
1962 static LIST_HEAD(postponed_selftests);
1963
1964 static int save_selftest(struct tracer *type)
1965 {
1966         struct trace_selftests *selftest;
1967
1968         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1969         if (!selftest)
1970                 return -ENOMEM;
1971
1972         selftest->type = type;
1973         list_add(&selftest->list, &postponed_selftests);
1974         return 0;
1975 }
1976
1977 static int run_tracer_selftest(struct tracer *type)
1978 {
1979         struct trace_array *tr = &global_trace;
1980         struct tracer *saved_tracer = tr->current_trace;
1981         int ret;
1982
1983         if (!type->selftest || tracing_selftest_disabled)
1984                 return 0;
1985
1986         /*
1987          * If a tracer registers early in boot up (before scheduling is
1988          * initialized and such), then do not run its selftests yet.
1989          * Instead, run it a little later in the boot process.
1990          */
1991         if (!selftests_can_run)
1992                 return save_selftest(type);
1993
1994         if (!tracing_is_on()) {
1995                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1996                         type->name);
1997                 return 0;
1998         }
1999
2000         /*
2001          * Run a selftest on this tracer.
2002          * Here we reset the trace buffer, and set the current
2003          * tracer to be this tracer. The tracer can then run some
2004          * internal tracing to verify that everything is in order.
2005          * If we fail, we do not register this tracer.
2006          */
2007         tracing_reset_online_cpus(&tr->array_buffer);
2008
2009         tr->current_trace = type;
2010
2011 #ifdef CONFIG_TRACER_MAX_TRACE
2012         if (type->use_max_tr) {
2013                 /* If we expanded the buffers, make sure the max is expanded too */
2014                 if (ring_buffer_expanded)
2015                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2016                                            RING_BUFFER_ALL_CPUS);
2017                 tr->allocated_snapshot = true;
2018         }
2019 #endif
2020
2021         /* the test is responsible for initializing and enabling */
2022         pr_info("Testing tracer %s: ", type->name);
2023         ret = type->selftest(type, tr);
2024         /* the test is responsible for resetting too */
2025         tr->current_trace = saved_tracer;
2026         if (ret) {
2027                 printk(KERN_CONT "FAILED!\n");
2028                 /* Add the warning after printing 'FAILED' */
2029                 WARN_ON(1);
2030                 return -1;
2031         }
2032         /* Only reset on passing, to avoid touching corrupted buffers */
2033         tracing_reset_online_cpus(&tr->array_buffer);
2034
2035 #ifdef CONFIG_TRACER_MAX_TRACE
2036         if (type->use_max_tr) {
2037                 tr->allocated_snapshot = false;
2038
2039                 /* Shrink the max buffer again */
2040                 if (ring_buffer_expanded)
2041                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2042                                            RING_BUFFER_ALL_CPUS);
2043         }
2044 #endif
2045
2046         printk(KERN_CONT "PASSED\n");
2047         return 0;
2048 }
2049
2050 static int do_run_tracer_selftest(struct tracer *type)
2051 {
2052         int ret;
2053
2054         /*
2055          * Tests can take a long time, especially if they are run one after the
2056          * other, as does happen during bootup when all the tracers are
2057          * registered. This could cause the soft lockup watchdog to trigger.
2058          */
2059         cond_resched();
2060
2061         tracing_selftest_running = true;
2062         ret = run_tracer_selftest(type);
2063         tracing_selftest_running = false;
2064
2065         return ret;
2066 }
2067
2068 static __init int init_trace_selftests(void)
2069 {
2070         struct trace_selftests *p, *n;
2071         struct tracer *t, **last;
2072         int ret;
2073
2074         selftests_can_run = true;
2075
2076         mutex_lock(&trace_types_lock);
2077
2078         if (list_empty(&postponed_selftests))
2079                 goto out;
2080
2081         pr_info("Running postponed tracer tests:\n");
2082
2083         tracing_selftest_running = true;
2084         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2085                 /* This loop can take minutes when sanitizers are enabled, so
2086                  * lets make sure we allow RCU processing.
2087                  */
2088                 cond_resched();
2089                 ret = run_tracer_selftest(p->type);
2090                 /* If the test fails, then warn and remove from available_tracers */
2091                 if (ret < 0) {
2092                         WARN(1, "tracer: %s failed selftest, disabling\n",
2093                              p->type->name);
2094                         last = &trace_types;
2095                         for (t = trace_types; t; t = t->next) {
2096                                 if (t == p->type) {
2097                                         *last = t->next;
2098                                         break;
2099                                 }
2100                                 last = &t->next;
2101                         }
2102                 }
2103                 list_del(&p->list);
2104                 kfree(p);
2105         }
2106         tracing_selftest_running = false;
2107
2108  out:
2109         mutex_unlock(&trace_types_lock);
2110
2111         return 0;
2112 }
2113 core_initcall(init_trace_selftests);
2114 #else
2115 static inline int run_tracer_selftest(struct tracer *type)
2116 {
2117         return 0;
2118 }
2119 static inline int do_run_tracer_selftest(struct tracer *type)
2120 {
2121         return 0;
2122 }
2123 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2124
2125 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2126
2127 static void __init apply_trace_boot_options(void);
2128
2129 /**
2130  * register_tracer - register a tracer with the ftrace system.
2131  * @type: the plugin for the tracer
2132  *
2133  * Register a new plugin tracer.
2134  */
2135 int __init register_tracer(struct tracer *type)
2136 {
2137         struct tracer *t;
2138         int ret = 0;
2139
2140         if (!type->name) {
2141                 pr_info("Tracer must have a name\n");
2142                 return -1;
2143         }
2144
2145         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2146                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2147                 return -1;
2148         }
2149
2150         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2151                 pr_warn("Can not register tracer %s due to lockdown\n",
2152                            type->name);
2153                 return -EPERM;
2154         }
2155
2156         mutex_lock(&trace_types_lock);
2157
2158         for (t = trace_types; t; t = t->next) {
2159                 if (strcmp(type->name, t->name) == 0) {
2160                         /* already found */
2161                         pr_info("Tracer %s already registered\n",
2162                                 type->name);
2163                         ret = -1;
2164                         goto out;
2165                 }
2166         }
2167
2168         if (!type->set_flag)
2169                 type->set_flag = &dummy_set_flag;
2170         if (!type->flags) {
2171                 /*allocate a dummy tracer_flags*/
2172                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2173                 if (!type->flags) {
2174                         ret = -ENOMEM;
2175                         goto out;
2176                 }
2177                 type->flags->val = 0;
2178                 type->flags->opts = dummy_tracer_opt;
2179         } else
2180                 if (!type->flags->opts)
2181                         type->flags->opts = dummy_tracer_opt;
2182
2183         /* store the tracer for __set_tracer_option */
2184         type->flags->trace = type;
2185
2186         ret = do_run_tracer_selftest(type);
2187         if (ret < 0)
2188                 goto out;
2189
2190         type->next = trace_types;
2191         trace_types = type;
2192         add_tracer_options(&global_trace, type);
2193
2194  out:
2195         mutex_unlock(&trace_types_lock);
2196
2197         if (ret || !default_bootup_tracer)
2198                 goto out_unlock;
2199
2200         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2201                 goto out_unlock;
2202
2203         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2204         /* Do we want this tracer to start on bootup? */
2205         tracing_set_tracer(&global_trace, type->name);
2206         default_bootup_tracer = NULL;
2207
2208         apply_trace_boot_options();
2209
2210         /* disable other selftests, since this will break it. */
2211         disable_tracing_selftest("running a tracer");
2212
2213  out_unlock:
2214         return ret;
2215 }
2216
2217 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2218 {
2219         struct trace_buffer *buffer = buf->buffer;
2220
2221         if (!buffer)
2222                 return;
2223
2224         ring_buffer_record_disable(buffer);
2225
2226         /* Make sure all commits have finished */
2227         synchronize_rcu();
2228         ring_buffer_reset_cpu(buffer, cpu);
2229
2230         ring_buffer_record_enable(buffer);
2231 }
2232
2233 void tracing_reset_online_cpus(struct array_buffer *buf)
2234 {
2235         struct trace_buffer *buffer = buf->buffer;
2236
2237         if (!buffer)
2238                 return;
2239
2240         ring_buffer_record_disable(buffer);
2241
2242         /* Make sure all commits have finished */
2243         synchronize_rcu();
2244
2245         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2246
2247         ring_buffer_reset_online_cpus(buffer);
2248
2249         ring_buffer_record_enable(buffer);
2250 }
2251
2252 /* Must have trace_types_lock held */
2253 void tracing_reset_all_online_cpus_unlocked(void)
2254 {
2255         struct trace_array *tr;
2256
2257         lockdep_assert_held(&trace_types_lock);
2258
2259         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2260                 if (!tr->clear_trace)
2261                         continue;
2262                 tr->clear_trace = false;
2263                 tracing_reset_online_cpus(&tr->array_buffer);
2264 #ifdef CONFIG_TRACER_MAX_TRACE
2265                 tracing_reset_online_cpus(&tr->max_buffer);
2266 #endif
2267         }
2268 }
2269
2270 void tracing_reset_all_online_cpus(void)
2271 {
2272         mutex_lock(&trace_types_lock);
2273         tracing_reset_all_online_cpus_unlocked();
2274         mutex_unlock(&trace_types_lock);
2275 }
2276
2277 /*
2278  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2279  * is the tgid last observed corresponding to pid=i.
2280  */
2281 static int *tgid_map;
2282
2283 /* The maximum valid index into tgid_map. */
2284 static size_t tgid_map_max;
2285
2286 #define SAVED_CMDLINES_DEFAULT 128
2287 #define NO_CMDLINE_MAP UINT_MAX
2288 /*
2289  * Preemption must be disabled before acquiring trace_cmdline_lock.
2290  * The various trace_arrays' max_lock must be acquired in a context
2291  * where interrupt is disabled.
2292  */
2293 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2294 struct saved_cmdlines_buffer {
2295         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2296         unsigned *map_cmdline_to_pid;
2297         unsigned cmdline_num;
2298         int cmdline_idx;
2299         char *saved_cmdlines;
2300 };
2301 static struct saved_cmdlines_buffer *savedcmd;
2302
2303 static inline char *get_saved_cmdlines(int idx)
2304 {
2305         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2306 }
2307
2308 static inline void set_cmdline(int idx, const char *cmdline)
2309 {
2310         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2311 }
2312
2313 static int allocate_cmdlines_buffer(unsigned int val,
2314                                     struct saved_cmdlines_buffer *s)
2315 {
2316         s->map_cmdline_to_pid = kmalloc_array(val,
2317                                               sizeof(*s->map_cmdline_to_pid),
2318                                               GFP_KERNEL);
2319         if (!s->map_cmdline_to_pid)
2320                 return -ENOMEM;
2321
2322         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2323         if (!s->saved_cmdlines) {
2324                 kfree(s->map_cmdline_to_pid);
2325                 return -ENOMEM;
2326         }
2327
2328         s->cmdline_idx = 0;
2329         s->cmdline_num = val;
2330         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2331                sizeof(s->map_pid_to_cmdline));
2332         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2333                val * sizeof(*s->map_cmdline_to_pid));
2334
2335         return 0;
2336 }
2337
2338 static int trace_create_savedcmd(void)
2339 {
2340         int ret;
2341
2342         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2343         if (!savedcmd)
2344                 return -ENOMEM;
2345
2346         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2347         if (ret < 0) {
2348                 kfree(savedcmd);
2349                 savedcmd = NULL;
2350                 return -ENOMEM;
2351         }
2352
2353         return 0;
2354 }
2355
2356 int is_tracing_stopped(void)
2357 {
2358         return global_trace.stop_count;
2359 }
2360
2361 /**
2362  * tracing_start - quick start of the tracer
2363  *
2364  * If tracing is enabled but was stopped by tracing_stop,
2365  * this will start the tracer back up.
2366  */
2367 void tracing_start(void)
2368 {
2369         struct trace_buffer *buffer;
2370         unsigned long flags;
2371
2372         if (tracing_disabled)
2373                 return;
2374
2375         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2376         if (--global_trace.stop_count) {
2377                 if (global_trace.stop_count < 0) {
2378                         /* Someone screwed up their debugging */
2379                         WARN_ON_ONCE(1);
2380                         global_trace.stop_count = 0;
2381                 }
2382                 goto out;
2383         }
2384
2385         /* Prevent the buffers from switching */
2386         arch_spin_lock(&global_trace.max_lock);
2387
2388         buffer = global_trace.array_buffer.buffer;
2389         if (buffer)
2390                 ring_buffer_record_enable(buffer);
2391
2392 #ifdef CONFIG_TRACER_MAX_TRACE
2393         buffer = global_trace.max_buffer.buffer;
2394         if (buffer)
2395                 ring_buffer_record_enable(buffer);
2396 #endif
2397
2398         arch_spin_unlock(&global_trace.max_lock);
2399
2400  out:
2401         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2402 }
2403
2404 static void tracing_start_tr(struct trace_array *tr)
2405 {
2406         struct trace_buffer *buffer;
2407         unsigned long flags;
2408
2409         if (tracing_disabled)
2410                 return;
2411
2412         /* If global, we need to also start the max tracer */
2413         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2414                 return tracing_start();
2415
2416         raw_spin_lock_irqsave(&tr->start_lock, flags);
2417
2418         if (--tr->stop_count) {
2419                 if (tr->stop_count < 0) {
2420                         /* Someone screwed up their debugging */
2421                         WARN_ON_ONCE(1);
2422                         tr->stop_count = 0;
2423                 }
2424                 goto out;
2425         }
2426
2427         buffer = tr->array_buffer.buffer;
2428         if (buffer)
2429                 ring_buffer_record_enable(buffer);
2430
2431  out:
2432         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2433 }
2434
2435 /**
2436  * tracing_stop - quick stop of the tracer
2437  *
2438  * Light weight way to stop tracing. Use in conjunction with
2439  * tracing_start.
2440  */
2441 void tracing_stop(void)
2442 {
2443         struct trace_buffer *buffer;
2444         unsigned long flags;
2445
2446         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2447         if (global_trace.stop_count++)
2448                 goto out;
2449
2450         /* Prevent the buffers from switching */
2451         arch_spin_lock(&global_trace.max_lock);
2452
2453         buffer = global_trace.array_buffer.buffer;
2454         if (buffer)
2455                 ring_buffer_record_disable(buffer);
2456
2457 #ifdef CONFIG_TRACER_MAX_TRACE
2458         buffer = global_trace.max_buffer.buffer;
2459         if (buffer)
2460                 ring_buffer_record_disable(buffer);
2461 #endif
2462
2463         arch_spin_unlock(&global_trace.max_lock);
2464
2465  out:
2466         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2467 }
2468
2469 static void tracing_stop_tr(struct trace_array *tr)
2470 {
2471         struct trace_buffer *buffer;
2472         unsigned long flags;
2473
2474         /* If global, we need to also stop the max tracer */
2475         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2476                 return tracing_stop();
2477
2478         raw_spin_lock_irqsave(&tr->start_lock, flags);
2479         if (tr->stop_count++)
2480                 goto out;
2481
2482         buffer = tr->array_buffer.buffer;
2483         if (buffer)
2484                 ring_buffer_record_disable(buffer);
2485
2486  out:
2487         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2488 }
2489
2490 static int trace_save_cmdline(struct task_struct *tsk)
2491 {
2492         unsigned tpid, idx;
2493
2494         /* treat recording of idle task as a success */
2495         if (!tsk->pid)
2496                 return 1;
2497
2498         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2499
2500         /*
2501          * It's not the end of the world if we don't get
2502          * the lock, but we also don't want to spin
2503          * nor do we want to disable interrupts,
2504          * so if we miss here, then better luck next time.
2505          *
2506          * This is called within the scheduler and wake up, so interrupts
2507          * had better been disabled and run queue lock been held.
2508          */
2509         lockdep_assert_preemption_disabled();
2510         if (!arch_spin_trylock(&trace_cmdline_lock))
2511                 return 0;
2512
2513         idx = savedcmd->map_pid_to_cmdline[tpid];
2514         if (idx == NO_CMDLINE_MAP) {
2515                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2516
2517                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2518                 savedcmd->cmdline_idx = idx;
2519         }
2520
2521         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2522         set_cmdline(idx, tsk->comm);
2523
2524         arch_spin_unlock(&trace_cmdline_lock);
2525
2526         return 1;
2527 }
2528
2529 static void __trace_find_cmdline(int pid, char comm[])
2530 {
2531         unsigned map;
2532         int tpid;
2533
2534         if (!pid) {
2535                 strcpy(comm, "<idle>");
2536                 return;
2537         }
2538
2539         if (WARN_ON_ONCE(pid < 0)) {
2540                 strcpy(comm, "<XXX>");
2541                 return;
2542         }
2543
2544         tpid = pid & (PID_MAX_DEFAULT - 1);
2545         map = savedcmd->map_pid_to_cmdline[tpid];
2546         if (map != NO_CMDLINE_MAP) {
2547                 tpid = savedcmd->map_cmdline_to_pid[map];
2548                 if (tpid == pid) {
2549                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2550                         return;
2551                 }
2552         }
2553         strcpy(comm, "<...>");
2554 }
2555
2556 void trace_find_cmdline(int pid, char comm[])
2557 {
2558         preempt_disable();
2559         arch_spin_lock(&trace_cmdline_lock);
2560
2561         __trace_find_cmdline(pid, comm);
2562
2563         arch_spin_unlock(&trace_cmdline_lock);
2564         preempt_enable();
2565 }
2566
2567 static int *trace_find_tgid_ptr(int pid)
2568 {
2569         /*
2570          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2571          * if we observe a non-NULL tgid_map then we also observe the correct
2572          * tgid_map_max.
2573          */
2574         int *map = smp_load_acquire(&tgid_map);
2575
2576         if (unlikely(!map || pid > tgid_map_max))
2577                 return NULL;
2578
2579         return &map[pid];
2580 }
2581
2582 int trace_find_tgid(int pid)
2583 {
2584         int *ptr = trace_find_tgid_ptr(pid);
2585
2586         return ptr ? *ptr : 0;
2587 }
2588
2589 static int trace_save_tgid(struct task_struct *tsk)
2590 {
2591         int *ptr;
2592
2593         /* treat recording of idle task as a success */
2594         if (!tsk->pid)
2595                 return 1;
2596
2597         ptr = trace_find_tgid_ptr(tsk->pid);
2598         if (!ptr)
2599                 return 0;
2600
2601         *ptr = tsk->tgid;
2602         return 1;
2603 }
2604
2605 static bool tracing_record_taskinfo_skip(int flags)
2606 {
2607         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2608                 return true;
2609         if (!__this_cpu_read(trace_taskinfo_save))
2610                 return true;
2611         return false;
2612 }
2613
2614 /**
2615  * tracing_record_taskinfo - record the task info of a task
2616  *
2617  * @task:  task to record
2618  * @flags: TRACE_RECORD_CMDLINE for recording comm
2619  *         TRACE_RECORD_TGID for recording tgid
2620  */
2621 void tracing_record_taskinfo(struct task_struct *task, int flags)
2622 {
2623         bool done;
2624
2625         if (tracing_record_taskinfo_skip(flags))
2626                 return;
2627
2628         /*
2629          * Record as much task information as possible. If some fail, continue
2630          * to try to record the others.
2631          */
2632         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2633         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2634
2635         /* If recording any information failed, retry again soon. */
2636         if (!done)
2637                 return;
2638
2639         __this_cpu_write(trace_taskinfo_save, false);
2640 }
2641
2642 /**
2643  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2644  *
2645  * @prev: previous task during sched_switch
2646  * @next: next task during sched_switch
2647  * @flags: TRACE_RECORD_CMDLINE for recording comm
2648  *         TRACE_RECORD_TGID for recording tgid
2649  */
2650 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2651                                           struct task_struct *next, int flags)
2652 {
2653         bool done;
2654
2655         if (tracing_record_taskinfo_skip(flags))
2656                 return;
2657
2658         /*
2659          * Record as much task information as possible. If some fail, continue
2660          * to try to record the others.
2661          */
2662         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2663         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2664         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2665         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2666
2667         /* If recording any information failed, retry again soon. */
2668         if (!done)
2669                 return;
2670
2671         __this_cpu_write(trace_taskinfo_save, false);
2672 }
2673
2674 /* Helpers to record a specific task information */
2675 void tracing_record_cmdline(struct task_struct *task)
2676 {
2677         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2678 }
2679
2680 void tracing_record_tgid(struct task_struct *task)
2681 {
2682         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2683 }
2684
2685 /*
2686  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2687  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2688  * simplifies those functions and keeps them in sync.
2689  */
2690 enum print_line_t trace_handle_return(struct trace_seq *s)
2691 {
2692         return trace_seq_has_overflowed(s) ?
2693                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2694 }
2695 EXPORT_SYMBOL_GPL(trace_handle_return);
2696
2697 static unsigned short migration_disable_value(void)
2698 {
2699 #if defined(CONFIG_SMP)
2700         return current->migration_disabled;
2701 #else
2702         return 0;
2703 #endif
2704 }
2705
2706 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2707 {
2708         unsigned int trace_flags = irqs_status;
2709         unsigned int pc;
2710
2711         pc = preempt_count();
2712
2713         if (pc & NMI_MASK)
2714                 trace_flags |= TRACE_FLAG_NMI;
2715         if (pc & HARDIRQ_MASK)
2716                 trace_flags |= TRACE_FLAG_HARDIRQ;
2717         if (in_serving_softirq())
2718                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2719         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2720                 trace_flags |= TRACE_FLAG_BH_OFF;
2721
2722         if (tif_need_resched())
2723                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2724         if (test_preempt_need_resched())
2725                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2726         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2727                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2728 }
2729
2730 struct ring_buffer_event *
2731 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2732                           int type,
2733                           unsigned long len,
2734                           unsigned int trace_ctx)
2735 {
2736         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2737 }
2738
2739 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2740 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2741 static int trace_buffered_event_ref;
2742
2743 /**
2744  * trace_buffered_event_enable - enable buffering events
2745  *
2746  * When events are being filtered, it is quicker to use a temporary
2747  * buffer to write the event data into if there's a likely chance
2748  * that it will not be committed. The discard of the ring buffer
2749  * is not as fast as committing, and is much slower than copying
2750  * a commit.
2751  *
2752  * When an event is to be filtered, allocate per cpu buffers to
2753  * write the event data into, and if the event is filtered and discarded
2754  * it is simply dropped, otherwise, the entire data is to be committed
2755  * in one shot.
2756  */
2757 void trace_buffered_event_enable(void)
2758 {
2759         struct ring_buffer_event *event;
2760         struct page *page;
2761         int cpu;
2762
2763         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2764
2765         if (trace_buffered_event_ref++)
2766                 return;
2767
2768         for_each_tracing_cpu(cpu) {
2769                 page = alloc_pages_node(cpu_to_node(cpu),
2770                                         GFP_KERNEL | __GFP_NORETRY, 0);
2771                 if (!page)
2772                         goto failed;
2773
2774                 event = page_address(page);
2775                 memset(event, 0, sizeof(*event));
2776
2777                 per_cpu(trace_buffered_event, cpu) = event;
2778
2779                 preempt_disable();
2780                 if (cpu == smp_processor_id() &&
2781                     __this_cpu_read(trace_buffered_event) !=
2782                     per_cpu(trace_buffered_event, cpu))
2783                         WARN_ON_ONCE(1);
2784                 preempt_enable();
2785         }
2786
2787         return;
2788  failed:
2789         trace_buffered_event_disable();
2790 }
2791
2792 static void enable_trace_buffered_event(void *data)
2793 {
2794         /* Probably not needed, but do it anyway */
2795         smp_rmb();
2796         this_cpu_dec(trace_buffered_event_cnt);
2797 }
2798
2799 static void disable_trace_buffered_event(void *data)
2800 {
2801         this_cpu_inc(trace_buffered_event_cnt);
2802 }
2803
2804 /**
2805  * trace_buffered_event_disable - disable buffering events
2806  *
2807  * When a filter is removed, it is faster to not use the buffered
2808  * events, and to commit directly into the ring buffer. Free up
2809  * the temp buffers when there are no more users. This requires
2810  * special synchronization with current events.
2811  */
2812 void trace_buffered_event_disable(void)
2813 {
2814         int cpu;
2815
2816         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2817
2818         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2819                 return;
2820
2821         if (--trace_buffered_event_ref)
2822                 return;
2823
2824         preempt_disable();
2825         /* For each CPU, set the buffer as used. */
2826         smp_call_function_many(tracing_buffer_mask,
2827                                disable_trace_buffered_event, NULL, 1);
2828         preempt_enable();
2829
2830         /* Wait for all current users to finish */
2831         synchronize_rcu();
2832
2833         for_each_tracing_cpu(cpu) {
2834                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2835                 per_cpu(trace_buffered_event, cpu) = NULL;
2836         }
2837         /*
2838          * Make sure trace_buffered_event is NULL before clearing
2839          * trace_buffered_event_cnt.
2840          */
2841         smp_wmb();
2842
2843         preempt_disable();
2844         /* Do the work on each cpu */
2845         smp_call_function_many(tracing_buffer_mask,
2846                                enable_trace_buffered_event, NULL, 1);
2847         preempt_enable();
2848 }
2849
2850 static struct trace_buffer *temp_buffer;
2851
2852 struct ring_buffer_event *
2853 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2854                           struct trace_event_file *trace_file,
2855                           int type, unsigned long len,
2856                           unsigned int trace_ctx)
2857 {
2858         struct ring_buffer_event *entry;
2859         struct trace_array *tr = trace_file->tr;
2860         int val;
2861
2862         *current_rb = tr->array_buffer.buffer;
2863
2864         if (!tr->no_filter_buffering_ref &&
2865             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2866                 preempt_disable_notrace();
2867                 /*
2868                  * Filtering is on, so try to use the per cpu buffer first.
2869                  * This buffer will simulate a ring_buffer_event,
2870                  * where the type_len is zero and the array[0] will
2871                  * hold the full length.
2872                  * (see include/linux/ring-buffer.h for details on
2873                  *  how the ring_buffer_event is structured).
2874                  *
2875                  * Using a temp buffer during filtering and copying it
2876                  * on a matched filter is quicker than writing directly
2877                  * into the ring buffer and then discarding it when
2878                  * it doesn't match. That is because the discard
2879                  * requires several atomic operations to get right.
2880                  * Copying on match and doing nothing on a failed match
2881                  * is still quicker than no copy on match, but having
2882                  * to discard out of the ring buffer on a failed match.
2883                  */
2884                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2885                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2886
2887                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2888
2889                         /*
2890                          * Preemption is disabled, but interrupts and NMIs
2891                          * can still come in now. If that happens after
2892                          * the above increment, then it will have to go
2893                          * back to the old method of allocating the event
2894                          * on the ring buffer, and if the filter fails, it
2895                          * will have to call ring_buffer_discard_commit()
2896                          * to remove it.
2897                          *
2898                          * Need to also check the unlikely case that the
2899                          * length is bigger than the temp buffer size.
2900                          * If that happens, then the reserve is pretty much
2901                          * guaranteed to fail, as the ring buffer currently
2902                          * only allows events less than a page. But that may
2903                          * change in the future, so let the ring buffer reserve
2904                          * handle the failure in that case.
2905                          */
2906                         if (val == 1 && likely(len <= max_len)) {
2907                                 trace_event_setup(entry, type, trace_ctx);
2908                                 entry->array[0] = len;
2909                                 /* Return with preemption disabled */
2910                                 return entry;
2911                         }
2912                         this_cpu_dec(trace_buffered_event_cnt);
2913                 }
2914                 /* __trace_buffer_lock_reserve() disables preemption */
2915                 preempt_enable_notrace();
2916         }
2917
2918         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2919                                             trace_ctx);
2920         /*
2921          * If tracing is off, but we have triggers enabled
2922          * we still need to look at the event data. Use the temp_buffer
2923          * to store the trace event for the trigger to use. It's recursive
2924          * safe and will not be recorded anywhere.
2925          */
2926         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2927                 *current_rb = temp_buffer;
2928                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2929                                                     trace_ctx);
2930         }
2931         return entry;
2932 }
2933 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2934
2935 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2936 static DEFINE_MUTEX(tracepoint_printk_mutex);
2937
2938 static void output_printk(struct trace_event_buffer *fbuffer)
2939 {
2940         struct trace_event_call *event_call;
2941         struct trace_event_file *file;
2942         struct trace_event *event;
2943         unsigned long flags;
2944         struct trace_iterator *iter = tracepoint_print_iter;
2945
2946         /* We should never get here if iter is NULL */
2947         if (WARN_ON_ONCE(!iter))
2948                 return;
2949
2950         event_call = fbuffer->trace_file->event_call;
2951         if (!event_call || !event_call->event.funcs ||
2952             !event_call->event.funcs->trace)
2953                 return;
2954
2955         file = fbuffer->trace_file;
2956         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2957             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2958              !filter_match_preds(file->filter, fbuffer->entry)))
2959                 return;
2960
2961         event = &fbuffer->trace_file->event_call->event;
2962
2963         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2964         trace_seq_init(&iter->seq);
2965         iter->ent = fbuffer->entry;
2966         event_call->event.funcs->trace(iter, 0, event);
2967         trace_seq_putc(&iter->seq, 0);
2968         printk("%s", iter->seq.buffer);
2969
2970         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2971 }
2972
2973 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2974                              void *buffer, size_t *lenp,
2975                              loff_t *ppos)
2976 {
2977         int save_tracepoint_printk;
2978         int ret;
2979
2980         mutex_lock(&tracepoint_printk_mutex);
2981         save_tracepoint_printk = tracepoint_printk;
2982
2983         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2984
2985         /*
2986          * This will force exiting early, as tracepoint_printk
2987          * is always zero when tracepoint_printk_iter is not allocated
2988          */
2989         if (!tracepoint_print_iter)
2990                 tracepoint_printk = 0;
2991
2992         if (save_tracepoint_printk == tracepoint_printk)
2993                 goto out;
2994
2995         if (tracepoint_printk)
2996                 static_key_enable(&tracepoint_printk_key.key);
2997         else
2998                 static_key_disable(&tracepoint_printk_key.key);
2999
3000  out:
3001         mutex_unlock(&tracepoint_printk_mutex);
3002
3003         return ret;
3004 }
3005
3006 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3007 {
3008         enum event_trigger_type tt = ETT_NONE;
3009         struct trace_event_file *file = fbuffer->trace_file;
3010
3011         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3012                         fbuffer->entry, &tt))
3013                 goto discard;
3014
3015         if (static_key_false(&tracepoint_printk_key.key))
3016                 output_printk(fbuffer);
3017
3018         if (static_branch_unlikely(&trace_event_exports_enabled))
3019                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3020
3021         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3022                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3023
3024 discard:
3025         if (tt)
3026                 event_triggers_post_call(file, tt);
3027
3028 }
3029 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3030
3031 /*
3032  * Skip 3:
3033  *
3034  *   trace_buffer_unlock_commit_regs()
3035  *   trace_event_buffer_commit()
3036  *   trace_event_raw_event_xxx()
3037  */
3038 # define STACK_SKIP 3
3039
3040 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3041                                      struct trace_buffer *buffer,
3042                                      struct ring_buffer_event *event,
3043                                      unsigned int trace_ctx,
3044                                      struct pt_regs *regs)
3045 {
3046         __buffer_unlock_commit(buffer, event);
3047
3048         /*
3049          * If regs is not set, then skip the necessary functions.
3050          * Note, we can still get here via blktrace, wakeup tracer
3051          * and mmiotrace, but that's ok if they lose a function or
3052          * two. They are not that meaningful.
3053          */
3054         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3055         ftrace_trace_userstack(tr, buffer, trace_ctx);
3056 }
3057
3058 /*
3059  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3060  */
3061 void
3062 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3063                                    struct ring_buffer_event *event)
3064 {
3065         __buffer_unlock_commit(buffer, event);
3066 }
3067
3068 void
3069 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3070                parent_ip, unsigned int trace_ctx)
3071 {
3072         struct trace_event_call *call = &event_function;
3073         struct trace_buffer *buffer = tr->array_buffer.buffer;
3074         struct ring_buffer_event *event;
3075         struct ftrace_entry *entry;
3076
3077         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3078                                             trace_ctx);
3079         if (!event)
3080                 return;
3081         entry   = ring_buffer_event_data(event);
3082         entry->ip                       = ip;
3083         entry->parent_ip                = parent_ip;
3084
3085         if (!call_filter_check_discard(call, entry, buffer, event)) {
3086                 if (static_branch_unlikely(&trace_function_exports_enabled))
3087                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3088                 __buffer_unlock_commit(buffer, event);
3089         }
3090 }
3091
3092 #ifdef CONFIG_STACKTRACE
3093
3094 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3095 #define FTRACE_KSTACK_NESTING   4
3096
3097 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3098
3099 struct ftrace_stack {
3100         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3101 };
3102
3103
3104 struct ftrace_stacks {
3105         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3106 };
3107
3108 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3109 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3110
3111 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3112                                  unsigned int trace_ctx,
3113                                  int skip, struct pt_regs *regs)
3114 {
3115         struct trace_event_call *call = &event_kernel_stack;
3116         struct ring_buffer_event *event;
3117         unsigned int size, nr_entries;
3118         struct ftrace_stack *fstack;
3119         struct stack_entry *entry;
3120         int stackidx;
3121
3122         /*
3123          * Add one, for this function and the call to save_stack_trace()
3124          * If regs is set, then these functions will not be in the way.
3125          */
3126 #ifndef CONFIG_UNWINDER_ORC
3127         if (!regs)
3128                 skip++;
3129 #endif
3130
3131         preempt_disable_notrace();
3132
3133         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3134
3135         /* This should never happen. If it does, yell once and skip */
3136         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3137                 goto out;
3138
3139         /*
3140          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3141          * interrupt will either see the value pre increment or post
3142          * increment. If the interrupt happens pre increment it will have
3143          * restored the counter when it returns.  We just need a barrier to
3144          * keep gcc from moving things around.
3145          */
3146         barrier();
3147
3148         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3149         size = ARRAY_SIZE(fstack->calls);
3150
3151         if (regs) {
3152                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3153                                                    size, skip);
3154         } else {
3155                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3156         }
3157
3158         size = nr_entries * sizeof(unsigned long);
3159         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3160                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3161                                     trace_ctx);
3162         if (!event)
3163                 goto out;
3164         entry = ring_buffer_event_data(event);
3165
3166         memcpy(&entry->caller, fstack->calls, size);
3167         entry->size = nr_entries;
3168
3169         if (!call_filter_check_discard(call, entry, buffer, event))
3170                 __buffer_unlock_commit(buffer, event);
3171
3172  out:
3173         /* Again, don't let gcc optimize things here */
3174         barrier();
3175         __this_cpu_dec(ftrace_stack_reserve);
3176         preempt_enable_notrace();
3177
3178 }
3179
3180 static inline void ftrace_trace_stack(struct trace_array *tr,
3181                                       struct trace_buffer *buffer,
3182                                       unsigned int trace_ctx,
3183                                       int skip, struct pt_regs *regs)
3184 {
3185         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3186                 return;
3187
3188         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3189 }
3190
3191 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3192                    int skip)
3193 {
3194         struct trace_buffer *buffer = tr->array_buffer.buffer;
3195
3196         if (rcu_is_watching()) {
3197                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3198                 return;
3199         }
3200
3201         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3202                 return;
3203
3204         /*
3205          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3206          * but if the above rcu_is_watching() failed, then the NMI
3207          * triggered someplace critical, and ct_irq_enter() should
3208          * not be called from NMI.
3209          */
3210         if (unlikely(in_nmi()))
3211                 return;
3212
3213         ct_irq_enter_irqson();
3214         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3215         ct_irq_exit_irqson();
3216 }
3217
3218 /**
3219  * trace_dump_stack - record a stack back trace in the trace buffer
3220  * @skip: Number of functions to skip (helper handlers)
3221  */
3222 void trace_dump_stack(int skip)
3223 {
3224         if (tracing_disabled || tracing_selftest_running)
3225                 return;
3226
3227 #ifndef CONFIG_UNWINDER_ORC
3228         /* Skip 1 to skip this function. */
3229         skip++;
3230 #endif
3231         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3232                              tracing_gen_ctx(), skip, NULL);
3233 }
3234 EXPORT_SYMBOL_GPL(trace_dump_stack);
3235
3236 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3237 static DEFINE_PER_CPU(int, user_stack_count);
3238
3239 static void
3240 ftrace_trace_userstack(struct trace_array *tr,
3241                        struct trace_buffer *buffer, unsigned int trace_ctx)
3242 {
3243         struct trace_event_call *call = &event_user_stack;
3244         struct ring_buffer_event *event;
3245         struct userstack_entry *entry;
3246
3247         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3248                 return;
3249
3250         /*
3251          * NMIs can not handle page faults, even with fix ups.
3252          * The save user stack can (and often does) fault.
3253          */
3254         if (unlikely(in_nmi()))
3255                 return;
3256
3257         /*
3258          * prevent recursion, since the user stack tracing may
3259          * trigger other kernel events.
3260          */
3261         preempt_disable();
3262         if (__this_cpu_read(user_stack_count))
3263                 goto out;
3264
3265         __this_cpu_inc(user_stack_count);
3266
3267         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3268                                             sizeof(*entry), trace_ctx);
3269         if (!event)
3270                 goto out_drop_count;
3271         entry   = ring_buffer_event_data(event);
3272
3273         entry->tgid             = current->tgid;
3274         memset(&entry->caller, 0, sizeof(entry->caller));
3275
3276         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3277         if (!call_filter_check_discard(call, entry, buffer, event))
3278                 __buffer_unlock_commit(buffer, event);
3279
3280  out_drop_count:
3281         __this_cpu_dec(user_stack_count);
3282  out:
3283         preempt_enable();
3284 }
3285 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3286 static void ftrace_trace_userstack(struct trace_array *tr,
3287                                    struct trace_buffer *buffer,
3288                                    unsigned int trace_ctx)
3289 {
3290 }
3291 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3292
3293 #endif /* CONFIG_STACKTRACE */
3294
3295 static inline void
3296 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3297                           unsigned long long delta)
3298 {
3299         entry->bottom_delta_ts = delta & U32_MAX;
3300         entry->top_delta_ts = (delta >> 32);
3301 }
3302
3303 void trace_last_func_repeats(struct trace_array *tr,
3304                              struct trace_func_repeats *last_info,
3305                              unsigned int trace_ctx)
3306 {
3307         struct trace_buffer *buffer = tr->array_buffer.buffer;
3308         struct func_repeats_entry *entry;
3309         struct ring_buffer_event *event;
3310         u64 delta;
3311
3312         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3313                                             sizeof(*entry), trace_ctx);
3314         if (!event)
3315                 return;
3316
3317         delta = ring_buffer_event_time_stamp(buffer, event) -
3318                 last_info->ts_last_call;
3319
3320         entry = ring_buffer_event_data(event);
3321         entry->ip = last_info->ip;
3322         entry->parent_ip = last_info->parent_ip;
3323         entry->count = last_info->count;
3324         func_repeats_set_delta_ts(entry, delta);
3325
3326         __buffer_unlock_commit(buffer, event);
3327 }
3328
3329 /* created for use with alloc_percpu */
3330 struct trace_buffer_struct {
3331         int nesting;
3332         char buffer[4][TRACE_BUF_SIZE];
3333 };
3334
3335 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3336
3337 /*
3338  * This allows for lockless recording.  If we're nested too deeply, then
3339  * this returns NULL.
3340  */
3341 static char *get_trace_buf(void)
3342 {
3343         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3344
3345         if (!trace_percpu_buffer || buffer->nesting >= 4)
3346                 return NULL;
3347
3348         buffer->nesting++;
3349
3350         /* Interrupts must see nesting incremented before we use the buffer */
3351         barrier();
3352         return &buffer->buffer[buffer->nesting - 1][0];
3353 }
3354
3355 static void put_trace_buf(void)
3356 {
3357         /* Don't let the decrement of nesting leak before this */
3358         barrier();
3359         this_cpu_dec(trace_percpu_buffer->nesting);
3360 }
3361
3362 static int alloc_percpu_trace_buffer(void)
3363 {
3364         struct trace_buffer_struct __percpu *buffers;
3365
3366         if (trace_percpu_buffer)
3367                 return 0;
3368
3369         buffers = alloc_percpu(struct trace_buffer_struct);
3370         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3371                 return -ENOMEM;
3372
3373         trace_percpu_buffer = buffers;
3374         return 0;
3375 }
3376
3377 static int buffers_allocated;
3378
3379 void trace_printk_init_buffers(void)
3380 {
3381         if (buffers_allocated)
3382                 return;
3383
3384         if (alloc_percpu_trace_buffer())
3385                 return;
3386
3387         /* trace_printk() is for debug use only. Don't use it in production. */
3388
3389         pr_warn("\n");
3390         pr_warn("**********************************************************\n");
3391         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3392         pr_warn("**                                                      **\n");
3393         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3394         pr_warn("**                                                      **\n");
3395         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3396         pr_warn("** unsafe for production use.                           **\n");
3397         pr_warn("**                                                      **\n");
3398         pr_warn("** If you see this message and you are not debugging    **\n");
3399         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3400         pr_warn("**                                                      **\n");
3401         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3402         pr_warn("**********************************************************\n");
3403
3404         /* Expand the buffers to set size */
3405         tracing_update_buffers();
3406
3407         buffers_allocated = 1;
3408
3409         /*
3410          * trace_printk_init_buffers() can be called by modules.
3411          * If that happens, then we need to start cmdline recording
3412          * directly here. If the global_trace.buffer is already
3413          * allocated here, then this was called by module code.
3414          */
3415         if (global_trace.array_buffer.buffer)
3416                 tracing_start_cmdline_record();
3417 }
3418 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3419
3420 void trace_printk_start_comm(void)
3421 {
3422         /* Start tracing comms if trace printk is set */
3423         if (!buffers_allocated)
3424                 return;
3425         tracing_start_cmdline_record();
3426 }
3427
3428 static void trace_printk_start_stop_comm(int enabled)
3429 {
3430         if (!buffers_allocated)
3431                 return;
3432
3433         if (enabled)
3434                 tracing_start_cmdline_record();
3435         else
3436                 tracing_stop_cmdline_record();
3437 }
3438
3439 /**
3440  * trace_vbprintk - write binary msg to tracing buffer
3441  * @ip:    The address of the caller
3442  * @fmt:   The string format to write to the buffer
3443  * @args:  Arguments for @fmt
3444  */
3445 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3446 {
3447         struct trace_event_call *call = &event_bprint;
3448         struct ring_buffer_event *event;
3449         struct trace_buffer *buffer;
3450         struct trace_array *tr = &global_trace;
3451         struct bprint_entry *entry;
3452         unsigned int trace_ctx;
3453         char *tbuffer;
3454         int len = 0, size;
3455
3456         if (unlikely(tracing_selftest_running || tracing_disabled))
3457                 return 0;
3458
3459         /* Don't pollute graph traces with trace_vprintk internals */
3460         pause_graph_tracing();
3461
3462         trace_ctx = tracing_gen_ctx();
3463         preempt_disable_notrace();
3464
3465         tbuffer = get_trace_buf();
3466         if (!tbuffer) {
3467                 len = 0;
3468                 goto out_nobuffer;
3469         }
3470
3471         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3472
3473         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3474                 goto out_put;
3475
3476         size = sizeof(*entry) + sizeof(u32) * len;
3477         buffer = tr->array_buffer.buffer;
3478         ring_buffer_nest_start(buffer);
3479         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3480                                             trace_ctx);
3481         if (!event)
3482                 goto out;
3483         entry = ring_buffer_event_data(event);
3484         entry->ip                       = ip;
3485         entry->fmt                      = fmt;
3486
3487         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3488         if (!call_filter_check_discard(call, entry, buffer, event)) {
3489                 __buffer_unlock_commit(buffer, event);
3490                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3491         }
3492
3493 out:
3494         ring_buffer_nest_end(buffer);
3495 out_put:
3496         put_trace_buf();
3497
3498 out_nobuffer:
3499         preempt_enable_notrace();
3500         unpause_graph_tracing();
3501
3502         return len;
3503 }
3504 EXPORT_SYMBOL_GPL(trace_vbprintk);
3505
3506 __printf(3, 0)
3507 static int
3508 __trace_array_vprintk(struct trace_buffer *buffer,
3509                       unsigned long ip, const char *fmt, va_list args)
3510 {
3511         struct trace_event_call *call = &event_print;
3512         struct ring_buffer_event *event;
3513         int len = 0, size;
3514         struct print_entry *entry;
3515         unsigned int trace_ctx;
3516         char *tbuffer;
3517
3518         if (tracing_disabled)
3519                 return 0;
3520
3521         /* Don't pollute graph traces with trace_vprintk internals */
3522         pause_graph_tracing();
3523
3524         trace_ctx = tracing_gen_ctx();
3525         preempt_disable_notrace();
3526
3527
3528         tbuffer = get_trace_buf();
3529         if (!tbuffer) {
3530                 len = 0;
3531                 goto out_nobuffer;
3532         }
3533
3534         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3535
3536         size = sizeof(*entry) + len + 1;
3537         ring_buffer_nest_start(buffer);
3538         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3539                                             trace_ctx);
3540         if (!event)
3541                 goto out;
3542         entry = ring_buffer_event_data(event);
3543         entry->ip = ip;
3544
3545         memcpy(&entry->buf, tbuffer, len + 1);
3546         if (!call_filter_check_discard(call, entry, buffer, event)) {
3547                 __buffer_unlock_commit(buffer, event);
3548                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3549         }
3550
3551 out:
3552         ring_buffer_nest_end(buffer);
3553         put_trace_buf();
3554
3555 out_nobuffer:
3556         preempt_enable_notrace();
3557         unpause_graph_tracing();
3558
3559         return len;
3560 }
3561
3562 __printf(3, 0)
3563 int trace_array_vprintk(struct trace_array *tr,
3564                         unsigned long ip, const char *fmt, va_list args)
3565 {
3566         if (tracing_selftest_running && tr == &global_trace)
3567                 return 0;
3568
3569         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3570 }
3571
3572 /**
3573  * trace_array_printk - Print a message to a specific instance
3574  * @tr: The instance trace_array descriptor
3575  * @ip: The instruction pointer that this is called from.
3576  * @fmt: The format to print (printf format)
3577  *
3578  * If a subsystem sets up its own instance, they have the right to
3579  * printk strings into their tracing instance buffer using this
3580  * function. Note, this function will not write into the top level
3581  * buffer (use trace_printk() for that), as writing into the top level
3582  * buffer should only have events that can be individually disabled.
3583  * trace_printk() is only used for debugging a kernel, and should not
3584  * be ever incorporated in normal use.
3585  *
3586  * trace_array_printk() can be used, as it will not add noise to the
3587  * top level tracing buffer.
3588  *
3589  * Note, trace_array_init_printk() must be called on @tr before this
3590  * can be used.
3591  */
3592 __printf(3, 0)
3593 int trace_array_printk(struct trace_array *tr,
3594                        unsigned long ip, const char *fmt, ...)
3595 {
3596         int ret;
3597         va_list ap;
3598
3599         if (!tr)
3600                 return -ENOENT;
3601
3602         /* This is only allowed for created instances */
3603         if (tr == &global_trace)
3604                 return 0;
3605
3606         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3607                 return 0;
3608
3609         va_start(ap, fmt);
3610         ret = trace_array_vprintk(tr, ip, fmt, ap);
3611         va_end(ap);
3612         return ret;
3613 }
3614 EXPORT_SYMBOL_GPL(trace_array_printk);
3615
3616 /**
3617  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3618  * @tr: The trace array to initialize the buffers for
3619  *
3620  * As trace_array_printk() only writes into instances, they are OK to
3621  * have in the kernel (unlike trace_printk()). This needs to be called
3622  * before trace_array_printk() can be used on a trace_array.
3623  */
3624 int trace_array_init_printk(struct trace_array *tr)
3625 {
3626         if (!tr)
3627                 return -ENOENT;
3628
3629         /* This is only allowed for created instances */
3630         if (tr == &global_trace)
3631                 return -EINVAL;
3632
3633         return alloc_percpu_trace_buffer();
3634 }
3635 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3636
3637 __printf(3, 4)
3638 int trace_array_printk_buf(struct trace_buffer *buffer,
3639                            unsigned long ip, const char *fmt, ...)
3640 {
3641         int ret;
3642         va_list ap;
3643
3644         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3645                 return 0;
3646
3647         va_start(ap, fmt);
3648         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3649         va_end(ap);
3650         return ret;
3651 }
3652
3653 __printf(2, 0)
3654 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3655 {
3656         return trace_array_vprintk(&global_trace, ip, fmt, args);
3657 }
3658 EXPORT_SYMBOL_GPL(trace_vprintk);
3659
3660 static void trace_iterator_increment(struct trace_iterator *iter)
3661 {
3662         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3663
3664         iter->idx++;
3665         if (buf_iter)
3666                 ring_buffer_iter_advance(buf_iter);
3667 }
3668
3669 static struct trace_entry *
3670 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3671                 unsigned long *lost_events)
3672 {
3673         struct ring_buffer_event *event;
3674         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3675
3676         if (buf_iter) {
3677                 event = ring_buffer_iter_peek(buf_iter, ts);
3678                 if (lost_events)
3679                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3680                                 (unsigned long)-1 : 0;
3681         } else {
3682                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3683                                          lost_events);
3684         }
3685
3686         if (event) {
3687                 iter->ent_size = ring_buffer_event_length(event);
3688                 return ring_buffer_event_data(event);
3689         }
3690         iter->ent_size = 0;
3691         return NULL;
3692 }
3693
3694 static struct trace_entry *
3695 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3696                   unsigned long *missing_events, u64 *ent_ts)
3697 {
3698         struct trace_buffer *buffer = iter->array_buffer->buffer;
3699         struct trace_entry *ent, *next = NULL;
3700         unsigned long lost_events = 0, next_lost = 0;
3701         int cpu_file = iter->cpu_file;
3702         u64 next_ts = 0, ts;
3703         int next_cpu = -1;
3704         int next_size = 0;
3705         int cpu;
3706
3707         /*
3708          * If we are in a per_cpu trace file, don't bother by iterating over
3709          * all cpu and peek directly.
3710          */
3711         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3712                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3713                         return NULL;
3714                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3715                 if (ent_cpu)
3716                         *ent_cpu = cpu_file;
3717
3718                 return ent;
3719         }
3720
3721         for_each_tracing_cpu(cpu) {
3722
3723                 if (ring_buffer_empty_cpu(buffer, cpu))
3724                         continue;
3725
3726                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3727
3728                 /*
3729                  * Pick the entry with the smallest timestamp:
3730                  */
3731                 if (ent && (!next || ts < next_ts)) {
3732                         next = ent;
3733                         next_cpu = cpu;
3734                         next_ts = ts;
3735                         next_lost = lost_events;
3736                         next_size = iter->ent_size;
3737                 }
3738         }
3739
3740         iter->ent_size = next_size;
3741
3742         if (ent_cpu)
3743                 *ent_cpu = next_cpu;
3744
3745         if (ent_ts)
3746                 *ent_ts = next_ts;
3747
3748         if (missing_events)
3749                 *missing_events = next_lost;
3750
3751         return next;
3752 }
3753
3754 #define STATIC_FMT_BUF_SIZE     128
3755 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3756
3757 char *trace_iter_expand_format(struct trace_iterator *iter)
3758 {
3759         char *tmp;
3760
3761         /*
3762          * iter->tr is NULL when used with tp_printk, which makes
3763          * this get called where it is not safe to call krealloc().
3764          */
3765         if (!iter->tr || iter->fmt == static_fmt_buf)
3766                 return NULL;
3767
3768         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3769                        GFP_KERNEL);
3770         if (tmp) {
3771                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3772                 iter->fmt = tmp;
3773         }
3774
3775         return tmp;
3776 }
3777
3778 /* Returns true if the string is safe to dereference from an event */
3779 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3780                            bool star, int len)
3781 {
3782         unsigned long addr = (unsigned long)str;
3783         struct trace_event *trace_event;
3784         struct trace_event_call *event;
3785
3786         /* Ignore strings with no length */
3787         if (star && !len)
3788                 return true;
3789
3790         /* OK if part of the event data */
3791         if ((addr >= (unsigned long)iter->ent) &&
3792             (addr < (unsigned long)iter->ent + iter->ent_size))
3793                 return true;
3794
3795         /* OK if part of the temp seq buffer */
3796         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3797             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3798                 return true;
3799
3800         /* Core rodata can not be freed */
3801         if (is_kernel_rodata(addr))
3802                 return true;
3803
3804         if (trace_is_tracepoint_string(str))
3805                 return true;
3806
3807         /*
3808          * Now this could be a module event, referencing core module
3809          * data, which is OK.
3810          */
3811         if (!iter->ent)
3812                 return false;
3813
3814         trace_event = ftrace_find_event(iter->ent->type);
3815         if (!trace_event)
3816                 return false;
3817
3818         event = container_of(trace_event, struct trace_event_call, event);
3819         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3820                 return false;
3821
3822         /* Would rather have rodata, but this will suffice */
3823         if (within_module_core(addr, event->module))
3824                 return true;
3825
3826         return false;
3827 }
3828
3829 static const char *show_buffer(struct trace_seq *s)
3830 {
3831         struct seq_buf *seq = &s->seq;
3832
3833         seq_buf_terminate(seq);
3834
3835         return seq->buffer;
3836 }
3837
3838 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3839
3840 static int test_can_verify_check(const char *fmt, ...)
3841 {
3842         char buf[16];
3843         va_list ap;
3844         int ret;
3845
3846         /*
3847          * The verifier is dependent on vsnprintf() modifies the va_list
3848          * passed to it, where it is sent as a reference. Some architectures
3849          * (like x86_32) passes it by value, which means that vsnprintf()
3850          * does not modify the va_list passed to it, and the verifier
3851          * would then need to be able to understand all the values that
3852          * vsnprintf can use. If it is passed by value, then the verifier
3853          * is disabled.
3854          */
3855         va_start(ap, fmt);
3856         vsnprintf(buf, 16, "%d", ap);
3857         ret = va_arg(ap, int);
3858         va_end(ap);
3859
3860         return ret;
3861 }
3862
3863 static void test_can_verify(void)
3864 {
3865         if (!test_can_verify_check("%d %d", 0, 1)) {
3866                 pr_info("trace event string verifier disabled\n");
3867                 static_branch_inc(&trace_no_verify);
3868         }
3869 }
3870
3871 /**
3872  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3873  * @iter: The iterator that holds the seq buffer and the event being printed
3874  * @fmt: The format used to print the event
3875  * @ap: The va_list holding the data to print from @fmt.
3876  *
3877  * This writes the data into the @iter->seq buffer using the data from
3878  * @fmt and @ap. If the format has a %s, then the source of the string
3879  * is examined to make sure it is safe to print, otherwise it will
3880  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3881  * pointer.
3882  */
3883 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3884                          va_list ap)
3885 {
3886         const char *p = fmt;
3887         const char *str;
3888         int i, j;
3889
3890         if (WARN_ON_ONCE(!fmt))
3891                 return;
3892
3893         if (static_branch_unlikely(&trace_no_verify))
3894                 goto print;
3895
3896         /* Don't bother checking when doing a ftrace_dump() */
3897         if (iter->fmt == static_fmt_buf)
3898                 goto print;
3899
3900         while (*p) {
3901                 bool star = false;
3902                 int len = 0;
3903
3904                 j = 0;
3905
3906                 /* We only care about %s and variants */
3907                 for (i = 0; p[i]; i++) {
3908                         if (i + 1 >= iter->fmt_size) {
3909                                 /*
3910                                  * If we can't expand the copy buffer,
3911                                  * just print it.
3912                                  */
3913                                 if (!trace_iter_expand_format(iter))
3914                                         goto print;
3915                         }
3916
3917                         if (p[i] == '\\' && p[i+1]) {
3918                                 i++;
3919                                 continue;
3920                         }
3921                         if (p[i] == '%') {
3922                                 /* Need to test cases like %08.*s */
3923                                 for (j = 1; p[i+j]; j++) {
3924                                         if (isdigit(p[i+j]) ||
3925                                             p[i+j] == '.')
3926                                                 continue;
3927                                         if (p[i+j] == '*') {
3928                                                 star = true;
3929                                                 continue;
3930                                         }
3931                                         break;
3932                                 }
3933                                 if (p[i+j] == 's')
3934                                         break;
3935                                 star = false;
3936                         }
3937                         j = 0;
3938                 }
3939                 /* If no %s found then just print normally */
3940                 if (!p[i])
3941                         break;
3942
3943                 /* Copy up to the %s, and print that */
3944                 strncpy(iter->fmt, p, i);
3945                 iter->fmt[i] = '\0';
3946                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3947
3948                 /*
3949                  * If iter->seq is full, the above call no longer guarantees
3950                  * that ap is in sync with fmt processing, and further calls
3951                  * to va_arg() can return wrong positional arguments.
3952                  *
3953                  * Ensure that ap is no longer used in this case.
3954                  */
3955                 if (iter->seq.full) {
3956                         p = "";
3957                         break;
3958                 }
3959
3960                 if (star)
3961                         len = va_arg(ap, int);
3962
3963                 /* The ap now points to the string data of the %s */
3964                 str = va_arg(ap, const char *);
3965
3966                 /*
3967                  * If you hit this warning, it is likely that the
3968                  * trace event in question used %s on a string that
3969                  * was saved at the time of the event, but may not be
3970                  * around when the trace is read. Use __string(),
3971                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3972                  * instead. See samples/trace_events/trace-events-sample.h
3973                  * for reference.
3974                  */
3975                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3976                               "fmt: '%s' current_buffer: '%s'",
3977                               fmt, show_buffer(&iter->seq))) {
3978                         int ret;
3979
3980                         /* Try to safely read the string */
3981                         if (star) {
3982                                 if (len + 1 > iter->fmt_size)
3983                                         len = iter->fmt_size - 1;
3984                                 if (len < 0)
3985                                         len = 0;
3986                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3987                                 iter->fmt[len] = 0;
3988                                 star = false;
3989                         } else {
3990                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3991                                                                   iter->fmt_size);
3992                         }
3993                         if (ret < 0)
3994                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3995                         else
3996                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3997                                                  str, iter->fmt);
3998                         str = "[UNSAFE-MEMORY]";
3999                         strcpy(iter->fmt, "%s");
4000                 } else {
4001                         strncpy(iter->fmt, p + i, j + 1);
4002                         iter->fmt[j+1] = '\0';
4003                 }
4004                 if (star)
4005                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
4006                 else
4007                         trace_seq_printf(&iter->seq, iter->fmt, str);
4008
4009                 p += i + j + 1;
4010         }
4011  print:
4012         if (*p)
4013                 trace_seq_vprintf(&iter->seq, p, ap);
4014 }
4015
4016 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4017 {
4018         const char *p, *new_fmt;
4019         char *q;
4020
4021         if (WARN_ON_ONCE(!fmt))
4022                 return fmt;
4023
4024         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4025                 return fmt;
4026
4027         p = fmt;
4028         new_fmt = q = iter->fmt;
4029         while (*p) {
4030                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4031                         if (!trace_iter_expand_format(iter))
4032                                 return fmt;
4033
4034                         q += iter->fmt - new_fmt;
4035                         new_fmt = iter->fmt;
4036                 }
4037
4038                 *q++ = *p++;
4039
4040                 /* Replace %p with %px */
4041                 if (p[-1] == '%') {
4042                         if (p[0] == '%') {
4043                                 *q++ = *p++;
4044                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4045                                 *q++ = *p++;
4046                                 *q++ = 'x';
4047                         }
4048                 }
4049         }
4050         *q = '\0';
4051
4052         return new_fmt;
4053 }
4054
4055 #define STATIC_TEMP_BUF_SIZE    128
4056 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4057
4058 /* Find the next real entry, without updating the iterator itself */
4059 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4060                                           int *ent_cpu, u64 *ent_ts)
4061 {
4062         /* __find_next_entry will reset ent_size */
4063         int ent_size = iter->ent_size;
4064         struct trace_entry *entry;
4065
4066         /*
4067          * If called from ftrace_dump(), then the iter->temp buffer
4068          * will be the static_temp_buf and not created from kmalloc.
4069          * If the entry size is greater than the buffer, we can
4070          * not save it. Just return NULL in that case. This is only
4071          * used to add markers when two consecutive events' time
4072          * stamps have a large delta. See trace_print_lat_context()
4073          */
4074         if (iter->temp == static_temp_buf &&
4075             STATIC_TEMP_BUF_SIZE < ent_size)
4076                 return NULL;
4077
4078         /*
4079          * The __find_next_entry() may call peek_next_entry(), which may
4080          * call ring_buffer_peek() that may make the contents of iter->ent
4081          * undefined. Need to copy iter->ent now.
4082          */
4083         if (iter->ent && iter->ent != iter->temp) {
4084                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4085                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4086                         void *temp;
4087                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4088                         if (!temp)
4089                                 return NULL;
4090                         kfree(iter->temp);
4091                         iter->temp = temp;
4092                         iter->temp_size = iter->ent_size;
4093                 }
4094                 memcpy(iter->temp, iter->ent, iter->ent_size);
4095                 iter->ent = iter->temp;
4096         }
4097         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4098         /* Put back the original ent_size */
4099         iter->ent_size = ent_size;
4100
4101         return entry;
4102 }
4103
4104 /* Find the next real entry, and increment the iterator to the next entry */
4105 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4106 {
4107         iter->ent = __find_next_entry(iter, &iter->cpu,
4108                                       &iter->lost_events, &iter->ts);
4109
4110         if (iter->ent)
4111                 trace_iterator_increment(iter);
4112
4113         return iter->ent ? iter : NULL;
4114 }
4115
4116 static void trace_consume(struct trace_iterator *iter)
4117 {
4118         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4119                             &iter->lost_events);
4120 }
4121
4122 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4123 {
4124         struct trace_iterator *iter = m->private;
4125         int i = (int)*pos;
4126         void *ent;
4127
4128         WARN_ON_ONCE(iter->leftover);
4129
4130         (*pos)++;
4131
4132         /* can't go backwards */
4133         if (iter->idx > i)
4134                 return NULL;
4135
4136         if (iter->idx < 0)
4137                 ent = trace_find_next_entry_inc(iter);
4138         else
4139                 ent = iter;
4140
4141         while (ent && iter->idx < i)
4142                 ent = trace_find_next_entry_inc(iter);
4143
4144         iter->pos = *pos;
4145
4146         return ent;
4147 }
4148
4149 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4150 {
4151         struct ring_buffer_iter *buf_iter;
4152         unsigned long entries = 0;
4153         u64 ts;
4154
4155         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4156
4157         buf_iter = trace_buffer_iter(iter, cpu);
4158         if (!buf_iter)
4159                 return;
4160
4161         ring_buffer_iter_reset(buf_iter);
4162
4163         /*
4164          * We could have the case with the max latency tracers
4165          * that a reset never took place on a cpu. This is evident
4166          * by the timestamp being before the start of the buffer.
4167          */
4168         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4169                 if (ts >= iter->array_buffer->time_start)
4170                         break;
4171                 entries++;
4172                 ring_buffer_iter_advance(buf_iter);
4173         }
4174
4175         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4176 }
4177
4178 /*
4179  * The current tracer is copied to avoid a global locking
4180  * all around.
4181  */
4182 static void *s_start(struct seq_file *m, loff_t *pos)
4183 {
4184         struct trace_iterator *iter = m->private;
4185         struct trace_array *tr = iter->tr;
4186         int cpu_file = iter->cpu_file;
4187         void *p = NULL;
4188         loff_t l = 0;
4189         int cpu;
4190
4191         /*
4192          * copy the tracer to avoid using a global lock all around.
4193          * iter->trace is a copy of current_trace, the pointer to the
4194          * name may be used instead of a strcmp(), as iter->trace->name
4195          * will point to the same string as current_trace->name.
4196          */
4197         mutex_lock(&trace_types_lock);
4198         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4199                 *iter->trace = *tr->current_trace;
4200         mutex_unlock(&trace_types_lock);
4201
4202 #ifdef CONFIG_TRACER_MAX_TRACE
4203         if (iter->snapshot && iter->trace->use_max_tr)
4204                 return ERR_PTR(-EBUSY);
4205 #endif
4206
4207         if (*pos != iter->pos) {
4208                 iter->ent = NULL;
4209                 iter->cpu = 0;
4210                 iter->idx = -1;
4211
4212                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4213                         for_each_tracing_cpu(cpu)
4214                                 tracing_iter_reset(iter, cpu);
4215                 } else
4216                         tracing_iter_reset(iter, cpu_file);
4217
4218                 iter->leftover = 0;
4219                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4220                         ;
4221
4222         } else {
4223                 /*
4224                  * If we overflowed the seq_file before, then we want
4225                  * to just reuse the trace_seq buffer again.
4226                  */
4227                 if (iter->leftover)
4228                         p = iter;
4229                 else {
4230                         l = *pos - 1;
4231                         p = s_next(m, p, &l);
4232                 }
4233         }
4234
4235         trace_event_read_lock();
4236         trace_access_lock(cpu_file);
4237         return p;
4238 }
4239
4240 static void s_stop(struct seq_file *m, void *p)
4241 {
4242         struct trace_iterator *iter = m->private;
4243
4244 #ifdef CONFIG_TRACER_MAX_TRACE
4245         if (iter->snapshot && iter->trace->use_max_tr)
4246                 return;
4247 #endif
4248
4249         trace_access_unlock(iter->cpu_file);
4250         trace_event_read_unlock();
4251 }
4252
4253 static void
4254 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4255                       unsigned long *entries, int cpu)
4256 {
4257         unsigned long count;
4258
4259         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4260         /*
4261          * If this buffer has skipped entries, then we hold all
4262          * entries for the trace and we need to ignore the
4263          * ones before the time stamp.
4264          */
4265         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4266                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4267                 /* total is the same as the entries */
4268                 *total = count;
4269         } else
4270                 *total = count +
4271                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4272         *entries = count;
4273 }
4274
4275 static void
4276 get_total_entries(struct array_buffer *buf,
4277                   unsigned long *total, unsigned long *entries)
4278 {
4279         unsigned long t, e;
4280         int cpu;
4281
4282         *total = 0;
4283         *entries = 0;
4284
4285         for_each_tracing_cpu(cpu) {
4286                 get_total_entries_cpu(buf, &t, &e, cpu);
4287                 *total += t;
4288                 *entries += e;
4289         }
4290 }
4291
4292 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4293 {
4294         unsigned long total, entries;
4295
4296         if (!tr)
4297                 tr = &global_trace;
4298
4299         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4300
4301         return entries;
4302 }
4303
4304 unsigned long trace_total_entries(struct trace_array *tr)
4305 {
4306         unsigned long total, entries;
4307
4308         if (!tr)
4309                 tr = &global_trace;
4310
4311         get_total_entries(&tr->array_buffer, &total, &entries);
4312
4313         return entries;
4314 }
4315
4316 static void print_lat_help_header(struct seq_file *m)
4317 {
4318         seq_puts(m, "#                    _------=> CPU#            \n"
4319                     "#                   / _-----=> irqs-off/BH-disabled\n"
4320                     "#                  | / _----=> need-resched    \n"
4321                     "#                  || / _---=> hardirq/softirq \n"
4322                     "#                  ||| / _--=> preempt-depth   \n"
4323                     "#                  |||| / _-=> migrate-disable \n"
4324                     "#                  ||||| /     delay           \n"
4325                     "#  cmd     pid     |||||| time  |   caller     \n"
4326                     "#     \\   /        ||||||  \\    |    /       \n");
4327 }
4328
4329 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4330 {
4331         unsigned long total;
4332         unsigned long entries;
4333
4334         get_total_entries(buf, &total, &entries);
4335         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4336                    entries, total, num_online_cpus());
4337         seq_puts(m, "#\n");
4338 }
4339
4340 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4341                                    unsigned int flags)
4342 {
4343         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4344
4345         print_event_info(buf, m);
4346
4347         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4348         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4349 }
4350
4351 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4352                                        unsigned int flags)
4353 {
4354         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4355         static const char space[] = "            ";
4356         int prec = tgid ? 12 : 2;
4357
4358         print_event_info(buf, m);
4359
4360         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4361         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4362         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4363         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4364         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4365         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4366         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4367         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4368 }
4369
4370 void
4371 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4372 {
4373         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4374         struct array_buffer *buf = iter->array_buffer;
4375         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4376         struct tracer *type = iter->trace;
4377         unsigned long entries;
4378         unsigned long total;
4379         const char *name = type->name;
4380
4381         get_total_entries(buf, &total, &entries);
4382
4383         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4384                    name, UTS_RELEASE);
4385         seq_puts(m, "# -----------------------------------"
4386                  "---------------------------------\n");
4387         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4388                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4389                    nsecs_to_usecs(data->saved_latency),
4390                    entries,
4391                    total,
4392                    buf->cpu,
4393                    preempt_model_none()      ? "server" :
4394                    preempt_model_voluntary() ? "desktop" :
4395                    preempt_model_full()      ? "preempt" :
4396                    preempt_model_rt()        ? "preempt_rt" :
4397                    "unknown",
4398                    /* These are reserved for later use */
4399                    0, 0, 0, 0);
4400 #ifdef CONFIG_SMP
4401         seq_printf(m, " #P:%d)\n", num_online_cpus());
4402 #else
4403         seq_puts(m, ")\n");
4404 #endif
4405         seq_puts(m, "#    -----------------\n");
4406         seq_printf(m, "#    | task: %.16s-%d "
4407                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4408                    data->comm, data->pid,
4409                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4410                    data->policy, data->rt_priority);
4411         seq_puts(m, "#    -----------------\n");
4412
4413         if (data->critical_start) {
4414                 seq_puts(m, "#  => started at: ");
4415                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4416                 trace_print_seq(m, &iter->seq);
4417                 seq_puts(m, "\n#  => ended at:   ");
4418                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4419                 trace_print_seq(m, &iter->seq);
4420                 seq_puts(m, "\n#\n");
4421         }
4422
4423         seq_puts(m, "#\n");
4424 }
4425
4426 static void test_cpu_buff_start(struct trace_iterator *iter)
4427 {
4428         struct trace_seq *s = &iter->seq;
4429         struct trace_array *tr = iter->tr;
4430
4431         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4432                 return;
4433
4434         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4435                 return;
4436
4437         if (cpumask_available(iter->started) &&
4438             cpumask_test_cpu(iter->cpu, iter->started))
4439                 return;
4440
4441         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4442                 return;
4443
4444         if (cpumask_available(iter->started))
4445                 cpumask_set_cpu(iter->cpu, iter->started);
4446
4447         /* Don't print started cpu buffer for the first entry of the trace */
4448         if (iter->idx > 1)
4449                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4450                                 iter->cpu);
4451 }
4452
4453 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4454 {
4455         struct trace_array *tr = iter->tr;
4456         struct trace_seq *s = &iter->seq;
4457         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4458         struct trace_entry *entry;
4459         struct trace_event *event;
4460
4461         entry = iter->ent;
4462
4463         test_cpu_buff_start(iter);
4464
4465         event = ftrace_find_event(entry->type);
4466
4467         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4468                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4469                         trace_print_lat_context(iter);
4470                 else
4471                         trace_print_context(iter);
4472         }
4473
4474         if (trace_seq_has_overflowed(s))
4475                 return TRACE_TYPE_PARTIAL_LINE;
4476
4477         if (event) {
4478                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4479                         return print_event_fields(iter, event);
4480                 return event->funcs->trace(iter, sym_flags, event);
4481         }
4482
4483         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4484
4485         return trace_handle_return(s);
4486 }
4487
4488 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4489 {
4490         struct trace_array *tr = iter->tr;
4491         struct trace_seq *s = &iter->seq;
4492         struct trace_entry *entry;
4493         struct trace_event *event;
4494
4495         entry = iter->ent;
4496
4497         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4498                 trace_seq_printf(s, "%d %d %llu ",
4499                                  entry->pid, iter->cpu, iter->ts);
4500
4501         if (trace_seq_has_overflowed(s))
4502                 return TRACE_TYPE_PARTIAL_LINE;
4503
4504         event = ftrace_find_event(entry->type);
4505         if (event)
4506                 return event->funcs->raw(iter, 0, event);
4507
4508         trace_seq_printf(s, "%d ?\n", entry->type);
4509
4510         return trace_handle_return(s);
4511 }
4512
4513 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4514 {
4515         struct trace_array *tr = iter->tr;
4516         struct trace_seq *s = &iter->seq;
4517         unsigned char newline = '\n';
4518         struct trace_entry *entry;
4519         struct trace_event *event;
4520
4521         entry = iter->ent;
4522
4523         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4524                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4525                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4526                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4527                 if (trace_seq_has_overflowed(s))
4528                         return TRACE_TYPE_PARTIAL_LINE;
4529         }
4530
4531         event = ftrace_find_event(entry->type);
4532         if (event) {
4533                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4534                 if (ret != TRACE_TYPE_HANDLED)
4535                         return ret;
4536         }
4537
4538         SEQ_PUT_FIELD(s, newline);
4539
4540         return trace_handle_return(s);
4541 }
4542
4543 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4544 {
4545         struct trace_array *tr = iter->tr;
4546         struct trace_seq *s = &iter->seq;
4547         struct trace_entry *entry;
4548         struct trace_event *event;
4549
4550         entry = iter->ent;
4551
4552         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4553                 SEQ_PUT_FIELD(s, entry->pid);
4554                 SEQ_PUT_FIELD(s, iter->cpu);
4555                 SEQ_PUT_FIELD(s, iter->ts);
4556                 if (trace_seq_has_overflowed(s))
4557                         return TRACE_TYPE_PARTIAL_LINE;
4558         }
4559
4560         event = ftrace_find_event(entry->type);
4561         return event ? event->funcs->binary(iter, 0, event) :
4562                 TRACE_TYPE_HANDLED;
4563 }
4564
4565 int trace_empty(struct trace_iterator *iter)
4566 {
4567         struct ring_buffer_iter *buf_iter;
4568         int cpu;
4569
4570         /* If we are looking at one CPU buffer, only check that one */
4571         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4572                 cpu = iter->cpu_file;
4573                 buf_iter = trace_buffer_iter(iter, cpu);
4574                 if (buf_iter) {
4575                         if (!ring_buffer_iter_empty(buf_iter))
4576                                 return 0;
4577                 } else {
4578                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4579                                 return 0;
4580                 }
4581                 return 1;
4582         }
4583
4584         for_each_tracing_cpu(cpu) {
4585                 buf_iter = trace_buffer_iter(iter, cpu);
4586                 if (buf_iter) {
4587                         if (!ring_buffer_iter_empty(buf_iter))
4588                                 return 0;
4589                 } else {
4590                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4591                                 return 0;
4592                 }
4593         }
4594
4595         return 1;
4596 }
4597
4598 /*  Called with trace_event_read_lock() held. */
4599 enum print_line_t print_trace_line(struct trace_iterator *iter)
4600 {
4601         struct trace_array *tr = iter->tr;
4602         unsigned long trace_flags = tr->trace_flags;
4603         enum print_line_t ret;
4604
4605         if (iter->lost_events) {
4606                 if (iter->lost_events == (unsigned long)-1)
4607                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4608                                          iter->cpu);
4609                 else
4610                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4611                                          iter->cpu, iter->lost_events);
4612                 if (trace_seq_has_overflowed(&iter->seq))
4613                         return TRACE_TYPE_PARTIAL_LINE;
4614         }
4615
4616         if (iter->trace && iter->trace->print_line) {
4617                 ret = iter->trace->print_line(iter);
4618                 if (ret != TRACE_TYPE_UNHANDLED)
4619                         return ret;
4620         }
4621
4622         if (iter->ent->type == TRACE_BPUTS &&
4623                         trace_flags & TRACE_ITER_PRINTK &&
4624                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4625                 return trace_print_bputs_msg_only(iter);
4626
4627         if (iter->ent->type == TRACE_BPRINT &&
4628                         trace_flags & TRACE_ITER_PRINTK &&
4629                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4630                 return trace_print_bprintk_msg_only(iter);
4631
4632         if (iter->ent->type == TRACE_PRINT &&
4633                         trace_flags & TRACE_ITER_PRINTK &&
4634                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4635                 return trace_print_printk_msg_only(iter);
4636
4637         if (trace_flags & TRACE_ITER_BIN)
4638                 return print_bin_fmt(iter);
4639
4640         if (trace_flags & TRACE_ITER_HEX)
4641                 return print_hex_fmt(iter);
4642
4643         if (trace_flags & TRACE_ITER_RAW)
4644                 return print_raw_fmt(iter);
4645
4646         return print_trace_fmt(iter);
4647 }
4648
4649 void trace_latency_header(struct seq_file *m)
4650 {
4651         struct trace_iterator *iter = m->private;
4652         struct trace_array *tr = iter->tr;
4653
4654         /* print nothing if the buffers are empty */
4655         if (trace_empty(iter))
4656                 return;
4657
4658         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4659                 print_trace_header(m, iter);
4660
4661         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4662                 print_lat_help_header(m);
4663 }
4664
4665 void trace_default_header(struct seq_file *m)
4666 {
4667         struct trace_iterator *iter = m->private;
4668         struct trace_array *tr = iter->tr;
4669         unsigned long trace_flags = tr->trace_flags;
4670
4671         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4672                 return;
4673
4674         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4675                 /* print nothing if the buffers are empty */
4676                 if (trace_empty(iter))
4677                         return;
4678                 print_trace_header(m, iter);
4679                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4680                         print_lat_help_header(m);
4681         } else {
4682                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4683                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4684                                 print_func_help_header_irq(iter->array_buffer,
4685                                                            m, trace_flags);
4686                         else
4687                                 print_func_help_header(iter->array_buffer, m,
4688                                                        trace_flags);
4689                 }
4690         }
4691 }
4692
4693 static void test_ftrace_alive(struct seq_file *m)
4694 {
4695         if (!ftrace_is_dead())
4696                 return;
4697         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4698                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4699 }
4700
4701 #ifdef CONFIG_TRACER_MAX_TRACE
4702 static void show_snapshot_main_help(struct seq_file *m)
4703 {
4704         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4705                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4706                     "#                      Takes a snapshot of the main buffer.\n"
4707                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4708                     "#                      (Doesn't have to be '2' works with any number that\n"
4709                     "#                       is not a '0' or '1')\n");
4710 }
4711
4712 static void show_snapshot_percpu_help(struct seq_file *m)
4713 {
4714         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4715 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4716         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4717                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4718 #else
4719         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4720                     "#                     Must use main snapshot file to allocate.\n");
4721 #endif
4722         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4723                     "#                      (Doesn't have to be '2' works with any number that\n"
4724                     "#                       is not a '0' or '1')\n");
4725 }
4726
4727 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4728 {
4729         if (iter->tr->allocated_snapshot)
4730                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4731         else
4732                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4733
4734         seq_puts(m, "# Snapshot commands:\n");
4735         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4736                 show_snapshot_main_help(m);
4737         else
4738                 show_snapshot_percpu_help(m);
4739 }
4740 #else
4741 /* Should never be called */
4742 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4743 #endif
4744
4745 static int s_show(struct seq_file *m, void *v)
4746 {
4747         struct trace_iterator *iter = v;
4748         int ret;
4749
4750         if (iter->ent == NULL) {
4751                 if (iter->tr) {
4752                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4753                         seq_puts(m, "#\n");
4754                         test_ftrace_alive(m);
4755                 }
4756                 if (iter->snapshot && trace_empty(iter))
4757                         print_snapshot_help(m, iter);
4758                 else if (iter->trace && iter->trace->print_header)
4759                         iter->trace->print_header(m);
4760                 else
4761                         trace_default_header(m);
4762
4763         } else if (iter->leftover) {
4764                 /*
4765                  * If we filled the seq_file buffer earlier, we
4766                  * want to just show it now.
4767                  */
4768                 ret = trace_print_seq(m, &iter->seq);
4769
4770                 /* ret should this time be zero, but you never know */
4771                 iter->leftover = ret;
4772
4773         } else {
4774                 print_trace_line(iter);
4775                 ret = trace_print_seq(m, &iter->seq);
4776                 /*
4777                  * If we overflow the seq_file buffer, then it will
4778                  * ask us for this data again at start up.
4779                  * Use that instead.
4780                  *  ret is 0 if seq_file write succeeded.
4781                  *        -1 otherwise.
4782                  */
4783                 iter->leftover = ret;
4784         }
4785
4786         return 0;
4787 }
4788
4789 /*
4790  * Should be used after trace_array_get(), trace_types_lock
4791  * ensures that i_cdev was already initialized.
4792  */
4793 static inline int tracing_get_cpu(struct inode *inode)
4794 {
4795         if (inode->i_cdev) /* See trace_create_cpu_file() */
4796                 return (long)inode->i_cdev - 1;
4797         return RING_BUFFER_ALL_CPUS;
4798 }
4799
4800 static const struct seq_operations tracer_seq_ops = {
4801         .start          = s_start,
4802         .next           = s_next,
4803         .stop           = s_stop,
4804         .show           = s_show,
4805 };
4806
4807 static struct trace_iterator *
4808 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4809 {
4810         struct trace_array *tr = inode->i_private;
4811         struct trace_iterator *iter;
4812         int cpu;
4813
4814         if (tracing_disabled)
4815                 return ERR_PTR(-ENODEV);
4816
4817         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4818         if (!iter)
4819                 return ERR_PTR(-ENOMEM);
4820
4821         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4822                                     GFP_KERNEL);
4823         if (!iter->buffer_iter)
4824                 goto release;
4825
4826         /*
4827          * trace_find_next_entry() may need to save off iter->ent.
4828          * It will place it into the iter->temp buffer. As most
4829          * events are less than 128, allocate a buffer of that size.
4830          * If one is greater, then trace_find_next_entry() will
4831          * allocate a new buffer to adjust for the bigger iter->ent.
4832          * It's not critical if it fails to get allocated here.
4833          */
4834         iter->temp = kmalloc(128, GFP_KERNEL);
4835         if (iter->temp)
4836                 iter->temp_size = 128;
4837
4838         /*
4839          * trace_event_printf() may need to modify given format
4840          * string to replace %p with %px so that it shows real address
4841          * instead of hash value. However, that is only for the event
4842          * tracing, other tracer may not need. Defer the allocation
4843          * until it is needed.
4844          */
4845         iter->fmt = NULL;
4846         iter->fmt_size = 0;
4847
4848         /*
4849          * We make a copy of the current tracer to avoid concurrent
4850          * changes on it while we are reading.
4851          */
4852         mutex_lock(&trace_types_lock);
4853         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4854         if (!iter->trace)
4855                 goto fail;
4856
4857         *iter->trace = *tr->current_trace;
4858
4859         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4860                 goto fail;
4861
4862         iter->tr = tr;
4863
4864 #ifdef CONFIG_TRACER_MAX_TRACE
4865         /* Currently only the top directory has a snapshot */
4866         if (tr->current_trace->print_max || snapshot)
4867                 iter->array_buffer = &tr->max_buffer;
4868         else
4869 #endif
4870                 iter->array_buffer = &tr->array_buffer;
4871         iter->snapshot = snapshot;
4872         iter->pos = -1;
4873         iter->cpu_file = tracing_get_cpu(inode);
4874         mutex_init(&iter->mutex);
4875
4876         /* Notify the tracer early; before we stop tracing. */
4877         if (iter->trace->open)
4878                 iter->trace->open(iter);
4879
4880         /* Annotate start of buffers if we had overruns */
4881         if (ring_buffer_overruns(iter->array_buffer->buffer))
4882                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4883
4884         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4885         if (trace_clocks[tr->clock_id].in_ns)
4886                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4887
4888         /*
4889          * If pause-on-trace is enabled, then stop the trace while
4890          * dumping, unless this is the "snapshot" file
4891          */
4892         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4893                 tracing_stop_tr(tr);
4894
4895         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4896                 for_each_tracing_cpu(cpu) {
4897                         iter->buffer_iter[cpu] =
4898                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4899                                                          cpu, GFP_KERNEL);
4900                 }
4901                 ring_buffer_read_prepare_sync();
4902                 for_each_tracing_cpu(cpu) {
4903                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4904                         tracing_iter_reset(iter, cpu);
4905                 }
4906         } else {
4907                 cpu = iter->cpu_file;
4908                 iter->buffer_iter[cpu] =
4909                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4910                                                  cpu, GFP_KERNEL);
4911                 ring_buffer_read_prepare_sync();
4912                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4913                 tracing_iter_reset(iter, cpu);
4914         }
4915
4916         mutex_unlock(&trace_types_lock);
4917
4918         return iter;
4919
4920  fail:
4921         mutex_unlock(&trace_types_lock);
4922         kfree(iter->trace);
4923         kfree(iter->temp);
4924         kfree(iter->buffer_iter);
4925 release:
4926         seq_release_private(inode, file);
4927         return ERR_PTR(-ENOMEM);
4928 }
4929
4930 int tracing_open_generic(struct inode *inode, struct file *filp)
4931 {
4932         int ret;
4933
4934         ret = tracing_check_open_get_tr(NULL);
4935         if (ret)
4936                 return ret;
4937
4938         filp->private_data = inode->i_private;
4939         return 0;
4940 }
4941
4942 bool tracing_is_disabled(void)
4943 {
4944         return (tracing_disabled) ? true: false;
4945 }
4946
4947 /*
4948  * Open and update trace_array ref count.
4949  * Must have the current trace_array passed to it.
4950  */
4951 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4952 {
4953         struct trace_array *tr = inode->i_private;
4954         int ret;
4955
4956         ret = tracing_check_open_get_tr(tr);
4957         if (ret)
4958                 return ret;
4959
4960         filp->private_data = inode->i_private;
4961
4962         return 0;
4963 }
4964
4965 static int tracing_mark_open(struct inode *inode, struct file *filp)
4966 {
4967         stream_open(inode, filp);
4968         return tracing_open_generic_tr(inode, filp);
4969 }
4970
4971 static int tracing_release(struct inode *inode, struct file *file)
4972 {
4973         struct trace_array *tr = inode->i_private;
4974         struct seq_file *m = file->private_data;
4975         struct trace_iterator *iter;
4976         int cpu;
4977
4978         if (!(file->f_mode & FMODE_READ)) {
4979                 trace_array_put(tr);
4980                 return 0;
4981         }
4982
4983         /* Writes do not use seq_file */
4984         iter = m->private;
4985         mutex_lock(&trace_types_lock);
4986
4987         for_each_tracing_cpu(cpu) {
4988                 if (iter->buffer_iter[cpu])
4989                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4990         }
4991
4992         if (iter->trace && iter->trace->close)
4993                 iter->trace->close(iter);
4994
4995         if (!iter->snapshot && tr->stop_count)
4996                 /* reenable tracing if it was previously enabled */
4997                 tracing_start_tr(tr);
4998
4999         __trace_array_put(tr);
5000
5001         mutex_unlock(&trace_types_lock);
5002
5003         mutex_destroy(&iter->mutex);
5004         free_cpumask_var(iter->started);
5005         kfree(iter->fmt);
5006         kfree(iter->temp);
5007         kfree(iter->trace);
5008         kfree(iter->buffer_iter);
5009         seq_release_private(inode, file);
5010
5011         return 0;
5012 }
5013
5014 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5015 {
5016         struct trace_array *tr = inode->i_private;
5017
5018         trace_array_put(tr);
5019         return 0;
5020 }
5021
5022 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5023 {
5024         struct trace_array *tr = inode->i_private;
5025
5026         trace_array_put(tr);
5027
5028         return single_release(inode, file);
5029 }
5030
5031 static int tracing_open(struct inode *inode, struct file *file)
5032 {
5033         struct trace_array *tr = inode->i_private;
5034         struct trace_iterator *iter;
5035         int ret;
5036
5037         ret = tracing_check_open_get_tr(tr);
5038         if (ret)
5039                 return ret;
5040
5041         /* If this file was open for write, then erase contents */
5042         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5043                 int cpu = tracing_get_cpu(inode);
5044                 struct array_buffer *trace_buf = &tr->array_buffer;
5045
5046 #ifdef CONFIG_TRACER_MAX_TRACE
5047                 if (tr->current_trace->print_max)
5048                         trace_buf = &tr->max_buffer;
5049 #endif
5050
5051                 if (cpu == RING_BUFFER_ALL_CPUS)
5052                         tracing_reset_online_cpus(trace_buf);
5053                 else
5054                         tracing_reset_cpu(trace_buf, cpu);
5055         }
5056
5057         if (file->f_mode & FMODE_READ) {
5058                 iter = __tracing_open(inode, file, false);
5059                 if (IS_ERR(iter))
5060                         ret = PTR_ERR(iter);
5061                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5062                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5063         }
5064
5065         if (ret < 0)
5066                 trace_array_put(tr);
5067
5068         return ret;
5069 }
5070
5071 /*
5072  * Some tracers are not suitable for instance buffers.
5073  * A tracer is always available for the global array (toplevel)
5074  * or if it explicitly states that it is.
5075  */
5076 static bool
5077 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5078 {
5079         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5080 }
5081
5082 /* Find the next tracer that this trace array may use */
5083 static struct tracer *
5084 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5085 {
5086         while (t && !trace_ok_for_array(t, tr))
5087                 t = t->next;
5088
5089         return t;
5090 }
5091
5092 static void *
5093 t_next(struct seq_file *m, void *v, loff_t *pos)
5094 {
5095         struct trace_array *tr = m->private;
5096         struct tracer *t = v;
5097
5098         (*pos)++;
5099
5100         if (t)
5101                 t = get_tracer_for_array(tr, t->next);
5102
5103         return t;
5104 }
5105
5106 static void *t_start(struct seq_file *m, loff_t *pos)
5107 {
5108         struct trace_array *tr = m->private;
5109         struct tracer *t;
5110         loff_t l = 0;
5111
5112         mutex_lock(&trace_types_lock);
5113
5114         t = get_tracer_for_array(tr, trace_types);
5115         for (; t && l < *pos; t = t_next(m, t, &l))
5116                         ;
5117
5118         return t;
5119 }
5120
5121 static void t_stop(struct seq_file *m, void *p)
5122 {
5123         mutex_unlock(&trace_types_lock);
5124 }
5125
5126 static int t_show(struct seq_file *m, void *v)
5127 {
5128         struct tracer *t = v;
5129
5130         if (!t)
5131                 return 0;
5132
5133         seq_puts(m, t->name);
5134         if (t->next)
5135                 seq_putc(m, ' ');
5136         else
5137                 seq_putc(m, '\n');
5138
5139         return 0;
5140 }
5141
5142 static const struct seq_operations show_traces_seq_ops = {
5143         .start          = t_start,
5144         .next           = t_next,
5145         .stop           = t_stop,
5146         .show           = t_show,
5147 };
5148
5149 static int show_traces_open(struct inode *inode, struct file *file)
5150 {
5151         struct trace_array *tr = inode->i_private;
5152         struct seq_file *m;
5153         int ret;
5154
5155         ret = tracing_check_open_get_tr(tr);
5156         if (ret)
5157                 return ret;
5158
5159         ret = seq_open(file, &show_traces_seq_ops);
5160         if (ret) {
5161                 trace_array_put(tr);
5162                 return ret;
5163         }
5164
5165         m = file->private_data;
5166         m->private = tr;
5167
5168         return 0;
5169 }
5170
5171 static int show_traces_release(struct inode *inode, struct file *file)
5172 {
5173         struct trace_array *tr = inode->i_private;
5174
5175         trace_array_put(tr);
5176         return seq_release(inode, file);
5177 }
5178
5179 static ssize_t
5180 tracing_write_stub(struct file *filp, const char __user *ubuf,
5181                    size_t count, loff_t *ppos)
5182 {
5183         return count;
5184 }
5185
5186 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5187 {
5188         int ret;
5189
5190         if (file->f_mode & FMODE_READ)
5191                 ret = seq_lseek(file, offset, whence);
5192         else
5193                 file->f_pos = ret = 0;
5194
5195         return ret;
5196 }
5197
5198 static const struct file_operations tracing_fops = {
5199         .open           = tracing_open,
5200         .read           = seq_read,
5201         .read_iter      = seq_read_iter,
5202         .splice_read    = generic_file_splice_read,
5203         .write          = tracing_write_stub,
5204         .llseek         = tracing_lseek,
5205         .release        = tracing_release,
5206 };
5207
5208 static const struct file_operations show_traces_fops = {
5209         .open           = show_traces_open,
5210         .read           = seq_read,
5211         .llseek         = seq_lseek,
5212         .release        = show_traces_release,
5213 };
5214
5215 static ssize_t
5216 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5217                      size_t count, loff_t *ppos)
5218 {
5219         struct trace_array *tr = file_inode(filp)->i_private;
5220         char *mask_str;
5221         int len;
5222
5223         len = snprintf(NULL, 0, "%*pb\n",
5224                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5225         mask_str = kmalloc(len, GFP_KERNEL);
5226         if (!mask_str)
5227                 return -ENOMEM;
5228
5229         len = snprintf(mask_str, len, "%*pb\n",
5230                        cpumask_pr_args(tr->tracing_cpumask));
5231         if (len >= count) {
5232                 count = -EINVAL;
5233                 goto out_err;
5234         }
5235         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5236
5237 out_err:
5238         kfree(mask_str);
5239
5240         return count;
5241 }
5242
5243 int tracing_set_cpumask(struct trace_array *tr,
5244                         cpumask_var_t tracing_cpumask_new)
5245 {
5246         int cpu;
5247
5248         if (!tr)
5249                 return -EINVAL;
5250
5251         local_irq_disable();
5252         arch_spin_lock(&tr->max_lock);
5253         for_each_tracing_cpu(cpu) {
5254                 /*
5255                  * Increase/decrease the disabled counter if we are
5256                  * about to flip a bit in the cpumask:
5257                  */
5258                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5259                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5260                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5261                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5262                 }
5263                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5264                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5265                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5266                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5267                 }
5268         }
5269         arch_spin_unlock(&tr->max_lock);
5270         local_irq_enable();
5271
5272         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5273
5274         return 0;
5275 }
5276
5277 static ssize_t
5278 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5279                       size_t count, loff_t *ppos)
5280 {
5281         struct trace_array *tr = file_inode(filp)->i_private;
5282         cpumask_var_t tracing_cpumask_new;
5283         int err;
5284
5285         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5286                 return -ENOMEM;
5287
5288         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5289         if (err)
5290                 goto err_free;
5291
5292         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5293         if (err)
5294                 goto err_free;
5295
5296         free_cpumask_var(tracing_cpumask_new);
5297
5298         return count;
5299
5300 err_free:
5301         free_cpumask_var(tracing_cpumask_new);
5302
5303         return err;
5304 }
5305
5306 static const struct file_operations tracing_cpumask_fops = {
5307         .open           = tracing_open_generic_tr,
5308         .read           = tracing_cpumask_read,
5309         .write          = tracing_cpumask_write,
5310         .release        = tracing_release_generic_tr,
5311         .llseek         = generic_file_llseek,
5312 };
5313
5314 static int tracing_trace_options_show(struct seq_file *m, void *v)
5315 {
5316         struct tracer_opt *trace_opts;
5317         struct trace_array *tr = m->private;
5318         u32 tracer_flags;
5319         int i;
5320
5321         mutex_lock(&trace_types_lock);
5322         tracer_flags = tr->current_trace->flags->val;
5323         trace_opts = tr->current_trace->flags->opts;
5324
5325         for (i = 0; trace_options[i]; i++) {
5326                 if (tr->trace_flags & (1 << i))
5327                         seq_printf(m, "%s\n", trace_options[i]);
5328                 else
5329                         seq_printf(m, "no%s\n", trace_options[i]);
5330         }
5331
5332         for (i = 0; trace_opts[i].name; i++) {
5333                 if (tracer_flags & trace_opts[i].bit)
5334                         seq_printf(m, "%s\n", trace_opts[i].name);
5335                 else
5336                         seq_printf(m, "no%s\n", trace_opts[i].name);
5337         }
5338         mutex_unlock(&trace_types_lock);
5339
5340         return 0;
5341 }
5342
5343 static int __set_tracer_option(struct trace_array *tr,
5344                                struct tracer_flags *tracer_flags,
5345                                struct tracer_opt *opts, int neg)
5346 {
5347         struct tracer *trace = tracer_flags->trace;
5348         int ret;
5349
5350         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5351         if (ret)
5352                 return ret;
5353
5354         if (neg)
5355                 tracer_flags->val &= ~opts->bit;
5356         else
5357                 tracer_flags->val |= opts->bit;
5358         return 0;
5359 }
5360
5361 /* Try to assign a tracer specific option */
5362 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5363 {
5364         struct tracer *trace = tr->current_trace;
5365         struct tracer_flags *tracer_flags = trace->flags;
5366         struct tracer_opt *opts = NULL;
5367         int i;
5368
5369         for (i = 0; tracer_flags->opts[i].name; i++) {
5370                 opts = &tracer_flags->opts[i];
5371
5372                 if (strcmp(cmp, opts->name) == 0)
5373                         return __set_tracer_option(tr, trace->flags, opts, neg);
5374         }
5375
5376         return -EINVAL;
5377 }
5378
5379 /* Some tracers require overwrite to stay enabled */
5380 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5381 {
5382         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5383                 return -1;
5384
5385         return 0;
5386 }
5387
5388 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5389 {
5390         int *map;
5391
5392         if ((mask == TRACE_ITER_RECORD_TGID) ||
5393             (mask == TRACE_ITER_RECORD_CMD))
5394                 lockdep_assert_held(&event_mutex);
5395
5396         /* do nothing if flag is already set */
5397         if (!!(tr->trace_flags & mask) == !!enabled)
5398                 return 0;
5399
5400         /* Give the tracer a chance to approve the change */
5401         if (tr->current_trace->flag_changed)
5402                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5403                         return -EINVAL;
5404
5405         if (enabled)
5406                 tr->trace_flags |= mask;
5407         else
5408                 tr->trace_flags &= ~mask;
5409
5410         if (mask == TRACE_ITER_RECORD_CMD)
5411                 trace_event_enable_cmd_record(enabled);
5412
5413         if (mask == TRACE_ITER_RECORD_TGID) {
5414                 if (!tgid_map) {
5415                         tgid_map_max = pid_max;
5416                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5417                                        GFP_KERNEL);
5418
5419                         /*
5420                          * Pairs with smp_load_acquire() in
5421                          * trace_find_tgid_ptr() to ensure that if it observes
5422                          * the tgid_map we just allocated then it also observes
5423                          * the corresponding tgid_map_max value.
5424                          */
5425                         smp_store_release(&tgid_map, map);
5426                 }
5427                 if (!tgid_map) {
5428                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5429                         return -ENOMEM;
5430                 }
5431
5432                 trace_event_enable_tgid_record(enabled);
5433         }
5434
5435         if (mask == TRACE_ITER_EVENT_FORK)
5436                 trace_event_follow_fork(tr, enabled);
5437
5438         if (mask == TRACE_ITER_FUNC_FORK)
5439                 ftrace_pid_follow_fork(tr, enabled);
5440
5441         if (mask == TRACE_ITER_OVERWRITE) {
5442                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5443 #ifdef CONFIG_TRACER_MAX_TRACE
5444                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5445 #endif
5446         }
5447
5448         if (mask == TRACE_ITER_PRINTK) {
5449                 trace_printk_start_stop_comm(enabled);
5450                 trace_printk_control(enabled);
5451         }
5452
5453         return 0;
5454 }
5455
5456 int trace_set_options(struct trace_array *tr, char *option)
5457 {
5458         char *cmp;
5459         int neg = 0;
5460         int ret;
5461         size_t orig_len = strlen(option);
5462         int len;
5463
5464         cmp = strstrip(option);
5465
5466         len = str_has_prefix(cmp, "no");
5467         if (len)
5468                 neg = 1;
5469
5470         cmp += len;
5471
5472         mutex_lock(&event_mutex);
5473         mutex_lock(&trace_types_lock);
5474
5475         ret = match_string(trace_options, -1, cmp);
5476         /* If no option could be set, test the specific tracer options */
5477         if (ret < 0)
5478                 ret = set_tracer_option(tr, cmp, neg);
5479         else
5480                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5481
5482         mutex_unlock(&trace_types_lock);
5483         mutex_unlock(&event_mutex);
5484
5485         /*
5486          * If the first trailing whitespace is replaced with '\0' by strstrip,
5487          * turn it back into a space.
5488          */
5489         if (orig_len > strlen(option))
5490                 option[strlen(option)] = ' ';
5491
5492         return ret;
5493 }
5494
5495 static void __init apply_trace_boot_options(void)
5496 {
5497         char *buf = trace_boot_options_buf;
5498         char *option;
5499
5500         while (true) {
5501                 option = strsep(&buf, ",");
5502
5503                 if (!option)
5504                         break;
5505
5506                 if (*option)
5507                         trace_set_options(&global_trace, option);
5508
5509                 /* Put back the comma to allow this to be called again */
5510                 if (buf)
5511                         *(buf - 1) = ',';
5512         }
5513 }
5514
5515 static ssize_t
5516 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5517                         size_t cnt, loff_t *ppos)
5518 {
5519         struct seq_file *m = filp->private_data;
5520         struct trace_array *tr = m->private;
5521         char buf[64];
5522         int ret;
5523
5524         if (cnt >= sizeof(buf))
5525                 return -EINVAL;
5526
5527         if (copy_from_user(buf, ubuf, cnt))
5528                 return -EFAULT;
5529
5530         buf[cnt] = 0;
5531
5532         ret = trace_set_options(tr, buf);
5533         if (ret < 0)
5534                 return ret;
5535
5536         *ppos += cnt;
5537
5538         return cnt;
5539 }
5540
5541 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5542 {
5543         struct trace_array *tr = inode->i_private;
5544         int ret;
5545
5546         ret = tracing_check_open_get_tr(tr);
5547         if (ret)
5548                 return ret;
5549
5550         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5551         if (ret < 0)
5552                 trace_array_put(tr);
5553
5554         return ret;
5555 }
5556
5557 static const struct file_operations tracing_iter_fops = {
5558         .open           = tracing_trace_options_open,
5559         .read           = seq_read,
5560         .llseek         = seq_lseek,
5561         .release        = tracing_single_release_tr,
5562         .write          = tracing_trace_options_write,
5563 };
5564
5565 static const char readme_msg[] =
5566         "tracing mini-HOWTO:\n\n"
5567         "# echo 0 > tracing_on : quick way to disable tracing\n"
5568         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5569         " Important files:\n"
5570         "  trace\t\t\t- The static contents of the buffer\n"
5571         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5572         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5573         "  current_tracer\t- function and latency tracers\n"
5574         "  available_tracers\t- list of configured tracers for current_tracer\n"
5575         "  error_log\t- error log for failed commands (that support it)\n"
5576         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5577         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5578         "  trace_clock\t\t- change the clock used to order events\n"
5579         "       local:   Per cpu clock but may not be synced across CPUs\n"
5580         "      global:   Synced across CPUs but slows tracing down.\n"
5581         "     counter:   Not a clock, but just an increment\n"
5582         "      uptime:   Jiffy counter from time of boot\n"
5583         "        perf:   Same clock that perf events use\n"
5584 #ifdef CONFIG_X86_64
5585         "     x86-tsc:   TSC cycle counter\n"
5586 #endif
5587         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5588         "       delta:   Delta difference against a buffer-wide timestamp\n"
5589         "    absolute:   Absolute (standalone) timestamp\n"
5590         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5591         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5592         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5593         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5594         "\t\t\t  Remove sub-buffer with rmdir\n"
5595         "  trace_options\t\t- Set format or modify how tracing happens\n"
5596         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5597         "\t\t\t  option name\n"
5598         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5599 #ifdef CONFIG_DYNAMIC_FTRACE
5600         "\n  available_filter_functions - list of functions that can be filtered on\n"
5601         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5602         "\t\t\t  functions\n"
5603         "\t     accepts: func_full_name or glob-matching-pattern\n"
5604         "\t     modules: Can select a group via module\n"
5605         "\t      Format: :mod:<module-name>\n"
5606         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5607         "\t    triggers: a command to perform when function is hit\n"
5608         "\t      Format: <function>:<trigger>[:count]\n"
5609         "\t     trigger: traceon, traceoff\n"
5610         "\t\t      enable_event:<system>:<event>\n"
5611         "\t\t      disable_event:<system>:<event>\n"
5612 #ifdef CONFIG_STACKTRACE
5613         "\t\t      stacktrace\n"
5614 #endif
5615 #ifdef CONFIG_TRACER_SNAPSHOT
5616         "\t\t      snapshot\n"
5617 #endif
5618         "\t\t      dump\n"
5619         "\t\t      cpudump\n"
5620         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5621         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5622         "\t     The first one will disable tracing every time do_fault is hit\n"
5623         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5624         "\t       The first time do trap is hit and it disables tracing, the\n"
5625         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5626         "\t       the counter will not decrement. It only decrements when the\n"
5627         "\t       trigger did work\n"
5628         "\t     To remove trigger without count:\n"
5629         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5630         "\t     To remove trigger with a count:\n"
5631         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5632         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5633         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5634         "\t    modules: Can select a group via module command :mod:\n"
5635         "\t    Does not accept triggers\n"
5636 #endif /* CONFIG_DYNAMIC_FTRACE */
5637 #ifdef CONFIG_FUNCTION_TRACER
5638         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5639         "\t\t    (function)\n"
5640         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5641         "\t\t    (function)\n"
5642 #endif
5643 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5644         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5645         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5646         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5647 #endif
5648 #ifdef CONFIG_TRACER_SNAPSHOT
5649         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5650         "\t\t\t  snapshot buffer. Read the contents for more\n"
5651         "\t\t\t  information\n"
5652 #endif
5653 #ifdef CONFIG_STACK_TRACER
5654         "  stack_trace\t\t- Shows the max stack trace when active\n"
5655         "  stack_max_size\t- Shows current max stack size that was traced\n"
5656         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5657         "\t\t\t  new trace)\n"
5658 #ifdef CONFIG_DYNAMIC_FTRACE
5659         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5660         "\t\t\t  traces\n"
5661 #endif
5662 #endif /* CONFIG_STACK_TRACER */
5663 #ifdef CONFIG_DYNAMIC_EVENTS
5664         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5665         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5666 #endif
5667 #ifdef CONFIG_KPROBE_EVENTS
5668         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5669         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5670 #endif
5671 #ifdef CONFIG_UPROBE_EVENTS
5672         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5673         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5674 #endif
5675 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5676         "\t  accepts: event-definitions (one definition per line)\n"
5677         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5678         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5679 #ifdef CONFIG_HIST_TRIGGERS
5680         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5681 #endif
5682         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5683         "\t           -:[<group>/][<event>]\n"
5684 #ifdef CONFIG_KPROBE_EVENTS
5685         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5686   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5687 #endif
5688 #ifdef CONFIG_UPROBE_EVENTS
5689   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5690 #endif
5691         "\t     args: <name>=fetcharg[:type]\n"
5692         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5693 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5694         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5695 #else
5696         "\t           $stack<index>, $stack, $retval, $comm,\n"
5697 #endif
5698         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5699         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5700         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5701         "\t           symstr, <type>\\[<array-size>\\]\n"
5702 #ifdef CONFIG_HIST_TRIGGERS
5703         "\t    field: <stype> <name>;\n"
5704         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5705         "\t           [unsigned] char/int/long\n"
5706 #endif
5707         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5708         "\t            of the <attached-group>/<attached-event>.\n"
5709 #endif
5710         "  events/\t\t- Directory containing all trace event subsystems:\n"
5711         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5712         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5713         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5714         "\t\t\t  events\n"
5715         "      filter\t\t- If set, only events passing filter are traced\n"
5716         "  events/<system>/<event>/\t- Directory containing control files for\n"
5717         "\t\t\t  <event>:\n"
5718         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5719         "      filter\t\t- If set, only events passing filter are traced\n"
5720         "      trigger\t\t- If set, a command to perform when event is hit\n"
5721         "\t    Format: <trigger>[:count][if <filter>]\n"
5722         "\t   trigger: traceon, traceoff\n"
5723         "\t            enable_event:<system>:<event>\n"
5724         "\t            disable_event:<system>:<event>\n"
5725 #ifdef CONFIG_HIST_TRIGGERS
5726         "\t            enable_hist:<system>:<event>\n"
5727         "\t            disable_hist:<system>:<event>\n"
5728 #endif
5729 #ifdef CONFIG_STACKTRACE
5730         "\t\t    stacktrace\n"
5731 #endif
5732 #ifdef CONFIG_TRACER_SNAPSHOT
5733         "\t\t    snapshot\n"
5734 #endif
5735 #ifdef CONFIG_HIST_TRIGGERS
5736         "\t\t    hist (see below)\n"
5737 #endif
5738         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5739         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5740         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5741         "\t                  events/block/block_unplug/trigger\n"
5742         "\t   The first disables tracing every time block_unplug is hit.\n"
5743         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5744         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5745         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5746         "\t   Like function triggers, the counter is only decremented if it\n"
5747         "\t    enabled or disabled tracing.\n"
5748         "\t   To remove a trigger without a count:\n"
5749         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5750         "\t   To remove a trigger with a count:\n"
5751         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5752         "\t   Filters can be ignored when removing a trigger.\n"
5753 #ifdef CONFIG_HIST_TRIGGERS
5754         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5755         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5756         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5757         "\t            [:values=<field1[,field2,...]>]\n"
5758         "\t            [:sort=<field1[,field2,...]>]\n"
5759         "\t            [:size=#entries]\n"
5760         "\t            [:pause][:continue][:clear]\n"
5761         "\t            [:name=histname1]\n"
5762         "\t            [:nohitcount]\n"
5763         "\t            [:<handler>.<action>]\n"
5764         "\t            [if <filter>]\n\n"
5765         "\t    Note, special fields can be used as well:\n"
5766         "\t            common_timestamp - to record current timestamp\n"
5767         "\t            common_cpu - to record the CPU the event happened on\n"
5768         "\n"
5769         "\t    A hist trigger variable can be:\n"
5770         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5771         "\t        - a reference to another variable e.g. y=$x,\n"
5772         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5773         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5774         "\n"
5775         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5776         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5777         "\t    variable reference, field or numeric literal.\n"
5778         "\n"
5779         "\t    When a matching event is hit, an entry is added to a hash\n"
5780         "\t    table using the key(s) and value(s) named, and the value of a\n"
5781         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5782         "\t    correspond to fields in the event's format description.  Keys\n"
5783         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5784         "\t    Compound keys consisting of up to two fields can be specified\n"
5785         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5786         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5787         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5788         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5789         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5790         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5791         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5792         "\t    its histogram data will be shared with other triggers of the\n"
5793         "\t    same name, and trigger hits will update this common data.\n\n"
5794         "\t    Reading the 'hist' file for the event will dump the hash\n"
5795         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5796         "\t    triggers attached to an event, there will be a table for each\n"
5797         "\t    trigger in the output.  The table displayed for a named\n"
5798         "\t    trigger will be the same as any other instance having the\n"
5799         "\t    same name.  The default format used to display a given field\n"
5800         "\t    can be modified by appending any of the following modifiers\n"
5801         "\t    to the field name, as applicable:\n\n"
5802         "\t            .hex        display a number as a hex value\n"
5803         "\t            .sym        display an address as a symbol\n"
5804         "\t            .sym-offset display an address as a symbol and offset\n"
5805         "\t            .execname   display a common_pid as a program name\n"
5806         "\t            .syscall    display a syscall id as a syscall name\n"
5807         "\t            .log2       display log2 value rather than raw number\n"
5808         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5809         "\t            .usecs      display a common_timestamp in microseconds\n"
5810         "\t            .percent    display a number of percentage value\n"
5811         "\t            .graph      display a bar-graph of a value\n\n"
5812         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5813         "\t    trigger or to start a hist trigger but not log any events\n"
5814         "\t    until told to do so.  'continue' can be used to start or\n"
5815         "\t    restart a paused hist trigger.\n\n"
5816         "\t    The 'clear' parameter will clear the contents of a running\n"
5817         "\t    hist trigger and leave its current paused/active state\n"
5818         "\t    unchanged.\n\n"
5819         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5820         "\t    raw hitcount in the histogram.\n\n"
5821         "\t    The enable_hist and disable_hist triggers can be used to\n"
5822         "\t    have one event conditionally start and stop another event's\n"
5823         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5824         "\t    the enable_event and disable_event triggers.\n\n"
5825         "\t    Hist trigger handlers and actions are executed whenever a\n"
5826         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5827         "\t        <handler>.<action>\n\n"
5828         "\t    The available handlers are:\n\n"
5829         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5830         "\t        onmax(var)               - invoke if var exceeds current max\n"
5831         "\t        onchange(var)            - invoke action if var changes\n\n"
5832         "\t    The available actions are:\n\n"
5833         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5834         "\t        save(field,...)                      - save current event fields\n"
5835 #ifdef CONFIG_TRACER_SNAPSHOT
5836         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5837 #endif
5838 #ifdef CONFIG_SYNTH_EVENTS
5839         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5840         "\t  Write into this file to define/undefine new synthetic events.\n"
5841         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5842 #endif
5843 #endif
5844 ;
5845
5846 static ssize_t
5847 tracing_readme_read(struct file *filp, char __user *ubuf,
5848                        size_t cnt, loff_t *ppos)
5849 {
5850         return simple_read_from_buffer(ubuf, cnt, ppos,
5851                                         readme_msg, strlen(readme_msg));
5852 }
5853
5854 static const struct file_operations tracing_readme_fops = {
5855         .open           = tracing_open_generic,
5856         .read           = tracing_readme_read,
5857         .llseek         = generic_file_llseek,
5858 };
5859
5860 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5861 {
5862         int pid = ++(*pos);
5863
5864         return trace_find_tgid_ptr(pid);
5865 }
5866
5867 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5868 {
5869         int pid = *pos;
5870
5871         return trace_find_tgid_ptr(pid);
5872 }
5873
5874 static void saved_tgids_stop(struct seq_file *m, void *v)
5875 {
5876 }
5877
5878 static int saved_tgids_show(struct seq_file *m, void *v)
5879 {
5880         int *entry = (int *)v;
5881         int pid = entry - tgid_map;
5882         int tgid = *entry;
5883
5884         if (tgid == 0)
5885                 return SEQ_SKIP;
5886
5887         seq_printf(m, "%d %d\n", pid, tgid);
5888         return 0;
5889 }
5890
5891 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5892         .start          = saved_tgids_start,
5893         .stop           = saved_tgids_stop,
5894         .next           = saved_tgids_next,
5895         .show           = saved_tgids_show,
5896 };
5897
5898 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5899 {
5900         int ret;
5901
5902         ret = tracing_check_open_get_tr(NULL);
5903         if (ret)
5904                 return ret;
5905
5906         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5907 }
5908
5909
5910 static const struct file_operations tracing_saved_tgids_fops = {
5911         .open           = tracing_saved_tgids_open,
5912         .read           = seq_read,
5913         .llseek         = seq_lseek,
5914         .release        = seq_release,
5915 };
5916
5917 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5918 {
5919         unsigned int *ptr = v;
5920
5921         if (*pos || m->count)
5922                 ptr++;
5923
5924         (*pos)++;
5925
5926         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5927              ptr++) {
5928                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5929                         continue;
5930
5931                 return ptr;
5932         }
5933
5934         return NULL;
5935 }
5936
5937 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5938 {
5939         void *v;
5940         loff_t l = 0;
5941
5942         preempt_disable();
5943         arch_spin_lock(&trace_cmdline_lock);
5944
5945         v = &savedcmd->map_cmdline_to_pid[0];
5946         while (l <= *pos) {
5947                 v = saved_cmdlines_next(m, v, &l);
5948                 if (!v)
5949                         return NULL;
5950         }
5951
5952         return v;
5953 }
5954
5955 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5956 {
5957         arch_spin_unlock(&trace_cmdline_lock);
5958         preempt_enable();
5959 }
5960
5961 static int saved_cmdlines_show(struct seq_file *m, void *v)
5962 {
5963         char buf[TASK_COMM_LEN];
5964         unsigned int *pid = v;
5965
5966         __trace_find_cmdline(*pid, buf);
5967         seq_printf(m, "%d %s\n", *pid, buf);
5968         return 0;
5969 }
5970
5971 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5972         .start          = saved_cmdlines_start,
5973         .next           = saved_cmdlines_next,
5974         .stop           = saved_cmdlines_stop,
5975         .show           = saved_cmdlines_show,
5976 };
5977
5978 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5979 {
5980         int ret;
5981
5982         ret = tracing_check_open_get_tr(NULL);
5983         if (ret)
5984                 return ret;
5985
5986         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5987 }
5988
5989 static const struct file_operations tracing_saved_cmdlines_fops = {
5990         .open           = tracing_saved_cmdlines_open,
5991         .read           = seq_read,
5992         .llseek         = seq_lseek,
5993         .release        = seq_release,
5994 };
5995
5996 static ssize_t
5997 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5998                                  size_t cnt, loff_t *ppos)
5999 {
6000         char buf[64];
6001         int r;
6002
6003         preempt_disable();
6004         arch_spin_lock(&trace_cmdline_lock);
6005         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6006         arch_spin_unlock(&trace_cmdline_lock);
6007         preempt_enable();
6008
6009         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6010 }
6011
6012 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6013 {
6014         kfree(s->saved_cmdlines);
6015         kfree(s->map_cmdline_to_pid);
6016         kfree(s);
6017 }
6018
6019 static int tracing_resize_saved_cmdlines(unsigned int val)
6020 {
6021         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6022
6023         s = kmalloc(sizeof(*s), GFP_KERNEL);
6024         if (!s)
6025                 return -ENOMEM;
6026
6027         if (allocate_cmdlines_buffer(val, s) < 0) {
6028                 kfree(s);
6029                 return -ENOMEM;
6030         }
6031
6032         preempt_disable();
6033         arch_spin_lock(&trace_cmdline_lock);
6034         savedcmd_temp = savedcmd;
6035         savedcmd = s;
6036         arch_spin_unlock(&trace_cmdline_lock);
6037         preempt_enable();
6038         free_saved_cmdlines_buffer(savedcmd_temp);
6039
6040         return 0;
6041 }
6042
6043 static ssize_t
6044 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6045                                   size_t cnt, loff_t *ppos)
6046 {
6047         unsigned long val;
6048         int ret;
6049
6050         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6051         if (ret)
6052                 return ret;
6053
6054         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6055         if (!val || val > PID_MAX_DEFAULT)
6056                 return -EINVAL;
6057
6058         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6059         if (ret < 0)
6060                 return ret;
6061
6062         *ppos += cnt;
6063
6064         return cnt;
6065 }
6066
6067 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6068         .open           = tracing_open_generic,
6069         .read           = tracing_saved_cmdlines_size_read,
6070         .write          = tracing_saved_cmdlines_size_write,
6071 };
6072
6073 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6074 static union trace_eval_map_item *
6075 update_eval_map(union trace_eval_map_item *ptr)
6076 {
6077         if (!ptr->map.eval_string) {
6078                 if (ptr->tail.next) {
6079                         ptr = ptr->tail.next;
6080                         /* Set ptr to the next real item (skip head) */
6081                         ptr++;
6082                 } else
6083                         return NULL;
6084         }
6085         return ptr;
6086 }
6087
6088 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6089 {
6090         union trace_eval_map_item *ptr = v;
6091
6092         /*
6093          * Paranoid! If ptr points to end, we don't want to increment past it.
6094          * This really should never happen.
6095          */
6096         (*pos)++;
6097         ptr = update_eval_map(ptr);
6098         if (WARN_ON_ONCE(!ptr))
6099                 return NULL;
6100
6101         ptr++;
6102         ptr = update_eval_map(ptr);
6103
6104         return ptr;
6105 }
6106
6107 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6108 {
6109         union trace_eval_map_item *v;
6110         loff_t l = 0;
6111
6112         mutex_lock(&trace_eval_mutex);
6113
6114         v = trace_eval_maps;
6115         if (v)
6116                 v++;
6117
6118         while (v && l < *pos) {
6119                 v = eval_map_next(m, v, &l);
6120         }
6121
6122         return v;
6123 }
6124
6125 static void eval_map_stop(struct seq_file *m, void *v)
6126 {
6127         mutex_unlock(&trace_eval_mutex);
6128 }
6129
6130 static int eval_map_show(struct seq_file *m, void *v)
6131 {
6132         union trace_eval_map_item *ptr = v;
6133
6134         seq_printf(m, "%s %ld (%s)\n",
6135                    ptr->map.eval_string, ptr->map.eval_value,
6136                    ptr->map.system);
6137
6138         return 0;
6139 }
6140
6141 static const struct seq_operations tracing_eval_map_seq_ops = {
6142         .start          = eval_map_start,
6143         .next           = eval_map_next,
6144         .stop           = eval_map_stop,
6145         .show           = eval_map_show,
6146 };
6147
6148 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6149 {
6150         int ret;
6151
6152         ret = tracing_check_open_get_tr(NULL);
6153         if (ret)
6154                 return ret;
6155
6156         return seq_open(filp, &tracing_eval_map_seq_ops);
6157 }
6158
6159 static const struct file_operations tracing_eval_map_fops = {
6160         .open           = tracing_eval_map_open,
6161         .read           = seq_read,
6162         .llseek         = seq_lseek,
6163         .release        = seq_release,
6164 };
6165
6166 static inline union trace_eval_map_item *
6167 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6168 {
6169         /* Return tail of array given the head */
6170         return ptr + ptr->head.length + 1;
6171 }
6172
6173 static void
6174 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6175                            int len)
6176 {
6177         struct trace_eval_map **stop;
6178         struct trace_eval_map **map;
6179         union trace_eval_map_item *map_array;
6180         union trace_eval_map_item *ptr;
6181
6182         stop = start + len;
6183
6184         /*
6185          * The trace_eval_maps contains the map plus a head and tail item,
6186          * where the head holds the module and length of array, and the
6187          * tail holds a pointer to the next list.
6188          */
6189         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6190         if (!map_array) {
6191                 pr_warn("Unable to allocate trace eval mapping\n");
6192                 return;
6193         }
6194
6195         mutex_lock(&trace_eval_mutex);
6196
6197         if (!trace_eval_maps)
6198                 trace_eval_maps = map_array;
6199         else {
6200                 ptr = trace_eval_maps;
6201                 for (;;) {
6202                         ptr = trace_eval_jmp_to_tail(ptr);
6203                         if (!ptr->tail.next)
6204                                 break;
6205                         ptr = ptr->tail.next;
6206
6207                 }
6208                 ptr->tail.next = map_array;
6209         }
6210         map_array->head.mod = mod;
6211         map_array->head.length = len;
6212         map_array++;
6213
6214         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6215                 map_array->map = **map;
6216                 map_array++;
6217         }
6218         memset(map_array, 0, sizeof(*map_array));
6219
6220         mutex_unlock(&trace_eval_mutex);
6221 }
6222
6223 static void trace_create_eval_file(struct dentry *d_tracer)
6224 {
6225         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6226                           NULL, &tracing_eval_map_fops);
6227 }
6228
6229 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6230 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6231 static inline void trace_insert_eval_map_file(struct module *mod,
6232                               struct trace_eval_map **start, int len) { }
6233 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6234
6235 static void trace_insert_eval_map(struct module *mod,
6236                                   struct trace_eval_map **start, int len)
6237 {
6238         struct trace_eval_map **map;
6239
6240         if (len <= 0)
6241                 return;
6242
6243         map = start;
6244
6245         trace_event_eval_update(map, len);
6246
6247         trace_insert_eval_map_file(mod, start, len);
6248 }
6249
6250 static ssize_t
6251 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6252                        size_t cnt, loff_t *ppos)
6253 {
6254         struct trace_array *tr = filp->private_data;
6255         char buf[MAX_TRACER_SIZE+2];
6256         int r;
6257
6258         mutex_lock(&trace_types_lock);
6259         r = sprintf(buf, "%s\n", tr->current_trace->name);
6260         mutex_unlock(&trace_types_lock);
6261
6262         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6263 }
6264
6265 int tracer_init(struct tracer *t, struct trace_array *tr)
6266 {
6267         tracing_reset_online_cpus(&tr->array_buffer);
6268         return t->init(tr);
6269 }
6270
6271 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6272 {
6273         int cpu;
6274
6275         for_each_tracing_cpu(cpu)
6276                 per_cpu_ptr(buf->data, cpu)->entries = val;
6277 }
6278
6279 #ifdef CONFIG_TRACER_MAX_TRACE
6280 /* resize @tr's buffer to the size of @size_tr's entries */
6281 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6282                                         struct array_buffer *size_buf, int cpu_id)
6283 {
6284         int cpu, ret = 0;
6285
6286         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6287                 for_each_tracing_cpu(cpu) {
6288                         ret = ring_buffer_resize(trace_buf->buffer,
6289                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6290                         if (ret < 0)
6291                                 break;
6292                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6293                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6294                 }
6295         } else {
6296                 ret = ring_buffer_resize(trace_buf->buffer,
6297                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6298                 if (ret == 0)
6299                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6300                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6301         }
6302
6303         return ret;
6304 }
6305 #endif /* CONFIG_TRACER_MAX_TRACE */
6306
6307 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6308                                         unsigned long size, int cpu)
6309 {
6310         int ret;
6311
6312         /*
6313          * If kernel or user changes the size of the ring buffer
6314          * we use the size that was given, and we can forget about
6315          * expanding it later.
6316          */
6317         ring_buffer_expanded = true;
6318
6319         /* May be called before buffers are initialized */
6320         if (!tr->array_buffer.buffer)
6321                 return 0;
6322
6323         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6324         if (ret < 0)
6325                 return ret;
6326
6327 #ifdef CONFIG_TRACER_MAX_TRACE
6328         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6329             !tr->current_trace->use_max_tr)
6330                 goto out;
6331
6332         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6333         if (ret < 0) {
6334                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6335                                                      &tr->array_buffer, cpu);
6336                 if (r < 0) {
6337                         /*
6338                          * AARGH! We are left with different
6339                          * size max buffer!!!!
6340                          * The max buffer is our "snapshot" buffer.
6341                          * When a tracer needs a snapshot (one of the
6342                          * latency tracers), it swaps the max buffer
6343                          * with the saved snap shot. We succeeded to
6344                          * update the size of the main buffer, but failed to
6345                          * update the size of the max buffer. But when we tried
6346                          * to reset the main buffer to the original size, we
6347                          * failed there too. This is very unlikely to
6348                          * happen, but if it does, warn and kill all
6349                          * tracing.
6350                          */
6351                         WARN_ON(1);
6352                         tracing_disabled = 1;
6353                 }
6354                 return ret;
6355         }
6356
6357         if (cpu == RING_BUFFER_ALL_CPUS)
6358                 set_buffer_entries(&tr->max_buffer, size);
6359         else
6360                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6361
6362  out:
6363 #endif /* CONFIG_TRACER_MAX_TRACE */
6364
6365         if (cpu == RING_BUFFER_ALL_CPUS)
6366                 set_buffer_entries(&tr->array_buffer, size);
6367         else
6368                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6369
6370         return ret;
6371 }
6372
6373 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6374                                   unsigned long size, int cpu_id)
6375 {
6376         int ret;
6377
6378         mutex_lock(&trace_types_lock);
6379
6380         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6381                 /* make sure, this cpu is enabled in the mask */
6382                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6383                         ret = -EINVAL;
6384                         goto out;
6385                 }
6386         }
6387
6388         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6389         if (ret < 0)
6390                 ret = -ENOMEM;
6391
6392 out:
6393         mutex_unlock(&trace_types_lock);
6394
6395         return ret;
6396 }
6397
6398
6399 /**
6400  * tracing_update_buffers - used by tracing facility to expand ring buffers
6401  *
6402  * To save on memory when the tracing is never used on a system with it
6403  * configured in. The ring buffers are set to a minimum size. But once
6404  * a user starts to use the tracing facility, then they need to grow
6405  * to their default size.
6406  *
6407  * This function is to be called when a tracer is about to be used.
6408  */
6409 int tracing_update_buffers(void)
6410 {
6411         int ret = 0;
6412
6413         mutex_lock(&trace_types_lock);
6414         if (!ring_buffer_expanded)
6415                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6416                                                 RING_BUFFER_ALL_CPUS);
6417         mutex_unlock(&trace_types_lock);
6418
6419         return ret;
6420 }
6421
6422 struct trace_option_dentry;
6423
6424 static void
6425 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6426
6427 /*
6428  * Used to clear out the tracer before deletion of an instance.
6429  * Must have trace_types_lock held.
6430  */
6431 static void tracing_set_nop(struct trace_array *tr)
6432 {
6433         if (tr->current_trace == &nop_trace)
6434                 return;
6435         
6436         tr->current_trace->enabled--;
6437
6438         if (tr->current_trace->reset)
6439                 tr->current_trace->reset(tr);
6440
6441         tr->current_trace = &nop_trace;
6442 }
6443
6444 static bool tracer_options_updated;
6445
6446 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6447 {
6448         /* Only enable if the directory has been created already. */
6449         if (!tr->dir)
6450                 return;
6451
6452         /* Only create trace option files after update_tracer_options finish */
6453         if (!tracer_options_updated)
6454                 return;
6455
6456         create_trace_option_files(tr, t);
6457 }
6458
6459 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6460 {
6461         struct tracer *t;
6462 #ifdef CONFIG_TRACER_MAX_TRACE
6463         bool had_max_tr;
6464 #endif
6465         int ret = 0;
6466
6467         mutex_lock(&trace_types_lock);
6468
6469         if (!ring_buffer_expanded) {
6470                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6471                                                 RING_BUFFER_ALL_CPUS);
6472                 if (ret < 0)
6473                         goto out;
6474                 ret = 0;
6475         }
6476
6477         for (t = trace_types; t; t = t->next) {
6478                 if (strcmp(t->name, buf) == 0)
6479                         break;
6480         }
6481         if (!t) {
6482                 ret = -EINVAL;
6483                 goto out;
6484         }
6485         if (t == tr->current_trace)
6486                 goto out;
6487
6488 #ifdef CONFIG_TRACER_SNAPSHOT
6489         if (t->use_max_tr) {
6490                 local_irq_disable();
6491                 arch_spin_lock(&tr->max_lock);
6492                 if (tr->cond_snapshot)
6493                         ret = -EBUSY;
6494                 arch_spin_unlock(&tr->max_lock);
6495                 local_irq_enable();
6496                 if (ret)
6497                         goto out;
6498         }
6499 #endif
6500         /* Some tracers won't work on kernel command line */
6501         if (system_state < SYSTEM_RUNNING && t->noboot) {
6502                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6503                         t->name);
6504                 goto out;
6505         }
6506
6507         /* Some tracers are only allowed for the top level buffer */
6508         if (!trace_ok_for_array(t, tr)) {
6509                 ret = -EINVAL;
6510                 goto out;
6511         }
6512
6513         /* If trace pipe files are being read, we can't change the tracer */
6514         if (tr->trace_ref) {
6515                 ret = -EBUSY;
6516                 goto out;
6517         }
6518
6519         trace_branch_disable();
6520
6521         tr->current_trace->enabled--;
6522
6523         if (tr->current_trace->reset)
6524                 tr->current_trace->reset(tr);
6525
6526 #ifdef CONFIG_TRACER_MAX_TRACE
6527         had_max_tr = tr->current_trace->use_max_tr;
6528
6529         /* Current trace needs to be nop_trace before synchronize_rcu */
6530         tr->current_trace = &nop_trace;
6531
6532         if (had_max_tr && !t->use_max_tr) {
6533                 /*
6534                  * We need to make sure that the update_max_tr sees that
6535                  * current_trace changed to nop_trace to keep it from
6536                  * swapping the buffers after we resize it.
6537                  * The update_max_tr is called from interrupts disabled
6538                  * so a synchronized_sched() is sufficient.
6539                  */
6540                 synchronize_rcu();
6541                 free_snapshot(tr);
6542         }
6543
6544         if (t->use_max_tr && !tr->allocated_snapshot) {
6545                 ret = tracing_alloc_snapshot_instance(tr);
6546                 if (ret < 0)
6547                         goto out;
6548         }
6549 #else
6550         tr->current_trace = &nop_trace;
6551 #endif
6552
6553         if (t->init) {
6554                 ret = tracer_init(t, tr);
6555                 if (ret)
6556                         goto out;
6557         }
6558
6559         tr->current_trace = t;
6560         tr->current_trace->enabled++;
6561         trace_branch_enable(tr);
6562  out:
6563         mutex_unlock(&trace_types_lock);
6564
6565         return ret;
6566 }
6567
6568 static ssize_t
6569 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6570                         size_t cnt, loff_t *ppos)
6571 {
6572         struct trace_array *tr = filp->private_data;
6573         char buf[MAX_TRACER_SIZE+1];
6574         char *name;
6575         size_t ret;
6576         int err;
6577
6578         ret = cnt;
6579
6580         if (cnt > MAX_TRACER_SIZE)
6581                 cnt = MAX_TRACER_SIZE;
6582
6583         if (copy_from_user(buf, ubuf, cnt))
6584                 return -EFAULT;
6585
6586         buf[cnt] = 0;
6587
6588         name = strim(buf);
6589
6590         err = tracing_set_tracer(tr, name);
6591         if (err)
6592                 return err;
6593
6594         *ppos += ret;
6595
6596         return ret;
6597 }
6598
6599 static ssize_t
6600 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6601                    size_t cnt, loff_t *ppos)
6602 {
6603         char buf[64];
6604         int r;
6605
6606         r = snprintf(buf, sizeof(buf), "%ld\n",
6607                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6608         if (r > sizeof(buf))
6609                 r = sizeof(buf);
6610         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6611 }
6612
6613 static ssize_t
6614 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6615                     size_t cnt, loff_t *ppos)
6616 {
6617         unsigned long val;
6618         int ret;
6619
6620         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6621         if (ret)
6622                 return ret;
6623
6624         *ptr = val * 1000;
6625
6626         return cnt;
6627 }
6628
6629 static ssize_t
6630 tracing_thresh_read(struct file *filp, char __user *ubuf,
6631                     size_t cnt, loff_t *ppos)
6632 {
6633         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6634 }
6635
6636 static ssize_t
6637 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6638                      size_t cnt, loff_t *ppos)
6639 {
6640         struct trace_array *tr = filp->private_data;
6641         int ret;
6642
6643         mutex_lock(&trace_types_lock);
6644         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6645         if (ret < 0)
6646                 goto out;
6647
6648         if (tr->current_trace->update_thresh) {
6649                 ret = tr->current_trace->update_thresh(tr);
6650                 if (ret < 0)
6651                         goto out;
6652         }
6653
6654         ret = cnt;
6655 out:
6656         mutex_unlock(&trace_types_lock);
6657
6658         return ret;
6659 }
6660
6661 #ifdef CONFIG_TRACER_MAX_TRACE
6662
6663 static ssize_t
6664 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6665                      size_t cnt, loff_t *ppos)
6666 {
6667         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6668 }
6669
6670 static ssize_t
6671 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6672                       size_t cnt, loff_t *ppos)
6673 {
6674         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6675 }
6676
6677 #endif
6678
6679 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6680 {
6681         struct trace_array *tr = inode->i_private;
6682         struct trace_iterator *iter;
6683         int ret;
6684
6685         ret = tracing_check_open_get_tr(tr);
6686         if (ret)
6687                 return ret;
6688
6689         mutex_lock(&trace_types_lock);
6690
6691         /* create a buffer to store the information to pass to userspace */
6692         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6693         if (!iter) {
6694                 ret = -ENOMEM;
6695                 __trace_array_put(tr);
6696                 goto out;
6697         }
6698
6699         trace_seq_init(&iter->seq);
6700         iter->trace = tr->current_trace;
6701
6702         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6703                 ret = -ENOMEM;
6704                 goto fail;
6705         }
6706
6707         /* trace pipe does not show start of buffer */
6708         cpumask_setall(iter->started);
6709
6710         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6711                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6712
6713         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6714         if (trace_clocks[tr->clock_id].in_ns)
6715                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6716
6717         iter->tr = tr;
6718         iter->array_buffer = &tr->array_buffer;
6719         iter->cpu_file = tracing_get_cpu(inode);
6720         mutex_init(&iter->mutex);
6721         filp->private_data = iter;
6722
6723         if (iter->trace->pipe_open)
6724                 iter->trace->pipe_open(iter);
6725
6726         nonseekable_open(inode, filp);
6727
6728         tr->trace_ref++;
6729 out:
6730         mutex_unlock(&trace_types_lock);
6731         return ret;
6732
6733 fail:
6734         kfree(iter);
6735         __trace_array_put(tr);
6736         mutex_unlock(&trace_types_lock);
6737         return ret;
6738 }
6739
6740 static int tracing_release_pipe(struct inode *inode, struct file *file)
6741 {
6742         struct trace_iterator *iter = file->private_data;
6743         struct trace_array *tr = inode->i_private;
6744
6745         mutex_lock(&trace_types_lock);
6746
6747         tr->trace_ref--;
6748
6749         if (iter->trace->pipe_close)
6750                 iter->trace->pipe_close(iter);
6751
6752         mutex_unlock(&trace_types_lock);
6753
6754         free_cpumask_var(iter->started);
6755         kfree(iter->fmt);
6756         mutex_destroy(&iter->mutex);
6757         kfree(iter);
6758
6759         trace_array_put(tr);
6760
6761         return 0;
6762 }
6763
6764 static __poll_t
6765 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6766 {
6767         struct trace_array *tr = iter->tr;
6768
6769         /* Iterators are static, they should be filled or empty */
6770         if (trace_buffer_iter(iter, iter->cpu_file))
6771                 return EPOLLIN | EPOLLRDNORM;
6772
6773         if (tr->trace_flags & TRACE_ITER_BLOCK)
6774                 /*
6775                  * Always select as readable when in blocking mode
6776                  */
6777                 return EPOLLIN | EPOLLRDNORM;
6778         else
6779                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6780                                              filp, poll_table, iter->tr->buffer_percent);
6781 }
6782
6783 static __poll_t
6784 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6785 {
6786         struct trace_iterator *iter = filp->private_data;
6787
6788         return trace_poll(iter, filp, poll_table);
6789 }
6790
6791 /* Must be called with iter->mutex held. */
6792 static int tracing_wait_pipe(struct file *filp)
6793 {
6794         struct trace_iterator *iter = filp->private_data;
6795         int ret;
6796
6797         while (trace_empty(iter)) {
6798
6799                 if ((filp->f_flags & O_NONBLOCK)) {
6800                         return -EAGAIN;
6801                 }
6802
6803                 /*
6804                  * We block until we read something and tracing is disabled.
6805                  * We still block if tracing is disabled, but we have never
6806                  * read anything. This allows a user to cat this file, and
6807                  * then enable tracing. But after we have read something,
6808                  * we give an EOF when tracing is again disabled.
6809                  *
6810                  * iter->pos will be 0 if we haven't read anything.
6811                  */
6812                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6813                         break;
6814
6815                 mutex_unlock(&iter->mutex);
6816
6817                 ret = wait_on_pipe(iter, 0);
6818
6819                 mutex_lock(&iter->mutex);
6820
6821                 if (ret)
6822                         return ret;
6823         }
6824
6825         return 1;
6826 }
6827
6828 /*
6829  * Consumer reader.
6830  */
6831 static ssize_t
6832 tracing_read_pipe(struct file *filp, char __user *ubuf,
6833                   size_t cnt, loff_t *ppos)
6834 {
6835         struct trace_iterator *iter = filp->private_data;
6836         ssize_t sret;
6837
6838         /*
6839          * Avoid more than one consumer on a single file descriptor
6840          * This is just a matter of traces coherency, the ring buffer itself
6841          * is protected.
6842          */
6843         mutex_lock(&iter->mutex);
6844
6845         /* return any leftover data */
6846         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6847         if (sret != -EBUSY)
6848                 goto out;
6849
6850         trace_seq_init(&iter->seq);
6851
6852         if (iter->trace->read) {
6853                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6854                 if (sret)
6855                         goto out;
6856         }
6857
6858 waitagain:
6859         sret = tracing_wait_pipe(filp);
6860         if (sret <= 0)
6861                 goto out;
6862
6863         /* stop when tracing is finished */
6864         if (trace_empty(iter)) {
6865                 sret = 0;
6866                 goto out;
6867         }
6868
6869         if (cnt >= PAGE_SIZE)
6870                 cnt = PAGE_SIZE - 1;
6871
6872         /* reset all but tr, trace, and overruns */
6873         trace_iterator_reset(iter);
6874         cpumask_clear(iter->started);
6875         trace_seq_init(&iter->seq);
6876
6877         trace_event_read_lock();
6878         trace_access_lock(iter->cpu_file);
6879         while (trace_find_next_entry_inc(iter) != NULL) {
6880                 enum print_line_t ret;
6881                 int save_len = iter->seq.seq.len;
6882
6883                 ret = print_trace_line(iter);
6884                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6885                         /*
6886                          * If one print_trace_line() fills entire trace_seq in one shot,
6887                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6888                          * In this case, we need to consume it, otherwise, loop will peek
6889                          * this event next time, resulting in an infinite loop.
6890                          */
6891                         if (save_len == 0) {
6892                                 iter->seq.full = 0;
6893                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6894                                 trace_consume(iter);
6895                                 break;
6896                         }
6897
6898                         /* In other cases, don't print partial lines */
6899                         iter->seq.seq.len = save_len;
6900                         break;
6901                 }
6902                 if (ret != TRACE_TYPE_NO_CONSUME)
6903                         trace_consume(iter);
6904
6905                 if (trace_seq_used(&iter->seq) >= cnt)
6906                         break;
6907
6908                 /*
6909                  * Setting the full flag means we reached the trace_seq buffer
6910                  * size and we should leave by partial output condition above.
6911                  * One of the trace_seq_* functions is not used properly.
6912                  */
6913                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6914                           iter->ent->type);
6915         }
6916         trace_access_unlock(iter->cpu_file);
6917         trace_event_read_unlock();
6918
6919         /* Now copy what we have to the user */
6920         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6921         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6922                 trace_seq_init(&iter->seq);
6923
6924         /*
6925          * If there was nothing to send to user, in spite of consuming trace
6926          * entries, go back to wait for more entries.
6927          */
6928         if (sret == -EBUSY)
6929                 goto waitagain;
6930
6931 out:
6932         mutex_unlock(&iter->mutex);
6933
6934         return sret;
6935 }
6936
6937 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6938                                      unsigned int idx)
6939 {
6940         __free_page(spd->pages[idx]);
6941 }
6942
6943 static size_t
6944 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6945 {
6946         size_t count;
6947         int save_len;
6948         int ret;
6949
6950         /* Seq buffer is page-sized, exactly what we need. */
6951         for (;;) {
6952                 save_len = iter->seq.seq.len;
6953                 ret = print_trace_line(iter);
6954
6955                 if (trace_seq_has_overflowed(&iter->seq)) {
6956                         iter->seq.seq.len = save_len;
6957                         break;
6958                 }
6959
6960                 /*
6961                  * This should not be hit, because it should only
6962                  * be set if the iter->seq overflowed. But check it
6963                  * anyway to be safe.
6964                  */
6965                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6966                         iter->seq.seq.len = save_len;
6967                         break;
6968                 }
6969
6970                 count = trace_seq_used(&iter->seq) - save_len;
6971                 if (rem < count) {
6972                         rem = 0;
6973                         iter->seq.seq.len = save_len;
6974                         break;
6975                 }
6976
6977                 if (ret != TRACE_TYPE_NO_CONSUME)
6978                         trace_consume(iter);
6979                 rem -= count;
6980                 if (!trace_find_next_entry_inc(iter))   {
6981                         rem = 0;
6982                         iter->ent = NULL;
6983                         break;
6984                 }
6985         }
6986
6987         return rem;
6988 }
6989
6990 static ssize_t tracing_splice_read_pipe(struct file *filp,
6991                                         loff_t *ppos,
6992                                         struct pipe_inode_info *pipe,
6993                                         size_t len,
6994                                         unsigned int flags)
6995 {
6996         struct page *pages_def[PIPE_DEF_BUFFERS];
6997         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6998         struct trace_iterator *iter = filp->private_data;
6999         struct splice_pipe_desc spd = {
7000                 .pages          = pages_def,
7001                 .partial        = partial_def,
7002                 .nr_pages       = 0, /* This gets updated below. */
7003                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7004                 .ops            = &default_pipe_buf_ops,
7005                 .spd_release    = tracing_spd_release_pipe,
7006         };
7007         ssize_t ret;
7008         size_t rem;
7009         unsigned int i;
7010
7011         if (splice_grow_spd(pipe, &spd))
7012                 return -ENOMEM;
7013
7014         mutex_lock(&iter->mutex);
7015
7016         if (iter->trace->splice_read) {
7017                 ret = iter->trace->splice_read(iter, filp,
7018                                                ppos, pipe, len, flags);
7019                 if (ret)
7020                         goto out_err;
7021         }
7022
7023         ret = tracing_wait_pipe(filp);
7024         if (ret <= 0)
7025                 goto out_err;
7026
7027         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7028                 ret = -EFAULT;
7029                 goto out_err;
7030         }
7031
7032         trace_event_read_lock();
7033         trace_access_lock(iter->cpu_file);
7034
7035         /* Fill as many pages as possible. */
7036         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7037                 spd.pages[i] = alloc_page(GFP_KERNEL);
7038                 if (!spd.pages[i])
7039                         break;
7040
7041                 rem = tracing_fill_pipe_page(rem, iter);
7042
7043                 /* Copy the data into the page, so we can start over. */
7044                 ret = trace_seq_to_buffer(&iter->seq,
7045                                           page_address(spd.pages[i]),
7046                                           trace_seq_used(&iter->seq));
7047                 if (ret < 0) {
7048                         __free_page(spd.pages[i]);
7049                         break;
7050                 }
7051                 spd.partial[i].offset = 0;
7052                 spd.partial[i].len = trace_seq_used(&iter->seq);
7053
7054                 trace_seq_init(&iter->seq);
7055         }
7056
7057         trace_access_unlock(iter->cpu_file);
7058         trace_event_read_unlock();
7059         mutex_unlock(&iter->mutex);
7060
7061         spd.nr_pages = i;
7062
7063         if (i)
7064                 ret = splice_to_pipe(pipe, &spd);
7065         else
7066                 ret = 0;
7067 out:
7068         splice_shrink_spd(&spd);
7069         return ret;
7070
7071 out_err:
7072         mutex_unlock(&iter->mutex);
7073         goto out;
7074 }
7075
7076 static ssize_t
7077 tracing_entries_read(struct file *filp, char __user *ubuf,
7078                      size_t cnt, loff_t *ppos)
7079 {
7080         struct inode *inode = file_inode(filp);
7081         struct trace_array *tr = inode->i_private;
7082         int cpu = tracing_get_cpu(inode);
7083         char buf[64];
7084         int r = 0;
7085         ssize_t ret;
7086
7087         mutex_lock(&trace_types_lock);
7088
7089         if (cpu == RING_BUFFER_ALL_CPUS) {
7090                 int cpu, buf_size_same;
7091                 unsigned long size;
7092
7093                 size = 0;
7094                 buf_size_same = 1;
7095                 /* check if all cpu sizes are same */
7096                 for_each_tracing_cpu(cpu) {
7097                         /* fill in the size from first enabled cpu */
7098                         if (size == 0)
7099                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7100                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7101                                 buf_size_same = 0;
7102                                 break;
7103                         }
7104                 }
7105
7106                 if (buf_size_same) {
7107                         if (!ring_buffer_expanded)
7108                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7109                                             size >> 10,
7110                                             trace_buf_size >> 10);
7111                         else
7112                                 r = sprintf(buf, "%lu\n", size >> 10);
7113                 } else
7114                         r = sprintf(buf, "X\n");
7115         } else
7116                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7117
7118         mutex_unlock(&trace_types_lock);
7119
7120         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7121         return ret;
7122 }
7123
7124 static ssize_t
7125 tracing_entries_write(struct file *filp, const char __user *ubuf,
7126                       size_t cnt, loff_t *ppos)
7127 {
7128         struct inode *inode = file_inode(filp);
7129         struct trace_array *tr = inode->i_private;
7130         unsigned long val;
7131         int ret;
7132
7133         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7134         if (ret)
7135                 return ret;
7136
7137         /* must have at least 1 entry */
7138         if (!val)
7139                 return -EINVAL;
7140
7141         /* value is in KB */
7142         val <<= 10;
7143         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7144         if (ret < 0)
7145                 return ret;
7146
7147         *ppos += cnt;
7148
7149         return cnt;
7150 }
7151
7152 static ssize_t
7153 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7154                                 size_t cnt, loff_t *ppos)
7155 {
7156         struct trace_array *tr = filp->private_data;
7157         char buf[64];
7158         int r, cpu;
7159         unsigned long size = 0, expanded_size = 0;
7160
7161         mutex_lock(&trace_types_lock);
7162         for_each_tracing_cpu(cpu) {
7163                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7164                 if (!ring_buffer_expanded)
7165                         expanded_size += trace_buf_size >> 10;
7166         }
7167         if (ring_buffer_expanded)
7168                 r = sprintf(buf, "%lu\n", size);
7169         else
7170                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7171         mutex_unlock(&trace_types_lock);
7172
7173         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7174 }
7175
7176 static ssize_t
7177 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7178                           size_t cnt, loff_t *ppos)
7179 {
7180         /*
7181          * There is no need to read what the user has written, this function
7182          * is just to make sure that there is no error when "echo" is used
7183          */
7184
7185         *ppos += cnt;
7186
7187         return cnt;
7188 }
7189
7190 static int
7191 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7192 {
7193         struct trace_array *tr = inode->i_private;
7194
7195         /* disable tracing ? */
7196         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7197                 tracer_tracing_off(tr);
7198         /* resize the ring buffer to 0 */
7199         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7200
7201         trace_array_put(tr);
7202
7203         return 0;
7204 }
7205
7206 static ssize_t
7207 tracing_mark_write(struct file *filp, const char __user *ubuf,
7208                                         size_t cnt, loff_t *fpos)
7209 {
7210         struct trace_array *tr = filp->private_data;
7211         struct ring_buffer_event *event;
7212         enum event_trigger_type tt = ETT_NONE;
7213         struct trace_buffer *buffer;
7214         struct print_entry *entry;
7215         ssize_t written;
7216         int size;
7217         int len;
7218
7219 /* Used in tracing_mark_raw_write() as well */
7220 #define FAULTED_STR "<faulted>"
7221 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7222
7223         if (tracing_disabled)
7224                 return -EINVAL;
7225
7226         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7227                 return -EINVAL;
7228
7229         if (cnt > TRACE_BUF_SIZE)
7230                 cnt = TRACE_BUF_SIZE;
7231
7232         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7233
7234         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7235
7236         /* If less than "<faulted>", then make sure we can still add that */
7237         if (cnt < FAULTED_SIZE)
7238                 size += FAULTED_SIZE - cnt;
7239
7240         buffer = tr->array_buffer.buffer;
7241         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7242                                             tracing_gen_ctx());
7243         if (unlikely(!event))
7244                 /* Ring buffer disabled, return as if not open for write */
7245                 return -EBADF;
7246
7247         entry = ring_buffer_event_data(event);
7248         entry->ip = _THIS_IP_;
7249
7250         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7251         if (len) {
7252                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7253                 cnt = FAULTED_SIZE;
7254                 written = -EFAULT;
7255         } else
7256                 written = cnt;
7257
7258         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7259                 /* do not add \n before testing triggers, but add \0 */
7260                 entry->buf[cnt] = '\0';
7261                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7262         }
7263
7264         if (entry->buf[cnt - 1] != '\n') {
7265                 entry->buf[cnt] = '\n';
7266                 entry->buf[cnt + 1] = '\0';
7267         } else
7268                 entry->buf[cnt] = '\0';
7269
7270         if (static_branch_unlikely(&trace_marker_exports_enabled))
7271                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7272         __buffer_unlock_commit(buffer, event);
7273
7274         if (tt)
7275                 event_triggers_post_call(tr->trace_marker_file, tt);
7276
7277         return written;
7278 }
7279
7280 /* Limit it for now to 3K (including tag) */
7281 #define RAW_DATA_MAX_SIZE (1024*3)
7282
7283 static ssize_t
7284 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7285                                         size_t cnt, loff_t *fpos)
7286 {
7287         struct trace_array *tr = filp->private_data;
7288         struct ring_buffer_event *event;
7289         struct trace_buffer *buffer;
7290         struct raw_data_entry *entry;
7291         ssize_t written;
7292         int size;
7293         int len;
7294
7295 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7296
7297         if (tracing_disabled)
7298                 return -EINVAL;
7299
7300         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7301                 return -EINVAL;
7302
7303         /* The marker must at least have a tag id */
7304         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7305                 return -EINVAL;
7306
7307         if (cnt > TRACE_BUF_SIZE)
7308                 cnt = TRACE_BUF_SIZE;
7309
7310         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7311
7312         size = sizeof(*entry) + cnt;
7313         if (cnt < FAULT_SIZE_ID)
7314                 size += FAULT_SIZE_ID - cnt;
7315
7316         buffer = tr->array_buffer.buffer;
7317         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7318                                             tracing_gen_ctx());
7319         if (!event)
7320                 /* Ring buffer disabled, return as if not open for write */
7321                 return -EBADF;
7322
7323         entry = ring_buffer_event_data(event);
7324
7325         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7326         if (len) {
7327                 entry->id = -1;
7328                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7329                 written = -EFAULT;
7330         } else
7331                 written = cnt;
7332
7333         __buffer_unlock_commit(buffer, event);
7334
7335         return written;
7336 }
7337
7338 static int tracing_clock_show(struct seq_file *m, void *v)
7339 {
7340         struct trace_array *tr = m->private;
7341         int i;
7342
7343         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7344                 seq_printf(m,
7345                         "%s%s%s%s", i ? " " : "",
7346                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7347                         i == tr->clock_id ? "]" : "");
7348         seq_putc(m, '\n');
7349
7350         return 0;
7351 }
7352
7353 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7354 {
7355         int i;
7356
7357         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7358                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7359                         break;
7360         }
7361         if (i == ARRAY_SIZE(trace_clocks))
7362                 return -EINVAL;
7363
7364         mutex_lock(&trace_types_lock);
7365
7366         tr->clock_id = i;
7367
7368         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7369
7370         /*
7371          * New clock may not be consistent with the previous clock.
7372          * Reset the buffer so that it doesn't have incomparable timestamps.
7373          */
7374         tracing_reset_online_cpus(&tr->array_buffer);
7375
7376 #ifdef CONFIG_TRACER_MAX_TRACE
7377         if (tr->max_buffer.buffer)
7378                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7379         tracing_reset_online_cpus(&tr->max_buffer);
7380 #endif
7381
7382         mutex_unlock(&trace_types_lock);
7383
7384         return 0;
7385 }
7386
7387 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7388                                    size_t cnt, loff_t *fpos)
7389 {
7390         struct seq_file *m = filp->private_data;
7391         struct trace_array *tr = m->private;
7392         char buf[64];
7393         const char *clockstr;
7394         int ret;
7395
7396         if (cnt >= sizeof(buf))
7397                 return -EINVAL;
7398
7399         if (copy_from_user(buf, ubuf, cnt))
7400                 return -EFAULT;
7401
7402         buf[cnt] = 0;
7403
7404         clockstr = strstrip(buf);
7405
7406         ret = tracing_set_clock(tr, clockstr);
7407         if (ret)
7408                 return ret;
7409
7410         *fpos += cnt;
7411
7412         return cnt;
7413 }
7414
7415 static int tracing_clock_open(struct inode *inode, struct file *file)
7416 {
7417         struct trace_array *tr = inode->i_private;
7418         int ret;
7419
7420         ret = tracing_check_open_get_tr(tr);
7421         if (ret)
7422                 return ret;
7423
7424         ret = single_open(file, tracing_clock_show, inode->i_private);
7425         if (ret < 0)
7426                 trace_array_put(tr);
7427
7428         return ret;
7429 }
7430
7431 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7432 {
7433         struct trace_array *tr = m->private;
7434
7435         mutex_lock(&trace_types_lock);
7436
7437         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7438                 seq_puts(m, "delta [absolute]\n");
7439         else
7440                 seq_puts(m, "[delta] absolute\n");
7441
7442         mutex_unlock(&trace_types_lock);
7443
7444         return 0;
7445 }
7446
7447 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7448 {
7449         struct trace_array *tr = inode->i_private;
7450         int ret;
7451
7452         ret = tracing_check_open_get_tr(tr);
7453         if (ret)
7454                 return ret;
7455
7456         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7457         if (ret < 0)
7458                 trace_array_put(tr);
7459
7460         return ret;
7461 }
7462
7463 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7464 {
7465         if (rbe == this_cpu_read(trace_buffered_event))
7466                 return ring_buffer_time_stamp(buffer);
7467
7468         return ring_buffer_event_time_stamp(buffer, rbe);
7469 }
7470
7471 /*
7472  * Set or disable using the per CPU trace_buffer_event when possible.
7473  */
7474 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7475 {
7476         int ret = 0;
7477
7478         mutex_lock(&trace_types_lock);
7479
7480         if (set && tr->no_filter_buffering_ref++)
7481                 goto out;
7482
7483         if (!set) {
7484                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7485                         ret = -EINVAL;
7486                         goto out;
7487                 }
7488
7489                 --tr->no_filter_buffering_ref;
7490         }
7491  out:
7492         mutex_unlock(&trace_types_lock);
7493
7494         return ret;
7495 }
7496
7497 struct ftrace_buffer_info {
7498         struct trace_iterator   iter;
7499         void                    *spare;
7500         unsigned int            spare_cpu;
7501         unsigned int            read;
7502 };
7503
7504 #ifdef CONFIG_TRACER_SNAPSHOT
7505 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7506 {
7507         struct trace_array *tr = inode->i_private;
7508         struct trace_iterator *iter;
7509         struct seq_file *m;
7510         int ret;
7511
7512         ret = tracing_check_open_get_tr(tr);
7513         if (ret)
7514                 return ret;
7515
7516         if (file->f_mode & FMODE_READ) {
7517                 iter = __tracing_open(inode, file, true);
7518                 if (IS_ERR(iter))
7519                         ret = PTR_ERR(iter);
7520         } else {
7521                 /* Writes still need the seq_file to hold the private data */
7522                 ret = -ENOMEM;
7523                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7524                 if (!m)
7525                         goto out;
7526                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7527                 if (!iter) {
7528                         kfree(m);
7529                         goto out;
7530                 }
7531                 ret = 0;
7532
7533                 iter->tr = tr;
7534                 iter->array_buffer = &tr->max_buffer;
7535                 iter->cpu_file = tracing_get_cpu(inode);
7536                 m->private = iter;
7537                 file->private_data = m;
7538         }
7539 out:
7540         if (ret < 0)
7541                 trace_array_put(tr);
7542
7543         return ret;
7544 }
7545
7546 static ssize_t
7547 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7548                        loff_t *ppos)
7549 {
7550         struct seq_file *m = filp->private_data;
7551         struct trace_iterator *iter = m->private;
7552         struct trace_array *tr = iter->tr;
7553         unsigned long val;
7554         int ret;
7555
7556         ret = tracing_update_buffers();
7557         if (ret < 0)
7558                 return ret;
7559
7560         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7561         if (ret)
7562                 return ret;
7563
7564         mutex_lock(&trace_types_lock);
7565
7566         if (tr->current_trace->use_max_tr) {
7567                 ret = -EBUSY;
7568                 goto out;
7569         }
7570
7571         local_irq_disable();
7572         arch_spin_lock(&tr->max_lock);
7573         if (tr->cond_snapshot)
7574                 ret = -EBUSY;
7575         arch_spin_unlock(&tr->max_lock);
7576         local_irq_enable();
7577         if (ret)
7578                 goto out;
7579
7580         switch (val) {
7581         case 0:
7582                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7583                         ret = -EINVAL;
7584                         break;
7585                 }
7586                 if (tr->allocated_snapshot)
7587                         free_snapshot(tr);
7588                 break;
7589         case 1:
7590 /* Only allow per-cpu swap if the ring buffer supports it */
7591 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7592                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7593                         ret = -EINVAL;
7594                         break;
7595                 }
7596 #endif
7597                 if (tr->allocated_snapshot)
7598                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7599                                         &tr->array_buffer, iter->cpu_file);
7600                 else
7601                         ret = tracing_alloc_snapshot_instance(tr);
7602                 if (ret < 0)
7603                         break;
7604                 local_irq_disable();
7605                 /* Now, we're going to swap */
7606                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7607                         update_max_tr(tr, current, smp_processor_id(), NULL);
7608                 else
7609                         update_max_tr_single(tr, current, iter->cpu_file);
7610                 local_irq_enable();
7611                 break;
7612         default:
7613                 if (tr->allocated_snapshot) {
7614                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7615                                 tracing_reset_online_cpus(&tr->max_buffer);
7616                         else
7617                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7618                 }
7619                 break;
7620         }
7621
7622         if (ret >= 0) {
7623                 *ppos += cnt;
7624                 ret = cnt;
7625         }
7626 out:
7627         mutex_unlock(&trace_types_lock);
7628         return ret;
7629 }
7630
7631 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7632 {
7633         struct seq_file *m = file->private_data;
7634         int ret;
7635
7636         ret = tracing_release(inode, file);
7637
7638         if (file->f_mode & FMODE_READ)
7639                 return ret;
7640
7641         /* If write only, the seq_file is just a stub */
7642         if (m)
7643                 kfree(m->private);
7644         kfree(m);
7645
7646         return 0;
7647 }
7648
7649 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7650 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7651                                     size_t count, loff_t *ppos);
7652 static int tracing_buffers_release(struct inode *inode, struct file *file);
7653 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7654                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7655
7656 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7657 {
7658         struct ftrace_buffer_info *info;
7659         int ret;
7660
7661         /* The following checks for tracefs lockdown */
7662         ret = tracing_buffers_open(inode, filp);
7663         if (ret < 0)
7664                 return ret;
7665
7666         info = filp->private_data;
7667
7668         if (info->iter.trace->use_max_tr) {
7669                 tracing_buffers_release(inode, filp);
7670                 return -EBUSY;
7671         }
7672
7673         info->iter.snapshot = true;
7674         info->iter.array_buffer = &info->iter.tr->max_buffer;
7675
7676         return ret;
7677 }
7678
7679 #endif /* CONFIG_TRACER_SNAPSHOT */
7680
7681
7682 static const struct file_operations tracing_thresh_fops = {
7683         .open           = tracing_open_generic,
7684         .read           = tracing_thresh_read,
7685         .write          = tracing_thresh_write,
7686         .llseek         = generic_file_llseek,
7687 };
7688
7689 #ifdef CONFIG_TRACER_MAX_TRACE
7690 static const struct file_operations tracing_max_lat_fops = {
7691         .open           = tracing_open_generic,
7692         .read           = tracing_max_lat_read,
7693         .write          = tracing_max_lat_write,
7694         .llseek         = generic_file_llseek,
7695 };
7696 #endif
7697
7698 static const struct file_operations set_tracer_fops = {
7699         .open           = tracing_open_generic,
7700         .read           = tracing_set_trace_read,
7701         .write          = tracing_set_trace_write,
7702         .llseek         = generic_file_llseek,
7703 };
7704
7705 static const struct file_operations tracing_pipe_fops = {
7706         .open           = tracing_open_pipe,
7707         .poll           = tracing_poll_pipe,
7708         .read           = tracing_read_pipe,
7709         .splice_read    = tracing_splice_read_pipe,
7710         .release        = tracing_release_pipe,
7711         .llseek         = no_llseek,
7712 };
7713
7714 static const struct file_operations tracing_entries_fops = {
7715         .open           = tracing_open_generic_tr,
7716         .read           = tracing_entries_read,
7717         .write          = tracing_entries_write,
7718         .llseek         = generic_file_llseek,
7719         .release        = tracing_release_generic_tr,
7720 };
7721
7722 static const struct file_operations tracing_total_entries_fops = {
7723         .open           = tracing_open_generic_tr,
7724         .read           = tracing_total_entries_read,
7725         .llseek         = generic_file_llseek,
7726         .release        = tracing_release_generic_tr,
7727 };
7728
7729 static const struct file_operations tracing_free_buffer_fops = {
7730         .open           = tracing_open_generic_tr,
7731         .write          = tracing_free_buffer_write,
7732         .release        = tracing_free_buffer_release,
7733 };
7734
7735 static const struct file_operations tracing_mark_fops = {
7736         .open           = tracing_mark_open,
7737         .write          = tracing_mark_write,
7738         .release        = tracing_release_generic_tr,
7739 };
7740
7741 static const struct file_operations tracing_mark_raw_fops = {
7742         .open           = tracing_mark_open,
7743         .write          = tracing_mark_raw_write,
7744         .release        = tracing_release_generic_tr,
7745 };
7746
7747 static const struct file_operations trace_clock_fops = {
7748         .open           = tracing_clock_open,
7749         .read           = seq_read,
7750         .llseek         = seq_lseek,
7751         .release        = tracing_single_release_tr,
7752         .write          = tracing_clock_write,
7753 };
7754
7755 static const struct file_operations trace_time_stamp_mode_fops = {
7756         .open           = tracing_time_stamp_mode_open,
7757         .read           = seq_read,
7758         .llseek         = seq_lseek,
7759         .release        = tracing_single_release_tr,
7760 };
7761
7762 #ifdef CONFIG_TRACER_SNAPSHOT
7763 static const struct file_operations snapshot_fops = {
7764         .open           = tracing_snapshot_open,
7765         .read           = seq_read,
7766         .write          = tracing_snapshot_write,
7767         .llseek         = tracing_lseek,
7768         .release        = tracing_snapshot_release,
7769 };
7770
7771 static const struct file_operations snapshot_raw_fops = {
7772         .open           = snapshot_raw_open,
7773         .read           = tracing_buffers_read,
7774         .release        = tracing_buffers_release,
7775         .splice_read    = tracing_buffers_splice_read,
7776         .llseek         = no_llseek,
7777 };
7778
7779 #endif /* CONFIG_TRACER_SNAPSHOT */
7780
7781 /*
7782  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7783  * @filp: The active open file structure
7784  * @ubuf: The userspace provided buffer to read value into
7785  * @cnt: The maximum number of bytes to read
7786  * @ppos: The current "file" position
7787  *
7788  * This function implements the write interface for a struct trace_min_max_param.
7789  * The filp->private_data must point to a trace_min_max_param structure that
7790  * defines where to write the value, the min and the max acceptable values,
7791  * and a lock to protect the write.
7792  */
7793 static ssize_t
7794 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7795 {
7796         struct trace_min_max_param *param = filp->private_data;
7797         u64 val;
7798         int err;
7799
7800         if (!param)
7801                 return -EFAULT;
7802
7803         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7804         if (err)
7805                 return err;
7806
7807         if (param->lock)
7808                 mutex_lock(param->lock);
7809
7810         if (param->min && val < *param->min)
7811                 err = -EINVAL;
7812
7813         if (param->max && val > *param->max)
7814                 err = -EINVAL;
7815
7816         if (!err)
7817                 *param->val = val;
7818
7819         if (param->lock)
7820                 mutex_unlock(param->lock);
7821
7822         if (err)
7823                 return err;
7824
7825         return cnt;
7826 }
7827
7828 /*
7829  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7830  * @filp: The active open file structure
7831  * @ubuf: The userspace provided buffer to read value into
7832  * @cnt: The maximum number of bytes to read
7833  * @ppos: The current "file" position
7834  *
7835  * This function implements the read interface for a struct trace_min_max_param.
7836  * The filp->private_data must point to a trace_min_max_param struct with valid
7837  * data.
7838  */
7839 static ssize_t
7840 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7841 {
7842         struct trace_min_max_param *param = filp->private_data;
7843         char buf[U64_STR_SIZE];
7844         int len;
7845         u64 val;
7846
7847         if (!param)
7848                 return -EFAULT;
7849
7850         val = *param->val;
7851
7852         if (cnt > sizeof(buf))
7853                 cnt = sizeof(buf);
7854
7855         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7856
7857         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7858 }
7859
7860 const struct file_operations trace_min_max_fops = {
7861         .open           = tracing_open_generic,
7862         .read           = trace_min_max_read,
7863         .write          = trace_min_max_write,
7864 };
7865
7866 #define TRACING_LOG_ERRS_MAX    8
7867 #define TRACING_LOG_LOC_MAX     128
7868
7869 #define CMD_PREFIX "  Command: "
7870
7871 struct err_info {
7872         const char      **errs; /* ptr to loc-specific array of err strings */
7873         u8              type;   /* index into errs -> specific err string */
7874         u16             pos;    /* caret position */
7875         u64             ts;
7876 };
7877
7878 struct tracing_log_err {
7879         struct list_head        list;
7880         struct err_info         info;
7881         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7882         char                    *cmd;                     /* what caused err */
7883 };
7884
7885 static DEFINE_MUTEX(tracing_err_log_lock);
7886
7887 static struct tracing_log_err *alloc_tracing_log_err(int len)
7888 {
7889         struct tracing_log_err *err;
7890
7891         err = kzalloc(sizeof(*err), GFP_KERNEL);
7892         if (!err)
7893                 return ERR_PTR(-ENOMEM);
7894
7895         err->cmd = kzalloc(len, GFP_KERNEL);
7896         if (!err->cmd) {
7897                 kfree(err);
7898                 return ERR_PTR(-ENOMEM);
7899         }
7900
7901         return err;
7902 }
7903
7904 static void free_tracing_log_err(struct tracing_log_err *err)
7905 {
7906         kfree(err->cmd);
7907         kfree(err);
7908 }
7909
7910 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7911                                                    int len)
7912 {
7913         struct tracing_log_err *err;
7914         char *cmd;
7915
7916         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7917                 err = alloc_tracing_log_err(len);
7918                 if (PTR_ERR(err) != -ENOMEM)
7919                         tr->n_err_log_entries++;
7920
7921                 return err;
7922         }
7923         cmd = kzalloc(len, GFP_KERNEL);
7924         if (!cmd)
7925                 return ERR_PTR(-ENOMEM);
7926         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7927         kfree(err->cmd);
7928         err->cmd = cmd;
7929         list_del(&err->list);
7930
7931         return err;
7932 }
7933
7934 /**
7935  * err_pos - find the position of a string within a command for error careting
7936  * @cmd: The tracing command that caused the error
7937  * @str: The string to position the caret at within @cmd
7938  *
7939  * Finds the position of the first occurrence of @str within @cmd.  The
7940  * return value can be passed to tracing_log_err() for caret placement
7941  * within @cmd.
7942  *
7943  * Returns the index within @cmd of the first occurrence of @str or 0
7944  * if @str was not found.
7945  */
7946 unsigned int err_pos(char *cmd, const char *str)
7947 {
7948         char *found;
7949
7950         if (WARN_ON(!strlen(cmd)))
7951                 return 0;
7952
7953         found = strstr(cmd, str);
7954         if (found)
7955                 return found - cmd;
7956
7957         return 0;
7958 }
7959
7960 /**
7961  * tracing_log_err - write an error to the tracing error log
7962  * @tr: The associated trace array for the error (NULL for top level array)
7963  * @loc: A string describing where the error occurred
7964  * @cmd: The tracing command that caused the error
7965  * @errs: The array of loc-specific static error strings
7966  * @type: The index into errs[], which produces the specific static err string
7967  * @pos: The position the caret should be placed in the cmd
7968  *
7969  * Writes an error into tracing/error_log of the form:
7970  *
7971  * <loc>: error: <text>
7972  *   Command: <cmd>
7973  *              ^
7974  *
7975  * tracing/error_log is a small log file containing the last
7976  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7977  * unless there has been a tracing error, and the error log can be
7978  * cleared and have its memory freed by writing the empty string in
7979  * truncation mode to it i.e. echo > tracing/error_log.
7980  *
7981  * NOTE: the @errs array along with the @type param are used to
7982  * produce a static error string - this string is not copied and saved
7983  * when the error is logged - only a pointer to it is saved.  See
7984  * existing callers for examples of how static strings are typically
7985  * defined for use with tracing_log_err().
7986  */
7987 void tracing_log_err(struct trace_array *tr,
7988                      const char *loc, const char *cmd,
7989                      const char **errs, u8 type, u16 pos)
7990 {
7991         struct tracing_log_err *err;
7992         int len = 0;
7993
7994         if (!tr)
7995                 tr = &global_trace;
7996
7997         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7998
7999         mutex_lock(&tracing_err_log_lock);
8000         err = get_tracing_log_err(tr, len);
8001         if (PTR_ERR(err) == -ENOMEM) {
8002                 mutex_unlock(&tracing_err_log_lock);
8003                 return;
8004         }
8005
8006         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8007         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8008
8009         err->info.errs = errs;
8010         err->info.type = type;
8011         err->info.pos = pos;
8012         err->info.ts = local_clock();
8013
8014         list_add_tail(&err->list, &tr->err_log);
8015         mutex_unlock(&tracing_err_log_lock);
8016 }
8017
8018 static void clear_tracing_err_log(struct trace_array *tr)
8019 {
8020         struct tracing_log_err *err, *next;
8021
8022         mutex_lock(&tracing_err_log_lock);
8023         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8024                 list_del(&err->list);
8025                 free_tracing_log_err(err);
8026         }
8027
8028         tr->n_err_log_entries = 0;
8029         mutex_unlock(&tracing_err_log_lock);
8030 }
8031
8032 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8033 {
8034         struct trace_array *tr = m->private;
8035
8036         mutex_lock(&tracing_err_log_lock);
8037
8038         return seq_list_start(&tr->err_log, *pos);
8039 }
8040
8041 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8042 {
8043         struct trace_array *tr = m->private;
8044
8045         return seq_list_next(v, &tr->err_log, pos);
8046 }
8047
8048 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8049 {
8050         mutex_unlock(&tracing_err_log_lock);
8051 }
8052
8053 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8054 {
8055         u16 i;
8056
8057         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8058                 seq_putc(m, ' ');
8059         for (i = 0; i < pos; i++)
8060                 seq_putc(m, ' ');
8061         seq_puts(m, "^\n");
8062 }
8063
8064 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8065 {
8066         struct tracing_log_err *err = v;
8067
8068         if (err) {
8069                 const char *err_text = err->info.errs[err->info.type];
8070                 u64 sec = err->info.ts;
8071                 u32 nsec;
8072
8073                 nsec = do_div(sec, NSEC_PER_SEC);
8074                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8075                            err->loc, err_text);
8076                 seq_printf(m, "%s", err->cmd);
8077                 tracing_err_log_show_pos(m, err->info.pos);
8078         }
8079
8080         return 0;
8081 }
8082
8083 static const struct seq_operations tracing_err_log_seq_ops = {
8084         .start  = tracing_err_log_seq_start,
8085         .next   = tracing_err_log_seq_next,
8086         .stop   = tracing_err_log_seq_stop,
8087         .show   = tracing_err_log_seq_show
8088 };
8089
8090 static int tracing_err_log_open(struct inode *inode, struct file *file)
8091 {
8092         struct trace_array *tr = inode->i_private;
8093         int ret = 0;
8094
8095         ret = tracing_check_open_get_tr(tr);
8096         if (ret)
8097                 return ret;
8098
8099         /* If this file was opened for write, then erase contents */
8100         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8101                 clear_tracing_err_log(tr);
8102
8103         if (file->f_mode & FMODE_READ) {
8104                 ret = seq_open(file, &tracing_err_log_seq_ops);
8105                 if (!ret) {
8106                         struct seq_file *m = file->private_data;
8107                         m->private = tr;
8108                 } else {
8109                         trace_array_put(tr);
8110                 }
8111         }
8112         return ret;
8113 }
8114
8115 static ssize_t tracing_err_log_write(struct file *file,
8116                                      const char __user *buffer,
8117                                      size_t count, loff_t *ppos)
8118 {
8119         return count;
8120 }
8121
8122 static int tracing_err_log_release(struct inode *inode, struct file *file)
8123 {
8124         struct trace_array *tr = inode->i_private;
8125
8126         trace_array_put(tr);
8127
8128         if (file->f_mode & FMODE_READ)
8129                 seq_release(inode, file);
8130
8131         return 0;
8132 }
8133
8134 static const struct file_operations tracing_err_log_fops = {
8135         .open           = tracing_err_log_open,
8136         .write          = tracing_err_log_write,
8137         .read           = seq_read,
8138         .llseek         = seq_lseek,
8139         .release        = tracing_err_log_release,
8140 };
8141
8142 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8143 {
8144         struct trace_array *tr = inode->i_private;
8145         struct ftrace_buffer_info *info;
8146         int ret;
8147
8148         ret = tracing_check_open_get_tr(tr);
8149         if (ret)
8150                 return ret;
8151
8152         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8153         if (!info) {
8154                 trace_array_put(tr);
8155                 return -ENOMEM;
8156         }
8157
8158         mutex_lock(&trace_types_lock);
8159
8160         info->iter.tr           = tr;
8161         info->iter.cpu_file     = tracing_get_cpu(inode);
8162         info->iter.trace        = tr->current_trace;
8163         info->iter.array_buffer = &tr->array_buffer;
8164         info->spare             = NULL;
8165         /* Force reading ring buffer for first read */
8166         info->read              = (unsigned int)-1;
8167
8168         filp->private_data = info;
8169
8170         tr->trace_ref++;
8171
8172         mutex_unlock(&trace_types_lock);
8173
8174         ret = nonseekable_open(inode, filp);
8175         if (ret < 0)
8176                 trace_array_put(tr);
8177
8178         return ret;
8179 }
8180
8181 static __poll_t
8182 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8183 {
8184         struct ftrace_buffer_info *info = filp->private_data;
8185         struct trace_iterator *iter = &info->iter;
8186
8187         return trace_poll(iter, filp, poll_table);
8188 }
8189
8190 static ssize_t
8191 tracing_buffers_read(struct file *filp, char __user *ubuf,
8192                      size_t count, loff_t *ppos)
8193 {
8194         struct ftrace_buffer_info *info = filp->private_data;
8195         struct trace_iterator *iter = &info->iter;
8196         ssize_t ret = 0;
8197         ssize_t size;
8198
8199         if (!count)
8200                 return 0;
8201
8202 #ifdef CONFIG_TRACER_MAX_TRACE
8203         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8204                 return -EBUSY;
8205 #endif
8206
8207         if (!info->spare) {
8208                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8209                                                           iter->cpu_file);
8210                 if (IS_ERR(info->spare)) {
8211                         ret = PTR_ERR(info->spare);
8212                         info->spare = NULL;
8213                 } else {
8214                         info->spare_cpu = iter->cpu_file;
8215                 }
8216         }
8217         if (!info->spare)
8218                 return ret;
8219
8220         /* Do we have previous read data to read? */
8221         if (info->read < PAGE_SIZE)
8222                 goto read;
8223
8224  again:
8225         trace_access_lock(iter->cpu_file);
8226         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8227                                     &info->spare,
8228                                     count,
8229                                     iter->cpu_file, 0);
8230         trace_access_unlock(iter->cpu_file);
8231
8232         if (ret < 0) {
8233                 if (trace_empty(iter)) {
8234                         if ((filp->f_flags & O_NONBLOCK))
8235                                 return -EAGAIN;
8236
8237                         ret = wait_on_pipe(iter, 0);
8238                         if (ret)
8239                                 return ret;
8240
8241                         goto again;
8242                 }
8243                 return 0;
8244         }
8245
8246         info->read = 0;
8247  read:
8248         size = PAGE_SIZE - info->read;
8249         if (size > count)
8250                 size = count;
8251
8252         ret = copy_to_user(ubuf, info->spare + info->read, size);
8253         if (ret == size)
8254                 return -EFAULT;
8255
8256         size -= ret;
8257
8258         *ppos += size;
8259         info->read += size;
8260
8261         return size;
8262 }
8263
8264 static int tracing_buffers_release(struct inode *inode, struct file *file)
8265 {
8266         struct ftrace_buffer_info *info = file->private_data;
8267         struct trace_iterator *iter = &info->iter;
8268
8269         mutex_lock(&trace_types_lock);
8270
8271         iter->tr->trace_ref--;
8272
8273         __trace_array_put(iter->tr);
8274
8275         iter->wait_index++;
8276         /* Make sure the waiters see the new wait_index */
8277         smp_wmb();
8278
8279         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8280
8281         if (info->spare)
8282                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8283                                            info->spare_cpu, info->spare);
8284         kvfree(info);
8285
8286         mutex_unlock(&trace_types_lock);
8287
8288         return 0;
8289 }
8290
8291 struct buffer_ref {
8292         struct trace_buffer     *buffer;
8293         void                    *page;
8294         int                     cpu;
8295         refcount_t              refcount;
8296 };
8297
8298 static void buffer_ref_release(struct buffer_ref *ref)
8299 {
8300         if (!refcount_dec_and_test(&ref->refcount))
8301                 return;
8302         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8303         kfree(ref);
8304 }
8305
8306 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8307                                     struct pipe_buffer *buf)
8308 {
8309         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8310
8311         buffer_ref_release(ref);
8312         buf->private = 0;
8313 }
8314
8315 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8316                                 struct pipe_buffer *buf)
8317 {
8318         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8319
8320         if (refcount_read(&ref->refcount) > INT_MAX/2)
8321                 return false;
8322
8323         refcount_inc(&ref->refcount);
8324         return true;
8325 }
8326
8327 /* Pipe buffer operations for a buffer. */
8328 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8329         .release                = buffer_pipe_buf_release,
8330         .get                    = buffer_pipe_buf_get,
8331 };
8332
8333 /*
8334  * Callback from splice_to_pipe(), if we need to release some pages
8335  * at the end of the spd in case we error'ed out in filling the pipe.
8336  */
8337 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8338 {
8339         struct buffer_ref *ref =
8340                 (struct buffer_ref *)spd->partial[i].private;
8341
8342         buffer_ref_release(ref);
8343         spd->partial[i].private = 0;
8344 }
8345
8346 static ssize_t
8347 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8348                             struct pipe_inode_info *pipe, size_t len,
8349                             unsigned int flags)
8350 {
8351         struct ftrace_buffer_info *info = file->private_data;
8352         struct trace_iterator *iter = &info->iter;
8353         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8354         struct page *pages_def[PIPE_DEF_BUFFERS];
8355         struct splice_pipe_desc spd = {
8356                 .pages          = pages_def,
8357                 .partial        = partial_def,
8358                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8359                 .ops            = &buffer_pipe_buf_ops,
8360                 .spd_release    = buffer_spd_release,
8361         };
8362         struct buffer_ref *ref;
8363         int entries, i;
8364         ssize_t ret = 0;
8365
8366 #ifdef CONFIG_TRACER_MAX_TRACE
8367         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8368                 return -EBUSY;
8369 #endif
8370
8371         if (*ppos & (PAGE_SIZE - 1))
8372                 return -EINVAL;
8373
8374         if (len & (PAGE_SIZE - 1)) {
8375                 if (len < PAGE_SIZE)
8376                         return -EINVAL;
8377                 len &= PAGE_MASK;
8378         }
8379
8380         if (splice_grow_spd(pipe, &spd))
8381                 return -ENOMEM;
8382
8383  again:
8384         trace_access_lock(iter->cpu_file);
8385         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8386
8387         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8388                 struct page *page;
8389                 int r;
8390
8391                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8392                 if (!ref) {
8393                         ret = -ENOMEM;
8394                         break;
8395                 }
8396
8397                 refcount_set(&ref->refcount, 1);
8398                 ref->buffer = iter->array_buffer->buffer;
8399                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8400                 if (IS_ERR(ref->page)) {
8401                         ret = PTR_ERR(ref->page);
8402                         ref->page = NULL;
8403                         kfree(ref);
8404                         break;
8405                 }
8406                 ref->cpu = iter->cpu_file;
8407
8408                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8409                                           len, iter->cpu_file, 1);
8410                 if (r < 0) {
8411                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8412                                                    ref->page);
8413                         kfree(ref);
8414                         break;
8415                 }
8416
8417                 page = virt_to_page(ref->page);
8418
8419                 spd.pages[i] = page;
8420                 spd.partial[i].len = PAGE_SIZE;
8421                 spd.partial[i].offset = 0;
8422                 spd.partial[i].private = (unsigned long)ref;
8423                 spd.nr_pages++;
8424                 *ppos += PAGE_SIZE;
8425
8426                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8427         }
8428
8429         trace_access_unlock(iter->cpu_file);
8430         spd.nr_pages = i;
8431
8432         /* did we read anything? */
8433         if (!spd.nr_pages) {
8434                 long wait_index;
8435
8436                 if (ret)
8437                         goto out;
8438
8439                 ret = -EAGAIN;
8440                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8441                         goto out;
8442
8443                 wait_index = READ_ONCE(iter->wait_index);
8444
8445                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8446                 if (ret)
8447                         goto out;
8448
8449                 /* No need to wait after waking up when tracing is off */
8450                 if (!tracer_tracing_is_on(iter->tr))
8451                         goto out;
8452
8453                 /* Make sure we see the new wait_index */
8454                 smp_rmb();
8455                 if (wait_index != iter->wait_index)
8456                         goto out;
8457
8458                 goto again;
8459         }
8460
8461         ret = splice_to_pipe(pipe, &spd);
8462 out:
8463         splice_shrink_spd(&spd);
8464
8465         return ret;
8466 }
8467
8468 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8469 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8470 {
8471         struct ftrace_buffer_info *info = file->private_data;
8472         struct trace_iterator *iter = &info->iter;
8473
8474         if (cmd)
8475                 return -ENOIOCTLCMD;
8476
8477         mutex_lock(&trace_types_lock);
8478
8479         iter->wait_index++;
8480         /* Make sure the waiters see the new wait_index */
8481         smp_wmb();
8482
8483         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8484
8485         mutex_unlock(&trace_types_lock);
8486         return 0;
8487 }
8488
8489 static const struct file_operations tracing_buffers_fops = {
8490         .open           = tracing_buffers_open,
8491         .read           = tracing_buffers_read,
8492         .poll           = tracing_buffers_poll,
8493         .release        = tracing_buffers_release,
8494         .splice_read    = tracing_buffers_splice_read,
8495         .unlocked_ioctl = tracing_buffers_ioctl,
8496         .llseek         = no_llseek,
8497 };
8498
8499 static ssize_t
8500 tracing_stats_read(struct file *filp, char __user *ubuf,
8501                    size_t count, loff_t *ppos)
8502 {
8503         struct inode *inode = file_inode(filp);
8504         struct trace_array *tr = inode->i_private;
8505         struct array_buffer *trace_buf = &tr->array_buffer;
8506         int cpu = tracing_get_cpu(inode);
8507         struct trace_seq *s;
8508         unsigned long cnt;
8509         unsigned long long t;
8510         unsigned long usec_rem;
8511
8512         s = kmalloc(sizeof(*s), GFP_KERNEL);
8513         if (!s)
8514                 return -ENOMEM;
8515
8516         trace_seq_init(s);
8517
8518         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8519         trace_seq_printf(s, "entries: %ld\n", cnt);
8520
8521         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8522         trace_seq_printf(s, "overrun: %ld\n", cnt);
8523
8524         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8525         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8526
8527         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8528         trace_seq_printf(s, "bytes: %ld\n", cnt);
8529
8530         if (trace_clocks[tr->clock_id].in_ns) {
8531                 /* local or global for trace_clock */
8532                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8533                 usec_rem = do_div(t, USEC_PER_SEC);
8534                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8535                                                                 t, usec_rem);
8536
8537                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8538                 usec_rem = do_div(t, USEC_PER_SEC);
8539                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8540         } else {
8541                 /* counter or tsc mode for trace_clock */
8542                 trace_seq_printf(s, "oldest event ts: %llu\n",
8543                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8544
8545                 trace_seq_printf(s, "now ts: %llu\n",
8546                                 ring_buffer_time_stamp(trace_buf->buffer));
8547         }
8548
8549         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8550         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8551
8552         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8553         trace_seq_printf(s, "read events: %ld\n", cnt);
8554
8555         count = simple_read_from_buffer(ubuf, count, ppos,
8556                                         s->buffer, trace_seq_used(s));
8557
8558         kfree(s);
8559
8560         return count;
8561 }
8562
8563 static const struct file_operations tracing_stats_fops = {
8564         .open           = tracing_open_generic_tr,
8565         .read           = tracing_stats_read,
8566         .llseek         = generic_file_llseek,
8567         .release        = tracing_release_generic_tr,
8568 };
8569
8570 #ifdef CONFIG_DYNAMIC_FTRACE
8571
8572 static ssize_t
8573 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8574                   size_t cnt, loff_t *ppos)
8575 {
8576         ssize_t ret;
8577         char *buf;
8578         int r;
8579
8580         /* 256 should be plenty to hold the amount needed */
8581         buf = kmalloc(256, GFP_KERNEL);
8582         if (!buf)
8583                 return -ENOMEM;
8584
8585         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8586                       ftrace_update_tot_cnt,
8587                       ftrace_number_of_pages,
8588                       ftrace_number_of_groups);
8589
8590         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8591         kfree(buf);
8592         return ret;
8593 }
8594
8595 static const struct file_operations tracing_dyn_info_fops = {
8596         .open           = tracing_open_generic,
8597         .read           = tracing_read_dyn_info,
8598         .llseek         = generic_file_llseek,
8599 };
8600 #endif /* CONFIG_DYNAMIC_FTRACE */
8601
8602 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8603 static void
8604 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8605                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8606                 void *data)
8607 {
8608         tracing_snapshot_instance(tr);
8609 }
8610
8611 static void
8612 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8613                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8614                       void *data)
8615 {
8616         struct ftrace_func_mapper *mapper = data;
8617         long *count = NULL;
8618
8619         if (mapper)
8620                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8621
8622         if (count) {
8623
8624                 if (*count <= 0)
8625                         return;
8626
8627                 (*count)--;
8628         }
8629
8630         tracing_snapshot_instance(tr);
8631 }
8632
8633 static int
8634 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8635                       struct ftrace_probe_ops *ops, void *data)
8636 {
8637         struct ftrace_func_mapper *mapper = data;
8638         long *count = NULL;
8639
8640         seq_printf(m, "%ps:", (void *)ip);
8641
8642         seq_puts(m, "snapshot");
8643
8644         if (mapper)
8645                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8646
8647         if (count)
8648                 seq_printf(m, ":count=%ld\n", *count);
8649         else
8650                 seq_puts(m, ":unlimited\n");
8651
8652         return 0;
8653 }
8654
8655 static int
8656 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8657                      unsigned long ip, void *init_data, void **data)
8658 {
8659         struct ftrace_func_mapper *mapper = *data;
8660
8661         if (!mapper) {
8662                 mapper = allocate_ftrace_func_mapper();
8663                 if (!mapper)
8664                         return -ENOMEM;
8665                 *data = mapper;
8666         }
8667
8668         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8669 }
8670
8671 static void
8672 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8673                      unsigned long ip, void *data)
8674 {
8675         struct ftrace_func_mapper *mapper = data;
8676
8677         if (!ip) {
8678                 if (!mapper)
8679                         return;
8680                 free_ftrace_func_mapper(mapper, NULL);
8681                 return;
8682         }
8683
8684         ftrace_func_mapper_remove_ip(mapper, ip);
8685 }
8686
8687 static struct ftrace_probe_ops snapshot_probe_ops = {
8688         .func                   = ftrace_snapshot,
8689         .print                  = ftrace_snapshot_print,
8690 };
8691
8692 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8693         .func                   = ftrace_count_snapshot,
8694         .print                  = ftrace_snapshot_print,
8695         .init                   = ftrace_snapshot_init,
8696         .free                   = ftrace_snapshot_free,
8697 };
8698
8699 static int
8700 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8701                                char *glob, char *cmd, char *param, int enable)
8702 {
8703         struct ftrace_probe_ops *ops;
8704         void *count = (void *)-1;
8705         char *number;
8706         int ret;
8707
8708         if (!tr)
8709                 return -ENODEV;
8710
8711         /* hash funcs only work with set_ftrace_filter */
8712         if (!enable)
8713                 return -EINVAL;
8714
8715         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8716
8717         if (glob[0] == '!')
8718                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8719
8720         if (!param)
8721                 goto out_reg;
8722
8723         number = strsep(&param, ":");
8724
8725         if (!strlen(number))
8726                 goto out_reg;
8727
8728         /*
8729          * We use the callback data field (which is a pointer)
8730          * as our counter.
8731          */
8732         ret = kstrtoul(number, 0, (unsigned long *)&count);
8733         if (ret)
8734                 return ret;
8735
8736  out_reg:
8737         ret = tracing_alloc_snapshot_instance(tr);
8738         if (ret < 0)
8739                 goto out;
8740
8741         ret = register_ftrace_function_probe(glob, tr, ops, count);
8742
8743  out:
8744         return ret < 0 ? ret : 0;
8745 }
8746
8747 static struct ftrace_func_command ftrace_snapshot_cmd = {
8748         .name                   = "snapshot",
8749         .func                   = ftrace_trace_snapshot_callback,
8750 };
8751
8752 static __init int register_snapshot_cmd(void)
8753 {
8754         return register_ftrace_command(&ftrace_snapshot_cmd);
8755 }
8756 #else
8757 static inline __init int register_snapshot_cmd(void) { return 0; }
8758 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8759
8760 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8761 {
8762         if (WARN_ON(!tr->dir))
8763                 return ERR_PTR(-ENODEV);
8764
8765         /* Top directory uses NULL as the parent */
8766         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8767                 return NULL;
8768
8769         /* All sub buffers have a descriptor */
8770         return tr->dir;
8771 }
8772
8773 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8774 {
8775         struct dentry *d_tracer;
8776
8777         if (tr->percpu_dir)
8778                 return tr->percpu_dir;
8779
8780         d_tracer = tracing_get_dentry(tr);
8781         if (IS_ERR(d_tracer))
8782                 return NULL;
8783
8784         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8785
8786         MEM_FAIL(!tr->percpu_dir,
8787                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8788
8789         return tr->percpu_dir;
8790 }
8791
8792 static struct dentry *
8793 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8794                       void *data, long cpu, const struct file_operations *fops)
8795 {
8796         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8797
8798         if (ret) /* See tracing_get_cpu() */
8799                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8800         return ret;
8801 }
8802
8803 static void
8804 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8805 {
8806         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8807         struct dentry *d_cpu;
8808         char cpu_dir[30]; /* 30 characters should be more than enough */
8809
8810         if (!d_percpu)
8811                 return;
8812
8813         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8814         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8815         if (!d_cpu) {
8816                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8817                 return;
8818         }
8819
8820         /* per cpu trace_pipe */
8821         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8822                                 tr, cpu, &tracing_pipe_fops);
8823
8824         /* per cpu trace */
8825         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8826                                 tr, cpu, &tracing_fops);
8827
8828         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8829                                 tr, cpu, &tracing_buffers_fops);
8830
8831         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8832                                 tr, cpu, &tracing_stats_fops);
8833
8834         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8835                                 tr, cpu, &tracing_entries_fops);
8836
8837 #ifdef CONFIG_TRACER_SNAPSHOT
8838         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8839                                 tr, cpu, &snapshot_fops);
8840
8841         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8842                                 tr, cpu, &snapshot_raw_fops);
8843 #endif
8844 }
8845
8846 #ifdef CONFIG_FTRACE_SELFTEST
8847 /* Let selftest have access to static functions in this file */
8848 #include "trace_selftest.c"
8849 #endif
8850
8851 static ssize_t
8852 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8853                         loff_t *ppos)
8854 {
8855         struct trace_option_dentry *topt = filp->private_data;
8856         char *buf;
8857
8858         if (topt->flags->val & topt->opt->bit)
8859                 buf = "1\n";
8860         else
8861                 buf = "0\n";
8862
8863         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8864 }
8865
8866 static ssize_t
8867 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8868                          loff_t *ppos)
8869 {
8870         struct trace_option_dentry *topt = filp->private_data;
8871         unsigned long val;
8872         int ret;
8873
8874         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8875         if (ret)
8876                 return ret;
8877
8878         if (val != 0 && val != 1)
8879                 return -EINVAL;
8880
8881         if (!!(topt->flags->val & topt->opt->bit) != val) {
8882                 mutex_lock(&trace_types_lock);
8883                 ret = __set_tracer_option(topt->tr, topt->flags,
8884                                           topt->opt, !val);
8885                 mutex_unlock(&trace_types_lock);
8886                 if (ret)
8887                         return ret;
8888         }
8889
8890         *ppos += cnt;
8891
8892         return cnt;
8893 }
8894
8895
8896 static const struct file_operations trace_options_fops = {
8897         .open = tracing_open_generic,
8898         .read = trace_options_read,
8899         .write = trace_options_write,
8900         .llseek = generic_file_llseek,
8901 };
8902
8903 /*
8904  * In order to pass in both the trace_array descriptor as well as the index
8905  * to the flag that the trace option file represents, the trace_array
8906  * has a character array of trace_flags_index[], which holds the index
8907  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8908  * The address of this character array is passed to the flag option file
8909  * read/write callbacks.
8910  *
8911  * In order to extract both the index and the trace_array descriptor,
8912  * get_tr_index() uses the following algorithm.
8913  *
8914  *   idx = *ptr;
8915  *
8916  * As the pointer itself contains the address of the index (remember
8917  * index[1] == 1).
8918  *
8919  * Then to get the trace_array descriptor, by subtracting that index
8920  * from the ptr, we get to the start of the index itself.
8921  *
8922  *   ptr - idx == &index[0]
8923  *
8924  * Then a simple container_of() from that pointer gets us to the
8925  * trace_array descriptor.
8926  */
8927 static void get_tr_index(void *data, struct trace_array **ptr,
8928                          unsigned int *pindex)
8929 {
8930         *pindex = *(unsigned char *)data;
8931
8932         *ptr = container_of(data - *pindex, struct trace_array,
8933                             trace_flags_index);
8934 }
8935
8936 static ssize_t
8937 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8938                         loff_t *ppos)
8939 {
8940         void *tr_index = filp->private_data;
8941         struct trace_array *tr;
8942         unsigned int index;
8943         char *buf;
8944
8945         get_tr_index(tr_index, &tr, &index);
8946
8947         if (tr->trace_flags & (1 << index))
8948                 buf = "1\n";
8949         else
8950                 buf = "0\n";
8951
8952         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8953 }
8954
8955 static ssize_t
8956 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8957                          loff_t *ppos)
8958 {
8959         void *tr_index = filp->private_data;
8960         struct trace_array *tr;
8961         unsigned int index;
8962         unsigned long val;
8963         int ret;
8964
8965         get_tr_index(tr_index, &tr, &index);
8966
8967         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8968         if (ret)
8969                 return ret;
8970
8971         if (val != 0 && val != 1)
8972                 return -EINVAL;
8973
8974         mutex_lock(&event_mutex);
8975         mutex_lock(&trace_types_lock);
8976         ret = set_tracer_flag(tr, 1 << index, val);
8977         mutex_unlock(&trace_types_lock);
8978         mutex_unlock(&event_mutex);
8979
8980         if (ret < 0)
8981                 return ret;
8982
8983         *ppos += cnt;
8984
8985         return cnt;
8986 }
8987
8988 static const struct file_operations trace_options_core_fops = {
8989         .open = tracing_open_generic,
8990         .read = trace_options_core_read,
8991         .write = trace_options_core_write,
8992         .llseek = generic_file_llseek,
8993 };
8994
8995 struct dentry *trace_create_file(const char *name,
8996                                  umode_t mode,
8997                                  struct dentry *parent,
8998                                  void *data,
8999                                  const struct file_operations *fops)
9000 {
9001         struct dentry *ret;
9002
9003         ret = tracefs_create_file(name, mode, parent, data, fops);
9004         if (!ret)
9005                 pr_warn("Could not create tracefs '%s' entry\n", name);
9006
9007         return ret;
9008 }
9009
9010
9011 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9012 {
9013         struct dentry *d_tracer;
9014
9015         if (tr->options)
9016                 return tr->options;
9017
9018         d_tracer = tracing_get_dentry(tr);
9019         if (IS_ERR(d_tracer))
9020                 return NULL;
9021
9022         tr->options = tracefs_create_dir("options", d_tracer);
9023         if (!tr->options) {
9024                 pr_warn("Could not create tracefs directory 'options'\n");
9025                 return NULL;
9026         }
9027
9028         return tr->options;
9029 }
9030
9031 static void
9032 create_trace_option_file(struct trace_array *tr,
9033                          struct trace_option_dentry *topt,
9034                          struct tracer_flags *flags,
9035                          struct tracer_opt *opt)
9036 {
9037         struct dentry *t_options;
9038
9039         t_options = trace_options_init_dentry(tr);
9040         if (!t_options)
9041                 return;
9042
9043         topt->flags = flags;
9044         topt->opt = opt;
9045         topt->tr = tr;
9046
9047         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9048                                         t_options, topt, &trace_options_fops);
9049
9050 }
9051
9052 static void
9053 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9054 {
9055         struct trace_option_dentry *topts;
9056         struct trace_options *tr_topts;
9057         struct tracer_flags *flags;
9058         struct tracer_opt *opts;
9059         int cnt;
9060         int i;
9061
9062         if (!tracer)
9063                 return;
9064
9065         flags = tracer->flags;
9066
9067         if (!flags || !flags->opts)
9068                 return;
9069
9070         /*
9071          * If this is an instance, only create flags for tracers
9072          * the instance may have.
9073          */
9074         if (!trace_ok_for_array(tracer, tr))
9075                 return;
9076
9077         for (i = 0; i < tr->nr_topts; i++) {
9078                 /* Make sure there's no duplicate flags. */
9079                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9080                         return;
9081         }
9082
9083         opts = flags->opts;
9084
9085         for (cnt = 0; opts[cnt].name; cnt++)
9086                 ;
9087
9088         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9089         if (!topts)
9090                 return;
9091
9092         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9093                             GFP_KERNEL);
9094         if (!tr_topts) {
9095                 kfree(topts);
9096                 return;
9097         }
9098
9099         tr->topts = tr_topts;
9100         tr->topts[tr->nr_topts].tracer = tracer;
9101         tr->topts[tr->nr_topts].topts = topts;
9102         tr->nr_topts++;
9103
9104         for (cnt = 0; opts[cnt].name; cnt++) {
9105                 create_trace_option_file(tr, &topts[cnt], flags,
9106                                          &opts[cnt]);
9107                 MEM_FAIL(topts[cnt].entry == NULL,
9108                           "Failed to create trace option: %s",
9109                           opts[cnt].name);
9110         }
9111 }
9112
9113 static struct dentry *
9114 create_trace_option_core_file(struct trace_array *tr,
9115                               const char *option, long index)
9116 {
9117         struct dentry *t_options;
9118
9119         t_options = trace_options_init_dentry(tr);
9120         if (!t_options)
9121                 return NULL;
9122
9123         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9124                                  (void *)&tr->trace_flags_index[index],
9125                                  &trace_options_core_fops);
9126 }
9127
9128 static void create_trace_options_dir(struct trace_array *tr)
9129 {
9130         struct dentry *t_options;
9131         bool top_level = tr == &global_trace;
9132         int i;
9133
9134         t_options = trace_options_init_dentry(tr);
9135         if (!t_options)
9136                 return;
9137
9138         for (i = 0; trace_options[i]; i++) {
9139                 if (top_level ||
9140                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9141                         create_trace_option_core_file(tr, trace_options[i], i);
9142         }
9143 }
9144
9145 static ssize_t
9146 rb_simple_read(struct file *filp, char __user *ubuf,
9147                size_t cnt, loff_t *ppos)
9148 {
9149         struct trace_array *tr = filp->private_data;
9150         char buf[64];
9151         int r;
9152
9153         r = tracer_tracing_is_on(tr);
9154         r = sprintf(buf, "%d\n", r);
9155
9156         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9157 }
9158
9159 static ssize_t
9160 rb_simple_write(struct file *filp, const char __user *ubuf,
9161                 size_t cnt, loff_t *ppos)
9162 {
9163         struct trace_array *tr = filp->private_data;
9164         struct trace_buffer *buffer = tr->array_buffer.buffer;
9165         unsigned long val;
9166         int ret;
9167
9168         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9169         if (ret)
9170                 return ret;
9171
9172         if (buffer) {
9173                 mutex_lock(&trace_types_lock);
9174                 if (!!val == tracer_tracing_is_on(tr)) {
9175                         val = 0; /* do nothing */
9176                 } else if (val) {
9177                         tracer_tracing_on(tr);
9178                         if (tr->current_trace->start)
9179                                 tr->current_trace->start(tr);
9180                 } else {
9181                         tracer_tracing_off(tr);
9182                         if (tr->current_trace->stop)
9183                                 tr->current_trace->stop(tr);
9184                         /* Wake up any waiters */
9185                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9186                 }
9187                 mutex_unlock(&trace_types_lock);
9188         }
9189
9190         (*ppos)++;
9191
9192         return cnt;
9193 }
9194
9195 static const struct file_operations rb_simple_fops = {
9196         .open           = tracing_open_generic_tr,
9197         .read           = rb_simple_read,
9198         .write          = rb_simple_write,
9199         .release        = tracing_release_generic_tr,
9200         .llseek         = default_llseek,
9201 };
9202
9203 static ssize_t
9204 buffer_percent_read(struct file *filp, char __user *ubuf,
9205                     size_t cnt, loff_t *ppos)
9206 {
9207         struct trace_array *tr = filp->private_data;
9208         char buf[64];
9209         int r;
9210
9211         r = tr->buffer_percent;
9212         r = sprintf(buf, "%d\n", r);
9213
9214         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9215 }
9216
9217 static ssize_t
9218 buffer_percent_write(struct file *filp, const char __user *ubuf,
9219                      size_t cnt, loff_t *ppos)
9220 {
9221         struct trace_array *tr = filp->private_data;
9222         unsigned long val;
9223         int ret;
9224
9225         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9226         if (ret)
9227                 return ret;
9228
9229         if (val > 100)
9230                 return -EINVAL;
9231
9232         tr->buffer_percent = val;
9233
9234         (*ppos)++;
9235
9236         return cnt;
9237 }
9238
9239 static const struct file_operations buffer_percent_fops = {
9240         .open           = tracing_open_generic_tr,
9241         .read           = buffer_percent_read,
9242         .write          = buffer_percent_write,
9243         .release        = tracing_release_generic_tr,
9244         .llseek         = default_llseek,
9245 };
9246
9247 static struct dentry *trace_instance_dir;
9248
9249 static void
9250 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9251
9252 static int
9253 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9254 {
9255         enum ring_buffer_flags rb_flags;
9256
9257         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9258
9259         buf->tr = tr;
9260
9261         buf->buffer = ring_buffer_alloc(size, rb_flags);
9262         if (!buf->buffer)
9263                 return -ENOMEM;
9264
9265         buf->data = alloc_percpu(struct trace_array_cpu);
9266         if (!buf->data) {
9267                 ring_buffer_free(buf->buffer);
9268                 buf->buffer = NULL;
9269                 return -ENOMEM;
9270         }
9271
9272         /* Allocate the first page for all buffers */
9273         set_buffer_entries(&tr->array_buffer,
9274                            ring_buffer_size(tr->array_buffer.buffer, 0));
9275
9276         return 0;
9277 }
9278
9279 static void free_trace_buffer(struct array_buffer *buf)
9280 {
9281         if (buf->buffer) {
9282                 ring_buffer_free(buf->buffer);
9283                 buf->buffer = NULL;
9284                 free_percpu(buf->data);
9285                 buf->data = NULL;
9286         }
9287 }
9288
9289 static int allocate_trace_buffers(struct trace_array *tr, int size)
9290 {
9291         int ret;
9292
9293         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9294         if (ret)
9295                 return ret;
9296
9297 #ifdef CONFIG_TRACER_MAX_TRACE
9298         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9299                                     allocate_snapshot ? size : 1);
9300         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9301                 free_trace_buffer(&tr->array_buffer);
9302                 return -ENOMEM;
9303         }
9304         tr->allocated_snapshot = allocate_snapshot;
9305
9306         allocate_snapshot = false;
9307 #endif
9308
9309         return 0;
9310 }
9311
9312 static void free_trace_buffers(struct trace_array *tr)
9313 {
9314         if (!tr)
9315                 return;
9316
9317         free_trace_buffer(&tr->array_buffer);
9318
9319 #ifdef CONFIG_TRACER_MAX_TRACE
9320         free_trace_buffer(&tr->max_buffer);
9321 #endif
9322 }
9323
9324 static void init_trace_flags_index(struct trace_array *tr)
9325 {
9326         int i;
9327
9328         /* Used by the trace options files */
9329         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9330                 tr->trace_flags_index[i] = i;
9331 }
9332
9333 static void __update_tracer_options(struct trace_array *tr)
9334 {
9335         struct tracer *t;
9336
9337         for (t = trace_types; t; t = t->next)
9338                 add_tracer_options(tr, t);
9339 }
9340
9341 static void update_tracer_options(struct trace_array *tr)
9342 {
9343         mutex_lock(&trace_types_lock);
9344         tracer_options_updated = true;
9345         __update_tracer_options(tr);
9346         mutex_unlock(&trace_types_lock);
9347 }
9348
9349 /* Must have trace_types_lock held */
9350 struct trace_array *trace_array_find(const char *instance)
9351 {
9352         struct trace_array *tr, *found = NULL;
9353
9354         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9355                 if (tr->name && strcmp(tr->name, instance) == 0) {
9356                         found = tr;
9357                         break;
9358                 }
9359         }
9360
9361         return found;
9362 }
9363
9364 struct trace_array *trace_array_find_get(const char *instance)
9365 {
9366         struct trace_array *tr;
9367
9368         mutex_lock(&trace_types_lock);
9369         tr = trace_array_find(instance);
9370         if (tr)
9371                 tr->ref++;
9372         mutex_unlock(&trace_types_lock);
9373
9374         return tr;
9375 }
9376
9377 static int trace_array_create_dir(struct trace_array *tr)
9378 {
9379         int ret;
9380
9381         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9382         if (!tr->dir)
9383                 return -EINVAL;
9384
9385         ret = event_trace_add_tracer(tr->dir, tr);
9386         if (ret) {
9387                 tracefs_remove(tr->dir);
9388                 return ret;
9389         }
9390
9391         init_tracer_tracefs(tr, tr->dir);
9392         __update_tracer_options(tr);
9393
9394         return ret;
9395 }
9396
9397 static struct trace_array *trace_array_create(const char *name)
9398 {
9399         struct trace_array *tr;
9400         int ret;
9401
9402         ret = -ENOMEM;
9403         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9404         if (!tr)
9405                 return ERR_PTR(ret);
9406
9407         tr->name = kstrdup(name, GFP_KERNEL);
9408         if (!tr->name)
9409                 goto out_free_tr;
9410
9411         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9412                 goto out_free_tr;
9413
9414         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9415
9416         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9417
9418         raw_spin_lock_init(&tr->start_lock);
9419
9420         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9421
9422         tr->current_trace = &nop_trace;
9423
9424         INIT_LIST_HEAD(&tr->systems);
9425         INIT_LIST_HEAD(&tr->events);
9426         INIT_LIST_HEAD(&tr->hist_vars);
9427         INIT_LIST_HEAD(&tr->err_log);
9428
9429         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9430                 goto out_free_tr;
9431
9432         if (ftrace_allocate_ftrace_ops(tr) < 0)
9433                 goto out_free_tr;
9434
9435         ftrace_init_trace_array(tr);
9436
9437         init_trace_flags_index(tr);
9438
9439         if (trace_instance_dir) {
9440                 ret = trace_array_create_dir(tr);
9441                 if (ret)
9442                         goto out_free_tr;
9443         } else
9444                 __trace_early_add_events(tr);
9445
9446         list_add(&tr->list, &ftrace_trace_arrays);
9447
9448         tr->ref++;
9449
9450         return tr;
9451
9452  out_free_tr:
9453         ftrace_free_ftrace_ops(tr);
9454         free_trace_buffers(tr);
9455         free_cpumask_var(tr->tracing_cpumask);
9456         kfree(tr->name);
9457         kfree(tr);
9458
9459         return ERR_PTR(ret);
9460 }
9461
9462 static int instance_mkdir(const char *name)
9463 {
9464         struct trace_array *tr;
9465         int ret;
9466
9467         mutex_lock(&event_mutex);
9468         mutex_lock(&trace_types_lock);
9469
9470         ret = -EEXIST;
9471         if (trace_array_find(name))
9472                 goto out_unlock;
9473
9474         tr = trace_array_create(name);
9475
9476         ret = PTR_ERR_OR_ZERO(tr);
9477
9478 out_unlock:
9479         mutex_unlock(&trace_types_lock);
9480         mutex_unlock(&event_mutex);
9481         return ret;
9482 }
9483
9484 /**
9485  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9486  * @name: The name of the trace array to be looked up/created.
9487  *
9488  * Returns pointer to trace array with given name.
9489  * NULL, if it cannot be created.
9490  *
9491  * NOTE: This function increments the reference counter associated with the
9492  * trace array returned. This makes sure it cannot be freed while in use.
9493  * Use trace_array_put() once the trace array is no longer needed.
9494  * If the trace_array is to be freed, trace_array_destroy() needs to
9495  * be called after the trace_array_put(), or simply let user space delete
9496  * it from the tracefs instances directory. But until the
9497  * trace_array_put() is called, user space can not delete it.
9498  *
9499  */
9500 struct trace_array *trace_array_get_by_name(const char *name)
9501 {
9502         struct trace_array *tr;
9503
9504         mutex_lock(&event_mutex);
9505         mutex_lock(&trace_types_lock);
9506
9507         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9508                 if (tr->name && strcmp(tr->name, name) == 0)
9509                         goto out_unlock;
9510         }
9511
9512         tr = trace_array_create(name);
9513
9514         if (IS_ERR(tr))
9515                 tr = NULL;
9516 out_unlock:
9517         if (tr)
9518                 tr->ref++;
9519
9520         mutex_unlock(&trace_types_lock);
9521         mutex_unlock(&event_mutex);
9522         return tr;
9523 }
9524 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9525
9526 static int __remove_instance(struct trace_array *tr)
9527 {
9528         int i;
9529
9530         /* Reference counter for a newly created trace array = 1. */
9531         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9532                 return -EBUSY;
9533
9534         list_del(&tr->list);
9535
9536         /* Disable all the flags that were enabled coming in */
9537         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9538                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9539                         set_tracer_flag(tr, 1 << i, 0);
9540         }
9541
9542         tracing_set_nop(tr);
9543         clear_ftrace_function_probes(tr);
9544         event_trace_del_tracer(tr);
9545         ftrace_clear_pids(tr);
9546         ftrace_destroy_function_files(tr);
9547         tracefs_remove(tr->dir);
9548         free_percpu(tr->last_func_repeats);
9549         free_trace_buffers(tr);
9550         clear_tracing_err_log(tr);
9551
9552         for (i = 0; i < tr->nr_topts; i++) {
9553                 kfree(tr->topts[i].topts);
9554         }
9555         kfree(tr->topts);
9556
9557         free_cpumask_var(tr->tracing_cpumask);
9558         kfree(tr->name);
9559         kfree(tr);
9560
9561         return 0;
9562 }
9563
9564 int trace_array_destroy(struct trace_array *this_tr)
9565 {
9566         struct trace_array *tr;
9567         int ret;
9568
9569         if (!this_tr)
9570                 return -EINVAL;
9571
9572         mutex_lock(&event_mutex);
9573         mutex_lock(&trace_types_lock);
9574
9575         ret = -ENODEV;
9576
9577         /* Making sure trace array exists before destroying it. */
9578         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9579                 if (tr == this_tr) {
9580                         ret = __remove_instance(tr);
9581                         break;
9582                 }
9583         }
9584
9585         mutex_unlock(&trace_types_lock);
9586         mutex_unlock(&event_mutex);
9587
9588         return ret;
9589 }
9590 EXPORT_SYMBOL_GPL(trace_array_destroy);
9591
9592 static int instance_rmdir(const char *name)
9593 {
9594         struct trace_array *tr;
9595         int ret;
9596
9597         mutex_lock(&event_mutex);
9598         mutex_lock(&trace_types_lock);
9599
9600         ret = -ENODEV;
9601         tr = trace_array_find(name);
9602         if (tr)
9603                 ret = __remove_instance(tr);
9604
9605         mutex_unlock(&trace_types_lock);
9606         mutex_unlock(&event_mutex);
9607
9608         return ret;
9609 }
9610
9611 static __init void create_trace_instances(struct dentry *d_tracer)
9612 {
9613         struct trace_array *tr;
9614
9615         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9616                                                          instance_mkdir,
9617                                                          instance_rmdir);
9618         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9619                 return;
9620
9621         mutex_lock(&event_mutex);
9622         mutex_lock(&trace_types_lock);
9623
9624         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9625                 if (!tr->name)
9626                         continue;
9627                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9628                              "Failed to create instance directory\n"))
9629                         break;
9630         }
9631
9632         mutex_unlock(&trace_types_lock);
9633         mutex_unlock(&event_mutex);
9634 }
9635
9636 static void
9637 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9638 {
9639         struct trace_event_file *file;
9640         int cpu;
9641
9642         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9643                         tr, &show_traces_fops);
9644
9645         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9646                         tr, &set_tracer_fops);
9647
9648         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9649                           tr, &tracing_cpumask_fops);
9650
9651         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9652                           tr, &tracing_iter_fops);
9653
9654         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9655                           tr, &tracing_fops);
9656
9657         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9658                           tr, &tracing_pipe_fops);
9659
9660         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9661                           tr, &tracing_entries_fops);
9662
9663         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9664                           tr, &tracing_total_entries_fops);
9665
9666         trace_create_file("free_buffer", 0200, d_tracer,
9667                           tr, &tracing_free_buffer_fops);
9668
9669         trace_create_file("trace_marker", 0220, d_tracer,
9670                           tr, &tracing_mark_fops);
9671
9672         file = __find_event_file(tr, "ftrace", "print");
9673         if (file && file->dir)
9674                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9675                                   file, &event_trigger_fops);
9676         tr->trace_marker_file = file;
9677
9678         trace_create_file("trace_marker_raw", 0220, d_tracer,
9679                           tr, &tracing_mark_raw_fops);
9680
9681         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9682                           &trace_clock_fops);
9683
9684         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9685                           tr, &rb_simple_fops);
9686
9687         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9688                           &trace_time_stamp_mode_fops);
9689
9690         tr->buffer_percent = 50;
9691
9692         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9693                         tr, &buffer_percent_fops);
9694
9695         create_trace_options_dir(tr);
9696
9697 #ifdef CONFIG_TRACER_MAX_TRACE
9698         trace_create_maxlat_file(tr, d_tracer);
9699 #endif
9700
9701         if (ftrace_create_function_files(tr, d_tracer))
9702                 MEM_FAIL(1, "Could not allocate function filter files");
9703
9704 #ifdef CONFIG_TRACER_SNAPSHOT
9705         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9706                           tr, &snapshot_fops);
9707 #endif
9708
9709         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9710                           tr, &tracing_err_log_fops);
9711
9712         for_each_tracing_cpu(cpu)
9713                 tracing_init_tracefs_percpu(tr, cpu);
9714
9715         ftrace_init_tracefs(tr, d_tracer);
9716 }
9717
9718 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9719 {
9720         struct vfsmount *mnt;
9721         struct file_system_type *type;
9722
9723         /*
9724          * To maintain backward compatibility for tools that mount
9725          * debugfs to get to the tracing facility, tracefs is automatically
9726          * mounted to the debugfs/tracing directory.
9727          */
9728         type = get_fs_type("tracefs");
9729         if (!type)
9730                 return NULL;
9731         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9732         put_filesystem(type);
9733         if (IS_ERR(mnt))
9734                 return NULL;
9735         mntget(mnt);
9736
9737         return mnt;
9738 }
9739
9740 /**
9741  * tracing_init_dentry - initialize top level trace array
9742  *
9743  * This is called when creating files or directories in the tracing
9744  * directory. It is called via fs_initcall() by any of the boot up code
9745  * and expects to return the dentry of the top level tracing directory.
9746  */
9747 int tracing_init_dentry(void)
9748 {
9749         struct trace_array *tr = &global_trace;
9750
9751         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9752                 pr_warn("Tracing disabled due to lockdown\n");
9753                 return -EPERM;
9754         }
9755
9756         /* The top level trace array uses  NULL as parent */
9757         if (tr->dir)
9758                 return 0;
9759
9760         if (WARN_ON(!tracefs_initialized()))
9761                 return -ENODEV;
9762
9763         /*
9764          * As there may still be users that expect the tracing
9765          * files to exist in debugfs/tracing, we must automount
9766          * the tracefs file system there, so older tools still
9767          * work with the newer kernel.
9768          */
9769         tr->dir = debugfs_create_automount("tracing", NULL,
9770                                            trace_automount, NULL);
9771
9772         return 0;
9773 }
9774
9775 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9776 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9777
9778 static struct workqueue_struct *eval_map_wq __initdata;
9779 static struct work_struct eval_map_work __initdata;
9780 static struct work_struct tracerfs_init_work __initdata;
9781
9782 static void __init eval_map_work_func(struct work_struct *work)
9783 {
9784         int len;
9785
9786         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9787         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9788 }
9789
9790 static int __init trace_eval_init(void)
9791 {
9792         INIT_WORK(&eval_map_work, eval_map_work_func);
9793
9794         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9795         if (!eval_map_wq) {
9796                 pr_err("Unable to allocate eval_map_wq\n");
9797                 /* Do work here */
9798                 eval_map_work_func(&eval_map_work);
9799                 return -ENOMEM;
9800         }
9801
9802         queue_work(eval_map_wq, &eval_map_work);
9803         return 0;
9804 }
9805
9806 subsys_initcall(trace_eval_init);
9807
9808 static int __init trace_eval_sync(void)
9809 {
9810         /* Make sure the eval map updates are finished */
9811         if (eval_map_wq)
9812                 destroy_workqueue(eval_map_wq);
9813         return 0;
9814 }
9815
9816 late_initcall_sync(trace_eval_sync);
9817
9818
9819 #ifdef CONFIG_MODULES
9820 static void trace_module_add_evals(struct module *mod)
9821 {
9822         if (!mod->num_trace_evals)
9823                 return;
9824
9825         /*
9826          * Modules with bad taint do not have events created, do
9827          * not bother with enums either.
9828          */
9829         if (trace_module_has_bad_taint(mod))
9830                 return;
9831
9832         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9833 }
9834
9835 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9836 static void trace_module_remove_evals(struct module *mod)
9837 {
9838         union trace_eval_map_item *map;
9839         union trace_eval_map_item **last = &trace_eval_maps;
9840
9841         if (!mod->num_trace_evals)
9842                 return;
9843
9844         mutex_lock(&trace_eval_mutex);
9845
9846         map = trace_eval_maps;
9847
9848         while (map) {
9849                 if (map->head.mod == mod)
9850                         break;
9851                 map = trace_eval_jmp_to_tail(map);
9852                 last = &map->tail.next;
9853                 map = map->tail.next;
9854         }
9855         if (!map)
9856                 goto out;
9857
9858         *last = trace_eval_jmp_to_tail(map)->tail.next;
9859         kfree(map);
9860  out:
9861         mutex_unlock(&trace_eval_mutex);
9862 }
9863 #else
9864 static inline void trace_module_remove_evals(struct module *mod) { }
9865 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9866
9867 static int trace_module_notify(struct notifier_block *self,
9868                                unsigned long val, void *data)
9869 {
9870         struct module *mod = data;
9871
9872         switch (val) {
9873         case MODULE_STATE_COMING:
9874                 trace_module_add_evals(mod);
9875                 break;
9876         case MODULE_STATE_GOING:
9877                 trace_module_remove_evals(mod);
9878                 break;
9879         }
9880
9881         return NOTIFY_OK;
9882 }
9883
9884 static struct notifier_block trace_module_nb = {
9885         .notifier_call = trace_module_notify,
9886         .priority = 0,
9887 };
9888 #endif /* CONFIG_MODULES */
9889
9890 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9891 {
9892
9893         event_trace_init();
9894
9895         init_tracer_tracefs(&global_trace, NULL);
9896         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9897
9898         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9899                         &global_trace, &tracing_thresh_fops);
9900
9901         trace_create_file("README", TRACE_MODE_READ, NULL,
9902                         NULL, &tracing_readme_fops);
9903
9904         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9905                         NULL, &tracing_saved_cmdlines_fops);
9906
9907         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9908                           NULL, &tracing_saved_cmdlines_size_fops);
9909
9910         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9911                         NULL, &tracing_saved_tgids_fops);
9912
9913         trace_create_eval_file(NULL);
9914
9915 #ifdef CONFIG_MODULES
9916         register_module_notifier(&trace_module_nb);
9917 #endif
9918
9919 #ifdef CONFIG_DYNAMIC_FTRACE
9920         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9921                         NULL, &tracing_dyn_info_fops);
9922 #endif
9923
9924         create_trace_instances(NULL);
9925
9926         update_tracer_options(&global_trace);
9927 }
9928
9929 static __init int tracer_init_tracefs(void)
9930 {
9931         int ret;
9932
9933         trace_access_lock_init();
9934
9935         ret = tracing_init_dentry();
9936         if (ret)
9937                 return 0;
9938
9939         if (eval_map_wq) {
9940                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9941                 queue_work(eval_map_wq, &tracerfs_init_work);
9942         } else {
9943                 tracer_init_tracefs_work_func(NULL);
9944         }
9945
9946         rv_init_interface();
9947
9948         return 0;
9949 }
9950
9951 fs_initcall(tracer_init_tracefs);
9952
9953 static int trace_die_panic_handler(struct notifier_block *self,
9954                                 unsigned long ev, void *unused);
9955
9956 static struct notifier_block trace_panic_notifier = {
9957         .notifier_call = trace_die_panic_handler,
9958         .priority = INT_MAX - 1,
9959 };
9960
9961 static struct notifier_block trace_die_notifier = {
9962         .notifier_call = trace_die_panic_handler,
9963         .priority = INT_MAX - 1,
9964 };
9965
9966 /*
9967  * The idea is to execute the following die/panic callback early, in order
9968  * to avoid showing irrelevant information in the trace (like other panic
9969  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9970  * warnings get disabled (to prevent potential log flooding).
9971  */
9972 static int trace_die_panic_handler(struct notifier_block *self,
9973                                 unsigned long ev, void *unused)
9974 {
9975         if (!ftrace_dump_on_oops)
9976                 return NOTIFY_DONE;
9977
9978         /* The die notifier requires DIE_OOPS to trigger */
9979         if (self == &trace_die_notifier && ev != DIE_OOPS)
9980                 return NOTIFY_DONE;
9981
9982         ftrace_dump(ftrace_dump_on_oops);
9983
9984         return NOTIFY_DONE;
9985 }
9986
9987 /*
9988  * printk is set to max of 1024, we really don't need it that big.
9989  * Nothing should be printing 1000 characters anyway.
9990  */
9991 #define TRACE_MAX_PRINT         1000
9992
9993 /*
9994  * Define here KERN_TRACE so that we have one place to modify
9995  * it if we decide to change what log level the ftrace dump
9996  * should be at.
9997  */
9998 #define KERN_TRACE              KERN_EMERG
9999
10000 void
10001 trace_printk_seq(struct trace_seq *s)
10002 {
10003         /* Probably should print a warning here. */
10004         if (s->seq.len >= TRACE_MAX_PRINT)
10005                 s->seq.len = TRACE_MAX_PRINT;
10006
10007         /*
10008          * More paranoid code. Although the buffer size is set to
10009          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10010          * an extra layer of protection.
10011          */
10012         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10013                 s->seq.len = s->seq.size - 1;
10014
10015         /* should be zero ended, but we are paranoid. */
10016         s->buffer[s->seq.len] = 0;
10017
10018         printk(KERN_TRACE "%s", s->buffer);
10019
10020         trace_seq_init(s);
10021 }
10022
10023 void trace_init_global_iter(struct trace_iterator *iter)
10024 {
10025         iter->tr = &global_trace;
10026         iter->trace = iter->tr->current_trace;
10027         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10028         iter->array_buffer = &global_trace.array_buffer;
10029
10030         if (iter->trace && iter->trace->open)
10031                 iter->trace->open(iter);
10032
10033         /* Annotate start of buffers if we had overruns */
10034         if (ring_buffer_overruns(iter->array_buffer->buffer))
10035                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10036
10037         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10038         if (trace_clocks[iter->tr->clock_id].in_ns)
10039                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10040
10041         /* Can not use kmalloc for iter.temp and iter.fmt */
10042         iter->temp = static_temp_buf;
10043         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10044         iter->fmt = static_fmt_buf;
10045         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10046 }
10047
10048 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10049 {
10050         /* use static because iter can be a bit big for the stack */
10051         static struct trace_iterator iter;
10052         static atomic_t dump_running;
10053         struct trace_array *tr = &global_trace;
10054         unsigned int old_userobj;
10055         unsigned long flags;
10056         int cnt = 0, cpu;
10057
10058         /* Only allow one dump user at a time. */
10059         if (atomic_inc_return(&dump_running) != 1) {
10060                 atomic_dec(&dump_running);
10061                 return;
10062         }
10063
10064         /*
10065          * Always turn off tracing when we dump.
10066          * We don't need to show trace output of what happens
10067          * between multiple crashes.
10068          *
10069          * If the user does a sysrq-z, then they can re-enable
10070          * tracing with echo 1 > tracing_on.
10071          */
10072         tracing_off();
10073
10074         local_irq_save(flags);
10075
10076         /* Simulate the iterator */
10077         trace_init_global_iter(&iter);
10078
10079         for_each_tracing_cpu(cpu) {
10080                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10081         }
10082
10083         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10084
10085         /* don't look at user memory in panic mode */
10086         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10087
10088         switch (oops_dump_mode) {
10089         case DUMP_ALL:
10090                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10091                 break;
10092         case DUMP_ORIG:
10093                 iter.cpu_file = raw_smp_processor_id();
10094                 break;
10095         case DUMP_NONE:
10096                 goto out_enable;
10097         default:
10098                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10099                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10100         }
10101
10102         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10103
10104         /* Did function tracer already get disabled? */
10105         if (ftrace_is_dead()) {
10106                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10107                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10108         }
10109
10110         /*
10111          * We need to stop all tracing on all CPUS to read
10112          * the next buffer. This is a bit expensive, but is
10113          * not done often. We fill all what we can read,
10114          * and then release the locks again.
10115          */
10116
10117         while (!trace_empty(&iter)) {
10118
10119                 if (!cnt)
10120                         printk(KERN_TRACE "---------------------------------\n");
10121
10122                 cnt++;
10123
10124                 trace_iterator_reset(&iter);
10125                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10126
10127                 if (trace_find_next_entry_inc(&iter) != NULL) {
10128                         int ret;
10129
10130                         ret = print_trace_line(&iter);
10131                         if (ret != TRACE_TYPE_NO_CONSUME)
10132                                 trace_consume(&iter);
10133                 }
10134                 touch_nmi_watchdog();
10135
10136                 trace_printk_seq(&iter.seq);
10137         }
10138
10139         if (!cnt)
10140                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10141         else
10142                 printk(KERN_TRACE "---------------------------------\n");
10143
10144  out_enable:
10145         tr->trace_flags |= old_userobj;
10146
10147         for_each_tracing_cpu(cpu) {
10148                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10149         }
10150         atomic_dec(&dump_running);
10151         local_irq_restore(flags);
10152 }
10153 EXPORT_SYMBOL_GPL(ftrace_dump);
10154
10155 #define WRITE_BUFSIZE  4096
10156
10157 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10158                                 size_t count, loff_t *ppos,
10159                                 int (*createfn)(const char *))
10160 {
10161         char *kbuf, *buf, *tmp;
10162         int ret = 0;
10163         size_t done = 0;
10164         size_t size;
10165
10166         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10167         if (!kbuf)
10168                 return -ENOMEM;
10169
10170         while (done < count) {
10171                 size = count - done;
10172
10173                 if (size >= WRITE_BUFSIZE)
10174                         size = WRITE_BUFSIZE - 1;
10175
10176                 if (copy_from_user(kbuf, buffer + done, size)) {
10177                         ret = -EFAULT;
10178                         goto out;
10179                 }
10180                 kbuf[size] = '\0';
10181                 buf = kbuf;
10182                 do {
10183                         tmp = strchr(buf, '\n');
10184                         if (tmp) {
10185                                 *tmp = '\0';
10186                                 size = tmp - buf + 1;
10187                         } else {
10188                                 size = strlen(buf);
10189                                 if (done + size < count) {
10190                                         if (buf != kbuf)
10191                                                 break;
10192                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10193                                         pr_warn("Line length is too long: Should be less than %d\n",
10194                                                 WRITE_BUFSIZE - 2);
10195                                         ret = -EINVAL;
10196                                         goto out;
10197                                 }
10198                         }
10199                         done += size;
10200
10201                         /* Remove comments */
10202                         tmp = strchr(buf, '#');
10203
10204                         if (tmp)
10205                                 *tmp = '\0';
10206
10207                         ret = createfn(buf);
10208                         if (ret)
10209                                 goto out;
10210                         buf += size;
10211
10212                 } while (done < count);
10213         }
10214         ret = done;
10215
10216 out:
10217         kfree(kbuf);
10218
10219         return ret;
10220 }
10221
10222 #ifdef CONFIG_TRACER_MAX_TRACE
10223 __init static bool tr_needs_alloc_snapshot(const char *name)
10224 {
10225         char *test;
10226         int len = strlen(name);
10227         bool ret;
10228
10229         if (!boot_snapshot_index)
10230                 return false;
10231
10232         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10233             boot_snapshot_info[len] == '\t')
10234                 return true;
10235
10236         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10237         if (!test)
10238                 return false;
10239
10240         sprintf(test, "\t%s\t", name);
10241         ret = strstr(boot_snapshot_info, test) == NULL;
10242         kfree(test);
10243         return ret;
10244 }
10245
10246 __init static void do_allocate_snapshot(const char *name)
10247 {
10248         if (!tr_needs_alloc_snapshot(name))
10249                 return;
10250
10251         /*
10252          * When allocate_snapshot is set, the next call to
10253          * allocate_trace_buffers() (called by trace_array_get_by_name())
10254          * will allocate the snapshot buffer. That will alse clear
10255          * this flag.
10256          */
10257         allocate_snapshot = true;
10258 }
10259 #else
10260 static inline void do_allocate_snapshot(const char *name) { }
10261 #endif
10262
10263 __init static void enable_instances(void)
10264 {
10265         struct trace_array *tr;
10266         char *curr_str;
10267         char *str;
10268         char *tok;
10269
10270         /* A tab is always appended */
10271         boot_instance_info[boot_instance_index - 1] = '\0';
10272         str = boot_instance_info;
10273
10274         while ((curr_str = strsep(&str, "\t"))) {
10275
10276                 tok = strsep(&curr_str, ",");
10277
10278                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10279                         do_allocate_snapshot(tok);
10280
10281                 tr = trace_array_get_by_name(tok);
10282                 if (!tr) {
10283                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10284                         continue;
10285                 }
10286                 /* Allow user space to delete it */
10287                 trace_array_put(tr);
10288
10289                 while ((tok = strsep(&curr_str, ","))) {
10290                         early_enable_events(tr, tok, true);
10291                 }
10292         }
10293 }
10294
10295 __init static int tracer_alloc_buffers(void)
10296 {
10297         int ring_buf_size;
10298         int ret = -ENOMEM;
10299
10300
10301         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10302                 pr_warn("Tracing disabled due to lockdown\n");
10303                 return -EPERM;
10304         }
10305
10306         /*
10307          * Make sure we don't accidentally add more trace options
10308          * than we have bits for.
10309          */
10310         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10311
10312         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10313                 goto out;
10314
10315         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10316                 goto out_free_buffer_mask;
10317
10318         /* Only allocate trace_printk buffers if a trace_printk exists */
10319         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10320                 /* Must be called before global_trace.buffer is allocated */
10321                 trace_printk_init_buffers();
10322
10323         /* To save memory, keep the ring buffer size to its minimum */
10324         if (ring_buffer_expanded)
10325                 ring_buf_size = trace_buf_size;
10326         else
10327                 ring_buf_size = 1;
10328
10329         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10330         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10331
10332         raw_spin_lock_init(&global_trace.start_lock);
10333
10334         /*
10335          * The prepare callbacks allocates some memory for the ring buffer. We
10336          * don't free the buffer if the CPU goes down. If we were to free
10337          * the buffer, then the user would lose any trace that was in the
10338          * buffer. The memory will be removed once the "instance" is removed.
10339          */
10340         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10341                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10342                                       NULL);
10343         if (ret < 0)
10344                 goto out_free_cpumask;
10345         /* Used for event triggers */
10346         ret = -ENOMEM;
10347         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10348         if (!temp_buffer)
10349                 goto out_rm_hp_state;
10350
10351         if (trace_create_savedcmd() < 0)
10352                 goto out_free_temp_buffer;
10353
10354         /* TODO: make the number of buffers hot pluggable with CPUS */
10355         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10356                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10357                 goto out_free_savedcmd;
10358         }
10359
10360         if (global_trace.buffer_disabled)
10361                 tracing_off();
10362
10363         if (trace_boot_clock) {
10364                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10365                 if (ret < 0)
10366                         pr_warn("Trace clock %s not defined, going back to default\n",
10367                                 trace_boot_clock);
10368         }
10369
10370         /*
10371          * register_tracer() might reference current_trace, so it
10372          * needs to be set before we register anything. This is
10373          * just a bootstrap of current_trace anyway.
10374          */
10375         global_trace.current_trace = &nop_trace;
10376
10377         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10378
10379         ftrace_init_global_array_ops(&global_trace);
10380
10381         init_trace_flags_index(&global_trace);
10382
10383         register_tracer(&nop_trace);
10384
10385         /* Function tracing may start here (via kernel command line) */
10386         init_function_trace();
10387
10388         /* All seems OK, enable tracing */
10389         tracing_disabled = 0;
10390
10391         atomic_notifier_chain_register(&panic_notifier_list,
10392                                        &trace_panic_notifier);
10393
10394         register_die_notifier(&trace_die_notifier);
10395
10396         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10397
10398         INIT_LIST_HEAD(&global_trace.systems);
10399         INIT_LIST_HEAD(&global_trace.events);
10400         INIT_LIST_HEAD(&global_trace.hist_vars);
10401         INIT_LIST_HEAD(&global_trace.err_log);
10402         list_add(&global_trace.list, &ftrace_trace_arrays);
10403
10404         apply_trace_boot_options();
10405
10406         register_snapshot_cmd();
10407
10408         test_can_verify();
10409
10410         return 0;
10411
10412 out_free_savedcmd:
10413         free_saved_cmdlines_buffer(savedcmd);
10414 out_free_temp_buffer:
10415         ring_buffer_free(temp_buffer);
10416 out_rm_hp_state:
10417         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10418 out_free_cpumask:
10419         free_cpumask_var(global_trace.tracing_cpumask);
10420 out_free_buffer_mask:
10421         free_cpumask_var(tracing_buffer_mask);
10422 out:
10423         return ret;
10424 }
10425
10426 void __init ftrace_boot_snapshot(void)
10427 {
10428 #ifdef CONFIG_TRACER_MAX_TRACE
10429         struct trace_array *tr;
10430
10431         if (!snapshot_at_boot)
10432                 return;
10433
10434         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10435                 if (!tr->allocated_snapshot)
10436                         continue;
10437
10438                 tracing_snapshot_instance(tr);
10439                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10440         }
10441 #endif
10442 }
10443
10444 void __init early_trace_init(void)
10445 {
10446         if (tracepoint_printk) {
10447                 tracepoint_print_iter =
10448                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10449                 if (MEM_FAIL(!tracepoint_print_iter,
10450                              "Failed to allocate trace iterator\n"))
10451                         tracepoint_printk = 0;
10452                 else
10453                         static_key_enable(&tracepoint_printk_key.key);
10454         }
10455         tracer_alloc_buffers();
10456
10457         init_events();
10458 }
10459
10460 void __init trace_init(void)
10461 {
10462         trace_event_init();
10463
10464         if (boot_instance_index)
10465                 enable_instances();
10466 }
10467
10468 __init static void clear_boot_tracer(void)
10469 {
10470         /*
10471          * The default tracer at boot buffer is an init section.
10472          * This function is called in lateinit. If we did not
10473          * find the boot tracer, then clear it out, to prevent
10474          * later registration from accessing the buffer that is
10475          * about to be freed.
10476          */
10477         if (!default_bootup_tracer)
10478                 return;
10479
10480         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10481                default_bootup_tracer);
10482         default_bootup_tracer = NULL;
10483 }
10484
10485 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10486 __init static void tracing_set_default_clock(void)
10487 {
10488         /* sched_clock_stable() is determined in late_initcall */
10489         if (!trace_boot_clock && !sched_clock_stable()) {
10490                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10491                         pr_warn("Can not set tracing clock due to lockdown\n");
10492                         return;
10493                 }
10494
10495                 printk(KERN_WARNING
10496                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10497                        "If you want to keep using the local clock, then add:\n"
10498                        "  \"trace_clock=local\"\n"
10499                        "on the kernel command line\n");
10500                 tracing_set_clock(&global_trace, "global");
10501         }
10502 }
10503 #else
10504 static inline void tracing_set_default_clock(void) { }
10505 #endif
10506
10507 __init static int late_trace_init(void)
10508 {
10509         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10510                 static_key_disable(&tracepoint_printk_key.key);
10511                 tracepoint_printk = 0;
10512         }
10513
10514         tracing_set_default_clock();
10515         clear_boot_tracer();
10516         return 0;
10517 }
10518
10519 late_initcall_sync(late_trace_init);