Linux-libre 4.4.228-gnu
[librecmc/linux-libre.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static struct tracer_flags dummy_tracer_flags = {
78         .val = 0,
79         .opts = dummy_tracer_opt
80 };
81
82 static int
83 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84 {
85         return 0;
86 }
87
88 /*
89  * To prevent the comm cache from being overwritten when no
90  * tracing is active, only save the comm when a trace event
91  * occurred.
92  */
93 static DEFINE_PER_CPU(bool, trace_cmdline_save);
94
95 /*
96  * Kill all tracing for good (never come back).
97  * It is initialized to 1 but will turn to zero if the initialization
98  * of the tracer is successful. But that is the only place that sets
99  * this back to zero.
100  */
101 static int tracing_disabled = 1;
102
103 cpumask_var_t __read_mostly     tracing_buffer_mask;
104
105 /*
106  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
107  *
108  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
109  * is set, then ftrace_dump is called. This will output the contents
110  * of the ftrace buffers to the console.  This is very useful for
111  * capturing traces that lead to crashes and outputing it to a
112  * serial console.
113  *
114  * It is default off, but you can enable it with either specifying
115  * "ftrace_dump_on_oops" in the kernel command line, or setting
116  * /proc/sys/kernel/ftrace_dump_on_oops
117  * Set 1 if you want to dump buffers of all CPUs
118  * Set 2 if you want to dump the buffer of the CPU that triggered oops
119  */
120
121 enum ftrace_dump_mode ftrace_dump_on_oops;
122
123 /* When set, tracing will stop when a WARN*() is hit */
124 int __disable_trace_on_warning;
125
126 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
127 /* Map of enums to their values, for "enum_map" file */
128 struct trace_enum_map_head {
129         struct module                   *mod;
130         unsigned long                   length;
131 };
132
133 union trace_enum_map_item;
134
135 struct trace_enum_map_tail {
136         /*
137          * "end" is first and points to NULL as it must be different
138          * than "mod" or "enum_string"
139          */
140         union trace_enum_map_item       *next;
141         const char                      *end;   /* points to NULL */
142 };
143
144 static DEFINE_MUTEX(trace_enum_mutex);
145
146 /*
147  * The trace_enum_maps are saved in an array with two extra elements,
148  * one at the beginning, and one at the end. The beginning item contains
149  * the count of the saved maps (head.length), and the module they
150  * belong to if not built in (head.mod). The ending item contains a
151  * pointer to the next array of saved enum_map items.
152  */
153 union trace_enum_map_item {
154         struct trace_enum_map           map;
155         struct trace_enum_map_head      head;
156         struct trace_enum_map_tail      tail;
157 };
158
159 static union trace_enum_map_item *trace_enum_maps;
160 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
161
162 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
163
164 #define MAX_TRACER_SIZE         100
165 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
166 static char *default_bootup_tracer;
167
168 static bool allocate_snapshot;
169
170 static int __init set_cmdline_ftrace(char *str)
171 {
172         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
173         default_bootup_tracer = bootup_tracer_buf;
174         /* We are using ftrace early, expand it */
175         ring_buffer_expanded = true;
176         return 1;
177 }
178 __setup("ftrace=", set_cmdline_ftrace);
179
180 static int __init set_ftrace_dump_on_oops(char *str)
181 {
182         if (*str++ != '=' || !*str) {
183                 ftrace_dump_on_oops = DUMP_ALL;
184                 return 1;
185         }
186
187         if (!strcmp("orig_cpu", str)) {
188                 ftrace_dump_on_oops = DUMP_ORIG;
189                 return 1;
190         }
191
192         return 0;
193 }
194 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
195
196 static int __init stop_trace_on_warning(char *str)
197 {
198         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
199                 __disable_trace_on_warning = 1;
200         return 1;
201 }
202 __setup("traceoff_on_warning", stop_trace_on_warning);
203
204 static int __init boot_alloc_snapshot(char *str)
205 {
206         allocate_snapshot = true;
207         /* We also need the main ring buffer expanded */
208         ring_buffer_expanded = true;
209         return 1;
210 }
211 __setup("alloc_snapshot", boot_alloc_snapshot);
212
213
214 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
215
216 static int __init set_trace_boot_options(char *str)
217 {
218         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
219         return 0;
220 }
221 __setup("trace_options=", set_trace_boot_options);
222
223 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
224 static char *trace_boot_clock __initdata;
225
226 static int __init set_trace_boot_clock(char *str)
227 {
228         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
229         trace_boot_clock = trace_boot_clock_buf;
230         return 0;
231 }
232 __setup("trace_clock=", set_trace_boot_clock);
233
234 static int __init set_tracepoint_printk(char *str)
235 {
236         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
237                 tracepoint_printk = 1;
238         return 1;
239 }
240 __setup("tp_printk", set_tracepoint_printk);
241
242 unsigned long long ns2usecs(cycle_t nsec)
243 {
244         nsec += 500;
245         do_div(nsec, 1000);
246         return nsec;
247 }
248
249 /* trace_flags holds trace_options default values */
250 #define TRACE_DEFAULT_FLAGS                                             \
251         (FUNCTION_DEFAULT_FLAGS |                                       \
252          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
253          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
254          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
255          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
256
257 /* trace_options that are only supported by global_trace */
258 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
259                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
260
261
262 /*
263  * The global_trace is the descriptor that holds the tracing
264  * buffers for the live tracing. For each CPU, it contains
265  * a link list of pages that will store trace entries. The
266  * page descriptor of the pages in the memory is used to hold
267  * the link list by linking the lru item in the page descriptor
268  * to each of the pages in the buffer per CPU.
269  *
270  * For each active CPU there is a data field that holds the
271  * pages for the buffer for that CPU. Each CPU has the same number
272  * of pages allocated for its buffer.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 void trace_array_put(struct trace_array *this_tr)
305 {
306         mutex_lock(&trace_types_lock);
307         __trace_array_put(this_tr);
308         mutex_unlock(&trace_types_lock);
309 }
310
311 int filter_check_discard(struct trace_event_file *file, void *rec,
312                          struct ring_buffer *buffer,
313                          struct ring_buffer_event *event)
314 {
315         if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
316             !filter_match_preds(file->filter, rec)) {
317                 ring_buffer_discard_commit(buffer, event);
318                 return 1;
319         }
320
321         return 0;
322 }
323 EXPORT_SYMBOL_GPL(filter_check_discard);
324
325 int call_filter_check_discard(struct trace_event_call *call, void *rec,
326                               struct ring_buffer *buffer,
327                               struct ring_buffer_event *event)
328 {
329         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
330             !filter_match_preds(call->filter, rec)) {
331                 ring_buffer_discard_commit(buffer, event);
332                 return 1;
333         }
334
335         return 0;
336 }
337 EXPORT_SYMBOL_GPL(call_filter_check_discard);
338
339 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
340 {
341         u64 ts;
342
343         /* Early boot up does not have a buffer yet */
344         if (!buf->buffer)
345                 return trace_clock_local();
346
347         ts = ring_buffer_time_stamp(buf->buffer, cpu);
348         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
349
350         return ts;
351 }
352
353 cycle_t ftrace_now(int cpu)
354 {
355         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
356 }
357
358 /**
359  * tracing_is_enabled - Show if global_trace has been disabled
360  *
361  * Shows if the global trace has been enabled or not. It uses the
362  * mirror flag "buffer_disabled" to be used in fast paths such as for
363  * the irqsoff tracer. But it may be inaccurate due to races. If you
364  * need to know the accurate state, use tracing_is_on() which is a little
365  * slower, but accurate.
366  */
367 int tracing_is_enabled(void)
368 {
369         /*
370          * For quick access (irqsoff uses this in fast path), just
371          * return the mirror variable of the state of the ring buffer.
372          * It's a little racy, but we don't really care.
373          */
374         smp_rmb();
375         return !global_trace.buffer_disabled;
376 }
377
378 /*
379  * trace_buf_size is the size in bytes that is allocated
380  * for a buffer. Note, the number of bytes is always rounded
381  * to page size.
382  *
383  * This number is purposely set to a low number of 16384.
384  * If the dump on oops happens, it will be much appreciated
385  * to not have to wait for all that output. Anyway this can be
386  * boot time and run time configurable.
387  */
388 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
389
390 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
391
392 /* trace_types holds a link list of available tracers. */
393 static struct tracer            *trace_types __read_mostly;
394
395 /*
396  * trace_types_lock is used to protect the trace_types list.
397  */
398 DEFINE_MUTEX(trace_types_lock);
399
400 /*
401  * serialize the access of the ring buffer
402  *
403  * ring buffer serializes readers, but it is low level protection.
404  * The validity of the events (which returns by ring_buffer_peek() ..etc)
405  * are not protected by ring buffer.
406  *
407  * The content of events may become garbage if we allow other process consumes
408  * these events concurrently:
409  *   A) the page of the consumed events may become a normal page
410  *      (not reader page) in ring buffer, and this page will be rewrited
411  *      by events producer.
412  *   B) The page of the consumed events may become a page for splice_read,
413  *      and this page will be returned to system.
414  *
415  * These primitives allow multi process access to different cpu ring buffer
416  * concurrently.
417  *
418  * These primitives don't distinguish read-only and read-consume access.
419  * Multi read-only access are also serialized.
420  */
421
422 #ifdef CONFIG_SMP
423 static DECLARE_RWSEM(all_cpu_access_lock);
424 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
425
426 static inline void trace_access_lock(int cpu)
427 {
428         if (cpu == RING_BUFFER_ALL_CPUS) {
429                 /* gain it for accessing the whole ring buffer. */
430                 down_write(&all_cpu_access_lock);
431         } else {
432                 /* gain it for accessing a cpu ring buffer. */
433
434                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
435                 down_read(&all_cpu_access_lock);
436
437                 /* Secondly block other access to this @cpu ring buffer. */
438                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
439         }
440 }
441
442 static inline void trace_access_unlock(int cpu)
443 {
444         if (cpu == RING_BUFFER_ALL_CPUS) {
445                 up_write(&all_cpu_access_lock);
446         } else {
447                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
448                 up_read(&all_cpu_access_lock);
449         }
450 }
451
452 static inline void trace_access_lock_init(void)
453 {
454         int cpu;
455
456         for_each_possible_cpu(cpu)
457                 mutex_init(&per_cpu(cpu_access_lock, cpu));
458 }
459
460 #else
461
462 static DEFINE_MUTEX(access_lock);
463
464 static inline void trace_access_lock(int cpu)
465 {
466         (void)cpu;
467         mutex_lock(&access_lock);
468 }
469
470 static inline void trace_access_unlock(int cpu)
471 {
472         (void)cpu;
473         mutex_unlock(&access_lock);
474 }
475
476 static inline void trace_access_lock_init(void)
477 {
478 }
479
480 #endif
481
482 #ifdef CONFIG_STACKTRACE
483 static void __ftrace_trace_stack(struct ring_buffer *buffer,
484                                  unsigned long flags,
485                                  int skip, int pc, struct pt_regs *regs);
486 static inline void ftrace_trace_stack(struct trace_array *tr,
487                                       struct ring_buffer *buffer,
488                                       unsigned long flags,
489                                       int skip, int pc, struct pt_regs *regs);
490
491 #else
492 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
493                                         unsigned long flags,
494                                         int skip, int pc, struct pt_regs *regs)
495 {
496 }
497 static inline void ftrace_trace_stack(struct trace_array *tr,
498                                       struct ring_buffer *buffer,
499                                       unsigned long flags,
500                                       int skip, int pc, struct pt_regs *regs)
501 {
502 }
503
504 #endif
505
506 static void tracer_tracing_on(struct trace_array *tr)
507 {
508         if (tr->trace_buffer.buffer)
509                 ring_buffer_record_on(tr->trace_buffer.buffer);
510         /*
511          * This flag is looked at when buffers haven't been allocated
512          * yet, or by some tracers (like irqsoff), that just want to
513          * know if the ring buffer has been disabled, but it can handle
514          * races of where it gets disabled but we still do a record.
515          * As the check is in the fast path of the tracers, it is more
516          * important to be fast than accurate.
517          */
518         tr->buffer_disabled = 0;
519         /* Make the flag seen by readers */
520         smp_wmb();
521 }
522
523 /**
524  * tracing_on - enable tracing buffers
525  *
526  * This function enables tracing buffers that may have been
527  * disabled with tracing_off.
528  */
529 void tracing_on(void)
530 {
531         tracer_tracing_on(&global_trace);
532 }
533 EXPORT_SYMBOL_GPL(tracing_on);
534
535 /**
536  * __trace_puts - write a constant string into the trace buffer.
537  * @ip:    The address of the caller
538  * @str:   The constant string to write
539  * @size:  The size of the string.
540  */
541 int __trace_puts(unsigned long ip, const char *str, int size)
542 {
543         struct ring_buffer_event *event;
544         struct ring_buffer *buffer;
545         struct print_entry *entry;
546         unsigned long irq_flags;
547         int alloc;
548         int pc;
549
550         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
551                 return 0;
552
553         pc = preempt_count();
554
555         if (unlikely(tracing_selftest_running || tracing_disabled))
556                 return 0;
557
558         alloc = sizeof(*entry) + size + 2; /* possible \n added */
559
560         local_save_flags(irq_flags);
561         buffer = global_trace.trace_buffer.buffer;
562         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
563                                           irq_flags, pc);
564         if (!event)
565                 return 0;
566
567         entry = ring_buffer_event_data(event);
568         entry->ip = ip;
569
570         memcpy(&entry->buf, str, size);
571
572         /* Add a newline if necessary */
573         if (entry->buf[size - 1] != '\n') {
574                 entry->buf[size] = '\n';
575                 entry->buf[size + 1] = '\0';
576         } else
577                 entry->buf[size] = '\0';
578
579         __buffer_unlock_commit(buffer, event);
580         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
581
582         return size;
583 }
584 EXPORT_SYMBOL_GPL(__trace_puts);
585
586 /**
587  * __trace_bputs - write the pointer to a constant string into trace buffer
588  * @ip:    The address of the caller
589  * @str:   The constant string to write to the buffer to
590  */
591 int __trace_bputs(unsigned long ip, const char *str)
592 {
593         struct ring_buffer_event *event;
594         struct ring_buffer *buffer;
595         struct bputs_entry *entry;
596         unsigned long irq_flags;
597         int size = sizeof(struct bputs_entry);
598         int pc;
599
600         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
601                 return 0;
602
603         pc = preempt_count();
604
605         if (unlikely(tracing_selftest_running || tracing_disabled))
606                 return 0;
607
608         local_save_flags(irq_flags);
609         buffer = global_trace.trace_buffer.buffer;
610         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
611                                           irq_flags, pc);
612         if (!event)
613                 return 0;
614
615         entry = ring_buffer_event_data(event);
616         entry->ip                       = ip;
617         entry->str                      = str;
618
619         __buffer_unlock_commit(buffer, event);
620         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
621
622         return 1;
623 }
624 EXPORT_SYMBOL_GPL(__trace_bputs);
625
626 #ifdef CONFIG_TRACER_SNAPSHOT
627 /**
628  * trace_snapshot - take a snapshot of the current buffer.
629  *
630  * This causes a swap between the snapshot buffer and the current live
631  * tracing buffer. You can use this to take snapshots of the live
632  * trace when some condition is triggered, but continue to trace.
633  *
634  * Note, make sure to allocate the snapshot with either
635  * a tracing_snapshot_alloc(), or by doing it manually
636  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
637  *
638  * If the snapshot buffer is not allocated, it will stop tracing.
639  * Basically making a permanent snapshot.
640  */
641 void tracing_snapshot(void)
642 {
643         struct trace_array *tr = &global_trace;
644         struct tracer *tracer = tr->current_trace;
645         unsigned long flags;
646
647         if (in_nmi()) {
648                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
649                 internal_trace_puts("*** snapshot is being ignored        ***\n");
650                 return;
651         }
652
653         if (!tr->allocated_snapshot) {
654                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
655                 internal_trace_puts("*** stopping trace here!   ***\n");
656                 tracing_off();
657                 return;
658         }
659
660         /* Note, snapshot can not be used when the tracer uses it */
661         if (tracer->use_max_tr) {
662                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
663                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
664                 return;
665         }
666
667         local_irq_save(flags);
668         update_max_tr(tr, current, smp_processor_id());
669         local_irq_restore(flags);
670 }
671 EXPORT_SYMBOL_GPL(tracing_snapshot);
672
673 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
674                                         struct trace_buffer *size_buf, int cpu_id);
675 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
676
677 static int alloc_snapshot(struct trace_array *tr)
678 {
679         int ret;
680
681         if (!tr->allocated_snapshot) {
682
683                 /* allocate spare buffer */
684                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
685                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
686                 if (ret < 0)
687                         return ret;
688
689                 tr->allocated_snapshot = true;
690         }
691
692         return 0;
693 }
694
695 static void free_snapshot(struct trace_array *tr)
696 {
697         /*
698          * We don't free the ring buffer. instead, resize it because
699          * The max_tr ring buffer has some state (e.g. ring->clock) and
700          * we want preserve it.
701          */
702         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
703         set_buffer_entries(&tr->max_buffer, 1);
704         tracing_reset_online_cpus(&tr->max_buffer);
705         tr->allocated_snapshot = false;
706 }
707
708 /**
709  * tracing_alloc_snapshot - allocate snapshot buffer.
710  *
711  * This only allocates the snapshot buffer if it isn't already
712  * allocated - it doesn't also take a snapshot.
713  *
714  * This is meant to be used in cases where the snapshot buffer needs
715  * to be set up for events that can't sleep but need to be able to
716  * trigger a snapshot.
717  */
718 int tracing_alloc_snapshot(void)
719 {
720         struct trace_array *tr = &global_trace;
721         int ret;
722
723         ret = alloc_snapshot(tr);
724         WARN_ON(ret < 0);
725
726         return ret;
727 }
728 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
729
730 /**
731  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
732  *
733  * This is similar to trace_snapshot(), but it will allocate the
734  * snapshot buffer if it isn't already allocated. Use this only
735  * where it is safe to sleep, as the allocation may sleep.
736  *
737  * This causes a swap between the snapshot buffer and the current live
738  * tracing buffer. You can use this to take snapshots of the live
739  * trace when some condition is triggered, but continue to trace.
740  */
741 void tracing_snapshot_alloc(void)
742 {
743         int ret;
744
745         ret = tracing_alloc_snapshot();
746         if (ret < 0)
747                 return;
748
749         tracing_snapshot();
750 }
751 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
752 #else
753 void tracing_snapshot(void)
754 {
755         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
756 }
757 EXPORT_SYMBOL_GPL(tracing_snapshot);
758 int tracing_alloc_snapshot(void)
759 {
760         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
761         return -ENODEV;
762 }
763 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
764 void tracing_snapshot_alloc(void)
765 {
766         /* Give warning */
767         tracing_snapshot();
768 }
769 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
770 #endif /* CONFIG_TRACER_SNAPSHOT */
771
772 static void tracer_tracing_off(struct trace_array *tr)
773 {
774         if (tr->trace_buffer.buffer)
775                 ring_buffer_record_off(tr->trace_buffer.buffer);
776         /*
777          * This flag is looked at when buffers haven't been allocated
778          * yet, or by some tracers (like irqsoff), that just want to
779          * know if the ring buffer has been disabled, but it can handle
780          * races of where it gets disabled but we still do a record.
781          * As the check is in the fast path of the tracers, it is more
782          * important to be fast than accurate.
783          */
784         tr->buffer_disabled = 1;
785         /* Make the flag seen by readers */
786         smp_wmb();
787 }
788
789 /**
790  * tracing_off - turn off tracing buffers
791  *
792  * This function stops the tracing buffers from recording data.
793  * It does not disable any overhead the tracers themselves may
794  * be causing. This function simply causes all recording to
795  * the ring buffers to fail.
796  */
797 void tracing_off(void)
798 {
799         tracer_tracing_off(&global_trace);
800 }
801 EXPORT_SYMBOL_GPL(tracing_off);
802
803 void disable_trace_on_warning(void)
804 {
805         if (__disable_trace_on_warning)
806                 tracing_off();
807 }
808
809 /**
810  * tracer_tracing_is_on - show real state of ring buffer enabled
811  * @tr : the trace array to know if ring buffer is enabled
812  *
813  * Shows real state of the ring buffer if it is enabled or not.
814  */
815 static int tracer_tracing_is_on(struct trace_array *tr)
816 {
817         if (tr->trace_buffer.buffer)
818                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
819         return !tr->buffer_disabled;
820 }
821
822 /**
823  * tracing_is_on - show state of ring buffers enabled
824  */
825 int tracing_is_on(void)
826 {
827         return tracer_tracing_is_on(&global_trace);
828 }
829 EXPORT_SYMBOL_GPL(tracing_is_on);
830
831 static int __init set_buf_size(char *str)
832 {
833         unsigned long buf_size;
834
835         if (!str)
836                 return 0;
837         buf_size = memparse(str, &str);
838         /* nr_entries can not be zero */
839         if (buf_size == 0)
840                 return 0;
841         trace_buf_size = buf_size;
842         return 1;
843 }
844 __setup("trace_buf_size=", set_buf_size);
845
846 static int __init set_tracing_thresh(char *str)
847 {
848         unsigned long threshold;
849         int ret;
850
851         if (!str)
852                 return 0;
853         ret = kstrtoul(str, 0, &threshold);
854         if (ret < 0)
855                 return 0;
856         tracing_thresh = threshold * 1000;
857         return 1;
858 }
859 __setup("tracing_thresh=", set_tracing_thresh);
860
861 unsigned long nsecs_to_usecs(unsigned long nsecs)
862 {
863         return nsecs / 1000;
864 }
865
866 /*
867  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
868  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
869  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
870  * of strings in the order that the enums were defined.
871  */
872 #undef C
873 #define C(a, b) b
874
875 /* These must match the bit postions in trace_iterator_flags */
876 static const char *trace_options[] = {
877         TRACE_FLAGS
878         NULL
879 };
880
881 static struct {
882         u64 (*func)(void);
883         const char *name;
884         int in_ns;              /* is this clock in nanoseconds? */
885 } trace_clocks[] = {
886         { trace_clock_local,            "local",        1 },
887         { trace_clock_global,           "global",       1 },
888         { trace_clock_counter,          "counter",      0 },
889         { trace_clock_jiffies,          "uptime",       0 },
890         { trace_clock,                  "perf",         1 },
891         { ktime_get_mono_fast_ns,       "mono",         1 },
892         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
893         ARCH_TRACE_CLOCKS
894 };
895
896 /*
897  * trace_parser_get_init - gets the buffer for trace parser
898  */
899 int trace_parser_get_init(struct trace_parser *parser, int size)
900 {
901         memset(parser, 0, sizeof(*parser));
902
903         parser->buffer = kmalloc(size, GFP_KERNEL);
904         if (!parser->buffer)
905                 return 1;
906
907         parser->size = size;
908         return 0;
909 }
910
911 /*
912  * trace_parser_put - frees the buffer for trace parser
913  */
914 void trace_parser_put(struct trace_parser *parser)
915 {
916         kfree(parser->buffer);
917 }
918
919 /*
920  * trace_get_user - reads the user input string separated by  space
921  * (matched by isspace(ch))
922  *
923  * For each string found the 'struct trace_parser' is updated,
924  * and the function returns.
925  *
926  * Returns number of bytes read.
927  *
928  * See kernel/trace/trace.h for 'struct trace_parser' details.
929  */
930 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
931         size_t cnt, loff_t *ppos)
932 {
933         char ch;
934         size_t read = 0;
935         ssize_t ret;
936
937         if (!*ppos)
938                 trace_parser_clear(parser);
939
940         ret = get_user(ch, ubuf++);
941         if (ret)
942                 goto out;
943
944         read++;
945         cnt--;
946
947         /*
948          * The parser is not finished with the last write,
949          * continue reading the user input without skipping spaces.
950          */
951         if (!parser->cont) {
952                 /* skip white space */
953                 while (cnt && isspace(ch)) {
954                         ret = get_user(ch, ubuf++);
955                         if (ret)
956                                 goto out;
957                         read++;
958                         cnt--;
959                 }
960
961                 /* only spaces were written */
962                 if (isspace(ch)) {
963                         *ppos += read;
964                         ret = read;
965                         goto out;
966                 }
967
968                 parser->idx = 0;
969         }
970
971         /* read the non-space input */
972         while (cnt && !isspace(ch)) {
973                 if (parser->idx < parser->size - 1)
974                         parser->buffer[parser->idx++] = ch;
975                 else {
976                         ret = -EINVAL;
977                         goto out;
978                 }
979                 ret = get_user(ch, ubuf++);
980                 if (ret)
981                         goto out;
982                 read++;
983                 cnt--;
984         }
985
986         /* We either got finished input or we have to wait for another call. */
987         if (isspace(ch)) {
988                 parser->buffer[parser->idx] = 0;
989                 parser->cont = false;
990         } else if (parser->idx < parser->size - 1) {
991                 parser->cont = true;
992                 parser->buffer[parser->idx++] = ch;
993         } else {
994                 ret = -EINVAL;
995                 goto out;
996         }
997
998         *ppos += read;
999         ret = read;
1000
1001 out:
1002         return ret;
1003 }
1004
1005 /* TODO add a seq_buf_to_buffer() */
1006 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1007 {
1008         int len;
1009
1010         if (trace_seq_used(s) <= s->seq.readpos)
1011                 return -EBUSY;
1012
1013         len = trace_seq_used(s) - s->seq.readpos;
1014         if (cnt > len)
1015                 cnt = len;
1016         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1017
1018         s->seq.readpos += cnt;
1019         return cnt;
1020 }
1021
1022 unsigned long __read_mostly     tracing_thresh;
1023
1024 #ifdef CONFIG_TRACER_MAX_TRACE
1025 /*
1026  * Copy the new maximum trace into the separate maximum-trace
1027  * structure. (this way the maximum trace is permanently saved,
1028  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1029  */
1030 static void
1031 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1032 {
1033         struct trace_buffer *trace_buf = &tr->trace_buffer;
1034         struct trace_buffer *max_buf = &tr->max_buffer;
1035         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1036         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1037
1038         max_buf->cpu = cpu;
1039         max_buf->time_start = data->preempt_timestamp;
1040
1041         max_data->saved_latency = tr->max_latency;
1042         max_data->critical_start = data->critical_start;
1043         max_data->critical_end = data->critical_end;
1044
1045         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1046         max_data->pid = tsk->pid;
1047         /*
1048          * If tsk == current, then use current_uid(), as that does not use
1049          * RCU. The irq tracer can be called out of RCU scope.
1050          */
1051         if (tsk == current)
1052                 max_data->uid = current_uid();
1053         else
1054                 max_data->uid = task_uid(tsk);
1055
1056         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1057         max_data->policy = tsk->policy;
1058         max_data->rt_priority = tsk->rt_priority;
1059
1060         /* record this tasks comm */
1061         tracing_record_cmdline(tsk);
1062 }
1063
1064 /**
1065  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1066  * @tr: tracer
1067  * @tsk: the task with the latency
1068  * @cpu: The cpu that initiated the trace.
1069  *
1070  * Flip the buffers between the @tr and the max_tr and record information
1071  * about which task was the cause of this latency.
1072  */
1073 void
1074 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1075 {
1076         struct ring_buffer *buf;
1077
1078         if (tr->stop_count)
1079                 return;
1080
1081         WARN_ON_ONCE(!irqs_disabled());
1082
1083         if (!tr->allocated_snapshot) {
1084                 /* Only the nop tracer should hit this when disabling */
1085                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1086                 return;
1087         }
1088
1089         arch_spin_lock(&tr->max_lock);
1090
1091         /* Inherit the recordable setting from trace_buffer */
1092         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1093                 ring_buffer_record_on(tr->max_buffer.buffer);
1094         else
1095                 ring_buffer_record_off(tr->max_buffer.buffer);
1096
1097         buf = tr->trace_buffer.buffer;
1098         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1099         tr->max_buffer.buffer = buf;
1100
1101         __update_max_tr(tr, tsk, cpu);
1102         arch_spin_unlock(&tr->max_lock);
1103 }
1104
1105 /**
1106  * update_max_tr_single - only copy one trace over, and reset the rest
1107  * @tr - tracer
1108  * @tsk - task with the latency
1109  * @cpu - the cpu of the buffer to copy.
1110  *
1111  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1112  */
1113 void
1114 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1115 {
1116         int ret;
1117
1118         if (tr->stop_count)
1119                 return;
1120
1121         WARN_ON_ONCE(!irqs_disabled());
1122         if (!tr->allocated_snapshot) {
1123                 /* Only the nop tracer should hit this when disabling */
1124                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1125                 return;
1126         }
1127
1128         arch_spin_lock(&tr->max_lock);
1129
1130         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1131
1132         if (ret == -EBUSY) {
1133                 /*
1134                  * We failed to swap the buffer due to a commit taking
1135                  * place on this CPU. We fail to record, but we reset
1136                  * the max trace buffer (no one writes directly to it)
1137                  * and flag that it failed.
1138                  */
1139                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1140                         "Failed to swap buffers due to commit in progress\n");
1141         }
1142
1143         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1144
1145         __update_max_tr(tr, tsk, cpu);
1146         arch_spin_unlock(&tr->max_lock);
1147 }
1148 #endif /* CONFIG_TRACER_MAX_TRACE */
1149
1150 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1151 {
1152         /* Iterators are static, they should be filled or empty */
1153         if (trace_buffer_iter(iter, iter->cpu_file))
1154                 return 0;
1155
1156         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1157                                 full);
1158 }
1159
1160 #ifdef CONFIG_FTRACE_STARTUP_TEST
1161 static int run_tracer_selftest(struct tracer *type)
1162 {
1163         struct trace_array *tr = &global_trace;
1164         struct tracer *saved_tracer = tr->current_trace;
1165         int ret;
1166
1167         if (!type->selftest || tracing_selftest_disabled)
1168                 return 0;
1169
1170         /*
1171          * Run a selftest on this tracer.
1172          * Here we reset the trace buffer, and set the current
1173          * tracer to be this tracer. The tracer can then run some
1174          * internal tracing to verify that everything is in order.
1175          * If we fail, we do not register this tracer.
1176          */
1177         tracing_reset_online_cpus(&tr->trace_buffer);
1178
1179         tr->current_trace = type;
1180
1181 #ifdef CONFIG_TRACER_MAX_TRACE
1182         if (type->use_max_tr) {
1183                 /* If we expanded the buffers, make sure the max is expanded too */
1184                 if (ring_buffer_expanded)
1185                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1186                                            RING_BUFFER_ALL_CPUS);
1187                 tr->allocated_snapshot = true;
1188         }
1189 #endif
1190
1191         /* the test is responsible for initializing and enabling */
1192         pr_info("Testing tracer %s: ", type->name);
1193         ret = type->selftest(type, tr);
1194         /* the test is responsible for resetting too */
1195         tr->current_trace = saved_tracer;
1196         if (ret) {
1197                 printk(KERN_CONT "FAILED!\n");
1198                 /* Add the warning after printing 'FAILED' */
1199                 WARN_ON(1);
1200                 return -1;
1201         }
1202         /* Only reset on passing, to avoid touching corrupted buffers */
1203         tracing_reset_online_cpus(&tr->trace_buffer);
1204
1205 #ifdef CONFIG_TRACER_MAX_TRACE
1206         if (type->use_max_tr) {
1207                 tr->allocated_snapshot = false;
1208
1209                 /* Shrink the max buffer again */
1210                 if (ring_buffer_expanded)
1211                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1212                                            RING_BUFFER_ALL_CPUS);
1213         }
1214 #endif
1215
1216         printk(KERN_CONT "PASSED\n");
1217         return 0;
1218 }
1219 #else
1220 static inline int run_tracer_selftest(struct tracer *type)
1221 {
1222         return 0;
1223 }
1224 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1225
1226 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1227
1228 static void __init apply_trace_boot_options(void);
1229
1230 /**
1231  * register_tracer - register a tracer with the ftrace system.
1232  * @type - the plugin for the tracer
1233  *
1234  * Register a new plugin tracer.
1235  */
1236 int __init register_tracer(struct tracer *type)
1237 {
1238         struct tracer *t;
1239         int ret = 0;
1240
1241         if (!type->name) {
1242                 pr_info("Tracer must have a name\n");
1243                 return -1;
1244         }
1245
1246         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1247                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1248                 return -1;
1249         }
1250
1251         mutex_lock(&trace_types_lock);
1252
1253         tracing_selftest_running = true;
1254
1255         for (t = trace_types; t; t = t->next) {
1256                 if (strcmp(type->name, t->name) == 0) {
1257                         /* already found */
1258                         pr_info("Tracer %s already registered\n",
1259                                 type->name);
1260                         ret = -1;
1261                         goto out;
1262                 }
1263         }
1264
1265         if (!type->set_flag)
1266                 type->set_flag = &dummy_set_flag;
1267         if (!type->flags)
1268                 type->flags = &dummy_tracer_flags;
1269         else
1270                 if (!type->flags->opts)
1271                         type->flags->opts = dummy_tracer_opt;
1272
1273         ret = run_tracer_selftest(type);
1274         if (ret < 0)
1275                 goto out;
1276
1277         type->next = trace_types;
1278         trace_types = type;
1279         add_tracer_options(&global_trace, type);
1280
1281  out:
1282         tracing_selftest_running = false;
1283         mutex_unlock(&trace_types_lock);
1284
1285         if (ret || !default_bootup_tracer)
1286                 goto out_unlock;
1287
1288         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1289                 goto out_unlock;
1290
1291         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1292         /* Do we want this tracer to start on bootup? */
1293         tracing_set_tracer(&global_trace, type->name);
1294         default_bootup_tracer = NULL;
1295
1296         apply_trace_boot_options();
1297
1298         /* disable other selftests, since this will break it. */
1299         tracing_selftest_disabled = true;
1300 #ifdef CONFIG_FTRACE_STARTUP_TEST
1301         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1302                type->name);
1303 #endif
1304
1305  out_unlock:
1306         return ret;
1307 }
1308
1309 void tracing_reset(struct trace_buffer *buf, int cpu)
1310 {
1311         struct ring_buffer *buffer = buf->buffer;
1312
1313         if (!buffer)
1314                 return;
1315
1316         ring_buffer_record_disable(buffer);
1317
1318         /* Make sure all commits have finished */
1319         synchronize_sched();
1320         ring_buffer_reset_cpu(buffer, cpu);
1321
1322         ring_buffer_record_enable(buffer);
1323 }
1324
1325 void tracing_reset_online_cpus(struct trace_buffer *buf)
1326 {
1327         struct ring_buffer *buffer = buf->buffer;
1328         int cpu;
1329
1330         if (!buffer)
1331                 return;
1332
1333         ring_buffer_record_disable(buffer);
1334
1335         /* Make sure all commits have finished */
1336         synchronize_sched();
1337
1338         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1339
1340         for_each_online_cpu(cpu)
1341                 ring_buffer_reset_cpu(buffer, cpu);
1342
1343         ring_buffer_record_enable(buffer);
1344 }
1345
1346 /* Must have trace_types_lock held */
1347 void tracing_reset_all_online_cpus(void)
1348 {
1349         struct trace_array *tr;
1350
1351         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1352                 tracing_reset_online_cpus(&tr->trace_buffer);
1353 #ifdef CONFIG_TRACER_MAX_TRACE
1354                 tracing_reset_online_cpus(&tr->max_buffer);
1355 #endif
1356         }
1357 }
1358
1359 #define SAVED_CMDLINES_DEFAULT 128
1360 #define NO_CMDLINE_MAP UINT_MAX
1361 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1362 struct saved_cmdlines_buffer {
1363         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1364         unsigned *map_cmdline_to_pid;
1365         unsigned cmdline_num;
1366         int cmdline_idx;
1367         char *saved_cmdlines;
1368 };
1369 static struct saved_cmdlines_buffer *savedcmd;
1370
1371 /* temporary disable recording */
1372 static atomic_t trace_record_cmdline_disabled __read_mostly;
1373
1374 static inline char *get_saved_cmdlines(int idx)
1375 {
1376         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1377 }
1378
1379 static inline void set_cmdline(int idx, const char *cmdline)
1380 {
1381         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1382 }
1383
1384 static int allocate_cmdlines_buffer(unsigned int val,
1385                                     struct saved_cmdlines_buffer *s)
1386 {
1387         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1388                                         GFP_KERNEL);
1389         if (!s->map_cmdline_to_pid)
1390                 return -ENOMEM;
1391
1392         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1393         if (!s->saved_cmdlines) {
1394                 kfree(s->map_cmdline_to_pid);
1395                 return -ENOMEM;
1396         }
1397
1398         s->cmdline_idx = 0;
1399         s->cmdline_num = val;
1400         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1401                sizeof(s->map_pid_to_cmdline));
1402         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1403                val * sizeof(*s->map_cmdline_to_pid));
1404
1405         return 0;
1406 }
1407
1408 static int trace_create_savedcmd(void)
1409 {
1410         int ret;
1411
1412         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1413         if (!savedcmd)
1414                 return -ENOMEM;
1415
1416         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1417         if (ret < 0) {
1418                 kfree(savedcmd);
1419                 savedcmd = NULL;
1420                 return -ENOMEM;
1421         }
1422
1423         return 0;
1424 }
1425
1426 int is_tracing_stopped(void)
1427 {
1428         return global_trace.stop_count;
1429 }
1430
1431 /**
1432  * tracing_start - quick start of the tracer
1433  *
1434  * If tracing is enabled but was stopped by tracing_stop,
1435  * this will start the tracer back up.
1436  */
1437 void tracing_start(void)
1438 {
1439         struct ring_buffer *buffer;
1440         unsigned long flags;
1441
1442         if (tracing_disabled)
1443                 return;
1444
1445         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1446         if (--global_trace.stop_count) {
1447                 if (global_trace.stop_count < 0) {
1448                         /* Someone screwed up their debugging */
1449                         WARN_ON_ONCE(1);
1450                         global_trace.stop_count = 0;
1451                 }
1452                 goto out;
1453         }
1454
1455         /* Prevent the buffers from switching */
1456         arch_spin_lock(&global_trace.max_lock);
1457
1458         buffer = global_trace.trace_buffer.buffer;
1459         if (buffer)
1460                 ring_buffer_record_enable(buffer);
1461
1462 #ifdef CONFIG_TRACER_MAX_TRACE
1463         buffer = global_trace.max_buffer.buffer;
1464         if (buffer)
1465                 ring_buffer_record_enable(buffer);
1466 #endif
1467
1468         arch_spin_unlock(&global_trace.max_lock);
1469
1470  out:
1471         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1472 }
1473
1474 static void tracing_start_tr(struct trace_array *tr)
1475 {
1476         struct ring_buffer *buffer;
1477         unsigned long flags;
1478
1479         if (tracing_disabled)
1480                 return;
1481
1482         /* If global, we need to also start the max tracer */
1483         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1484                 return tracing_start();
1485
1486         raw_spin_lock_irqsave(&tr->start_lock, flags);
1487
1488         if (--tr->stop_count) {
1489                 if (tr->stop_count < 0) {
1490                         /* Someone screwed up their debugging */
1491                         WARN_ON_ONCE(1);
1492                         tr->stop_count = 0;
1493                 }
1494                 goto out;
1495         }
1496
1497         buffer = tr->trace_buffer.buffer;
1498         if (buffer)
1499                 ring_buffer_record_enable(buffer);
1500
1501  out:
1502         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1503 }
1504
1505 /**
1506  * tracing_stop - quick stop of the tracer
1507  *
1508  * Light weight way to stop tracing. Use in conjunction with
1509  * tracing_start.
1510  */
1511 void tracing_stop(void)
1512 {
1513         struct ring_buffer *buffer;
1514         unsigned long flags;
1515
1516         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1517         if (global_trace.stop_count++)
1518                 goto out;
1519
1520         /* Prevent the buffers from switching */
1521         arch_spin_lock(&global_trace.max_lock);
1522
1523         buffer = global_trace.trace_buffer.buffer;
1524         if (buffer)
1525                 ring_buffer_record_disable(buffer);
1526
1527 #ifdef CONFIG_TRACER_MAX_TRACE
1528         buffer = global_trace.max_buffer.buffer;
1529         if (buffer)
1530                 ring_buffer_record_disable(buffer);
1531 #endif
1532
1533         arch_spin_unlock(&global_trace.max_lock);
1534
1535  out:
1536         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1537 }
1538
1539 static void tracing_stop_tr(struct trace_array *tr)
1540 {
1541         struct ring_buffer *buffer;
1542         unsigned long flags;
1543
1544         /* If global, we need to also stop the max tracer */
1545         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1546                 return tracing_stop();
1547
1548         raw_spin_lock_irqsave(&tr->start_lock, flags);
1549         if (tr->stop_count++)
1550                 goto out;
1551
1552         buffer = tr->trace_buffer.buffer;
1553         if (buffer)
1554                 ring_buffer_record_disable(buffer);
1555
1556  out:
1557         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1558 }
1559
1560 void trace_stop_cmdline_recording(void);
1561
1562 static int trace_save_cmdline(struct task_struct *tsk)
1563 {
1564         unsigned pid, idx;
1565
1566         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1567                 return 0;
1568
1569         /*
1570          * It's not the end of the world if we don't get
1571          * the lock, but we also don't want to spin
1572          * nor do we want to disable interrupts,
1573          * so if we miss here, then better luck next time.
1574          */
1575         if (!arch_spin_trylock(&trace_cmdline_lock))
1576                 return 0;
1577
1578         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1579         if (idx == NO_CMDLINE_MAP) {
1580                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1581
1582                 /*
1583                  * Check whether the cmdline buffer at idx has a pid
1584                  * mapped. We are going to overwrite that entry so we
1585                  * need to clear the map_pid_to_cmdline. Otherwise we
1586                  * would read the new comm for the old pid.
1587                  */
1588                 pid = savedcmd->map_cmdline_to_pid[idx];
1589                 if (pid != NO_CMDLINE_MAP)
1590                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1591
1592                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1593                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1594
1595                 savedcmd->cmdline_idx = idx;
1596         }
1597
1598         set_cmdline(idx, tsk->comm);
1599
1600         arch_spin_unlock(&trace_cmdline_lock);
1601
1602         return 1;
1603 }
1604
1605 static void __trace_find_cmdline(int pid, char comm[])
1606 {
1607         unsigned map;
1608
1609         if (!pid) {
1610                 strcpy(comm, "<idle>");
1611                 return;
1612         }
1613
1614         if (WARN_ON_ONCE(pid < 0)) {
1615                 strcpy(comm, "<XXX>");
1616                 return;
1617         }
1618
1619         if (pid > PID_MAX_DEFAULT) {
1620                 strcpy(comm, "<...>");
1621                 return;
1622         }
1623
1624         map = savedcmd->map_pid_to_cmdline[pid];
1625         if (map != NO_CMDLINE_MAP)
1626                 strcpy(comm, get_saved_cmdlines(map));
1627         else
1628                 strcpy(comm, "<...>");
1629 }
1630
1631 void trace_find_cmdline(int pid, char comm[])
1632 {
1633         preempt_disable();
1634         arch_spin_lock(&trace_cmdline_lock);
1635
1636         __trace_find_cmdline(pid, comm);
1637
1638         arch_spin_unlock(&trace_cmdline_lock);
1639         preempt_enable();
1640 }
1641
1642 void tracing_record_cmdline(struct task_struct *tsk)
1643 {
1644         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1645                 return;
1646
1647         if (!__this_cpu_read(trace_cmdline_save))
1648                 return;
1649
1650         if (trace_save_cmdline(tsk))
1651                 __this_cpu_write(trace_cmdline_save, false);
1652 }
1653
1654 void
1655 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1656                              int pc)
1657 {
1658         struct task_struct *tsk = current;
1659
1660         entry->preempt_count            = pc & 0xff;
1661         entry->pid                      = (tsk) ? tsk->pid : 0;
1662         entry->flags =
1663 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1664                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1665 #else
1666                 TRACE_FLAG_IRQS_NOSUPPORT |
1667 #endif
1668                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1669                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1670                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1671                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1672 }
1673 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1674
1675 struct ring_buffer_event *
1676 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1677                           int type,
1678                           unsigned long len,
1679                           unsigned long flags, int pc)
1680 {
1681         struct ring_buffer_event *event;
1682
1683         event = ring_buffer_lock_reserve(buffer, len);
1684         if (event != NULL) {
1685                 struct trace_entry *ent = ring_buffer_event_data(event);
1686
1687                 tracing_generic_entry_update(ent, flags, pc);
1688                 ent->type = type;
1689         }
1690
1691         return event;
1692 }
1693
1694 void
1695 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1696 {
1697         __this_cpu_write(trace_cmdline_save, true);
1698         ring_buffer_unlock_commit(buffer, event);
1699 }
1700
1701 void trace_buffer_unlock_commit(struct trace_array *tr,
1702                                 struct ring_buffer *buffer,
1703                                 struct ring_buffer_event *event,
1704                                 unsigned long flags, int pc)
1705 {
1706         __buffer_unlock_commit(buffer, event);
1707
1708         ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
1709         ftrace_trace_userstack(buffer, flags, pc);
1710 }
1711 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1712
1713 static struct ring_buffer *temp_buffer;
1714
1715 struct ring_buffer_event *
1716 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1717                           struct trace_event_file *trace_file,
1718                           int type, unsigned long len,
1719                           unsigned long flags, int pc)
1720 {
1721         struct ring_buffer_event *entry;
1722
1723         *current_rb = trace_file->tr->trace_buffer.buffer;
1724         entry = trace_buffer_lock_reserve(*current_rb,
1725                                          type, len, flags, pc);
1726         /*
1727          * If tracing is off, but we have triggers enabled
1728          * we still need to look at the event data. Use the temp_buffer
1729          * to store the trace event for the tigger to use. It's recusive
1730          * safe and will not be recorded anywhere.
1731          */
1732         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1733                 *current_rb = temp_buffer;
1734                 entry = trace_buffer_lock_reserve(*current_rb,
1735                                                   type, len, flags, pc);
1736         }
1737         return entry;
1738 }
1739 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1740
1741 struct ring_buffer_event *
1742 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1743                                   int type, unsigned long len,
1744                                   unsigned long flags, int pc)
1745 {
1746         *current_rb = global_trace.trace_buffer.buffer;
1747         return trace_buffer_lock_reserve(*current_rb,
1748                                          type, len, flags, pc);
1749 }
1750 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1751
1752 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1753                                      struct ring_buffer *buffer,
1754                                      struct ring_buffer_event *event,
1755                                      unsigned long flags, int pc,
1756                                      struct pt_regs *regs)
1757 {
1758         __buffer_unlock_commit(buffer, event);
1759
1760         /*
1761          * If regs is not set, then skip the following callers:
1762          *   trace_buffer_unlock_commit_regs
1763          *   event_trigger_unlock_commit
1764          *   trace_event_buffer_commit
1765          *   trace_event_raw_event_sched_switch
1766          * Note, we can still get here via blktrace, wakeup tracer
1767          * and mmiotrace, but that's ok if they lose a function or
1768          * two. They are that meaningful.
1769          */
1770         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
1771         ftrace_trace_userstack(buffer, flags, pc);
1772 }
1773 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1774
1775 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1776                                          struct ring_buffer_event *event)
1777 {
1778         ring_buffer_discard_commit(buffer, event);
1779 }
1780 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1781
1782 void
1783 trace_function(struct trace_array *tr,
1784                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1785                int pc)
1786 {
1787         struct trace_event_call *call = &event_function;
1788         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1789         struct ring_buffer_event *event;
1790         struct ftrace_entry *entry;
1791
1792         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1793                                           flags, pc);
1794         if (!event)
1795                 return;
1796         entry   = ring_buffer_event_data(event);
1797         entry->ip                       = ip;
1798         entry->parent_ip                = parent_ip;
1799
1800         if (!call_filter_check_discard(call, entry, buffer, event))
1801                 __buffer_unlock_commit(buffer, event);
1802 }
1803
1804 #ifdef CONFIG_STACKTRACE
1805
1806 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1807 struct ftrace_stack {
1808         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1809 };
1810
1811 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1812 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1813
1814 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1815                                  unsigned long flags,
1816                                  int skip, int pc, struct pt_regs *regs)
1817 {
1818         struct trace_event_call *call = &event_kernel_stack;
1819         struct ring_buffer_event *event;
1820         struct stack_entry *entry;
1821         struct stack_trace trace;
1822         int use_stack;
1823         int size = FTRACE_STACK_ENTRIES;
1824
1825         trace.nr_entries        = 0;
1826         trace.skip              = skip;
1827
1828         /*
1829          * Add two, for this function and the call to save_stack_trace()
1830          * If regs is set, then these functions will not be in the way.
1831          */
1832         if (!regs)
1833                 trace.skip += 2;
1834
1835         /*
1836          * Since events can happen in NMIs there's no safe way to
1837          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1838          * or NMI comes in, it will just have to use the default
1839          * FTRACE_STACK_SIZE.
1840          */
1841         preempt_disable_notrace();
1842
1843         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1844         /*
1845          * We don't need any atomic variables, just a barrier.
1846          * If an interrupt comes in, we don't care, because it would
1847          * have exited and put the counter back to what we want.
1848          * We just need a barrier to keep gcc from moving things
1849          * around.
1850          */
1851         barrier();
1852         if (use_stack == 1) {
1853                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1854                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1855
1856                 if (regs)
1857                         save_stack_trace_regs(regs, &trace);
1858                 else
1859                         save_stack_trace(&trace);
1860
1861                 if (trace.nr_entries > size)
1862                         size = trace.nr_entries;
1863         } else
1864                 /* From now on, use_stack is a boolean */
1865                 use_stack = 0;
1866
1867         size *= sizeof(unsigned long);
1868
1869         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1870                                           sizeof(*entry) + size, flags, pc);
1871         if (!event)
1872                 goto out;
1873         entry = ring_buffer_event_data(event);
1874
1875         memset(&entry->caller, 0, size);
1876
1877         if (use_stack)
1878                 memcpy(&entry->caller, trace.entries,
1879                        trace.nr_entries * sizeof(unsigned long));
1880         else {
1881                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1882                 trace.entries           = entry->caller;
1883                 if (regs)
1884                         save_stack_trace_regs(regs, &trace);
1885                 else
1886                         save_stack_trace(&trace);
1887         }
1888
1889         entry->size = trace.nr_entries;
1890
1891         if (!call_filter_check_discard(call, entry, buffer, event))
1892                 __buffer_unlock_commit(buffer, event);
1893
1894  out:
1895         /* Again, don't let gcc optimize things here */
1896         barrier();
1897         __this_cpu_dec(ftrace_stack_reserve);
1898         preempt_enable_notrace();
1899
1900 }
1901
1902 static inline void ftrace_trace_stack(struct trace_array *tr,
1903                                       struct ring_buffer *buffer,
1904                                       unsigned long flags,
1905                                       int skip, int pc, struct pt_regs *regs)
1906 {
1907         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1908                 return;
1909
1910         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1911 }
1912
1913 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1914                    int pc)
1915 {
1916         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1917 }
1918
1919 /**
1920  * trace_dump_stack - record a stack back trace in the trace buffer
1921  * @skip: Number of functions to skip (helper handlers)
1922  */
1923 void trace_dump_stack(int skip)
1924 {
1925         unsigned long flags;
1926
1927         if (tracing_disabled || tracing_selftest_running)
1928                 return;
1929
1930         local_save_flags(flags);
1931
1932         /*
1933          * Skip 3 more, seems to get us at the caller of
1934          * this function.
1935          */
1936         skip += 3;
1937         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1938                              flags, skip, preempt_count(), NULL);
1939 }
1940
1941 static DEFINE_PER_CPU(int, user_stack_count);
1942
1943 void
1944 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1945 {
1946         struct trace_event_call *call = &event_user_stack;
1947         struct ring_buffer_event *event;
1948         struct userstack_entry *entry;
1949         struct stack_trace trace;
1950
1951         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
1952                 return;
1953
1954         /*
1955          * NMIs can not handle page faults, even with fix ups.
1956          * The save user stack can (and often does) fault.
1957          */
1958         if (unlikely(in_nmi()))
1959                 return;
1960
1961         /*
1962          * prevent recursion, since the user stack tracing may
1963          * trigger other kernel events.
1964          */
1965         preempt_disable();
1966         if (__this_cpu_read(user_stack_count))
1967                 goto out;
1968
1969         __this_cpu_inc(user_stack_count);
1970
1971         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1972                                           sizeof(*entry), flags, pc);
1973         if (!event)
1974                 goto out_drop_count;
1975         entry   = ring_buffer_event_data(event);
1976
1977         entry->tgid             = current->tgid;
1978         memset(&entry->caller, 0, sizeof(entry->caller));
1979
1980         trace.nr_entries        = 0;
1981         trace.max_entries       = FTRACE_STACK_ENTRIES;
1982         trace.skip              = 0;
1983         trace.entries           = entry->caller;
1984
1985         save_stack_trace_user(&trace);
1986         if (!call_filter_check_discard(call, entry, buffer, event))
1987                 __buffer_unlock_commit(buffer, event);
1988
1989  out_drop_count:
1990         __this_cpu_dec(user_stack_count);
1991  out:
1992         preempt_enable();
1993 }
1994
1995 #ifdef UNUSED
1996 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1997 {
1998         ftrace_trace_userstack(tr, flags, preempt_count());
1999 }
2000 #endif /* UNUSED */
2001
2002 #endif /* CONFIG_STACKTRACE */
2003
2004 /* created for use with alloc_percpu */
2005 struct trace_buffer_struct {
2006         char buffer[TRACE_BUF_SIZE];
2007 };
2008
2009 static struct trace_buffer_struct *trace_percpu_buffer;
2010 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
2011 static struct trace_buffer_struct *trace_percpu_irq_buffer;
2012 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
2013
2014 /*
2015  * The buffer used is dependent on the context. There is a per cpu
2016  * buffer for normal context, softirq contex, hard irq context and
2017  * for NMI context. Thise allows for lockless recording.
2018  *
2019  * Note, if the buffers failed to be allocated, then this returns NULL
2020  */
2021 static char *get_trace_buf(void)
2022 {
2023         struct trace_buffer_struct *percpu_buffer;
2024
2025         /*
2026          * If we have allocated per cpu buffers, then we do not
2027          * need to do any locking.
2028          */
2029         if (in_nmi())
2030                 percpu_buffer = trace_percpu_nmi_buffer;
2031         else if (in_irq())
2032                 percpu_buffer = trace_percpu_irq_buffer;
2033         else if (in_softirq())
2034                 percpu_buffer = trace_percpu_sirq_buffer;
2035         else
2036                 percpu_buffer = trace_percpu_buffer;
2037
2038         if (!percpu_buffer)
2039                 return NULL;
2040
2041         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2042 }
2043
2044 static int alloc_percpu_trace_buffer(void)
2045 {
2046         struct trace_buffer_struct *buffers;
2047         struct trace_buffer_struct *sirq_buffers;
2048         struct trace_buffer_struct *irq_buffers;
2049         struct trace_buffer_struct *nmi_buffers;
2050
2051         buffers = alloc_percpu(struct trace_buffer_struct);
2052         if (!buffers)
2053                 goto err_warn;
2054
2055         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2056         if (!sirq_buffers)
2057                 goto err_sirq;
2058
2059         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2060         if (!irq_buffers)
2061                 goto err_irq;
2062
2063         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2064         if (!nmi_buffers)
2065                 goto err_nmi;
2066
2067         trace_percpu_buffer = buffers;
2068         trace_percpu_sirq_buffer = sirq_buffers;
2069         trace_percpu_irq_buffer = irq_buffers;
2070         trace_percpu_nmi_buffer = nmi_buffers;
2071
2072         return 0;
2073
2074  err_nmi:
2075         free_percpu(irq_buffers);
2076  err_irq:
2077         free_percpu(sirq_buffers);
2078  err_sirq:
2079         free_percpu(buffers);
2080  err_warn:
2081         WARN(1, "Could not allocate percpu trace_printk buffer");
2082         return -ENOMEM;
2083 }
2084
2085 static int buffers_allocated;
2086
2087 void trace_printk_init_buffers(void)
2088 {
2089         if (buffers_allocated)
2090                 return;
2091
2092         if (alloc_percpu_trace_buffer())
2093                 return;
2094
2095         /* trace_printk() is for debug use only. Don't use it in production. */
2096
2097         pr_warning("\n");
2098         pr_warning("**********************************************************\n");
2099         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2100         pr_warning("**                                                      **\n");
2101         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2102         pr_warning("**                                                      **\n");
2103         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2104         pr_warning("** unsafe for production use.                           **\n");
2105         pr_warning("**                                                      **\n");
2106         pr_warning("** If you see this message and you are not debugging    **\n");
2107         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2108         pr_warning("**                                                      **\n");
2109         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2110         pr_warning("**********************************************************\n");
2111
2112         /* Expand the buffers to set size */
2113         tracing_update_buffers();
2114
2115         buffers_allocated = 1;
2116
2117         /*
2118          * trace_printk_init_buffers() can be called by modules.
2119          * If that happens, then we need to start cmdline recording
2120          * directly here. If the global_trace.buffer is already
2121          * allocated here, then this was called by module code.
2122          */
2123         if (global_trace.trace_buffer.buffer)
2124                 tracing_start_cmdline_record();
2125 }
2126
2127 void trace_printk_start_comm(void)
2128 {
2129         /* Start tracing comms if trace printk is set */
2130         if (!buffers_allocated)
2131                 return;
2132         tracing_start_cmdline_record();
2133 }
2134
2135 static void trace_printk_start_stop_comm(int enabled)
2136 {
2137         if (!buffers_allocated)
2138                 return;
2139
2140         if (enabled)
2141                 tracing_start_cmdline_record();
2142         else
2143                 tracing_stop_cmdline_record();
2144 }
2145
2146 /**
2147  * trace_vbprintk - write binary msg to tracing buffer
2148  *
2149  */
2150 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2151 {
2152         struct trace_event_call *call = &event_bprint;
2153         struct ring_buffer_event *event;
2154         struct ring_buffer *buffer;
2155         struct trace_array *tr = &global_trace;
2156         struct bprint_entry *entry;
2157         unsigned long flags;
2158         char *tbuffer;
2159         int len = 0, size, pc;
2160
2161         if (unlikely(tracing_selftest_running || tracing_disabled))
2162                 return 0;
2163
2164         /* Don't pollute graph traces with trace_vprintk internals */
2165         pause_graph_tracing();
2166
2167         pc = preempt_count();
2168         preempt_disable_notrace();
2169
2170         tbuffer = get_trace_buf();
2171         if (!tbuffer) {
2172                 len = 0;
2173                 goto out;
2174         }
2175
2176         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2177
2178         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2179                 goto out;
2180
2181         local_save_flags(flags);
2182         size = sizeof(*entry) + sizeof(u32) * len;
2183         buffer = tr->trace_buffer.buffer;
2184         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2185                                           flags, pc);
2186         if (!event)
2187                 goto out;
2188         entry = ring_buffer_event_data(event);
2189         entry->ip                       = ip;
2190         entry->fmt                      = fmt;
2191
2192         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2193         if (!call_filter_check_discard(call, entry, buffer, event)) {
2194                 __buffer_unlock_commit(buffer, event);
2195                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2196         }
2197
2198 out:
2199         preempt_enable_notrace();
2200         unpause_graph_tracing();
2201
2202         return len;
2203 }
2204 EXPORT_SYMBOL_GPL(trace_vbprintk);
2205
2206 __printf(3, 0)
2207 static int
2208 __trace_array_vprintk(struct ring_buffer *buffer,
2209                       unsigned long ip, const char *fmt, va_list args)
2210 {
2211         struct trace_event_call *call = &event_print;
2212         struct ring_buffer_event *event;
2213         int len = 0, size, pc;
2214         struct print_entry *entry;
2215         unsigned long flags;
2216         char *tbuffer;
2217
2218         if (tracing_disabled || tracing_selftest_running)
2219                 return 0;
2220
2221         /* Don't pollute graph traces with trace_vprintk internals */
2222         pause_graph_tracing();
2223
2224         pc = preempt_count();
2225         preempt_disable_notrace();
2226
2227
2228         tbuffer = get_trace_buf();
2229         if (!tbuffer) {
2230                 len = 0;
2231                 goto out;
2232         }
2233
2234         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2235
2236         local_save_flags(flags);
2237         size = sizeof(*entry) + len + 1;
2238         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2239                                           flags, pc);
2240         if (!event)
2241                 goto out;
2242         entry = ring_buffer_event_data(event);
2243         entry->ip = ip;
2244
2245         memcpy(&entry->buf, tbuffer, len + 1);
2246         if (!call_filter_check_discard(call, entry, buffer, event)) {
2247                 __buffer_unlock_commit(buffer, event);
2248                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2249         }
2250  out:
2251         preempt_enable_notrace();
2252         unpause_graph_tracing();
2253
2254         return len;
2255 }
2256
2257 __printf(3, 0)
2258 int trace_array_vprintk(struct trace_array *tr,
2259                         unsigned long ip, const char *fmt, va_list args)
2260 {
2261         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2262 }
2263
2264 __printf(3, 0)
2265 int trace_array_printk(struct trace_array *tr,
2266                        unsigned long ip, const char *fmt, ...)
2267 {
2268         int ret;
2269         va_list ap;
2270
2271         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2272                 return 0;
2273
2274         va_start(ap, fmt);
2275         ret = trace_array_vprintk(tr, ip, fmt, ap);
2276         va_end(ap);
2277         return ret;
2278 }
2279
2280 __printf(3, 4)
2281 int trace_array_printk_buf(struct ring_buffer *buffer,
2282                            unsigned long ip, const char *fmt, ...)
2283 {
2284         int ret;
2285         va_list ap;
2286
2287         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2288                 return 0;
2289
2290         va_start(ap, fmt);
2291         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2292         va_end(ap);
2293         return ret;
2294 }
2295
2296 __printf(2, 0)
2297 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2298 {
2299         return trace_array_vprintk(&global_trace, ip, fmt, args);
2300 }
2301 EXPORT_SYMBOL_GPL(trace_vprintk);
2302
2303 static void trace_iterator_increment(struct trace_iterator *iter)
2304 {
2305         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2306
2307         iter->idx++;
2308         if (buf_iter)
2309                 ring_buffer_read(buf_iter, NULL);
2310 }
2311
2312 static struct trace_entry *
2313 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2314                 unsigned long *lost_events)
2315 {
2316         struct ring_buffer_event *event;
2317         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2318
2319         if (buf_iter)
2320                 event = ring_buffer_iter_peek(buf_iter, ts);
2321         else
2322                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2323                                          lost_events);
2324
2325         if (event) {
2326                 iter->ent_size = ring_buffer_event_length(event);
2327                 return ring_buffer_event_data(event);
2328         }
2329         iter->ent_size = 0;
2330         return NULL;
2331 }
2332
2333 static struct trace_entry *
2334 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2335                   unsigned long *missing_events, u64 *ent_ts)
2336 {
2337         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2338         struct trace_entry *ent, *next = NULL;
2339         unsigned long lost_events = 0, next_lost = 0;
2340         int cpu_file = iter->cpu_file;
2341         u64 next_ts = 0, ts;
2342         int next_cpu = -1;
2343         int next_size = 0;
2344         int cpu;
2345
2346         /*
2347          * If we are in a per_cpu trace file, don't bother by iterating over
2348          * all cpu and peek directly.
2349          */
2350         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2351                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2352                         return NULL;
2353                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2354                 if (ent_cpu)
2355                         *ent_cpu = cpu_file;
2356
2357                 return ent;
2358         }
2359
2360         for_each_tracing_cpu(cpu) {
2361
2362                 if (ring_buffer_empty_cpu(buffer, cpu))
2363                         continue;
2364
2365                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2366
2367                 /*
2368                  * Pick the entry with the smallest timestamp:
2369                  */
2370                 if (ent && (!next || ts < next_ts)) {
2371                         next = ent;
2372                         next_cpu = cpu;
2373                         next_ts = ts;
2374                         next_lost = lost_events;
2375                         next_size = iter->ent_size;
2376                 }
2377         }
2378
2379         iter->ent_size = next_size;
2380
2381         if (ent_cpu)
2382                 *ent_cpu = next_cpu;
2383
2384         if (ent_ts)
2385                 *ent_ts = next_ts;
2386
2387         if (missing_events)
2388                 *missing_events = next_lost;
2389
2390         return next;
2391 }
2392
2393 /* Find the next real entry, without updating the iterator itself */
2394 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2395                                           int *ent_cpu, u64 *ent_ts)
2396 {
2397         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2398 }
2399
2400 /* Find the next real entry, and increment the iterator to the next entry */
2401 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2402 {
2403         iter->ent = __find_next_entry(iter, &iter->cpu,
2404                                       &iter->lost_events, &iter->ts);
2405
2406         if (iter->ent)
2407                 trace_iterator_increment(iter);
2408
2409         return iter->ent ? iter : NULL;
2410 }
2411
2412 static void trace_consume(struct trace_iterator *iter)
2413 {
2414         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2415                             &iter->lost_events);
2416 }
2417
2418 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2419 {
2420         struct trace_iterator *iter = m->private;
2421         int i = (int)*pos;
2422         void *ent;
2423
2424         WARN_ON_ONCE(iter->leftover);
2425
2426         (*pos)++;
2427
2428         /* can't go backwards */
2429         if (iter->idx > i)
2430                 return NULL;
2431
2432         if (iter->idx < 0)
2433                 ent = trace_find_next_entry_inc(iter);
2434         else
2435                 ent = iter;
2436
2437         while (ent && iter->idx < i)
2438                 ent = trace_find_next_entry_inc(iter);
2439
2440         iter->pos = *pos;
2441
2442         return ent;
2443 }
2444
2445 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2446 {
2447         struct ring_buffer_event *event;
2448         struct ring_buffer_iter *buf_iter;
2449         unsigned long entries = 0;
2450         u64 ts;
2451
2452         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2453
2454         buf_iter = trace_buffer_iter(iter, cpu);
2455         if (!buf_iter)
2456                 return;
2457
2458         ring_buffer_iter_reset(buf_iter);
2459
2460         /*
2461          * We could have the case with the max latency tracers
2462          * that a reset never took place on a cpu. This is evident
2463          * by the timestamp being before the start of the buffer.
2464          */
2465         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2466                 if (ts >= iter->trace_buffer->time_start)
2467                         break;
2468                 entries++;
2469                 ring_buffer_read(buf_iter, NULL);
2470         }
2471
2472         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2473 }
2474
2475 /*
2476  * The current tracer is copied to avoid a global locking
2477  * all around.
2478  */
2479 static void *s_start(struct seq_file *m, loff_t *pos)
2480 {
2481         struct trace_iterator *iter = m->private;
2482         struct trace_array *tr = iter->tr;
2483         int cpu_file = iter->cpu_file;
2484         void *p = NULL;
2485         loff_t l = 0;
2486         int cpu;
2487
2488         /*
2489          * copy the tracer to avoid using a global lock all around.
2490          * iter->trace is a copy of current_trace, the pointer to the
2491          * name may be used instead of a strcmp(), as iter->trace->name
2492          * will point to the same string as current_trace->name.
2493          */
2494         mutex_lock(&trace_types_lock);
2495         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2496                 *iter->trace = *tr->current_trace;
2497         mutex_unlock(&trace_types_lock);
2498
2499 #ifdef CONFIG_TRACER_MAX_TRACE
2500         if (iter->snapshot && iter->trace->use_max_tr)
2501                 return ERR_PTR(-EBUSY);
2502 #endif
2503
2504         if (!iter->snapshot)
2505                 atomic_inc(&trace_record_cmdline_disabled);
2506
2507         if (*pos != iter->pos) {
2508                 iter->ent = NULL;
2509                 iter->cpu = 0;
2510                 iter->idx = -1;
2511
2512                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2513                         for_each_tracing_cpu(cpu)
2514                                 tracing_iter_reset(iter, cpu);
2515                 } else
2516                         tracing_iter_reset(iter, cpu_file);
2517
2518                 iter->leftover = 0;
2519                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2520                         ;
2521
2522         } else {
2523                 /*
2524                  * If we overflowed the seq_file before, then we want
2525                  * to just reuse the trace_seq buffer again.
2526                  */
2527                 if (iter->leftover)
2528                         p = iter;
2529                 else {
2530                         l = *pos - 1;
2531                         p = s_next(m, p, &l);
2532                 }
2533         }
2534
2535         trace_event_read_lock();
2536         trace_access_lock(cpu_file);
2537         return p;
2538 }
2539
2540 static void s_stop(struct seq_file *m, void *p)
2541 {
2542         struct trace_iterator *iter = m->private;
2543
2544 #ifdef CONFIG_TRACER_MAX_TRACE
2545         if (iter->snapshot && iter->trace->use_max_tr)
2546                 return;
2547 #endif
2548
2549         if (!iter->snapshot)
2550                 atomic_dec(&trace_record_cmdline_disabled);
2551
2552         trace_access_unlock(iter->cpu_file);
2553         trace_event_read_unlock();
2554 }
2555
2556 static void
2557 get_total_entries(struct trace_buffer *buf,
2558                   unsigned long *total, unsigned long *entries)
2559 {
2560         unsigned long count;
2561         int cpu;
2562
2563         *total = 0;
2564         *entries = 0;
2565
2566         for_each_tracing_cpu(cpu) {
2567                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2568                 /*
2569                  * If this buffer has skipped entries, then we hold all
2570                  * entries for the trace and we need to ignore the
2571                  * ones before the time stamp.
2572                  */
2573                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2574                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2575                         /* total is the same as the entries */
2576                         *total += count;
2577                 } else
2578                         *total += count +
2579                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2580                 *entries += count;
2581         }
2582 }
2583
2584 static void print_lat_help_header(struct seq_file *m)
2585 {
2586         seq_puts(m, "#                  _------=> CPU#            \n"
2587                     "#                 / _-----=> irqs-off        \n"
2588                     "#                | / _----=> need-resched    \n"
2589                     "#                || / _---=> hardirq/softirq \n"
2590                     "#                ||| / _--=> preempt-depth   \n"
2591                     "#                |||| /     delay            \n"
2592                     "#  cmd     pid   ||||| time  |   caller      \n"
2593                     "#     \\   /      |||||  \\    |   /         \n");
2594 }
2595
2596 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2597 {
2598         unsigned long total;
2599         unsigned long entries;
2600
2601         get_total_entries(buf, &total, &entries);
2602         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2603                    entries, total, num_online_cpus());
2604         seq_puts(m, "#\n");
2605 }
2606
2607 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2608 {
2609         print_event_info(buf, m);
2610         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2611                     "#              | |       |          |         |\n");
2612 }
2613
2614 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2615 {
2616         print_event_info(buf, m);
2617         seq_puts(m, "#                              _-----=> irqs-off\n"
2618                     "#                             / _----=> need-resched\n"
2619                     "#                            | / _---=> hardirq/softirq\n"
2620                     "#                            || / _--=> preempt-depth\n"
2621                     "#                            ||| /     delay\n"
2622                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2623                     "#              | |       |   ||||       |         |\n");
2624 }
2625
2626 void
2627 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2628 {
2629         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2630         struct trace_buffer *buf = iter->trace_buffer;
2631         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2632         struct tracer *type = iter->trace;
2633         unsigned long entries;
2634         unsigned long total;
2635         const char *name = "preemption";
2636
2637         name = type->name;
2638
2639         get_total_entries(buf, &total, &entries);
2640
2641         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2642                    name, UTS_RELEASE);
2643         seq_puts(m, "# -----------------------------------"
2644                  "---------------------------------\n");
2645         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2646                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2647                    nsecs_to_usecs(data->saved_latency),
2648                    entries,
2649                    total,
2650                    buf->cpu,
2651 #if defined(CONFIG_PREEMPT_NONE)
2652                    "server",
2653 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2654                    "desktop",
2655 #elif defined(CONFIG_PREEMPT)
2656                    "preempt",
2657 #else
2658                    "unknown",
2659 #endif
2660                    /* These are reserved for later use */
2661                    0, 0, 0, 0);
2662 #ifdef CONFIG_SMP
2663         seq_printf(m, " #P:%d)\n", num_online_cpus());
2664 #else
2665         seq_puts(m, ")\n");
2666 #endif
2667         seq_puts(m, "#    -----------------\n");
2668         seq_printf(m, "#    | task: %.16s-%d "
2669                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2670                    data->comm, data->pid,
2671                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2672                    data->policy, data->rt_priority);
2673         seq_puts(m, "#    -----------------\n");
2674
2675         if (data->critical_start) {
2676                 seq_puts(m, "#  => started at: ");
2677                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2678                 trace_print_seq(m, &iter->seq);
2679                 seq_puts(m, "\n#  => ended at:   ");
2680                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2681                 trace_print_seq(m, &iter->seq);
2682                 seq_puts(m, "\n#\n");
2683         }
2684
2685         seq_puts(m, "#\n");
2686 }
2687
2688 static void test_cpu_buff_start(struct trace_iterator *iter)
2689 {
2690         struct trace_seq *s = &iter->seq;
2691         struct trace_array *tr = iter->tr;
2692
2693         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2694                 return;
2695
2696         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2697                 return;
2698
2699         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2700                 return;
2701
2702         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2703                 return;
2704
2705         if (iter->started)
2706                 cpumask_set_cpu(iter->cpu, iter->started);
2707
2708         /* Don't print started cpu buffer for the first entry of the trace */
2709         if (iter->idx > 1)
2710                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2711                                 iter->cpu);
2712 }
2713
2714 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2715 {
2716         struct trace_array *tr = iter->tr;
2717         struct trace_seq *s = &iter->seq;
2718         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2719         struct trace_entry *entry;
2720         struct trace_event *event;
2721
2722         entry = iter->ent;
2723
2724         test_cpu_buff_start(iter);
2725
2726         event = ftrace_find_event(entry->type);
2727
2728         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2729                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2730                         trace_print_lat_context(iter);
2731                 else
2732                         trace_print_context(iter);
2733         }
2734
2735         if (trace_seq_has_overflowed(s))
2736                 return TRACE_TYPE_PARTIAL_LINE;
2737
2738         if (event)
2739                 return event->funcs->trace(iter, sym_flags, event);
2740
2741         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2742
2743         return trace_handle_return(s);
2744 }
2745
2746 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2747 {
2748         struct trace_array *tr = iter->tr;
2749         struct trace_seq *s = &iter->seq;
2750         struct trace_entry *entry;
2751         struct trace_event *event;
2752
2753         entry = iter->ent;
2754
2755         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2756                 trace_seq_printf(s, "%d %d %llu ",
2757                                  entry->pid, iter->cpu, iter->ts);
2758
2759         if (trace_seq_has_overflowed(s))
2760                 return TRACE_TYPE_PARTIAL_LINE;
2761
2762         event = ftrace_find_event(entry->type);
2763         if (event)
2764                 return event->funcs->raw(iter, 0, event);
2765
2766         trace_seq_printf(s, "%d ?\n", entry->type);
2767
2768         return trace_handle_return(s);
2769 }
2770
2771 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2772 {
2773         struct trace_array *tr = iter->tr;
2774         struct trace_seq *s = &iter->seq;
2775         unsigned char newline = '\n';
2776         struct trace_entry *entry;
2777         struct trace_event *event;
2778
2779         entry = iter->ent;
2780
2781         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2782                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2783                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2784                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2785                 if (trace_seq_has_overflowed(s))
2786                         return TRACE_TYPE_PARTIAL_LINE;
2787         }
2788
2789         event = ftrace_find_event(entry->type);
2790         if (event) {
2791                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2792                 if (ret != TRACE_TYPE_HANDLED)
2793                         return ret;
2794         }
2795
2796         SEQ_PUT_FIELD(s, newline);
2797
2798         return trace_handle_return(s);
2799 }
2800
2801 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2802 {
2803         struct trace_array *tr = iter->tr;
2804         struct trace_seq *s = &iter->seq;
2805         struct trace_entry *entry;
2806         struct trace_event *event;
2807
2808         entry = iter->ent;
2809
2810         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2811                 SEQ_PUT_FIELD(s, entry->pid);
2812                 SEQ_PUT_FIELD(s, iter->cpu);
2813                 SEQ_PUT_FIELD(s, iter->ts);
2814                 if (trace_seq_has_overflowed(s))
2815                         return TRACE_TYPE_PARTIAL_LINE;
2816         }
2817
2818         event = ftrace_find_event(entry->type);
2819         return event ? event->funcs->binary(iter, 0, event) :
2820                 TRACE_TYPE_HANDLED;
2821 }
2822
2823 int trace_empty(struct trace_iterator *iter)
2824 {
2825         struct ring_buffer_iter *buf_iter;
2826         int cpu;
2827
2828         /* If we are looking at one CPU buffer, only check that one */
2829         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2830                 cpu = iter->cpu_file;
2831                 buf_iter = trace_buffer_iter(iter, cpu);
2832                 if (buf_iter) {
2833                         if (!ring_buffer_iter_empty(buf_iter))
2834                                 return 0;
2835                 } else {
2836                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2837                                 return 0;
2838                 }
2839                 return 1;
2840         }
2841
2842         for_each_tracing_cpu(cpu) {
2843                 buf_iter = trace_buffer_iter(iter, cpu);
2844                 if (buf_iter) {
2845                         if (!ring_buffer_iter_empty(buf_iter))
2846                                 return 0;
2847                 } else {
2848                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2849                                 return 0;
2850                 }
2851         }
2852
2853         return 1;
2854 }
2855
2856 /*  Called with trace_event_read_lock() held. */
2857 enum print_line_t print_trace_line(struct trace_iterator *iter)
2858 {
2859         struct trace_array *tr = iter->tr;
2860         unsigned long trace_flags = tr->trace_flags;
2861         enum print_line_t ret;
2862
2863         if (iter->lost_events) {
2864                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2865                                  iter->cpu, iter->lost_events);
2866                 if (trace_seq_has_overflowed(&iter->seq))
2867                         return TRACE_TYPE_PARTIAL_LINE;
2868         }
2869
2870         if (iter->trace && iter->trace->print_line) {
2871                 ret = iter->trace->print_line(iter);
2872                 if (ret != TRACE_TYPE_UNHANDLED)
2873                         return ret;
2874         }
2875
2876         if (iter->ent->type == TRACE_BPUTS &&
2877                         trace_flags & TRACE_ITER_PRINTK &&
2878                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2879                 return trace_print_bputs_msg_only(iter);
2880
2881         if (iter->ent->type == TRACE_BPRINT &&
2882                         trace_flags & TRACE_ITER_PRINTK &&
2883                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2884                 return trace_print_bprintk_msg_only(iter);
2885
2886         if (iter->ent->type == TRACE_PRINT &&
2887                         trace_flags & TRACE_ITER_PRINTK &&
2888                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2889                 return trace_print_printk_msg_only(iter);
2890
2891         if (trace_flags & TRACE_ITER_BIN)
2892                 return print_bin_fmt(iter);
2893
2894         if (trace_flags & TRACE_ITER_HEX)
2895                 return print_hex_fmt(iter);
2896
2897         if (trace_flags & TRACE_ITER_RAW)
2898                 return print_raw_fmt(iter);
2899
2900         return print_trace_fmt(iter);
2901 }
2902
2903 void trace_latency_header(struct seq_file *m)
2904 {
2905         struct trace_iterator *iter = m->private;
2906         struct trace_array *tr = iter->tr;
2907
2908         /* print nothing if the buffers are empty */
2909         if (trace_empty(iter))
2910                 return;
2911
2912         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2913                 print_trace_header(m, iter);
2914
2915         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2916                 print_lat_help_header(m);
2917 }
2918
2919 void trace_default_header(struct seq_file *m)
2920 {
2921         struct trace_iterator *iter = m->private;
2922         struct trace_array *tr = iter->tr;
2923         unsigned long trace_flags = tr->trace_flags;
2924
2925         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2926                 return;
2927
2928         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2929                 /* print nothing if the buffers are empty */
2930                 if (trace_empty(iter))
2931                         return;
2932                 print_trace_header(m, iter);
2933                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2934                         print_lat_help_header(m);
2935         } else {
2936                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2937                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2938                                 print_func_help_header_irq(iter->trace_buffer, m);
2939                         else
2940                                 print_func_help_header(iter->trace_buffer, m);
2941                 }
2942         }
2943 }
2944
2945 static void test_ftrace_alive(struct seq_file *m)
2946 {
2947         if (!ftrace_is_dead())
2948                 return;
2949         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2950                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2951 }
2952
2953 #ifdef CONFIG_TRACER_MAX_TRACE
2954 static void show_snapshot_main_help(struct seq_file *m)
2955 {
2956         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2957                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2958                     "#                      Takes a snapshot of the main buffer.\n"
2959                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2960                     "#                      (Doesn't have to be '2' works with any number that\n"
2961                     "#                       is not a '0' or '1')\n");
2962 }
2963
2964 static void show_snapshot_percpu_help(struct seq_file *m)
2965 {
2966         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2967 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2968         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2969                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2970 #else
2971         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2972                     "#                     Must use main snapshot file to allocate.\n");
2973 #endif
2974         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2975                     "#                      (Doesn't have to be '2' works with any number that\n"
2976                     "#                       is not a '0' or '1')\n");
2977 }
2978
2979 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2980 {
2981         if (iter->tr->allocated_snapshot)
2982                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2983         else
2984                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2985
2986         seq_puts(m, "# Snapshot commands:\n");
2987         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2988                 show_snapshot_main_help(m);
2989         else
2990                 show_snapshot_percpu_help(m);
2991 }
2992 #else
2993 /* Should never be called */
2994 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2995 #endif
2996
2997 static int s_show(struct seq_file *m, void *v)
2998 {
2999         struct trace_iterator *iter = v;
3000         int ret;
3001
3002         if (iter->ent == NULL) {
3003                 if (iter->tr) {
3004                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3005                         seq_puts(m, "#\n");
3006                         test_ftrace_alive(m);
3007                 }
3008                 if (iter->snapshot && trace_empty(iter))
3009                         print_snapshot_help(m, iter);
3010                 else if (iter->trace && iter->trace->print_header)
3011                         iter->trace->print_header(m);
3012                 else
3013                         trace_default_header(m);
3014
3015         } else if (iter->leftover) {
3016                 /*
3017                  * If we filled the seq_file buffer earlier, we
3018                  * want to just show it now.
3019                  */
3020                 ret = trace_print_seq(m, &iter->seq);
3021
3022                 /* ret should this time be zero, but you never know */
3023                 iter->leftover = ret;
3024
3025         } else {
3026                 print_trace_line(iter);
3027                 ret = trace_print_seq(m, &iter->seq);
3028                 /*
3029                  * If we overflow the seq_file buffer, then it will
3030                  * ask us for this data again at start up.
3031                  * Use that instead.
3032                  *  ret is 0 if seq_file write succeeded.
3033                  *        -1 otherwise.
3034                  */
3035                 iter->leftover = ret;
3036         }
3037
3038         return 0;
3039 }
3040
3041 /*
3042  * Should be used after trace_array_get(), trace_types_lock
3043  * ensures that i_cdev was already initialized.
3044  */
3045 static inline int tracing_get_cpu(struct inode *inode)
3046 {
3047         if (inode->i_cdev) /* See trace_create_cpu_file() */
3048                 return (long)inode->i_cdev - 1;
3049         return RING_BUFFER_ALL_CPUS;
3050 }
3051
3052 static const struct seq_operations tracer_seq_ops = {
3053         .start          = s_start,
3054         .next           = s_next,
3055         .stop           = s_stop,
3056         .show           = s_show,
3057 };
3058
3059 static struct trace_iterator *
3060 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3061 {
3062         struct trace_array *tr = inode->i_private;
3063         struct trace_iterator *iter;
3064         int cpu;
3065
3066         if (tracing_disabled)
3067                 return ERR_PTR(-ENODEV);
3068
3069         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3070         if (!iter)
3071                 return ERR_PTR(-ENOMEM);
3072
3073         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3074                                     GFP_KERNEL);
3075         if (!iter->buffer_iter)
3076                 goto release;
3077
3078         /*
3079          * We make a copy of the current tracer to avoid concurrent
3080          * changes on it while we are reading.
3081          */
3082         mutex_lock(&trace_types_lock);
3083         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3084         if (!iter->trace)
3085                 goto fail;
3086
3087         *iter->trace = *tr->current_trace;
3088
3089         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3090                 goto fail;
3091
3092         iter->tr = tr;
3093
3094 #ifdef CONFIG_TRACER_MAX_TRACE
3095         /* Currently only the top directory has a snapshot */
3096         if (tr->current_trace->print_max || snapshot)
3097                 iter->trace_buffer = &tr->max_buffer;
3098         else
3099 #endif
3100                 iter->trace_buffer = &tr->trace_buffer;
3101         iter->snapshot = snapshot;
3102         iter->pos = -1;
3103         iter->cpu_file = tracing_get_cpu(inode);
3104         mutex_init(&iter->mutex);
3105
3106         /* Notify the tracer early; before we stop tracing. */
3107         if (iter->trace && iter->trace->open)
3108                 iter->trace->open(iter);
3109
3110         /* Annotate start of buffers if we had overruns */
3111         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3112                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3113
3114         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3115         if (trace_clocks[tr->clock_id].in_ns)
3116                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3117
3118         /* stop the trace while dumping if we are not opening "snapshot" */
3119         if (!iter->snapshot)
3120                 tracing_stop_tr(tr);
3121
3122         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3123                 for_each_tracing_cpu(cpu) {
3124                         iter->buffer_iter[cpu] =
3125                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3126                                                          cpu, GFP_KERNEL);
3127                 }
3128                 ring_buffer_read_prepare_sync();
3129                 for_each_tracing_cpu(cpu) {
3130                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3131                         tracing_iter_reset(iter, cpu);
3132                 }
3133         } else {
3134                 cpu = iter->cpu_file;
3135                 iter->buffer_iter[cpu] =
3136                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3137                                                  cpu, GFP_KERNEL);
3138                 ring_buffer_read_prepare_sync();
3139                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3140                 tracing_iter_reset(iter, cpu);
3141         }
3142
3143         mutex_unlock(&trace_types_lock);
3144
3145         return iter;
3146
3147  fail:
3148         mutex_unlock(&trace_types_lock);
3149         kfree(iter->trace);
3150         kfree(iter->buffer_iter);
3151 release:
3152         seq_release_private(inode, file);
3153         return ERR_PTR(-ENOMEM);
3154 }
3155
3156 int tracing_open_generic(struct inode *inode, struct file *filp)
3157 {
3158         if (tracing_disabled)
3159                 return -ENODEV;
3160
3161         filp->private_data = inode->i_private;
3162         return 0;
3163 }
3164
3165 bool tracing_is_disabled(void)
3166 {
3167         return (tracing_disabled) ? true: false;
3168 }
3169
3170 /*
3171  * Open and update trace_array ref count.
3172  * Must have the current trace_array passed to it.
3173  */
3174 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3175 {
3176         struct trace_array *tr = inode->i_private;
3177
3178         if (tracing_disabled)
3179                 return -ENODEV;
3180
3181         if (trace_array_get(tr) < 0)
3182                 return -ENODEV;
3183
3184         filp->private_data = inode->i_private;
3185
3186         return 0;
3187 }
3188
3189 static int tracing_release(struct inode *inode, struct file *file)
3190 {
3191         struct trace_array *tr = inode->i_private;
3192         struct seq_file *m = file->private_data;
3193         struct trace_iterator *iter;
3194         int cpu;
3195
3196         if (!(file->f_mode & FMODE_READ)) {
3197                 trace_array_put(tr);
3198                 return 0;
3199         }
3200
3201         /* Writes do not use seq_file */
3202         iter = m->private;
3203         mutex_lock(&trace_types_lock);
3204
3205         for_each_tracing_cpu(cpu) {
3206                 if (iter->buffer_iter[cpu])
3207                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3208         }
3209
3210         if (iter->trace && iter->trace->close)
3211                 iter->trace->close(iter);
3212
3213         if (!iter->snapshot)
3214                 /* reenable tracing if it was previously enabled */
3215                 tracing_start_tr(tr);
3216
3217         __trace_array_put(tr);
3218
3219         mutex_unlock(&trace_types_lock);
3220
3221         mutex_destroy(&iter->mutex);
3222         free_cpumask_var(iter->started);
3223         kfree(iter->trace);
3224         kfree(iter->buffer_iter);
3225         seq_release_private(inode, file);
3226
3227         return 0;
3228 }
3229
3230 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3231 {
3232         struct trace_array *tr = inode->i_private;
3233
3234         trace_array_put(tr);
3235         return 0;
3236 }
3237
3238 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3239 {
3240         struct trace_array *tr = inode->i_private;
3241
3242         trace_array_put(tr);
3243
3244         return single_release(inode, file);
3245 }
3246
3247 static int tracing_open(struct inode *inode, struct file *file)
3248 {
3249         struct trace_array *tr = inode->i_private;
3250         struct trace_iterator *iter;
3251         int ret = 0;
3252
3253         if (trace_array_get(tr) < 0)
3254                 return -ENODEV;
3255
3256         /* If this file was open for write, then erase contents */
3257         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3258                 int cpu = tracing_get_cpu(inode);
3259                 struct trace_buffer *trace_buf = &tr->trace_buffer;
3260
3261 #ifdef CONFIG_TRACER_MAX_TRACE
3262                 if (tr->current_trace->print_max)
3263                         trace_buf = &tr->max_buffer;
3264 #endif
3265
3266                 if (cpu == RING_BUFFER_ALL_CPUS)
3267                         tracing_reset_online_cpus(trace_buf);
3268                 else
3269                         tracing_reset(trace_buf, cpu);
3270         }
3271
3272         if (file->f_mode & FMODE_READ) {
3273                 iter = __tracing_open(inode, file, false);
3274                 if (IS_ERR(iter))
3275                         ret = PTR_ERR(iter);
3276                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3277                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3278         }
3279
3280         if (ret < 0)
3281                 trace_array_put(tr);
3282
3283         return ret;
3284 }
3285
3286 /*
3287  * Some tracers are not suitable for instance buffers.
3288  * A tracer is always available for the global array (toplevel)
3289  * or if it explicitly states that it is.
3290  */
3291 static bool
3292 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3293 {
3294         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3295 }
3296
3297 /* Find the next tracer that this trace array may use */
3298 static struct tracer *
3299 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3300 {
3301         while (t && !trace_ok_for_array(t, tr))
3302                 t = t->next;
3303
3304         return t;
3305 }
3306
3307 static void *
3308 t_next(struct seq_file *m, void *v, loff_t *pos)
3309 {
3310         struct trace_array *tr = m->private;
3311         struct tracer *t = v;
3312
3313         (*pos)++;
3314
3315         if (t)
3316                 t = get_tracer_for_array(tr, t->next);
3317
3318         return t;
3319 }
3320
3321 static void *t_start(struct seq_file *m, loff_t *pos)
3322 {
3323         struct trace_array *tr = m->private;
3324         struct tracer *t;
3325         loff_t l = 0;
3326
3327         mutex_lock(&trace_types_lock);
3328
3329         t = get_tracer_for_array(tr, trace_types);
3330         for (; t && l < *pos; t = t_next(m, t, &l))
3331                         ;
3332
3333         return t;
3334 }
3335
3336 static void t_stop(struct seq_file *m, void *p)
3337 {
3338         mutex_unlock(&trace_types_lock);
3339 }
3340
3341 static int t_show(struct seq_file *m, void *v)
3342 {
3343         struct tracer *t = v;
3344
3345         if (!t)
3346                 return 0;
3347
3348         seq_puts(m, t->name);
3349         if (t->next)
3350                 seq_putc(m, ' ');
3351         else
3352                 seq_putc(m, '\n');
3353
3354         return 0;
3355 }
3356
3357 static const struct seq_operations show_traces_seq_ops = {
3358         .start          = t_start,
3359         .next           = t_next,
3360         .stop           = t_stop,
3361         .show           = t_show,
3362 };
3363
3364 static int show_traces_open(struct inode *inode, struct file *file)
3365 {
3366         struct trace_array *tr = inode->i_private;
3367         struct seq_file *m;
3368         int ret;
3369
3370         if (tracing_disabled)
3371                 return -ENODEV;
3372
3373         if (trace_array_get(tr) < 0)
3374                 return -ENODEV;
3375
3376         ret = seq_open(file, &show_traces_seq_ops);
3377         if (ret) {
3378                 trace_array_put(tr);
3379                 return ret;
3380         }
3381
3382         m = file->private_data;
3383         m->private = tr;
3384
3385         return 0;
3386 }
3387
3388 static int show_traces_release(struct inode *inode, struct file *file)
3389 {
3390         struct trace_array *tr = inode->i_private;
3391
3392         trace_array_put(tr);
3393         return seq_release(inode, file);
3394 }
3395
3396 static ssize_t
3397 tracing_write_stub(struct file *filp, const char __user *ubuf,
3398                    size_t count, loff_t *ppos)
3399 {
3400         return count;
3401 }
3402
3403 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3404 {
3405         int ret;
3406
3407         if (file->f_mode & FMODE_READ)
3408                 ret = seq_lseek(file, offset, whence);
3409         else
3410                 file->f_pos = ret = 0;
3411
3412         return ret;
3413 }
3414
3415 static const struct file_operations tracing_fops = {
3416         .open           = tracing_open,
3417         .read           = seq_read,
3418         .write          = tracing_write_stub,
3419         .llseek         = tracing_lseek,
3420         .release        = tracing_release,
3421 };
3422
3423 static const struct file_operations show_traces_fops = {
3424         .open           = show_traces_open,
3425         .read           = seq_read,
3426         .llseek         = seq_lseek,
3427         .release        = show_traces_release,
3428 };
3429
3430 static ssize_t
3431 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3432                      size_t count, loff_t *ppos)
3433 {
3434         struct trace_array *tr = file_inode(filp)->i_private;
3435         char *mask_str;
3436         int len;
3437
3438         len = snprintf(NULL, 0, "%*pb\n",
3439                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
3440         mask_str = kmalloc(len, GFP_KERNEL);
3441         if (!mask_str)
3442                 return -ENOMEM;
3443
3444         len = snprintf(mask_str, len, "%*pb\n",
3445                        cpumask_pr_args(tr->tracing_cpumask));
3446         if (len >= count) {
3447                 count = -EINVAL;
3448                 goto out_err;
3449         }
3450         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3451
3452 out_err:
3453         kfree(mask_str);
3454
3455         return count;
3456 }
3457
3458 static ssize_t
3459 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3460                       size_t count, loff_t *ppos)
3461 {
3462         struct trace_array *tr = file_inode(filp)->i_private;
3463         cpumask_var_t tracing_cpumask_new;
3464         int err, cpu;
3465
3466         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3467                 return -ENOMEM;
3468
3469         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3470         if (err)
3471                 goto err_unlock;
3472
3473         local_irq_disable();
3474         arch_spin_lock(&tr->max_lock);
3475         for_each_tracing_cpu(cpu) {
3476                 /*
3477                  * Increase/decrease the disabled counter if we are
3478                  * about to flip a bit in the cpumask:
3479                  */
3480                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3481                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3482                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3483                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3484                 }
3485                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3486                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3487                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3488                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3489                 }
3490         }
3491         arch_spin_unlock(&tr->max_lock);
3492         local_irq_enable();
3493
3494         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3495         free_cpumask_var(tracing_cpumask_new);
3496
3497         return count;
3498
3499 err_unlock:
3500         free_cpumask_var(tracing_cpumask_new);
3501
3502         return err;
3503 }
3504
3505 static const struct file_operations tracing_cpumask_fops = {
3506         .open           = tracing_open_generic_tr,
3507         .read           = tracing_cpumask_read,
3508         .write          = tracing_cpumask_write,
3509         .release        = tracing_release_generic_tr,
3510         .llseek         = generic_file_llseek,
3511 };
3512
3513 static int tracing_trace_options_show(struct seq_file *m, void *v)
3514 {
3515         struct tracer_opt *trace_opts;
3516         struct trace_array *tr = m->private;
3517         u32 tracer_flags;
3518         int i;
3519
3520         mutex_lock(&trace_types_lock);
3521         tracer_flags = tr->current_trace->flags->val;
3522         trace_opts = tr->current_trace->flags->opts;
3523
3524         for (i = 0; trace_options[i]; i++) {
3525                 if (tr->trace_flags & (1 << i))
3526                         seq_printf(m, "%s\n", trace_options[i]);
3527                 else
3528                         seq_printf(m, "no%s\n", trace_options[i]);
3529         }
3530
3531         for (i = 0; trace_opts[i].name; i++) {
3532                 if (tracer_flags & trace_opts[i].bit)
3533                         seq_printf(m, "%s\n", trace_opts[i].name);
3534                 else
3535                         seq_printf(m, "no%s\n", trace_opts[i].name);
3536         }
3537         mutex_unlock(&trace_types_lock);
3538
3539         return 0;
3540 }
3541
3542 static int __set_tracer_option(struct trace_array *tr,
3543                                struct tracer_flags *tracer_flags,
3544                                struct tracer_opt *opts, int neg)
3545 {
3546         struct tracer *trace = tr->current_trace;
3547         int ret;
3548
3549         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3550         if (ret)
3551                 return ret;
3552
3553         if (neg)
3554                 tracer_flags->val &= ~opts->bit;
3555         else
3556                 tracer_flags->val |= opts->bit;
3557         return 0;
3558 }
3559
3560 /* Try to assign a tracer specific option */
3561 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3562 {
3563         struct tracer *trace = tr->current_trace;
3564         struct tracer_flags *tracer_flags = trace->flags;
3565         struct tracer_opt *opts = NULL;
3566         int i;
3567
3568         for (i = 0; tracer_flags->opts[i].name; i++) {
3569                 opts = &tracer_flags->opts[i];
3570
3571                 if (strcmp(cmp, opts->name) == 0)
3572                         return __set_tracer_option(tr, trace->flags, opts, neg);
3573         }
3574
3575         return -EINVAL;
3576 }
3577
3578 /* Some tracers require overwrite to stay enabled */
3579 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3580 {
3581         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3582                 return -1;
3583
3584         return 0;
3585 }
3586
3587 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3588 {
3589         /* do nothing if flag is already set */
3590         if (!!(tr->trace_flags & mask) == !!enabled)
3591                 return 0;
3592
3593         /* Give the tracer a chance to approve the change */
3594         if (tr->current_trace->flag_changed)
3595                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3596                         return -EINVAL;
3597
3598         if (enabled)
3599                 tr->trace_flags |= mask;
3600         else
3601                 tr->trace_flags &= ~mask;
3602
3603         if (mask == TRACE_ITER_RECORD_CMD)
3604                 trace_event_enable_cmd_record(enabled);
3605
3606         if (mask == TRACE_ITER_OVERWRITE) {
3607                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3608 #ifdef CONFIG_TRACER_MAX_TRACE
3609                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3610 #endif
3611         }
3612
3613         if (mask == TRACE_ITER_PRINTK) {
3614                 trace_printk_start_stop_comm(enabled);
3615                 trace_printk_control(enabled);
3616         }
3617
3618         return 0;
3619 }
3620
3621 static int trace_set_options(struct trace_array *tr, char *option)
3622 {
3623         char *cmp;
3624         int neg = 0;
3625         int ret = -ENODEV;
3626         int i;
3627         size_t orig_len = strlen(option);
3628
3629         cmp = strstrip(option);
3630
3631         if (strncmp(cmp, "no", 2) == 0) {
3632                 neg = 1;
3633                 cmp += 2;
3634         }
3635
3636         mutex_lock(&trace_types_lock);
3637
3638         for (i = 0; trace_options[i]; i++) {
3639                 if (strcmp(cmp, trace_options[i]) == 0) {
3640                         ret = set_tracer_flag(tr, 1 << i, !neg);
3641                         break;
3642                 }
3643         }
3644
3645         /* If no option could be set, test the specific tracer options */
3646         if (!trace_options[i])
3647                 ret = set_tracer_option(tr, cmp, neg);
3648
3649         mutex_unlock(&trace_types_lock);
3650
3651         /*
3652          * If the first trailing whitespace is replaced with '\0' by strstrip,
3653          * turn it back into a space.
3654          */
3655         if (orig_len > strlen(option))
3656                 option[strlen(option)] = ' ';
3657
3658         return ret;
3659 }
3660
3661 static void __init apply_trace_boot_options(void)
3662 {
3663         char *buf = trace_boot_options_buf;
3664         char *option;
3665
3666         while (true) {
3667                 option = strsep(&buf, ",");
3668
3669                 if (!option)
3670                         break;
3671
3672                 if (*option)
3673                         trace_set_options(&global_trace, option);
3674
3675                 /* Put back the comma to allow this to be called again */
3676                 if (buf)
3677                         *(buf - 1) = ',';
3678         }
3679 }
3680
3681 static ssize_t
3682 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3683                         size_t cnt, loff_t *ppos)
3684 {
3685         struct seq_file *m = filp->private_data;
3686         struct trace_array *tr = m->private;
3687         char buf[64];
3688         int ret;
3689
3690         if (cnt >= sizeof(buf))
3691                 return -EINVAL;
3692
3693         if (copy_from_user(&buf, ubuf, cnt))
3694                 return -EFAULT;
3695
3696         buf[cnt] = 0;
3697
3698         ret = trace_set_options(tr, buf);
3699         if (ret < 0)
3700                 return ret;
3701
3702         *ppos += cnt;
3703
3704         return cnt;
3705 }
3706
3707 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3708 {
3709         struct trace_array *tr = inode->i_private;
3710         int ret;
3711
3712         if (tracing_disabled)
3713                 return -ENODEV;
3714
3715         if (trace_array_get(tr) < 0)
3716                 return -ENODEV;
3717
3718         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3719         if (ret < 0)
3720                 trace_array_put(tr);
3721
3722         return ret;
3723 }
3724
3725 static const struct file_operations tracing_iter_fops = {
3726         .open           = tracing_trace_options_open,
3727         .read           = seq_read,
3728         .llseek         = seq_lseek,
3729         .release        = tracing_single_release_tr,
3730         .write          = tracing_trace_options_write,
3731 };
3732
3733 static const char readme_msg[] =
3734         "tracing mini-HOWTO:\n\n"
3735         "# echo 0 > tracing_on : quick way to disable tracing\n"
3736         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3737         " Important files:\n"
3738         "  trace\t\t\t- The static contents of the buffer\n"
3739         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3740         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3741         "  current_tracer\t- function and latency tracers\n"
3742         "  available_tracers\t- list of configured tracers for current_tracer\n"
3743         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3744         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3745         "  trace_clock\t\t-change the clock used to order events\n"
3746         "       local:   Per cpu clock but may not be synced across CPUs\n"
3747         "      global:   Synced across CPUs but slows tracing down.\n"
3748         "     counter:   Not a clock, but just an increment\n"
3749         "      uptime:   Jiffy counter from time of boot\n"
3750         "        perf:   Same clock that perf events use\n"
3751 #ifdef CONFIG_X86_64
3752         "     x86-tsc:   TSC cycle counter\n"
3753 #endif
3754         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3755         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3756         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3757         "\t\t\t  Remove sub-buffer with rmdir\n"
3758         "  trace_options\t\t- Set format or modify how tracing happens\n"
3759         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3760         "\t\t\t  option name\n"
3761         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3762 #ifdef CONFIG_DYNAMIC_FTRACE
3763         "\n  available_filter_functions - list of functions that can be filtered on\n"
3764         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3765         "\t\t\t  functions\n"
3766         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3767         "\t     modules: Can select a group via module\n"
3768         "\t      Format: :mod:<module-name>\n"
3769         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3770         "\t    triggers: a command to perform when function is hit\n"
3771         "\t      Format: <function>:<trigger>[:count]\n"
3772         "\t     trigger: traceon, traceoff\n"
3773         "\t\t      enable_event:<system>:<event>\n"
3774         "\t\t      disable_event:<system>:<event>\n"
3775 #ifdef CONFIG_STACKTRACE
3776         "\t\t      stacktrace\n"
3777 #endif
3778 #ifdef CONFIG_TRACER_SNAPSHOT
3779         "\t\t      snapshot\n"
3780 #endif
3781         "\t\t      dump\n"
3782         "\t\t      cpudump\n"
3783         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3784         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3785         "\t     The first one will disable tracing every time do_fault is hit\n"
3786         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3787         "\t       The first time do trap is hit and it disables tracing, the\n"
3788         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3789         "\t       the counter will not decrement. It only decrements when the\n"
3790         "\t       trigger did work\n"
3791         "\t     To remove trigger without count:\n"
3792         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3793         "\t     To remove trigger with a count:\n"
3794         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3795         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3796         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3797         "\t    modules: Can select a group via module command :mod:\n"
3798         "\t    Does not accept triggers\n"
3799 #endif /* CONFIG_DYNAMIC_FTRACE */
3800 #ifdef CONFIG_FUNCTION_TRACER
3801         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3802         "\t\t    (function)\n"
3803 #endif
3804 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3805         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3806         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3807         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3808 #endif
3809 #ifdef CONFIG_TRACER_SNAPSHOT
3810         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3811         "\t\t\t  snapshot buffer. Read the contents for more\n"
3812         "\t\t\t  information\n"
3813 #endif
3814 #ifdef CONFIG_STACK_TRACER
3815         "  stack_trace\t\t- Shows the max stack trace when active\n"
3816         "  stack_max_size\t- Shows current max stack size that was traced\n"
3817         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3818         "\t\t\t  new trace)\n"
3819 #ifdef CONFIG_DYNAMIC_FTRACE
3820         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3821         "\t\t\t  traces\n"
3822 #endif
3823 #endif /* CONFIG_STACK_TRACER */
3824         "  events/\t\t- Directory containing all trace event subsystems:\n"
3825         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3826         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3827         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3828         "\t\t\t  events\n"
3829         "      filter\t\t- If set, only events passing filter are traced\n"
3830         "  events/<system>/<event>/\t- Directory containing control files for\n"
3831         "\t\t\t  <event>:\n"
3832         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3833         "      filter\t\t- If set, only events passing filter are traced\n"
3834         "      trigger\t\t- If set, a command to perform when event is hit\n"
3835         "\t    Format: <trigger>[:count][if <filter>]\n"
3836         "\t   trigger: traceon, traceoff\n"
3837         "\t            enable_event:<system>:<event>\n"
3838         "\t            disable_event:<system>:<event>\n"
3839 #ifdef CONFIG_STACKTRACE
3840         "\t\t    stacktrace\n"
3841 #endif
3842 #ifdef CONFIG_TRACER_SNAPSHOT
3843         "\t\t    snapshot\n"
3844 #endif
3845         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3846         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3847         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3848         "\t                  events/block/block_unplug/trigger\n"
3849         "\t   The first disables tracing every time block_unplug is hit.\n"
3850         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3851         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3852         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3853         "\t   Like function triggers, the counter is only decremented if it\n"
3854         "\t    enabled or disabled tracing.\n"
3855         "\t   To remove a trigger without a count:\n"
3856         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3857         "\t   To remove a trigger with a count:\n"
3858         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3859         "\t   Filters can be ignored when removing a trigger.\n"
3860 ;
3861
3862 static ssize_t
3863 tracing_readme_read(struct file *filp, char __user *ubuf,
3864                        size_t cnt, loff_t *ppos)
3865 {
3866         return simple_read_from_buffer(ubuf, cnt, ppos,
3867                                         readme_msg, strlen(readme_msg));
3868 }
3869
3870 static const struct file_operations tracing_readme_fops = {
3871         .open           = tracing_open_generic,
3872         .read           = tracing_readme_read,
3873         .llseek         = generic_file_llseek,
3874 };
3875
3876 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3877 {
3878         unsigned int *ptr = v;
3879
3880         if (*pos || m->count)
3881                 ptr++;
3882
3883         (*pos)++;
3884
3885         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3886              ptr++) {
3887                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3888                         continue;
3889
3890                 return ptr;
3891         }
3892
3893         return NULL;
3894 }
3895
3896 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3897 {
3898         void *v;
3899         loff_t l = 0;
3900
3901         preempt_disable();
3902         arch_spin_lock(&trace_cmdline_lock);
3903
3904         v = &savedcmd->map_cmdline_to_pid[0];
3905         while (l <= *pos) {
3906                 v = saved_cmdlines_next(m, v, &l);
3907                 if (!v)
3908                         return NULL;
3909         }
3910
3911         return v;
3912 }
3913
3914 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3915 {
3916         arch_spin_unlock(&trace_cmdline_lock);
3917         preempt_enable();
3918 }
3919
3920 static int saved_cmdlines_show(struct seq_file *m, void *v)
3921 {
3922         char buf[TASK_COMM_LEN];
3923         unsigned int *pid = v;
3924
3925         __trace_find_cmdline(*pid, buf);
3926         seq_printf(m, "%d %s\n", *pid, buf);
3927         return 0;
3928 }
3929
3930 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3931         .start          = saved_cmdlines_start,
3932         .next           = saved_cmdlines_next,
3933         .stop           = saved_cmdlines_stop,
3934         .show           = saved_cmdlines_show,
3935 };
3936
3937 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3938 {
3939         if (tracing_disabled)
3940                 return -ENODEV;
3941
3942         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3943 }
3944
3945 static const struct file_operations tracing_saved_cmdlines_fops = {
3946         .open           = tracing_saved_cmdlines_open,
3947         .read           = seq_read,
3948         .llseek         = seq_lseek,
3949         .release        = seq_release,
3950 };
3951
3952 static ssize_t
3953 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3954                                  size_t cnt, loff_t *ppos)
3955 {
3956         char buf[64];
3957         int r;
3958
3959         arch_spin_lock(&trace_cmdline_lock);
3960         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3961         arch_spin_unlock(&trace_cmdline_lock);
3962
3963         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3964 }
3965
3966 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3967 {
3968         kfree(s->saved_cmdlines);
3969         kfree(s->map_cmdline_to_pid);
3970         kfree(s);
3971 }
3972
3973 static int tracing_resize_saved_cmdlines(unsigned int val)
3974 {
3975         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3976
3977         s = kmalloc(sizeof(*s), GFP_KERNEL);
3978         if (!s)
3979                 return -ENOMEM;
3980
3981         if (allocate_cmdlines_buffer(val, s) < 0) {
3982                 kfree(s);
3983                 return -ENOMEM;
3984         }
3985
3986         arch_spin_lock(&trace_cmdline_lock);
3987         savedcmd_temp = savedcmd;
3988         savedcmd = s;
3989         arch_spin_unlock(&trace_cmdline_lock);
3990         free_saved_cmdlines_buffer(savedcmd_temp);
3991
3992         return 0;
3993 }
3994
3995 static ssize_t
3996 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3997                                   size_t cnt, loff_t *ppos)
3998 {
3999         unsigned long val;
4000         int ret;
4001
4002         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4003         if (ret)
4004                 return ret;
4005
4006         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4007         if (!val || val > PID_MAX_DEFAULT)
4008                 return -EINVAL;
4009
4010         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4011         if (ret < 0)
4012                 return ret;
4013
4014         *ppos += cnt;
4015
4016         return cnt;
4017 }
4018
4019 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4020         .open           = tracing_open_generic,
4021         .read           = tracing_saved_cmdlines_size_read,
4022         .write          = tracing_saved_cmdlines_size_write,
4023 };
4024
4025 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4026 static union trace_enum_map_item *
4027 update_enum_map(union trace_enum_map_item *ptr)
4028 {
4029         if (!ptr->map.enum_string) {
4030                 if (ptr->tail.next) {
4031                         ptr = ptr->tail.next;
4032                         /* Set ptr to the next real item (skip head) */
4033                         ptr++;
4034                 } else
4035                         return NULL;
4036         }
4037         return ptr;
4038 }
4039
4040 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4041 {
4042         union trace_enum_map_item *ptr = v;
4043
4044         /*
4045          * Paranoid! If ptr points to end, we don't want to increment past it.
4046          * This really should never happen.
4047          */
4048         ptr = update_enum_map(ptr);
4049         if (WARN_ON_ONCE(!ptr))
4050                 return NULL;
4051
4052         ptr++;
4053
4054         (*pos)++;
4055
4056         ptr = update_enum_map(ptr);
4057
4058         return ptr;
4059 }
4060
4061 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4062 {
4063         union trace_enum_map_item *v;
4064         loff_t l = 0;
4065
4066         mutex_lock(&trace_enum_mutex);
4067
4068         v = trace_enum_maps;
4069         if (v)
4070                 v++;
4071
4072         while (v && l < *pos) {
4073                 v = enum_map_next(m, v, &l);
4074         }
4075
4076         return v;
4077 }
4078
4079 static void enum_map_stop(struct seq_file *m, void *v)
4080 {
4081         mutex_unlock(&trace_enum_mutex);
4082 }
4083
4084 static int enum_map_show(struct seq_file *m, void *v)
4085 {
4086         union trace_enum_map_item *ptr = v;
4087
4088         seq_printf(m, "%s %ld (%s)\n",
4089                    ptr->map.enum_string, ptr->map.enum_value,
4090                    ptr->map.system);
4091
4092         return 0;
4093 }
4094
4095 static const struct seq_operations tracing_enum_map_seq_ops = {
4096         .start          = enum_map_start,
4097         .next           = enum_map_next,
4098         .stop           = enum_map_stop,
4099         .show           = enum_map_show,
4100 };
4101
4102 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4103 {
4104         if (tracing_disabled)
4105                 return -ENODEV;
4106
4107         return seq_open(filp, &tracing_enum_map_seq_ops);
4108 }
4109
4110 static const struct file_operations tracing_enum_map_fops = {
4111         .open           = tracing_enum_map_open,
4112         .read           = seq_read,
4113         .llseek         = seq_lseek,
4114         .release        = seq_release,
4115 };
4116
4117 static inline union trace_enum_map_item *
4118 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4119 {
4120         /* Return tail of array given the head */
4121         return ptr + ptr->head.length + 1;
4122 }
4123
4124 static void
4125 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4126                            int len)
4127 {
4128         struct trace_enum_map **stop;
4129         struct trace_enum_map **map;
4130         union trace_enum_map_item *map_array;
4131         union trace_enum_map_item *ptr;
4132
4133         stop = start + len;
4134
4135         /*
4136          * The trace_enum_maps contains the map plus a head and tail item,
4137          * where the head holds the module and length of array, and the
4138          * tail holds a pointer to the next list.
4139          */
4140         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4141         if (!map_array) {
4142                 pr_warning("Unable to allocate trace enum mapping\n");
4143                 return;
4144         }
4145
4146         mutex_lock(&trace_enum_mutex);
4147
4148         if (!trace_enum_maps)
4149                 trace_enum_maps = map_array;
4150         else {
4151                 ptr = trace_enum_maps;
4152                 for (;;) {
4153                         ptr = trace_enum_jmp_to_tail(ptr);
4154                         if (!ptr->tail.next)
4155                                 break;
4156                         ptr = ptr->tail.next;
4157
4158                 }
4159                 ptr->tail.next = map_array;
4160         }
4161         map_array->head.mod = mod;
4162         map_array->head.length = len;
4163         map_array++;
4164
4165         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4166                 map_array->map = **map;
4167                 map_array++;
4168         }
4169         memset(map_array, 0, sizeof(*map_array));
4170
4171         mutex_unlock(&trace_enum_mutex);
4172 }
4173
4174 static void trace_create_enum_file(struct dentry *d_tracer)
4175 {
4176         trace_create_file("enum_map", 0444, d_tracer,
4177                           NULL, &tracing_enum_map_fops);
4178 }
4179
4180 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4181 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4182 static inline void trace_insert_enum_map_file(struct module *mod,
4183                               struct trace_enum_map **start, int len) { }
4184 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4185
4186 static void trace_insert_enum_map(struct module *mod,
4187                                   struct trace_enum_map **start, int len)
4188 {
4189         struct trace_enum_map **map;
4190
4191         if (len <= 0)
4192                 return;
4193
4194         map = start;
4195
4196         trace_event_enum_update(map, len);
4197
4198         trace_insert_enum_map_file(mod, start, len);
4199 }
4200
4201 static ssize_t
4202 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4203                        size_t cnt, loff_t *ppos)
4204 {
4205         struct trace_array *tr = filp->private_data;
4206         char buf[MAX_TRACER_SIZE+2];
4207         int r;
4208
4209         mutex_lock(&trace_types_lock);
4210         r = sprintf(buf, "%s\n", tr->current_trace->name);
4211         mutex_unlock(&trace_types_lock);
4212
4213         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4214 }
4215
4216 int tracer_init(struct tracer *t, struct trace_array *tr)
4217 {
4218         tracing_reset_online_cpus(&tr->trace_buffer);
4219         return t->init(tr);
4220 }
4221
4222 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4223 {
4224         int cpu;
4225
4226         for_each_tracing_cpu(cpu)
4227                 per_cpu_ptr(buf->data, cpu)->entries = val;
4228 }
4229
4230 #ifdef CONFIG_TRACER_MAX_TRACE
4231 /* resize @tr's buffer to the size of @size_tr's entries */
4232 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4233                                         struct trace_buffer *size_buf, int cpu_id)
4234 {
4235         int cpu, ret = 0;
4236
4237         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4238                 for_each_tracing_cpu(cpu) {
4239                         ret = ring_buffer_resize(trace_buf->buffer,
4240                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4241                         if (ret < 0)
4242                                 break;
4243                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4244                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4245                 }
4246         } else {
4247                 ret = ring_buffer_resize(trace_buf->buffer,
4248                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4249                 if (ret == 0)
4250                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4251                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4252         }
4253
4254         return ret;
4255 }
4256 #endif /* CONFIG_TRACER_MAX_TRACE */
4257
4258 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4259                                         unsigned long size, int cpu)
4260 {
4261         int ret;
4262
4263         /*
4264          * If kernel or user changes the size of the ring buffer
4265          * we use the size that was given, and we can forget about
4266          * expanding it later.
4267          */
4268         ring_buffer_expanded = true;
4269
4270         /* May be called before buffers are initialized */
4271         if (!tr->trace_buffer.buffer)
4272                 return 0;
4273
4274         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4275         if (ret < 0)
4276                 return ret;
4277
4278 #ifdef CONFIG_TRACER_MAX_TRACE
4279         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4280             !tr->current_trace->use_max_tr)
4281                 goto out;
4282
4283         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4284         if (ret < 0) {
4285                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4286                                                      &tr->trace_buffer, cpu);
4287                 if (r < 0) {
4288                         /*
4289                          * AARGH! We are left with different
4290                          * size max buffer!!!!
4291                          * The max buffer is our "snapshot" buffer.
4292                          * When a tracer needs a snapshot (one of the
4293                          * latency tracers), it swaps the max buffer
4294                          * with the saved snap shot. We succeeded to
4295                          * update the size of the main buffer, but failed to
4296                          * update the size of the max buffer. But when we tried
4297                          * to reset the main buffer to the original size, we
4298                          * failed there too. This is very unlikely to
4299                          * happen, but if it does, warn and kill all
4300                          * tracing.
4301                          */
4302                         WARN_ON(1);
4303                         tracing_disabled = 1;
4304                 }
4305                 return ret;
4306         }
4307
4308         if (cpu == RING_BUFFER_ALL_CPUS)
4309                 set_buffer_entries(&tr->max_buffer, size);
4310         else
4311                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4312
4313  out:
4314 #endif /* CONFIG_TRACER_MAX_TRACE */
4315
4316         if (cpu == RING_BUFFER_ALL_CPUS)
4317                 set_buffer_entries(&tr->trace_buffer, size);
4318         else
4319                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4320
4321         return ret;
4322 }
4323
4324 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4325                                           unsigned long size, int cpu_id)
4326 {
4327         int ret = size;
4328
4329         mutex_lock(&trace_types_lock);
4330
4331         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4332                 /* make sure, this cpu is enabled in the mask */
4333                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4334                         ret = -EINVAL;
4335                         goto out;
4336                 }
4337         }
4338
4339         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4340         if (ret < 0)
4341                 ret = -ENOMEM;
4342
4343 out:
4344         mutex_unlock(&trace_types_lock);
4345
4346         return ret;
4347 }
4348
4349
4350 /**
4351  * tracing_update_buffers - used by tracing facility to expand ring buffers
4352  *
4353  * To save on memory when the tracing is never used on a system with it
4354  * configured in. The ring buffers are set to a minimum size. But once
4355  * a user starts to use the tracing facility, then they need to grow
4356  * to their default size.
4357  *
4358  * This function is to be called when a tracer is about to be used.
4359  */
4360 int tracing_update_buffers(void)
4361 {
4362         int ret = 0;
4363
4364         mutex_lock(&trace_types_lock);
4365         if (!ring_buffer_expanded)
4366                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4367                                                 RING_BUFFER_ALL_CPUS);
4368         mutex_unlock(&trace_types_lock);
4369
4370         return ret;
4371 }
4372
4373 struct trace_option_dentry;
4374
4375 static void
4376 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4377
4378 /*
4379  * Used to clear out the tracer before deletion of an instance.
4380  * Must have trace_types_lock held.
4381  */
4382 static void tracing_set_nop(struct trace_array *tr)
4383 {
4384         if (tr->current_trace == &nop_trace)
4385                 return;
4386         
4387         tr->current_trace->enabled--;
4388
4389         if (tr->current_trace->reset)
4390                 tr->current_trace->reset(tr);
4391
4392         tr->current_trace = &nop_trace;
4393 }
4394
4395 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4396 {
4397         /* Only enable if the directory has been created already. */
4398         if (!tr->dir)
4399                 return;
4400
4401         create_trace_option_files(tr, t);
4402 }
4403
4404 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4405 {
4406         struct tracer *t;
4407 #ifdef CONFIG_TRACER_MAX_TRACE
4408         bool had_max_tr;
4409 #endif
4410         int ret = 0;
4411
4412         mutex_lock(&trace_types_lock);
4413
4414         if (!ring_buffer_expanded) {
4415                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4416                                                 RING_BUFFER_ALL_CPUS);
4417                 if (ret < 0)
4418                         goto out;
4419                 ret = 0;
4420         }
4421
4422         for (t = trace_types; t; t = t->next) {
4423                 if (strcmp(t->name, buf) == 0)
4424                         break;
4425         }
4426         if (!t) {
4427                 ret = -EINVAL;
4428                 goto out;
4429         }
4430         if (t == tr->current_trace)
4431                 goto out;
4432
4433         /* Some tracers are only allowed for the top level buffer */
4434         if (!trace_ok_for_array(t, tr)) {
4435                 ret = -EINVAL;
4436                 goto out;
4437         }
4438
4439         /* If trace pipe files are being read, we can't change the tracer */
4440         if (tr->current_trace->ref) {
4441                 ret = -EBUSY;
4442                 goto out;
4443         }
4444
4445         trace_branch_disable();
4446
4447         tr->current_trace->enabled--;
4448
4449         if (tr->current_trace->reset)
4450                 tr->current_trace->reset(tr);
4451
4452         /* Current trace needs to be nop_trace before synchronize_sched */
4453         tr->current_trace = &nop_trace;
4454
4455 #ifdef CONFIG_TRACER_MAX_TRACE
4456         had_max_tr = tr->allocated_snapshot;
4457
4458         if (had_max_tr && !t->use_max_tr) {
4459                 /*
4460                  * We need to make sure that the update_max_tr sees that
4461                  * current_trace changed to nop_trace to keep it from
4462                  * swapping the buffers after we resize it.
4463                  * The update_max_tr is called from interrupts disabled
4464                  * so a synchronized_sched() is sufficient.
4465                  */
4466                 synchronize_sched();
4467                 free_snapshot(tr);
4468         }
4469 #endif
4470
4471 #ifdef CONFIG_TRACER_MAX_TRACE
4472         if (t->use_max_tr && !had_max_tr) {
4473                 ret = alloc_snapshot(tr);
4474                 if (ret < 0)
4475                         goto out;
4476         }
4477 #endif
4478
4479         if (t->init) {
4480                 ret = tracer_init(t, tr);
4481                 if (ret)
4482                         goto out;
4483         }
4484
4485         tr->current_trace = t;
4486         tr->current_trace->enabled++;
4487         trace_branch_enable(tr);
4488  out:
4489         mutex_unlock(&trace_types_lock);
4490
4491         return ret;
4492 }
4493
4494 static ssize_t
4495 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4496                         size_t cnt, loff_t *ppos)
4497 {
4498         struct trace_array *tr = filp->private_data;
4499         char buf[MAX_TRACER_SIZE+1];
4500         int i;
4501         size_t ret;
4502         int err;
4503
4504         ret = cnt;
4505
4506         if (cnt > MAX_TRACER_SIZE)
4507                 cnt = MAX_TRACER_SIZE;
4508
4509         if (copy_from_user(&buf, ubuf, cnt))
4510                 return -EFAULT;
4511
4512         buf[cnt] = 0;
4513
4514         /* strip ending whitespace. */
4515         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4516                 buf[i] = 0;
4517
4518         err = tracing_set_tracer(tr, buf);
4519         if (err)
4520                 return err;
4521
4522         *ppos += ret;
4523
4524         return ret;
4525 }
4526
4527 static ssize_t
4528 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4529                    size_t cnt, loff_t *ppos)
4530 {
4531         char buf[64];
4532         int r;
4533
4534         r = snprintf(buf, sizeof(buf), "%ld\n",
4535                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4536         if (r > sizeof(buf))
4537                 r = sizeof(buf);
4538         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4539 }
4540
4541 static ssize_t
4542 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4543                     size_t cnt, loff_t *ppos)
4544 {
4545         unsigned long val;
4546         int ret;
4547
4548         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4549         if (ret)
4550                 return ret;
4551
4552         *ptr = val * 1000;
4553
4554         return cnt;
4555 }
4556
4557 static ssize_t
4558 tracing_thresh_read(struct file *filp, char __user *ubuf,
4559                     size_t cnt, loff_t *ppos)
4560 {
4561         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4562 }
4563
4564 static ssize_t
4565 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4566                      size_t cnt, loff_t *ppos)
4567 {
4568         struct trace_array *tr = filp->private_data;
4569         int ret;
4570
4571         mutex_lock(&trace_types_lock);
4572         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4573         if (ret < 0)
4574                 goto out;
4575
4576         if (tr->current_trace->update_thresh) {
4577                 ret = tr->current_trace->update_thresh(tr);
4578                 if (ret < 0)
4579                         goto out;
4580         }
4581
4582         ret = cnt;
4583 out:
4584         mutex_unlock(&trace_types_lock);
4585
4586         return ret;
4587 }
4588
4589 #ifdef CONFIG_TRACER_MAX_TRACE
4590
4591 static ssize_t
4592 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4593                      size_t cnt, loff_t *ppos)
4594 {
4595         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4596 }
4597
4598 static ssize_t
4599 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4600                       size_t cnt, loff_t *ppos)
4601 {
4602         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4603 }
4604
4605 #endif
4606
4607 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4608 {
4609         struct trace_array *tr = inode->i_private;
4610         struct trace_iterator *iter;
4611         int ret = 0;
4612
4613         if (tracing_disabled)
4614                 return -ENODEV;
4615
4616         if (trace_array_get(tr) < 0)
4617                 return -ENODEV;
4618
4619         mutex_lock(&trace_types_lock);
4620
4621         /* create a buffer to store the information to pass to userspace */
4622         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4623         if (!iter) {
4624                 ret = -ENOMEM;
4625                 __trace_array_put(tr);
4626                 goto out;
4627         }
4628
4629         trace_seq_init(&iter->seq);
4630         iter->trace = tr->current_trace;
4631
4632         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4633                 ret = -ENOMEM;
4634                 goto fail;
4635         }
4636
4637         /* trace pipe does not show start of buffer */
4638         cpumask_setall(iter->started);
4639
4640         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4641                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4642
4643         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4644         if (trace_clocks[tr->clock_id].in_ns)
4645                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4646
4647         iter->tr = tr;
4648         iter->trace_buffer = &tr->trace_buffer;
4649         iter->cpu_file = tracing_get_cpu(inode);
4650         mutex_init(&iter->mutex);
4651         filp->private_data = iter;
4652
4653         if (iter->trace->pipe_open)
4654                 iter->trace->pipe_open(iter);
4655
4656         nonseekable_open(inode, filp);
4657
4658         tr->current_trace->ref++;
4659 out:
4660         mutex_unlock(&trace_types_lock);
4661         return ret;
4662
4663 fail:
4664         kfree(iter);
4665         __trace_array_put(tr);
4666         mutex_unlock(&trace_types_lock);
4667         return ret;
4668 }
4669
4670 static int tracing_release_pipe(struct inode *inode, struct file *file)
4671 {
4672         struct trace_iterator *iter = file->private_data;
4673         struct trace_array *tr = inode->i_private;
4674
4675         mutex_lock(&trace_types_lock);
4676
4677         tr->current_trace->ref--;
4678
4679         if (iter->trace->pipe_close)
4680                 iter->trace->pipe_close(iter);
4681
4682         mutex_unlock(&trace_types_lock);
4683
4684         free_cpumask_var(iter->started);
4685         mutex_destroy(&iter->mutex);
4686         kfree(iter);
4687
4688         trace_array_put(tr);
4689
4690         return 0;
4691 }
4692
4693 static unsigned int
4694 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4695 {
4696         struct trace_array *tr = iter->tr;
4697
4698         /* Iterators are static, they should be filled or empty */
4699         if (trace_buffer_iter(iter, iter->cpu_file))
4700                 return POLLIN | POLLRDNORM;
4701
4702         if (tr->trace_flags & TRACE_ITER_BLOCK)
4703                 /*
4704                  * Always select as readable when in blocking mode
4705                  */
4706                 return POLLIN | POLLRDNORM;
4707         else
4708                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4709                                              filp, poll_table);
4710 }
4711
4712 static unsigned int
4713 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4714 {
4715         struct trace_iterator *iter = filp->private_data;
4716
4717         return trace_poll(iter, filp, poll_table);
4718 }
4719
4720 /* Must be called with iter->mutex held. */
4721 static int tracing_wait_pipe(struct file *filp)
4722 {
4723         struct trace_iterator *iter = filp->private_data;
4724         int ret;
4725
4726         while (trace_empty(iter)) {
4727
4728                 if ((filp->f_flags & O_NONBLOCK)) {
4729                         return -EAGAIN;
4730                 }
4731
4732                 /*
4733                  * We block until we read something and tracing is disabled.
4734                  * We still block if tracing is disabled, but we have never
4735                  * read anything. This allows a user to cat this file, and
4736                  * then enable tracing. But after we have read something,
4737                  * we give an EOF when tracing is again disabled.
4738                  *
4739                  * iter->pos will be 0 if we haven't read anything.
4740                  */
4741                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
4742                         break;
4743
4744                 mutex_unlock(&iter->mutex);
4745
4746                 ret = wait_on_pipe(iter, false);
4747
4748                 mutex_lock(&iter->mutex);
4749
4750                 if (ret)
4751                         return ret;
4752         }
4753
4754         return 1;
4755 }
4756
4757 /*
4758  * Consumer reader.
4759  */
4760 static ssize_t
4761 tracing_read_pipe(struct file *filp, char __user *ubuf,
4762                   size_t cnt, loff_t *ppos)
4763 {
4764         struct trace_iterator *iter = filp->private_data;
4765         ssize_t sret;
4766
4767         /*
4768          * Avoid more than one consumer on a single file descriptor
4769          * This is just a matter of traces coherency, the ring buffer itself
4770          * is protected.
4771          */
4772         mutex_lock(&iter->mutex);
4773
4774         /* return any leftover data */
4775         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4776         if (sret != -EBUSY)
4777                 goto out;
4778
4779         trace_seq_init(&iter->seq);
4780
4781         if (iter->trace->read) {
4782                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4783                 if (sret)
4784                         goto out;
4785         }
4786
4787 waitagain:
4788         sret = tracing_wait_pipe(filp);
4789         if (sret <= 0)
4790                 goto out;
4791
4792         /* stop when tracing is finished */
4793         if (trace_empty(iter)) {
4794                 sret = 0;
4795                 goto out;
4796         }
4797
4798         if (cnt >= PAGE_SIZE)
4799                 cnt = PAGE_SIZE - 1;
4800
4801         /* reset all but tr, trace, and overruns */
4802         memset(&iter->seq, 0,
4803                sizeof(struct trace_iterator) -
4804                offsetof(struct trace_iterator, seq));
4805         cpumask_clear(iter->started);
4806         trace_seq_init(&iter->seq);
4807         iter->pos = -1;
4808
4809         trace_event_read_lock();
4810         trace_access_lock(iter->cpu_file);
4811         while (trace_find_next_entry_inc(iter) != NULL) {
4812                 enum print_line_t ret;
4813                 int save_len = iter->seq.seq.len;
4814
4815                 ret = print_trace_line(iter);
4816                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4817                         /* don't print partial lines */
4818                         iter->seq.seq.len = save_len;
4819                         break;
4820                 }
4821                 if (ret != TRACE_TYPE_NO_CONSUME)
4822                         trace_consume(iter);
4823
4824                 if (trace_seq_used(&iter->seq) >= cnt)
4825                         break;
4826
4827                 /*
4828                  * Setting the full flag means we reached the trace_seq buffer
4829                  * size and we should leave by partial output condition above.
4830                  * One of the trace_seq_* functions is not used properly.
4831                  */
4832                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4833                           iter->ent->type);
4834         }
4835         trace_access_unlock(iter->cpu_file);
4836         trace_event_read_unlock();
4837
4838         /* Now copy what we have to the user */
4839         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4840         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4841                 trace_seq_init(&iter->seq);
4842
4843         /*
4844          * If there was nothing to send to user, in spite of consuming trace
4845          * entries, go back to wait for more entries.
4846          */
4847         if (sret == -EBUSY)
4848                 goto waitagain;
4849
4850 out:
4851         mutex_unlock(&iter->mutex);
4852
4853         return sret;
4854 }
4855
4856 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4857                                      unsigned int idx)
4858 {
4859         __free_page(spd->pages[idx]);
4860 }
4861
4862 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4863         .can_merge              = 0,
4864         .confirm                = generic_pipe_buf_confirm,
4865         .release                = generic_pipe_buf_release,
4866         .steal                  = generic_pipe_buf_steal,
4867         .get                    = generic_pipe_buf_get,
4868 };
4869
4870 static size_t
4871 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4872 {
4873         size_t count;
4874         int save_len;
4875         int ret;
4876
4877         /* Seq buffer is page-sized, exactly what we need. */
4878         for (;;) {
4879                 save_len = iter->seq.seq.len;
4880                 ret = print_trace_line(iter);
4881
4882                 if (trace_seq_has_overflowed(&iter->seq)) {
4883                         iter->seq.seq.len = save_len;
4884                         break;
4885                 }
4886
4887                 /*
4888                  * This should not be hit, because it should only
4889                  * be set if the iter->seq overflowed. But check it
4890                  * anyway to be safe.
4891                  */
4892                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4893                         iter->seq.seq.len = save_len;
4894                         break;
4895                 }
4896
4897                 count = trace_seq_used(&iter->seq) - save_len;
4898                 if (rem < count) {
4899                         rem = 0;
4900                         iter->seq.seq.len = save_len;
4901                         break;
4902                 }
4903
4904                 if (ret != TRACE_TYPE_NO_CONSUME)
4905                         trace_consume(iter);
4906                 rem -= count;
4907                 if (!trace_find_next_entry_inc(iter))   {
4908                         rem = 0;
4909                         iter->ent = NULL;
4910                         break;
4911                 }
4912         }
4913
4914         return rem;
4915 }
4916
4917 static ssize_t tracing_splice_read_pipe(struct file *filp,
4918                                         loff_t *ppos,
4919                                         struct pipe_inode_info *pipe,
4920                                         size_t len,
4921                                         unsigned int flags)
4922 {
4923         struct page *pages_def[PIPE_DEF_BUFFERS];
4924         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4925         struct trace_iterator *iter = filp->private_data;
4926         struct splice_pipe_desc spd = {
4927                 .pages          = pages_def,
4928                 .partial        = partial_def,
4929                 .nr_pages       = 0, /* This gets updated below. */
4930                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4931                 .flags          = flags,
4932                 .ops            = &tracing_pipe_buf_ops,
4933                 .spd_release    = tracing_spd_release_pipe,
4934         };
4935         ssize_t ret;
4936         size_t rem;
4937         unsigned int i;
4938
4939         if (splice_grow_spd(pipe, &spd))
4940                 return -ENOMEM;
4941
4942         mutex_lock(&iter->mutex);
4943
4944         if (iter->trace->splice_read) {
4945                 ret = iter->trace->splice_read(iter, filp,
4946                                                ppos, pipe, len, flags);
4947                 if (ret)
4948                         goto out_err;
4949         }
4950
4951         ret = tracing_wait_pipe(filp);
4952         if (ret <= 0)
4953                 goto out_err;
4954
4955         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4956                 ret = -EFAULT;
4957                 goto out_err;
4958         }
4959
4960         trace_event_read_lock();
4961         trace_access_lock(iter->cpu_file);
4962
4963         /* Fill as many pages as possible. */
4964         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4965                 spd.pages[i] = alloc_page(GFP_KERNEL);
4966                 if (!spd.pages[i])
4967                         break;
4968
4969                 rem = tracing_fill_pipe_page(rem, iter);
4970
4971                 /* Copy the data into the page, so we can start over. */
4972                 ret = trace_seq_to_buffer(&iter->seq,
4973                                           page_address(spd.pages[i]),
4974                                           trace_seq_used(&iter->seq));
4975                 if (ret < 0) {
4976                         __free_page(spd.pages[i]);
4977                         break;
4978                 }
4979                 spd.partial[i].offset = 0;
4980                 spd.partial[i].len = trace_seq_used(&iter->seq);
4981
4982                 trace_seq_init(&iter->seq);
4983         }
4984
4985         trace_access_unlock(iter->cpu_file);
4986         trace_event_read_unlock();
4987         mutex_unlock(&iter->mutex);
4988
4989         spd.nr_pages = i;
4990
4991         if (i)
4992                 ret = splice_to_pipe(pipe, &spd);
4993         else
4994                 ret = 0;
4995 out:
4996         splice_shrink_spd(&spd);
4997         return ret;
4998
4999 out_err:
5000         mutex_unlock(&iter->mutex);
5001         goto out;
5002 }
5003
5004 static ssize_t
5005 tracing_entries_read(struct file *filp, char __user *ubuf,
5006                      size_t cnt, loff_t *ppos)
5007 {
5008         struct inode *inode = file_inode(filp);
5009         struct trace_array *tr = inode->i_private;
5010         int cpu = tracing_get_cpu(inode);
5011         char buf[64];
5012         int r = 0;
5013         ssize_t ret;
5014
5015         mutex_lock(&trace_types_lock);
5016
5017         if (cpu == RING_BUFFER_ALL_CPUS) {
5018                 int cpu, buf_size_same;
5019                 unsigned long size;
5020
5021                 size = 0;
5022                 buf_size_same = 1;
5023                 /* check if all cpu sizes are same */
5024                 for_each_tracing_cpu(cpu) {
5025                         /* fill in the size from first enabled cpu */
5026                         if (size == 0)
5027                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5028                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5029                                 buf_size_same = 0;
5030                                 break;
5031                         }
5032                 }
5033
5034                 if (buf_size_same) {
5035                         if (!ring_buffer_expanded)
5036                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5037                                             size >> 10,
5038                                             trace_buf_size >> 10);
5039                         else
5040                                 r = sprintf(buf, "%lu\n", size >> 10);
5041                 } else
5042                         r = sprintf(buf, "X\n");
5043         } else
5044                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5045
5046         mutex_unlock(&trace_types_lock);
5047
5048         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5049         return ret;
5050 }
5051
5052 static ssize_t
5053 tracing_entries_write(struct file *filp, const char __user *ubuf,
5054                       size_t cnt, loff_t *ppos)
5055 {
5056         struct inode *inode = file_inode(filp);
5057         struct trace_array *tr = inode->i_private;
5058         unsigned long val;
5059         int ret;
5060
5061         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5062         if (ret)
5063                 return ret;
5064
5065         /* must have at least 1 entry */
5066         if (!val)
5067                 return -EINVAL;
5068
5069         /* value is in KB */
5070         val <<= 10;
5071         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5072         if (ret < 0)
5073                 return ret;
5074
5075         *ppos += cnt;
5076
5077         return cnt;
5078 }
5079
5080 static ssize_t
5081 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5082                                 size_t cnt, loff_t *ppos)
5083 {
5084         struct trace_array *tr = filp->private_data;
5085         char buf[64];
5086         int r, cpu;
5087         unsigned long size = 0, expanded_size = 0;
5088
5089         mutex_lock(&trace_types_lock);
5090         for_each_tracing_cpu(cpu) {
5091                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5092                 if (!ring_buffer_expanded)
5093                         expanded_size += trace_buf_size >> 10;
5094         }
5095         if (ring_buffer_expanded)
5096                 r = sprintf(buf, "%lu\n", size);
5097         else
5098                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5099         mutex_unlock(&trace_types_lock);
5100
5101         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5102 }
5103
5104 static ssize_t
5105 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5106                           size_t cnt, loff_t *ppos)
5107 {
5108         /*
5109          * There is no need to read what the user has written, this function
5110          * is just to make sure that there is no error when "echo" is used
5111          */
5112
5113         *ppos += cnt;
5114
5115         return cnt;
5116 }
5117
5118 static int
5119 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5120 {
5121         struct trace_array *tr = inode->i_private;
5122
5123         /* disable tracing ? */
5124         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5125                 tracer_tracing_off(tr);
5126         /* resize the ring buffer to 0 */
5127         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5128
5129         trace_array_put(tr);
5130
5131         return 0;
5132 }
5133
5134 static ssize_t
5135 tracing_mark_write(struct file *filp, const char __user *ubuf,
5136                                         size_t cnt, loff_t *fpos)
5137 {
5138         unsigned long addr = (unsigned long)ubuf;
5139         struct trace_array *tr = filp->private_data;
5140         struct ring_buffer_event *event;
5141         struct ring_buffer *buffer;
5142         struct print_entry *entry;
5143         unsigned long irq_flags;
5144         struct page *pages[2];
5145         void *map_page[2];
5146         int nr_pages = 1;
5147         ssize_t written;
5148         int offset;
5149         int size;
5150         int len;
5151         int ret;
5152         int i;
5153
5154         if (tracing_disabled)
5155                 return -EINVAL;
5156
5157         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5158                 return -EINVAL;
5159
5160         if (cnt > TRACE_BUF_SIZE)
5161                 cnt = TRACE_BUF_SIZE;
5162
5163         /*
5164          * Userspace is injecting traces into the kernel trace buffer.
5165          * We want to be as non intrusive as possible.
5166          * To do so, we do not want to allocate any special buffers
5167          * or take any locks, but instead write the userspace data
5168          * straight into the ring buffer.
5169          *
5170          * First we need to pin the userspace buffer into memory,
5171          * which, most likely it is, because it just referenced it.
5172          * But there's no guarantee that it is. By using get_user_pages_fast()
5173          * and kmap_atomic/kunmap_atomic() we can get access to the
5174          * pages directly. We then write the data directly into the
5175          * ring buffer.
5176          */
5177         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5178
5179         /* check if we cross pages */
5180         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5181                 nr_pages = 2;
5182
5183         offset = addr & (PAGE_SIZE - 1);
5184         addr &= PAGE_MASK;
5185
5186         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5187         if (ret < nr_pages) {
5188                 while (--ret >= 0)
5189                         put_page(pages[ret]);
5190                 written = -EFAULT;
5191                 goto out;
5192         }
5193
5194         for (i = 0; i < nr_pages; i++)
5195                 map_page[i] = kmap_atomic(pages[i]);
5196
5197         local_save_flags(irq_flags);
5198         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5199         buffer = tr->trace_buffer.buffer;
5200         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5201                                           irq_flags, preempt_count());
5202         if (!event) {
5203                 /* Ring buffer disabled, return as if not open for write */
5204                 written = -EBADF;
5205                 goto out_unlock;
5206         }
5207
5208         entry = ring_buffer_event_data(event);
5209         entry->ip = _THIS_IP_;
5210
5211         if (nr_pages == 2) {
5212                 len = PAGE_SIZE - offset;
5213                 memcpy(&entry->buf, map_page[0] + offset, len);
5214                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5215         } else
5216                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5217
5218         if (entry->buf[cnt - 1] != '\n') {
5219                 entry->buf[cnt] = '\n';
5220                 entry->buf[cnt + 1] = '\0';
5221         } else
5222                 entry->buf[cnt] = '\0';
5223
5224         __buffer_unlock_commit(buffer, event);
5225
5226         written = cnt;
5227
5228         *fpos += written;
5229
5230  out_unlock:
5231         for (i = nr_pages - 1; i >= 0; i--) {
5232                 kunmap_atomic(map_page[i]);
5233                 put_page(pages[i]);
5234         }
5235  out:
5236         return written;
5237 }
5238
5239 static int tracing_clock_show(struct seq_file *m, void *v)
5240 {
5241         struct trace_array *tr = m->private;
5242         int i;
5243
5244         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5245                 seq_printf(m,
5246                         "%s%s%s%s", i ? " " : "",
5247                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5248                         i == tr->clock_id ? "]" : "");
5249         seq_putc(m, '\n');
5250
5251         return 0;
5252 }
5253
5254 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5255 {
5256         int i;
5257
5258         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5259                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5260                         break;
5261         }
5262         if (i == ARRAY_SIZE(trace_clocks))
5263                 return -EINVAL;
5264
5265         mutex_lock(&trace_types_lock);
5266
5267         tr->clock_id = i;
5268
5269         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5270
5271         /*
5272          * New clock may not be consistent with the previous clock.
5273          * Reset the buffer so that it doesn't have incomparable timestamps.
5274          */
5275         tracing_reset_online_cpus(&tr->trace_buffer);
5276
5277 #ifdef CONFIG_TRACER_MAX_TRACE
5278         if (tr->max_buffer.buffer)
5279                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5280         tracing_reset_online_cpus(&tr->max_buffer);
5281 #endif
5282
5283         mutex_unlock(&trace_types_lock);
5284
5285         return 0;
5286 }
5287
5288 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5289                                    size_t cnt, loff_t *fpos)
5290 {
5291         struct seq_file *m = filp->private_data;
5292         struct trace_array *tr = m->private;
5293         char buf[64];
5294         const char *clockstr;
5295         int ret;
5296
5297         if (cnt >= sizeof(buf))
5298                 return -EINVAL;
5299
5300         if (copy_from_user(&buf, ubuf, cnt))
5301                 return -EFAULT;
5302
5303         buf[cnt] = 0;
5304
5305         clockstr = strstrip(buf);
5306
5307         ret = tracing_set_clock(tr, clockstr);
5308         if (ret)
5309                 return ret;
5310
5311         *fpos += cnt;
5312
5313         return cnt;
5314 }
5315
5316 static int tracing_clock_open(struct inode *inode, struct file *file)
5317 {
5318         struct trace_array *tr = inode->i_private;
5319         int ret;
5320
5321         if (tracing_disabled)
5322                 return -ENODEV;
5323
5324         if (trace_array_get(tr))
5325                 return -ENODEV;
5326
5327         ret = single_open(file, tracing_clock_show, inode->i_private);
5328         if (ret < 0)
5329                 trace_array_put(tr);
5330
5331         return ret;
5332 }
5333
5334 struct ftrace_buffer_info {
5335         struct trace_iterator   iter;
5336         void                    *spare;
5337         unsigned int            read;
5338 };
5339
5340 #ifdef CONFIG_TRACER_SNAPSHOT
5341 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5342 {
5343         struct trace_array *tr = inode->i_private;
5344         struct trace_iterator *iter;
5345         struct seq_file *m;
5346         int ret = 0;
5347
5348         if (trace_array_get(tr) < 0)
5349                 return -ENODEV;
5350
5351         if (file->f_mode & FMODE_READ) {
5352                 iter = __tracing_open(inode, file, true);
5353                 if (IS_ERR(iter))
5354                         ret = PTR_ERR(iter);
5355         } else {
5356                 /* Writes still need the seq_file to hold the private data */
5357                 ret = -ENOMEM;
5358                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5359                 if (!m)
5360                         goto out;
5361                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5362                 if (!iter) {
5363                         kfree(m);
5364                         goto out;
5365                 }
5366                 ret = 0;
5367
5368                 iter->tr = tr;
5369                 iter->trace_buffer = &tr->max_buffer;
5370                 iter->cpu_file = tracing_get_cpu(inode);
5371                 m->private = iter;
5372                 file->private_data = m;
5373         }
5374 out:
5375         if (ret < 0)
5376                 trace_array_put(tr);
5377
5378         return ret;
5379 }
5380
5381 static ssize_t
5382 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5383                        loff_t *ppos)
5384 {
5385         struct seq_file *m = filp->private_data;
5386         struct trace_iterator *iter = m->private;
5387         struct trace_array *tr = iter->tr;
5388         unsigned long val;
5389         int ret;
5390
5391         ret = tracing_update_buffers();
5392         if (ret < 0)
5393                 return ret;
5394
5395         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5396         if (ret)
5397                 return ret;
5398
5399         mutex_lock(&trace_types_lock);
5400
5401         if (tr->current_trace->use_max_tr) {
5402                 ret = -EBUSY;
5403                 goto out;
5404         }
5405
5406         switch (val) {
5407         case 0:
5408                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5409                         ret = -EINVAL;
5410                         break;
5411                 }
5412                 if (tr->allocated_snapshot)
5413                         free_snapshot(tr);
5414                 break;
5415         case 1:
5416 /* Only allow per-cpu swap if the ring buffer supports it */
5417 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5418                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5419                         ret = -EINVAL;
5420                         break;
5421                 }
5422 #endif
5423                 if (!tr->allocated_snapshot)
5424                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
5425                                 &tr->trace_buffer, iter->cpu_file);
5426                 else
5427                         ret = alloc_snapshot(tr);
5428
5429                 if (ret < 0)
5430                         break;
5431
5432                 local_irq_disable();
5433                 /* Now, we're going to swap */
5434                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5435                         update_max_tr(tr, current, smp_processor_id());
5436                 else
5437                         update_max_tr_single(tr, current, iter->cpu_file);
5438                 local_irq_enable();
5439                 break;
5440         default:
5441                 if (tr->allocated_snapshot) {
5442                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5443                                 tracing_reset_online_cpus(&tr->max_buffer);
5444                         else
5445                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5446                 }
5447                 break;
5448         }
5449
5450         if (ret >= 0) {
5451                 *ppos += cnt;
5452                 ret = cnt;
5453         }
5454 out:
5455         mutex_unlock(&trace_types_lock);
5456         return ret;
5457 }
5458
5459 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5460 {
5461         struct seq_file *m = file->private_data;
5462         int ret;
5463
5464         ret = tracing_release(inode, file);
5465
5466         if (file->f_mode & FMODE_READ)
5467                 return ret;
5468
5469         /* If write only, the seq_file is just a stub */
5470         if (m)
5471                 kfree(m->private);
5472         kfree(m);
5473
5474         return 0;
5475 }
5476
5477 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5478 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5479                                     size_t count, loff_t *ppos);
5480 static int tracing_buffers_release(struct inode *inode, struct file *file);
5481 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5482                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5483
5484 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5485 {
5486         struct ftrace_buffer_info *info;
5487         int ret;
5488
5489         ret = tracing_buffers_open(inode, filp);
5490         if (ret < 0)
5491                 return ret;
5492
5493         info = filp->private_data;
5494
5495         if (info->iter.trace->use_max_tr) {
5496                 tracing_buffers_release(inode, filp);
5497                 return -EBUSY;
5498         }
5499
5500         info->iter.snapshot = true;
5501         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5502
5503         return ret;
5504 }
5505
5506 #endif /* CONFIG_TRACER_SNAPSHOT */
5507
5508
5509 static const struct file_operations tracing_thresh_fops = {
5510         .open           = tracing_open_generic,
5511         .read           = tracing_thresh_read,
5512         .write          = tracing_thresh_write,
5513         .llseek         = generic_file_llseek,
5514 };
5515
5516 #ifdef CONFIG_TRACER_MAX_TRACE
5517 static const struct file_operations tracing_max_lat_fops = {
5518         .open           = tracing_open_generic,
5519         .read           = tracing_max_lat_read,
5520         .write          = tracing_max_lat_write,
5521         .llseek         = generic_file_llseek,
5522 };
5523 #endif
5524
5525 static const struct file_operations set_tracer_fops = {
5526         .open           = tracing_open_generic,
5527         .read           = tracing_set_trace_read,
5528         .write          = tracing_set_trace_write,
5529         .llseek         = generic_file_llseek,
5530 };
5531
5532 static const struct file_operations tracing_pipe_fops = {
5533         .open           = tracing_open_pipe,
5534         .poll           = tracing_poll_pipe,
5535         .read           = tracing_read_pipe,
5536         .splice_read    = tracing_splice_read_pipe,
5537         .release        = tracing_release_pipe,
5538         .llseek         = no_llseek,
5539 };
5540
5541 static const struct file_operations tracing_entries_fops = {
5542         .open           = tracing_open_generic_tr,
5543         .read           = tracing_entries_read,
5544         .write          = tracing_entries_write,
5545         .llseek         = generic_file_llseek,
5546         .release        = tracing_release_generic_tr,
5547 };
5548
5549 static const struct file_operations tracing_total_entries_fops = {
5550         .open           = tracing_open_generic_tr,
5551         .read           = tracing_total_entries_read,
5552         .llseek         = generic_file_llseek,
5553         .release        = tracing_release_generic_tr,
5554 };
5555
5556 static const struct file_operations tracing_free_buffer_fops = {
5557         .open           = tracing_open_generic_tr,
5558         .write          = tracing_free_buffer_write,
5559         .release        = tracing_free_buffer_release,
5560 };
5561
5562 static const struct file_operations tracing_mark_fops = {
5563         .open           = tracing_open_generic_tr,
5564         .write          = tracing_mark_write,
5565         .llseek         = generic_file_llseek,
5566         .release        = tracing_release_generic_tr,
5567 };
5568
5569 static const struct file_operations trace_clock_fops = {
5570         .open           = tracing_clock_open,
5571         .read           = seq_read,
5572         .llseek         = seq_lseek,
5573         .release        = tracing_single_release_tr,
5574         .write          = tracing_clock_write,
5575 };
5576
5577 #ifdef CONFIG_TRACER_SNAPSHOT
5578 static const struct file_operations snapshot_fops = {
5579         .open           = tracing_snapshot_open,
5580         .read           = seq_read,
5581         .write          = tracing_snapshot_write,
5582         .llseek         = tracing_lseek,
5583         .release        = tracing_snapshot_release,
5584 };
5585
5586 static const struct file_operations snapshot_raw_fops = {
5587         .open           = snapshot_raw_open,
5588         .read           = tracing_buffers_read,
5589         .release        = tracing_buffers_release,
5590         .splice_read    = tracing_buffers_splice_read,
5591         .llseek         = no_llseek,
5592 };
5593
5594 #endif /* CONFIG_TRACER_SNAPSHOT */
5595
5596 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5597 {
5598         struct trace_array *tr = inode->i_private;
5599         struct ftrace_buffer_info *info;
5600         int ret;
5601
5602         if (tracing_disabled)
5603                 return -ENODEV;
5604
5605         if (trace_array_get(tr) < 0)
5606                 return -ENODEV;
5607
5608         info = kzalloc(sizeof(*info), GFP_KERNEL);
5609         if (!info) {
5610                 trace_array_put(tr);
5611                 return -ENOMEM;
5612         }
5613
5614         mutex_lock(&trace_types_lock);
5615
5616         info->iter.tr           = tr;
5617         info->iter.cpu_file     = tracing_get_cpu(inode);
5618         info->iter.trace        = tr->current_trace;
5619         info->iter.trace_buffer = &tr->trace_buffer;
5620         info->spare             = NULL;
5621         /* Force reading ring buffer for first read */
5622         info->read              = (unsigned int)-1;
5623
5624         filp->private_data = info;
5625
5626         tr->current_trace->ref++;
5627
5628         mutex_unlock(&trace_types_lock);
5629
5630         ret = nonseekable_open(inode, filp);
5631         if (ret < 0)
5632                 trace_array_put(tr);
5633
5634         return ret;
5635 }
5636
5637 static unsigned int
5638 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5639 {
5640         struct ftrace_buffer_info *info = filp->private_data;
5641         struct trace_iterator *iter = &info->iter;
5642
5643         return trace_poll(iter, filp, poll_table);
5644 }
5645
5646 static ssize_t
5647 tracing_buffers_read(struct file *filp, char __user *ubuf,
5648                      size_t count, loff_t *ppos)
5649 {
5650         struct ftrace_buffer_info *info = filp->private_data;
5651         struct trace_iterator *iter = &info->iter;
5652         ssize_t ret;
5653         ssize_t size;
5654
5655         if (!count)
5656                 return 0;
5657
5658 #ifdef CONFIG_TRACER_MAX_TRACE
5659         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5660                 return -EBUSY;
5661 #endif
5662
5663         if (!info->spare)
5664                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5665                                                           iter->cpu_file);
5666         if (!info->spare)
5667                 return -ENOMEM;
5668
5669         /* Do we have previous read data to read? */
5670         if (info->read < PAGE_SIZE)
5671                 goto read;
5672
5673  again:
5674         trace_access_lock(iter->cpu_file);
5675         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5676                                     &info->spare,
5677                                     count,
5678                                     iter->cpu_file, 0);
5679         trace_access_unlock(iter->cpu_file);
5680
5681         if (ret < 0) {
5682                 if (trace_empty(iter)) {
5683                         if ((filp->f_flags & O_NONBLOCK))
5684                                 return -EAGAIN;
5685
5686                         ret = wait_on_pipe(iter, false);
5687                         if (ret)
5688                                 return ret;
5689
5690                         goto again;
5691                 }
5692                 return 0;
5693         }
5694
5695         info->read = 0;
5696  read:
5697         size = PAGE_SIZE - info->read;
5698         if (size > count)
5699                 size = count;
5700
5701         ret = copy_to_user(ubuf, info->spare + info->read, size);
5702         if (ret == size)
5703                 return -EFAULT;
5704
5705         size -= ret;
5706
5707         *ppos += size;
5708         info->read += size;
5709
5710         return size;
5711 }
5712
5713 static int tracing_buffers_release(struct inode *inode, struct file *file)
5714 {
5715         struct ftrace_buffer_info *info = file->private_data;
5716         struct trace_iterator *iter = &info->iter;
5717
5718         mutex_lock(&trace_types_lock);
5719
5720         iter->tr->current_trace->ref--;
5721
5722         __trace_array_put(iter->tr);
5723
5724         if (info->spare)
5725                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5726         kfree(info);
5727
5728         mutex_unlock(&trace_types_lock);
5729
5730         return 0;
5731 }
5732
5733 struct buffer_ref {
5734         struct ring_buffer      *buffer;
5735         void                    *page;
5736         int                     ref;
5737 };
5738
5739 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5740                                     struct pipe_buffer *buf)
5741 {
5742         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5743
5744         if (--ref->ref)
5745                 return;
5746
5747         ring_buffer_free_read_page(ref->buffer, ref->page);
5748         kfree(ref);
5749         buf->private = 0;
5750 }
5751
5752 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5753                                 struct pipe_buffer *buf)
5754 {
5755         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5756
5757         if (ref->ref > INT_MAX/2)
5758                 return false;
5759
5760         ref->ref++;
5761         return true;
5762 }
5763
5764 /* Pipe buffer operations for a buffer. */
5765 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5766         .can_merge              = 0,
5767         .confirm                = generic_pipe_buf_confirm,
5768         .release                = buffer_pipe_buf_release,
5769         .steal                  = generic_pipe_buf_steal,
5770         .get                    = buffer_pipe_buf_get,
5771 };
5772
5773 /*
5774  * Callback from splice_to_pipe(), if we need to release some pages
5775  * at the end of the spd in case we error'ed out in filling the pipe.
5776  */
5777 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5778 {
5779         struct buffer_ref *ref =
5780                 (struct buffer_ref *)spd->partial[i].private;
5781
5782         if (--ref->ref)
5783                 return;
5784
5785         ring_buffer_free_read_page(ref->buffer, ref->page);
5786         kfree(ref);
5787         spd->partial[i].private = 0;
5788 }
5789
5790 static ssize_t
5791 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5792                             struct pipe_inode_info *pipe, size_t len,
5793                             unsigned int flags)
5794 {
5795         struct ftrace_buffer_info *info = file->private_data;
5796         struct trace_iterator *iter = &info->iter;
5797         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5798         struct page *pages_def[PIPE_DEF_BUFFERS];
5799         struct splice_pipe_desc spd = {
5800                 .pages          = pages_def,
5801                 .partial        = partial_def,
5802                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5803                 .flags          = flags,
5804                 .ops            = &buffer_pipe_buf_ops,
5805                 .spd_release    = buffer_spd_release,
5806         };
5807         struct buffer_ref *ref;
5808         int entries, i;
5809         ssize_t ret = 0;
5810
5811 #ifdef CONFIG_TRACER_MAX_TRACE
5812         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5813                 return -EBUSY;
5814 #endif
5815
5816         if (*ppos & (PAGE_SIZE - 1))
5817                 return -EINVAL;
5818
5819         if (len & (PAGE_SIZE - 1)) {
5820                 if (len < PAGE_SIZE)
5821                         return -EINVAL;
5822                 len &= PAGE_MASK;
5823         }
5824
5825         if (splice_grow_spd(pipe, &spd))
5826                 return -ENOMEM;
5827
5828  again:
5829         trace_access_lock(iter->cpu_file);
5830         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5831
5832         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5833                 struct page *page;
5834                 int r;
5835
5836                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5837                 if (!ref) {
5838                         ret = -ENOMEM;
5839                         break;
5840                 }
5841
5842                 ref->ref = 1;
5843                 ref->buffer = iter->trace_buffer->buffer;
5844                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5845                 if (!ref->page) {
5846                         ret = -ENOMEM;
5847                         kfree(ref);
5848                         break;
5849                 }
5850
5851                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5852                                           len, iter->cpu_file, 1);
5853                 if (r < 0) {
5854                         ring_buffer_free_read_page(ref->buffer, ref->page);
5855                         kfree(ref);
5856                         break;
5857                 }
5858
5859                 page = virt_to_page(ref->page);
5860
5861                 spd.pages[i] = page;
5862                 spd.partial[i].len = PAGE_SIZE;
5863                 spd.partial[i].offset = 0;
5864                 spd.partial[i].private = (unsigned long)ref;
5865                 spd.nr_pages++;
5866                 *ppos += PAGE_SIZE;
5867
5868                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5869         }
5870
5871         trace_access_unlock(iter->cpu_file);
5872         spd.nr_pages = i;
5873
5874         /* did we read anything? */
5875         if (!spd.nr_pages) {
5876                 if (ret)
5877                         goto out;
5878
5879                 ret = -EAGAIN;
5880                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5881                         goto out;
5882
5883                 ret = wait_on_pipe(iter, true);
5884                 if (ret)
5885                         goto out;
5886
5887                 goto again;
5888         }
5889
5890         ret = splice_to_pipe(pipe, &spd);
5891 out:
5892         splice_shrink_spd(&spd);
5893
5894         return ret;
5895 }
5896
5897 static const struct file_operations tracing_buffers_fops = {
5898         .open           = tracing_buffers_open,
5899         .read           = tracing_buffers_read,
5900         .poll           = tracing_buffers_poll,
5901         .release        = tracing_buffers_release,
5902         .splice_read    = tracing_buffers_splice_read,
5903         .llseek         = no_llseek,
5904 };
5905
5906 static ssize_t
5907 tracing_stats_read(struct file *filp, char __user *ubuf,
5908                    size_t count, loff_t *ppos)
5909 {
5910         struct inode *inode = file_inode(filp);
5911         struct trace_array *tr = inode->i_private;
5912         struct trace_buffer *trace_buf = &tr->trace_buffer;
5913         int cpu = tracing_get_cpu(inode);
5914         struct trace_seq *s;
5915         unsigned long cnt;
5916         unsigned long long t;
5917         unsigned long usec_rem;
5918
5919         s = kmalloc(sizeof(*s), GFP_KERNEL);
5920         if (!s)
5921                 return -ENOMEM;
5922
5923         trace_seq_init(s);
5924
5925         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5926         trace_seq_printf(s, "entries: %ld\n", cnt);
5927
5928         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5929         trace_seq_printf(s, "overrun: %ld\n", cnt);
5930
5931         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5932         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5933
5934         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5935         trace_seq_printf(s, "bytes: %ld\n", cnt);
5936
5937         if (trace_clocks[tr->clock_id].in_ns) {
5938                 /* local or global for trace_clock */
5939                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5940                 usec_rem = do_div(t, USEC_PER_SEC);
5941                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5942                                                                 t, usec_rem);
5943
5944                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5945                 usec_rem = do_div(t, USEC_PER_SEC);
5946                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5947         } else {
5948                 /* counter or tsc mode for trace_clock */
5949                 trace_seq_printf(s, "oldest event ts: %llu\n",
5950                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5951
5952                 trace_seq_printf(s, "now ts: %llu\n",
5953                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5954         }
5955
5956         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5957         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5958
5959         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5960         trace_seq_printf(s, "read events: %ld\n", cnt);
5961
5962         count = simple_read_from_buffer(ubuf, count, ppos,
5963                                         s->buffer, trace_seq_used(s));
5964
5965         kfree(s);
5966
5967         return count;
5968 }
5969
5970 static const struct file_operations tracing_stats_fops = {
5971         .open           = tracing_open_generic_tr,
5972         .read           = tracing_stats_read,
5973         .llseek         = generic_file_llseek,
5974         .release        = tracing_release_generic_tr,
5975 };
5976
5977 #ifdef CONFIG_DYNAMIC_FTRACE
5978
5979 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5980 {
5981         return 0;
5982 }
5983
5984 static ssize_t
5985 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5986                   size_t cnt, loff_t *ppos)
5987 {
5988         static char ftrace_dyn_info_buffer[1024];
5989         static DEFINE_MUTEX(dyn_info_mutex);
5990         unsigned long *p = filp->private_data;
5991         char *buf = ftrace_dyn_info_buffer;
5992         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5993         int r;
5994
5995         mutex_lock(&dyn_info_mutex);
5996         r = sprintf(buf, "%ld ", *p);
5997
5998         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5999         buf[r++] = '\n';
6000
6001         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6002
6003         mutex_unlock(&dyn_info_mutex);
6004
6005         return r;
6006 }
6007
6008 static const struct file_operations tracing_dyn_info_fops = {
6009         .open           = tracing_open_generic,
6010         .read           = tracing_read_dyn_info,
6011         .llseek         = generic_file_llseek,
6012 };
6013 #endif /* CONFIG_DYNAMIC_FTRACE */
6014
6015 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6016 static void
6017 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6018 {
6019         tracing_snapshot();
6020 }
6021
6022 static void
6023 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6024 {
6025         unsigned long *count = (long *)data;
6026
6027         if (!*count)
6028                 return;
6029
6030         if (*count != -1)
6031                 (*count)--;
6032
6033         tracing_snapshot();
6034 }
6035
6036 static int
6037 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6038                       struct ftrace_probe_ops *ops, void *data)
6039 {
6040         long count = (long)data;
6041
6042         seq_printf(m, "%ps:", (void *)ip);
6043
6044         seq_puts(m, "snapshot");
6045
6046         if (count == -1)
6047                 seq_puts(m, ":unlimited\n");
6048         else
6049                 seq_printf(m, ":count=%ld\n", count);
6050
6051         return 0;
6052 }
6053
6054 static struct ftrace_probe_ops snapshot_probe_ops = {
6055         .func                   = ftrace_snapshot,
6056         .print                  = ftrace_snapshot_print,
6057 };
6058
6059 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6060         .func                   = ftrace_count_snapshot,
6061         .print                  = ftrace_snapshot_print,
6062 };
6063
6064 static int
6065 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6066                                char *glob, char *cmd, char *param, int enable)
6067 {
6068         struct ftrace_probe_ops *ops;
6069         void *count = (void *)-1;
6070         char *number;
6071         int ret;
6072
6073         /* hash funcs only work with set_ftrace_filter */
6074         if (!enable)
6075                 return -EINVAL;
6076
6077         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6078
6079         if (glob[0] == '!') {
6080                 unregister_ftrace_function_probe_func(glob+1, ops);
6081                 return 0;
6082         }
6083
6084         if (!param)
6085                 goto out_reg;
6086
6087         number = strsep(&param, ":");
6088
6089         if (!strlen(number))
6090                 goto out_reg;
6091
6092         /*
6093          * We use the callback data field (which is a pointer)
6094          * as our counter.
6095          */
6096         ret = kstrtoul(number, 0, (unsigned long *)&count);
6097         if (ret)
6098                 return ret;
6099
6100  out_reg:
6101         ret = alloc_snapshot(&global_trace);
6102         if (ret < 0)
6103                 goto out;
6104
6105         ret = register_ftrace_function_probe(glob, ops, count);
6106
6107  out:
6108         return ret < 0 ? ret : 0;
6109 }
6110
6111 static struct ftrace_func_command ftrace_snapshot_cmd = {
6112         .name                   = "snapshot",
6113         .func                   = ftrace_trace_snapshot_callback,
6114 };
6115
6116 static __init int register_snapshot_cmd(void)
6117 {
6118         return register_ftrace_command(&ftrace_snapshot_cmd);
6119 }
6120 #else
6121 static inline __init int register_snapshot_cmd(void) { return 0; }
6122 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6123
6124 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6125 {
6126         if (WARN_ON(!tr->dir))
6127                 return ERR_PTR(-ENODEV);
6128
6129         /* Top directory uses NULL as the parent */
6130         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6131                 return NULL;
6132
6133         /* All sub buffers have a descriptor */
6134         return tr->dir;
6135 }
6136
6137 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6138 {
6139         struct dentry *d_tracer;
6140
6141         if (tr->percpu_dir)
6142                 return tr->percpu_dir;
6143
6144         d_tracer = tracing_get_dentry(tr);
6145         if (IS_ERR(d_tracer))
6146                 return NULL;
6147
6148         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6149
6150         WARN_ONCE(!tr->percpu_dir,
6151                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6152
6153         return tr->percpu_dir;
6154 }
6155
6156 static struct dentry *
6157 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6158                       void *data, long cpu, const struct file_operations *fops)
6159 {
6160         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6161
6162         if (ret) /* See tracing_get_cpu() */
6163                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6164         return ret;
6165 }
6166
6167 static void
6168 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6169 {
6170         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6171         struct dentry *d_cpu;
6172         char cpu_dir[30]; /* 30 characters should be more than enough */
6173
6174         if (!d_percpu)
6175                 return;
6176
6177         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6178         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6179         if (!d_cpu) {
6180                 pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
6181                 return;
6182         }
6183
6184         /* per cpu trace_pipe */
6185         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6186                                 tr, cpu, &tracing_pipe_fops);
6187
6188         /* per cpu trace */
6189         trace_create_cpu_file("trace", 0644, d_cpu,
6190                                 tr, cpu, &tracing_fops);
6191
6192         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6193                                 tr, cpu, &tracing_buffers_fops);
6194
6195         trace_create_cpu_file("stats", 0444, d_cpu,
6196                                 tr, cpu, &tracing_stats_fops);
6197
6198         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6199                                 tr, cpu, &tracing_entries_fops);
6200
6201 #ifdef CONFIG_TRACER_SNAPSHOT
6202         trace_create_cpu_file("snapshot", 0644, d_cpu,
6203                                 tr, cpu, &snapshot_fops);
6204
6205         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6206                                 tr, cpu, &snapshot_raw_fops);
6207 #endif
6208 }
6209
6210 #ifdef CONFIG_FTRACE_SELFTEST
6211 /* Let selftest have access to static functions in this file */
6212 #include "trace_selftest.c"
6213 #endif
6214
6215 static ssize_t
6216 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6217                         loff_t *ppos)
6218 {
6219         struct trace_option_dentry *topt = filp->private_data;
6220         char *buf;
6221
6222         if (topt->flags->val & topt->opt->bit)
6223                 buf = "1\n";
6224         else
6225                 buf = "0\n";
6226
6227         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6228 }
6229
6230 static ssize_t
6231 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6232                          loff_t *ppos)
6233 {
6234         struct trace_option_dentry *topt = filp->private_data;
6235         unsigned long val;
6236         int ret;
6237
6238         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6239         if (ret)
6240                 return ret;
6241
6242         if (val != 0 && val != 1)
6243                 return -EINVAL;
6244
6245         if (!!(topt->flags->val & topt->opt->bit) != val) {
6246                 mutex_lock(&trace_types_lock);
6247                 ret = __set_tracer_option(topt->tr, topt->flags,
6248                                           topt->opt, !val);
6249                 mutex_unlock(&trace_types_lock);
6250                 if (ret)
6251                         return ret;
6252         }
6253
6254         *ppos += cnt;
6255
6256         return cnt;
6257 }
6258
6259
6260 static const struct file_operations trace_options_fops = {
6261         .open = tracing_open_generic,
6262         .read = trace_options_read,
6263         .write = trace_options_write,
6264         .llseek = generic_file_llseek,
6265 };
6266
6267 /*
6268  * In order to pass in both the trace_array descriptor as well as the index
6269  * to the flag that the trace option file represents, the trace_array
6270  * has a character array of trace_flags_index[], which holds the index
6271  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6272  * The address of this character array is passed to the flag option file
6273  * read/write callbacks.
6274  *
6275  * In order to extract both the index and the trace_array descriptor,
6276  * get_tr_index() uses the following algorithm.
6277  *
6278  *   idx = *ptr;
6279  *
6280  * As the pointer itself contains the address of the index (remember
6281  * index[1] == 1).
6282  *
6283  * Then to get the trace_array descriptor, by subtracting that index
6284  * from the ptr, we get to the start of the index itself.
6285  *
6286  *   ptr - idx == &index[0]
6287  *
6288  * Then a simple container_of() from that pointer gets us to the
6289  * trace_array descriptor.
6290  */
6291 static void get_tr_index(void *data, struct trace_array **ptr,
6292                          unsigned int *pindex)
6293 {
6294         *pindex = *(unsigned char *)data;
6295
6296         *ptr = container_of(data - *pindex, struct trace_array,
6297                             trace_flags_index);
6298 }
6299
6300 static ssize_t
6301 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6302                         loff_t *ppos)
6303 {
6304         void *tr_index = filp->private_data;
6305         struct trace_array *tr;
6306         unsigned int index;
6307         char *buf;
6308
6309         get_tr_index(tr_index, &tr, &index);
6310
6311         if (tr->trace_flags & (1 << index))
6312                 buf = "1\n";
6313         else
6314                 buf = "0\n";
6315
6316         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6317 }
6318
6319 static ssize_t
6320 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6321                          loff_t *ppos)
6322 {
6323         void *tr_index = filp->private_data;
6324         struct trace_array *tr;
6325         unsigned int index;
6326         unsigned long val;
6327         int ret;
6328
6329         get_tr_index(tr_index, &tr, &index);
6330
6331         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6332         if (ret)
6333                 return ret;
6334
6335         if (val != 0 && val != 1)
6336                 return -EINVAL;
6337
6338         mutex_lock(&trace_types_lock);
6339         ret = set_tracer_flag(tr, 1 << index, val);
6340         mutex_unlock(&trace_types_lock);
6341
6342         if (ret < 0)
6343                 return ret;
6344
6345         *ppos += cnt;
6346
6347         return cnt;
6348 }
6349
6350 static const struct file_operations trace_options_core_fops = {
6351         .open = tracing_open_generic,
6352         .read = trace_options_core_read,
6353         .write = trace_options_core_write,
6354         .llseek = generic_file_llseek,
6355 };
6356
6357 struct dentry *trace_create_file(const char *name,
6358                                  umode_t mode,
6359                                  struct dentry *parent,
6360                                  void *data,
6361                                  const struct file_operations *fops)
6362 {
6363         struct dentry *ret;
6364
6365         ret = tracefs_create_file(name, mode, parent, data, fops);
6366         if (!ret)
6367                 pr_warning("Could not create tracefs '%s' entry\n", name);
6368
6369         return ret;
6370 }
6371
6372
6373 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6374 {
6375         struct dentry *d_tracer;
6376
6377         if (tr->options)
6378                 return tr->options;
6379
6380         d_tracer = tracing_get_dentry(tr);
6381         if (IS_ERR(d_tracer))
6382                 return NULL;
6383
6384         tr->options = tracefs_create_dir("options", d_tracer);
6385         if (!tr->options) {
6386                 pr_warning("Could not create tracefs directory 'options'\n");
6387                 return NULL;
6388         }
6389
6390         return tr->options;
6391 }
6392
6393 static void
6394 create_trace_option_file(struct trace_array *tr,
6395                          struct trace_option_dentry *topt,
6396                          struct tracer_flags *flags,
6397                          struct tracer_opt *opt)
6398 {
6399         struct dentry *t_options;
6400
6401         t_options = trace_options_init_dentry(tr);
6402         if (!t_options)
6403                 return;
6404
6405         topt->flags = flags;
6406         topt->opt = opt;
6407         topt->tr = tr;
6408
6409         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6410                                     &trace_options_fops);
6411
6412 }
6413
6414 static void
6415 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6416 {
6417         struct trace_option_dentry *topts;
6418         struct trace_options *tr_topts;
6419         struct tracer_flags *flags;
6420         struct tracer_opt *opts;
6421         int cnt;
6422         int i;
6423
6424         if (!tracer)
6425                 return;
6426
6427         flags = tracer->flags;
6428
6429         if (!flags || !flags->opts)
6430                 return;
6431
6432         /*
6433          * If this is an instance, only create flags for tracers
6434          * the instance may have.
6435          */
6436         if (!trace_ok_for_array(tracer, tr))
6437                 return;
6438
6439         for (i = 0; i < tr->nr_topts; i++) {
6440                 /*
6441                  * Check if these flags have already been added.
6442                  * Some tracers share flags.
6443                  */
6444                 if (tr->topts[i].tracer->flags == tracer->flags)
6445                         return;
6446         }
6447
6448         opts = flags->opts;
6449
6450         for (cnt = 0; opts[cnt].name; cnt++)
6451                 ;
6452
6453         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6454         if (!topts)
6455                 return;
6456
6457         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6458                             GFP_KERNEL);
6459         if (!tr_topts) {
6460                 kfree(topts);
6461                 return;
6462         }
6463
6464         tr->topts = tr_topts;
6465         tr->topts[tr->nr_topts].tracer = tracer;
6466         tr->topts[tr->nr_topts].topts = topts;
6467         tr->nr_topts++;
6468
6469         for (cnt = 0; opts[cnt].name; cnt++) {
6470                 create_trace_option_file(tr, &topts[cnt], flags,
6471                                          &opts[cnt]);
6472                 WARN_ONCE(topts[cnt].entry == NULL,
6473                           "Failed to create trace option: %s",
6474                           opts[cnt].name);
6475         }
6476 }
6477
6478 static struct dentry *
6479 create_trace_option_core_file(struct trace_array *tr,
6480                               const char *option, long index)
6481 {
6482         struct dentry *t_options;
6483
6484         t_options = trace_options_init_dentry(tr);
6485         if (!t_options)
6486                 return NULL;
6487
6488         return trace_create_file(option, 0644, t_options,
6489                                  (void *)&tr->trace_flags_index[index],
6490                                  &trace_options_core_fops);
6491 }
6492
6493 static void create_trace_options_dir(struct trace_array *tr)
6494 {
6495         struct dentry *t_options;
6496         bool top_level = tr == &global_trace;
6497         int i;
6498
6499         t_options = trace_options_init_dentry(tr);
6500         if (!t_options)
6501                 return;
6502
6503         for (i = 0; trace_options[i]; i++) {
6504                 if (top_level ||
6505                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6506                         create_trace_option_core_file(tr, trace_options[i], i);
6507         }
6508 }
6509
6510 static ssize_t
6511 rb_simple_read(struct file *filp, char __user *ubuf,
6512                size_t cnt, loff_t *ppos)
6513 {
6514         struct trace_array *tr = filp->private_data;
6515         char buf[64];
6516         int r;
6517
6518         r = tracer_tracing_is_on(tr);
6519         r = sprintf(buf, "%d\n", r);
6520
6521         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6522 }
6523
6524 static ssize_t
6525 rb_simple_write(struct file *filp, const char __user *ubuf,
6526                 size_t cnt, loff_t *ppos)
6527 {
6528         struct trace_array *tr = filp->private_data;
6529         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6530         unsigned long val;
6531         int ret;
6532
6533         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6534         if (ret)
6535                 return ret;
6536
6537         if (buffer) {
6538                 mutex_lock(&trace_types_lock);
6539                 if (!!val == tracer_tracing_is_on(tr)) {
6540                         val = 0; /* do nothing */
6541                 } else if (val) {
6542                         tracer_tracing_on(tr);
6543                         if (tr->current_trace->start)
6544                                 tr->current_trace->start(tr);
6545                 } else {
6546                         tracer_tracing_off(tr);
6547                         if (tr->current_trace->stop)
6548                                 tr->current_trace->stop(tr);
6549                 }
6550                 mutex_unlock(&trace_types_lock);
6551         }
6552
6553         (*ppos)++;
6554
6555         return cnt;
6556 }
6557
6558 static const struct file_operations rb_simple_fops = {
6559         .open           = tracing_open_generic_tr,
6560         .read           = rb_simple_read,
6561         .write          = rb_simple_write,
6562         .release        = tracing_release_generic_tr,
6563         .llseek         = default_llseek,
6564 };
6565
6566 struct dentry *trace_instance_dir;
6567
6568 static void
6569 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6570
6571 static int
6572 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6573 {
6574         enum ring_buffer_flags rb_flags;
6575
6576         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6577
6578         buf->tr = tr;
6579
6580         buf->buffer = ring_buffer_alloc(size, rb_flags);
6581         if (!buf->buffer)
6582                 return -ENOMEM;
6583
6584         buf->data = alloc_percpu(struct trace_array_cpu);
6585         if (!buf->data) {
6586                 ring_buffer_free(buf->buffer);
6587                 buf->buffer = NULL;
6588                 return -ENOMEM;
6589         }
6590
6591         /* Allocate the first page for all buffers */
6592         set_buffer_entries(&tr->trace_buffer,
6593                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6594
6595         return 0;
6596 }
6597
6598 static int allocate_trace_buffers(struct trace_array *tr, int size)
6599 {
6600         int ret;
6601
6602         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6603         if (ret)
6604                 return ret;
6605
6606 #ifdef CONFIG_TRACER_MAX_TRACE
6607         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6608                                     allocate_snapshot ? size : 1);
6609         if (WARN_ON(ret)) {
6610                 ring_buffer_free(tr->trace_buffer.buffer);
6611                 tr->trace_buffer.buffer = NULL;
6612                 free_percpu(tr->trace_buffer.data);
6613                 tr->trace_buffer.data = NULL;
6614                 return -ENOMEM;
6615         }
6616         tr->allocated_snapshot = allocate_snapshot;
6617
6618         /*
6619          * Only the top level trace array gets its snapshot allocated
6620          * from the kernel command line.
6621          */
6622         allocate_snapshot = false;
6623 #endif
6624         return 0;
6625 }
6626
6627 static void free_trace_buffer(struct trace_buffer *buf)
6628 {
6629         if (buf->buffer) {
6630                 ring_buffer_free(buf->buffer);
6631                 buf->buffer = NULL;
6632                 free_percpu(buf->data);
6633                 buf->data = NULL;
6634         }
6635 }
6636
6637 static void free_trace_buffers(struct trace_array *tr)
6638 {
6639         if (!tr)
6640                 return;
6641
6642         free_trace_buffer(&tr->trace_buffer);
6643
6644 #ifdef CONFIG_TRACER_MAX_TRACE
6645         free_trace_buffer(&tr->max_buffer);
6646 #endif
6647 }
6648
6649 static void init_trace_flags_index(struct trace_array *tr)
6650 {
6651         int i;
6652
6653         /* Used by the trace options files */
6654         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6655                 tr->trace_flags_index[i] = i;
6656 }
6657
6658 static void __update_tracer_options(struct trace_array *tr)
6659 {
6660         struct tracer *t;
6661
6662         for (t = trace_types; t; t = t->next)
6663                 add_tracer_options(tr, t);
6664 }
6665
6666 static void update_tracer_options(struct trace_array *tr)
6667 {
6668         mutex_lock(&trace_types_lock);
6669         __update_tracer_options(tr);
6670         mutex_unlock(&trace_types_lock);
6671 }
6672
6673 static int instance_mkdir(const char *name)
6674 {
6675         struct trace_array *tr;
6676         int ret;
6677
6678         mutex_lock(&trace_types_lock);
6679
6680         ret = -EEXIST;
6681         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6682                 if (tr->name && strcmp(tr->name, name) == 0)
6683                         goto out_unlock;
6684         }
6685
6686         ret = -ENOMEM;
6687         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6688         if (!tr)
6689                 goto out_unlock;
6690
6691         tr->name = kstrdup(name, GFP_KERNEL);
6692         if (!tr->name)
6693                 goto out_free_tr;
6694
6695         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6696                 goto out_free_tr;
6697
6698         tr->trace_flags = global_trace.trace_flags;
6699
6700         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6701
6702         raw_spin_lock_init(&tr->start_lock);
6703
6704         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6705
6706         tr->current_trace = &nop_trace;
6707
6708         INIT_LIST_HEAD(&tr->systems);
6709         INIT_LIST_HEAD(&tr->events);
6710
6711         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6712                 goto out_free_tr;
6713
6714         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6715         if (!tr->dir)
6716                 goto out_free_tr;
6717
6718         ret = event_trace_add_tracer(tr->dir, tr);
6719         if (ret) {
6720                 tracefs_remove_recursive(tr->dir);
6721                 goto out_free_tr;
6722         }
6723
6724         init_tracer_tracefs(tr, tr->dir);
6725         init_trace_flags_index(tr);
6726         __update_tracer_options(tr);
6727
6728         list_add(&tr->list, &ftrace_trace_arrays);
6729
6730         mutex_unlock(&trace_types_lock);
6731
6732         return 0;
6733
6734  out_free_tr:
6735         free_trace_buffers(tr);
6736         free_cpumask_var(tr->tracing_cpumask);
6737         kfree(tr->name);
6738         kfree(tr);
6739
6740  out_unlock:
6741         mutex_unlock(&trace_types_lock);
6742
6743         return ret;
6744
6745 }
6746
6747 static int instance_rmdir(const char *name)
6748 {
6749         struct trace_array *tr;
6750         int found = 0;
6751         int ret;
6752         int i;
6753
6754         mutex_lock(&trace_types_lock);
6755
6756         ret = -ENODEV;
6757         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6758                 if (tr->name && strcmp(tr->name, name) == 0) {
6759                         found = 1;
6760                         break;
6761                 }
6762         }
6763         if (!found)
6764                 goto out_unlock;
6765
6766         ret = -EBUSY;
6767         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6768                 goto out_unlock;
6769
6770         list_del(&tr->list);
6771
6772         tracing_set_nop(tr);
6773         event_trace_del_tracer(tr);
6774         ftrace_destroy_function_files(tr);
6775         tracefs_remove_recursive(tr->dir);
6776         free_trace_buffers(tr);
6777
6778         for (i = 0; i < tr->nr_topts; i++) {
6779                 kfree(tr->topts[i].topts);
6780         }
6781         kfree(tr->topts);
6782
6783         free_cpumask_var(tr->tracing_cpumask);
6784         kfree(tr->name);
6785         kfree(tr);
6786
6787         ret = 0;
6788
6789  out_unlock:
6790         mutex_unlock(&trace_types_lock);
6791
6792         return ret;
6793 }
6794
6795 static __init void create_trace_instances(struct dentry *d_tracer)
6796 {
6797         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6798                                                          instance_mkdir,
6799                                                          instance_rmdir);
6800         if (WARN_ON(!trace_instance_dir))
6801                 return;
6802 }
6803
6804 static void
6805 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6806 {
6807         int cpu;
6808
6809         trace_create_file("available_tracers", 0444, d_tracer,
6810                         tr, &show_traces_fops);
6811
6812         trace_create_file("current_tracer", 0644, d_tracer,
6813                         tr, &set_tracer_fops);
6814
6815         trace_create_file("tracing_cpumask", 0644, d_tracer,
6816                           tr, &tracing_cpumask_fops);
6817
6818         trace_create_file("trace_options", 0644, d_tracer,
6819                           tr, &tracing_iter_fops);
6820
6821         trace_create_file("trace", 0644, d_tracer,
6822                           tr, &tracing_fops);
6823
6824         trace_create_file("trace_pipe", 0444, d_tracer,
6825                           tr, &tracing_pipe_fops);
6826
6827         trace_create_file("buffer_size_kb", 0644, d_tracer,
6828                           tr, &tracing_entries_fops);
6829
6830         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6831                           tr, &tracing_total_entries_fops);
6832
6833         trace_create_file("free_buffer", 0200, d_tracer,
6834                           tr, &tracing_free_buffer_fops);
6835
6836         trace_create_file("trace_marker", 0220, d_tracer,
6837                           tr, &tracing_mark_fops);
6838
6839         trace_create_file("trace_clock", 0644, d_tracer, tr,
6840                           &trace_clock_fops);
6841
6842         trace_create_file("tracing_on", 0644, d_tracer,
6843                           tr, &rb_simple_fops);
6844
6845         create_trace_options_dir(tr);
6846
6847 #ifdef CONFIG_TRACER_MAX_TRACE
6848         trace_create_file("tracing_max_latency", 0644, d_tracer,
6849                         &tr->max_latency, &tracing_max_lat_fops);
6850 #endif
6851
6852         if (ftrace_create_function_files(tr, d_tracer))
6853                 WARN(1, "Could not allocate function filter files");
6854
6855 #ifdef CONFIG_TRACER_SNAPSHOT
6856         trace_create_file("snapshot", 0644, d_tracer,
6857                           tr, &snapshot_fops);
6858 #endif
6859
6860         for_each_tracing_cpu(cpu)
6861                 tracing_init_tracefs_percpu(tr, cpu);
6862
6863 }
6864
6865 static struct vfsmount *trace_automount(void *ingore)
6866 {
6867         struct vfsmount *mnt;
6868         struct file_system_type *type;
6869
6870         /*
6871          * To maintain backward compatibility for tools that mount
6872          * debugfs to get to the tracing facility, tracefs is automatically
6873          * mounted to the debugfs/tracing directory.
6874          */
6875         type = get_fs_type("tracefs");
6876         if (!type)
6877                 return NULL;
6878         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6879         put_filesystem(type);
6880         if (IS_ERR(mnt))
6881                 return NULL;
6882         mntget(mnt);
6883
6884         return mnt;
6885 }
6886
6887 /**
6888  * tracing_init_dentry - initialize top level trace array
6889  *
6890  * This is called when creating files or directories in the tracing
6891  * directory. It is called via fs_initcall() by any of the boot up code
6892  * and expects to return the dentry of the top level tracing directory.
6893  */
6894 struct dentry *tracing_init_dentry(void)
6895 {
6896         struct trace_array *tr = &global_trace;
6897
6898         /* The top level trace array uses  NULL as parent */
6899         if (tr->dir)
6900                 return NULL;
6901
6902         if (WARN_ON(!tracefs_initialized()) ||
6903                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
6904                  WARN_ON(!debugfs_initialized())))
6905                 return ERR_PTR(-ENODEV);
6906
6907         /*
6908          * As there may still be users that expect the tracing
6909          * files to exist in debugfs/tracing, we must automount
6910          * the tracefs file system there, so older tools still
6911          * work with the newer kerenl.
6912          */
6913         tr->dir = debugfs_create_automount("tracing", NULL,
6914                                            trace_automount, NULL);
6915         if (!tr->dir) {
6916                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
6917                 return ERR_PTR(-ENOMEM);
6918         }
6919
6920         return NULL;
6921 }
6922
6923 extern struct trace_enum_map *__start_ftrace_enum_maps[];
6924 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
6925
6926 static void __init trace_enum_init(void)
6927 {
6928         int len;
6929
6930         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6931         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6932 }
6933
6934 #ifdef CONFIG_MODULES
6935 static void trace_module_add_enums(struct module *mod)
6936 {
6937         if (!mod->num_trace_enums)
6938                 return;
6939
6940         /*
6941          * Modules with bad taint do not have events created, do
6942          * not bother with enums either.
6943          */
6944         if (trace_module_has_bad_taint(mod))
6945                 return;
6946
6947         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6948 }
6949
6950 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
6951 static void trace_module_remove_enums(struct module *mod)
6952 {
6953         union trace_enum_map_item *map;
6954         union trace_enum_map_item **last = &trace_enum_maps;
6955
6956         if (!mod->num_trace_enums)
6957                 return;
6958
6959         mutex_lock(&trace_enum_mutex);
6960
6961         map = trace_enum_maps;
6962
6963         while (map) {
6964                 if (map->head.mod == mod)
6965                         break;
6966                 map = trace_enum_jmp_to_tail(map);
6967                 last = &map->tail.next;
6968                 map = map->tail.next;
6969         }
6970         if (!map)
6971                 goto out;
6972
6973         *last = trace_enum_jmp_to_tail(map)->tail.next;
6974         kfree(map);
6975  out:
6976         mutex_unlock(&trace_enum_mutex);
6977 }
6978 #else
6979 static inline void trace_module_remove_enums(struct module *mod) { }
6980 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
6981
6982 static int trace_module_notify(struct notifier_block *self,
6983                                unsigned long val, void *data)
6984 {
6985         struct module *mod = data;
6986
6987         switch (val) {
6988         case MODULE_STATE_COMING:
6989                 trace_module_add_enums(mod);
6990                 break;
6991         case MODULE_STATE_GOING:
6992                 trace_module_remove_enums(mod);
6993                 break;
6994         }
6995
6996         return 0;
6997 }
6998
6999 static struct notifier_block trace_module_nb = {
7000         .notifier_call = trace_module_notify,
7001         .priority = 0,
7002 };
7003 #endif /* CONFIG_MODULES */
7004
7005 static __init int tracer_init_tracefs(void)
7006 {
7007         struct dentry *d_tracer;
7008
7009         trace_access_lock_init();
7010
7011         d_tracer = tracing_init_dentry();
7012         if (IS_ERR(d_tracer))
7013                 return 0;
7014
7015         init_tracer_tracefs(&global_trace, d_tracer);
7016
7017         trace_create_file("tracing_thresh", 0644, d_tracer,
7018                         &global_trace, &tracing_thresh_fops);
7019
7020         trace_create_file("README", 0444, d_tracer,
7021                         NULL, &tracing_readme_fops);
7022
7023         trace_create_file("saved_cmdlines", 0444, d_tracer,
7024                         NULL, &tracing_saved_cmdlines_fops);
7025
7026         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7027                           NULL, &tracing_saved_cmdlines_size_fops);
7028
7029         trace_enum_init();
7030
7031         trace_create_enum_file(d_tracer);
7032
7033 #ifdef CONFIG_MODULES
7034         register_module_notifier(&trace_module_nb);
7035 #endif
7036
7037 #ifdef CONFIG_DYNAMIC_FTRACE
7038         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7039                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7040 #endif
7041
7042         create_trace_instances(d_tracer);
7043
7044         update_tracer_options(&global_trace);
7045
7046         return 0;
7047 }
7048
7049 static int trace_panic_handler(struct notifier_block *this,
7050                                unsigned long event, void *unused)
7051 {
7052         if (ftrace_dump_on_oops)
7053                 ftrace_dump(ftrace_dump_on_oops);
7054         return NOTIFY_OK;
7055 }
7056
7057 static struct notifier_block trace_panic_notifier = {
7058         .notifier_call  = trace_panic_handler,
7059         .next           = NULL,
7060         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7061 };
7062
7063 static int trace_die_handler(struct notifier_block *self,
7064                              unsigned long val,
7065                              void *data)
7066 {
7067         switch (val) {
7068         case DIE_OOPS:
7069                 if (ftrace_dump_on_oops)
7070                         ftrace_dump(ftrace_dump_on_oops);
7071                 break;
7072         default:
7073                 break;
7074         }
7075         return NOTIFY_OK;
7076 }
7077
7078 static struct notifier_block trace_die_notifier = {
7079         .notifier_call = trace_die_handler,
7080         .priority = 200
7081 };
7082
7083 /*
7084  * printk is set to max of 1024, we really don't need it that big.
7085  * Nothing should be printing 1000 characters anyway.
7086  */
7087 #define TRACE_MAX_PRINT         1000
7088
7089 /*
7090  * Define here KERN_TRACE so that we have one place to modify
7091  * it if we decide to change what log level the ftrace dump
7092  * should be at.
7093  */
7094 #define KERN_TRACE              KERN_EMERG
7095
7096 void
7097 trace_printk_seq(struct trace_seq *s)
7098 {
7099         /* Probably should print a warning here. */
7100         if (s->seq.len >= TRACE_MAX_PRINT)
7101                 s->seq.len = TRACE_MAX_PRINT;
7102
7103         /*
7104          * More paranoid code. Although the buffer size is set to
7105          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7106          * an extra layer of protection.
7107          */
7108         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7109                 s->seq.len = s->seq.size - 1;
7110
7111         /* should be zero ended, but we are paranoid. */
7112         s->buffer[s->seq.len] = 0;
7113
7114         printk(KERN_TRACE "%s", s->buffer);
7115
7116         trace_seq_init(s);
7117 }
7118
7119 void trace_init_global_iter(struct trace_iterator *iter)
7120 {
7121         iter->tr = &global_trace;
7122         iter->trace = iter->tr->current_trace;
7123         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7124         iter->trace_buffer = &global_trace.trace_buffer;
7125
7126         if (iter->trace && iter->trace->open)
7127                 iter->trace->open(iter);
7128
7129         /* Annotate start of buffers if we had overruns */
7130         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7131                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7132
7133         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7134         if (trace_clocks[iter->tr->clock_id].in_ns)
7135                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7136 }
7137
7138 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7139 {
7140         /* use static because iter can be a bit big for the stack */
7141         static struct trace_iterator iter;
7142         static atomic_t dump_running;
7143         struct trace_array *tr = &global_trace;
7144         unsigned int old_userobj;
7145         unsigned long flags;
7146         int cnt = 0, cpu;
7147
7148         /* Only allow one dump user at a time. */
7149         if (atomic_inc_return(&dump_running) != 1) {
7150                 atomic_dec(&dump_running);
7151                 return;
7152         }
7153
7154         /*
7155          * Always turn off tracing when we dump.
7156          * We don't need to show trace output of what happens
7157          * between multiple crashes.
7158          *
7159          * If the user does a sysrq-z, then they can re-enable
7160          * tracing with echo 1 > tracing_on.
7161          */
7162         tracing_off();
7163
7164         local_irq_save(flags);
7165
7166         /* Simulate the iterator */
7167         trace_init_global_iter(&iter);
7168
7169         for_each_tracing_cpu(cpu) {
7170                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7171         }
7172
7173         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7174
7175         /* don't look at user memory in panic mode */
7176         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7177
7178         switch (oops_dump_mode) {
7179         case DUMP_ALL:
7180                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7181                 break;
7182         case DUMP_ORIG:
7183                 iter.cpu_file = raw_smp_processor_id();
7184                 break;
7185         case DUMP_NONE:
7186                 goto out_enable;
7187         default:
7188                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7189                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7190         }
7191
7192         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7193
7194         /* Did function tracer already get disabled? */
7195         if (ftrace_is_dead()) {
7196                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7197                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7198         }
7199
7200         /*
7201          * We need to stop all tracing on all CPUS to read the
7202          * the next buffer. This is a bit expensive, but is
7203          * not done often. We fill all what we can read,
7204          * and then release the locks again.
7205          */
7206
7207         while (!trace_empty(&iter)) {
7208
7209                 if (!cnt)
7210                         printk(KERN_TRACE "---------------------------------\n");
7211
7212                 cnt++;
7213
7214                 trace_iterator_reset(&iter);
7215                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7216
7217                 if (trace_find_next_entry_inc(&iter) != NULL) {
7218                         int ret;
7219
7220                         ret = print_trace_line(&iter);
7221                         if (ret != TRACE_TYPE_NO_CONSUME)
7222                                 trace_consume(&iter);
7223                 }
7224                 touch_nmi_watchdog();
7225
7226                 trace_printk_seq(&iter.seq);
7227         }
7228
7229         if (!cnt)
7230                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7231         else
7232                 printk(KERN_TRACE "---------------------------------\n");
7233
7234  out_enable:
7235         tr->trace_flags |= old_userobj;
7236
7237         for_each_tracing_cpu(cpu) {
7238                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7239         }
7240         atomic_dec(&dump_running);
7241         local_irq_restore(flags);
7242 }
7243 EXPORT_SYMBOL_GPL(ftrace_dump);
7244
7245 __init static int tracer_alloc_buffers(void)
7246 {
7247         int ring_buf_size;
7248         int ret = -ENOMEM;
7249
7250         /*
7251          * Make sure we don't accidently add more trace options
7252          * than we have bits for.
7253          */
7254         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7255
7256         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7257                 goto out;
7258
7259         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7260                 goto out_free_buffer_mask;
7261
7262         /* Only allocate trace_printk buffers if a trace_printk exists */
7263         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7264                 /* Must be called before global_trace.buffer is allocated */
7265                 trace_printk_init_buffers();
7266
7267         /* To save memory, keep the ring buffer size to its minimum */
7268         if (ring_buffer_expanded)
7269                 ring_buf_size = trace_buf_size;
7270         else
7271                 ring_buf_size = 1;
7272
7273         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7274         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7275
7276         raw_spin_lock_init(&global_trace.start_lock);
7277
7278         /* Used for event triggers */
7279         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7280         if (!temp_buffer)
7281                 goto out_free_cpumask;
7282
7283         if (trace_create_savedcmd() < 0)
7284                 goto out_free_temp_buffer;
7285
7286         /* TODO: make the number of buffers hot pluggable with CPUS */
7287         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7288                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7289                 WARN_ON(1);
7290                 goto out_free_savedcmd;
7291         }
7292
7293         if (global_trace.buffer_disabled)
7294                 tracing_off();
7295
7296         if (trace_boot_clock) {
7297                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7298                 if (ret < 0)
7299                         pr_warning("Trace clock %s not defined, going back to default\n",
7300                                    trace_boot_clock);
7301         }
7302
7303         /*
7304          * register_tracer() might reference current_trace, so it
7305          * needs to be set before we register anything. This is
7306          * just a bootstrap of current_trace anyway.
7307          */
7308         global_trace.current_trace = &nop_trace;
7309
7310         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7311
7312         ftrace_init_global_array_ops(&global_trace);
7313
7314         init_trace_flags_index(&global_trace);
7315
7316         register_tracer(&nop_trace);
7317
7318         /* All seems OK, enable tracing */
7319         tracing_disabled = 0;
7320
7321         atomic_notifier_chain_register(&panic_notifier_list,
7322                                        &trace_panic_notifier);
7323
7324         register_die_notifier(&trace_die_notifier);
7325
7326         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7327
7328         INIT_LIST_HEAD(&global_trace.systems);
7329         INIT_LIST_HEAD(&global_trace.events);
7330         list_add(&global_trace.list, &ftrace_trace_arrays);
7331
7332         apply_trace_boot_options();
7333
7334         register_snapshot_cmd();
7335
7336         return 0;
7337
7338 out_free_savedcmd:
7339         free_saved_cmdlines_buffer(savedcmd);
7340 out_free_temp_buffer:
7341         ring_buffer_free(temp_buffer);
7342 out_free_cpumask:
7343         free_cpumask_var(global_trace.tracing_cpumask);
7344 out_free_buffer_mask:
7345         free_cpumask_var(tracing_buffer_mask);
7346 out:
7347         return ret;
7348 }
7349
7350 void __init trace_init(void)
7351 {
7352         if (tracepoint_printk) {
7353                 tracepoint_print_iter =
7354                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7355                 if (WARN_ON(!tracepoint_print_iter))
7356                         tracepoint_printk = 0;
7357         }
7358         tracer_alloc_buffers();
7359         trace_event_init();
7360 }
7361
7362 __init static int clear_boot_tracer(void)
7363 {
7364         /*
7365          * The default tracer at boot buffer is an init section.
7366          * This function is called in lateinit. If we did not
7367          * find the boot tracer, then clear it out, to prevent
7368          * later registration from accessing the buffer that is
7369          * about to be freed.
7370          */
7371         if (!default_bootup_tracer)
7372                 return 0;
7373
7374         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7375                default_bootup_tracer);
7376         default_bootup_tracer = NULL;
7377
7378         return 0;
7379 }
7380
7381 fs_initcall(tracer_init_tracefs);
7382 late_initcall(clear_boot_tracer);