Linux-libre 5.3.12-gnu
[librecmc/linux-libre.git] / tools / perf / util / stat-shadow.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include <stdio.h>
3 #include "evsel.h"
4 #include "stat.h"
5 #include "color.h"
6 #include "pmu.h"
7 #include "rblist.h"
8 #include "evlist.h"
9 #include "expr.h"
10 #include "metricgroup.h"
11 #include <linux/zalloc.h>
12
13 /*
14  * AGGR_GLOBAL: Use CPU 0
15  * AGGR_SOCKET: Use first CPU of socket
16  * AGGR_DIE: Use first CPU of die
17  * AGGR_CORE: Use first CPU of core
18  * AGGR_NONE: Use matching CPU
19  * AGGR_THREAD: Not supported?
20  */
21 static bool have_frontend_stalled;
22
23 struct runtime_stat rt_stat;
24 struct stats walltime_nsecs_stats;
25
26 struct saved_value {
27         struct rb_node rb_node;
28         struct perf_evsel *evsel;
29         enum stat_type type;
30         int ctx;
31         int cpu;
32         struct runtime_stat *stat;
33         struct stats stats;
34 };
35
36 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
37 {
38         struct saved_value *a = container_of(rb_node,
39                                              struct saved_value,
40                                              rb_node);
41         const struct saved_value *b = entry;
42
43         if (a->cpu != b->cpu)
44                 return a->cpu - b->cpu;
45
46         /*
47          * Previously the rbtree was used to link generic metrics.
48          * The keys were evsel/cpu. Now the rbtree is extended to support
49          * per-thread shadow stats. For shadow stats case, the keys
50          * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
51          * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
52          */
53         if (a->type != b->type)
54                 return a->type - b->type;
55
56         if (a->ctx != b->ctx)
57                 return a->ctx - b->ctx;
58
59         if (a->evsel == NULL && b->evsel == NULL) {
60                 if (a->stat == b->stat)
61                         return 0;
62
63                 if ((char *)a->stat < (char *)b->stat)
64                         return -1;
65
66                 return 1;
67         }
68
69         if (a->evsel == b->evsel)
70                 return 0;
71         if ((char *)a->evsel < (char *)b->evsel)
72                 return -1;
73         return +1;
74 }
75
76 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
77                                      const void *entry)
78 {
79         struct saved_value *nd = malloc(sizeof(struct saved_value));
80
81         if (!nd)
82                 return NULL;
83         memcpy(nd, entry, sizeof(struct saved_value));
84         return &nd->rb_node;
85 }
86
87 static void saved_value_delete(struct rblist *rblist __maybe_unused,
88                                struct rb_node *rb_node)
89 {
90         struct saved_value *v;
91
92         BUG_ON(!rb_node);
93         v = container_of(rb_node, struct saved_value, rb_node);
94         free(v);
95 }
96
97 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
98                                               int cpu,
99                                               bool create,
100                                               enum stat_type type,
101                                               int ctx,
102                                               struct runtime_stat *st)
103 {
104         struct rblist *rblist;
105         struct rb_node *nd;
106         struct saved_value dm = {
107                 .cpu = cpu,
108                 .evsel = evsel,
109                 .type = type,
110                 .ctx = ctx,
111                 .stat = st,
112         };
113
114         rblist = &st->value_list;
115
116         nd = rblist__find(rblist, &dm);
117         if (nd)
118                 return container_of(nd, struct saved_value, rb_node);
119         if (create) {
120                 rblist__add_node(rblist, &dm);
121                 nd = rblist__find(rblist, &dm);
122                 if (nd)
123                         return container_of(nd, struct saved_value, rb_node);
124         }
125         return NULL;
126 }
127
128 void runtime_stat__init(struct runtime_stat *st)
129 {
130         struct rblist *rblist = &st->value_list;
131
132         rblist__init(rblist);
133         rblist->node_cmp = saved_value_cmp;
134         rblist->node_new = saved_value_new;
135         rblist->node_delete = saved_value_delete;
136 }
137
138 void runtime_stat__exit(struct runtime_stat *st)
139 {
140         rblist__exit(&st->value_list);
141 }
142
143 void perf_stat__init_shadow_stats(void)
144 {
145         have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
146         runtime_stat__init(&rt_stat);
147 }
148
149 static int evsel_context(struct perf_evsel *evsel)
150 {
151         int ctx = 0;
152
153         if (evsel->attr.exclude_kernel)
154                 ctx |= CTX_BIT_KERNEL;
155         if (evsel->attr.exclude_user)
156                 ctx |= CTX_BIT_USER;
157         if (evsel->attr.exclude_hv)
158                 ctx |= CTX_BIT_HV;
159         if (evsel->attr.exclude_host)
160                 ctx |= CTX_BIT_HOST;
161         if (evsel->attr.exclude_idle)
162                 ctx |= CTX_BIT_IDLE;
163
164         return ctx;
165 }
166
167 static void reset_stat(struct runtime_stat *st)
168 {
169         struct rblist *rblist;
170         struct rb_node *pos, *next;
171
172         rblist = &st->value_list;
173         next = rb_first_cached(&rblist->entries);
174         while (next) {
175                 pos = next;
176                 next = rb_next(pos);
177                 memset(&container_of(pos, struct saved_value, rb_node)->stats,
178                        0,
179                        sizeof(struct stats));
180         }
181 }
182
183 void perf_stat__reset_shadow_stats(void)
184 {
185         reset_stat(&rt_stat);
186         memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
187 }
188
189 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
190 {
191         reset_stat(st);
192 }
193
194 static void update_runtime_stat(struct runtime_stat *st,
195                                 enum stat_type type,
196                                 int ctx, int cpu, u64 count)
197 {
198         struct saved_value *v = saved_value_lookup(NULL, cpu, true,
199                                                    type, ctx, st);
200
201         if (v)
202                 update_stats(&v->stats, count);
203 }
204
205 /*
206  * Update various tracking values we maintain to print
207  * more semantic information such as miss/hit ratios,
208  * instruction rates, etc:
209  */
210 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
211                                     int cpu, struct runtime_stat *st)
212 {
213         int ctx = evsel_context(counter);
214         u64 count_ns = count;
215
216         count *= counter->scale;
217
218         if (perf_evsel__is_clock(counter))
219                 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
220         else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
221                 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
222         else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
223                 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
224         else if (perf_stat_evsel__is(counter, TRANSACTION_START))
225                 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
226         else if (perf_stat_evsel__is(counter, ELISION_START))
227                 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
228         else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
229                 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
230                                     ctx, cpu, count);
231         else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
232                 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
233                                     ctx, cpu, count);
234         else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
235                 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
236                                     ctx, cpu, count);
237         else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
238                 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
239                                     ctx, cpu, count);
240         else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
241                 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
242                                     ctx, cpu, count);
243         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
244                 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
245                                     ctx, cpu, count);
246         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
247                 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
248                                     ctx, cpu, count);
249         else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
250                 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
251         else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
252                 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
253         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
254                 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
255         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
256                 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
257         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
258                 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
259         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
260                 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
261         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
262                 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
263         else if (perf_stat_evsel__is(counter, SMI_NUM))
264                 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
265         else if (perf_stat_evsel__is(counter, APERF))
266                 update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
267
268         if (counter->collect_stat) {
269                 struct saved_value *v = saved_value_lookup(counter, cpu, true,
270                                                            STAT_NONE, 0, st);
271                 update_stats(&v->stats, count);
272         }
273 }
274
275 /* used for get_ratio_color() */
276 enum grc_type {
277         GRC_STALLED_CYCLES_FE,
278         GRC_STALLED_CYCLES_BE,
279         GRC_CACHE_MISSES,
280         GRC_MAX_NR
281 };
282
283 static const char *get_ratio_color(enum grc_type type, double ratio)
284 {
285         static const double grc_table[GRC_MAX_NR][3] = {
286                 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
287                 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
288                 [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
289         };
290         const char *color = PERF_COLOR_NORMAL;
291
292         if (ratio > grc_table[type][0])
293                 color = PERF_COLOR_RED;
294         else if (ratio > grc_table[type][1])
295                 color = PERF_COLOR_MAGENTA;
296         else if (ratio > grc_table[type][2])
297                 color = PERF_COLOR_YELLOW;
298
299         return color;
300 }
301
302 static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
303                                                 const char *name)
304 {
305         struct perf_evsel *c2;
306
307         evlist__for_each_entry (evsel_list, c2) {
308                 if (!strcasecmp(c2->name, name) && !c2->collect_stat)
309                         return c2;
310         }
311         return NULL;
312 }
313
314 /* Mark MetricExpr target events and link events using them to them. */
315 void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
316 {
317         struct perf_evsel *counter, *leader, **metric_events, *oc;
318         bool found;
319         const char **metric_names;
320         int i;
321         int num_metric_names;
322
323         evlist__for_each_entry(evsel_list, counter) {
324                 bool invalid = false;
325
326                 leader = counter->leader;
327                 if (!counter->metric_expr)
328                         continue;
329                 metric_events = counter->metric_events;
330                 if (!metric_events) {
331                         if (expr__find_other(counter->metric_expr, counter->name,
332                                                 &metric_names, &num_metric_names) < 0)
333                                 continue;
334
335                         metric_events = calloc(sizeof(struct perf_evsel *),
336                                                num_metric_names + 1);
337                         if (!metric_events)
338                                 return;
339                         counter->metric_events = metric_events;
340                 }
341
342                 for (i = 0; i < num_metric_names; i++) {
343                         found = false;
344                         if (leader) {
345                                 /* Search in group */
346                                 for_each_group_member (oc, leader) {
347                                         if (!strcasecmp(oc->name, metric_names[i]) &&
348                                                 !oc->collect_stat) {
349                                                 found = true;
350                                                 break;
351                                         }
352                                 }
353                         }
354                         if (!found) {
355                                 /* Search ignoring groups */
356                                 oc = perf_stat__find_event(evsel_list, metric_names[i]);
357                         }
358                         if (!oc) {
359                                 /* Deduping one is good enough to handle duplicated PMUs. */
360                                 static char *printed;
361
362                                 /*
363                                  * Adding events automatically would be difficult, because
364                                  * it would risk creating groups that are not schedulable.
365                                  * perf stat doesn't understand all the scheduling constraints
366                                  * of events. So we ask the user instead to add the missing
367                                  * events.
368                                  */
369                                 if (!printed || strcasecmp(printed, metric_names[i])) {
370                                         fprintf(stderr,
371                                                 "Add %s event to groups to get metric expression for %s\n",
372                                                 metric_names[i],
373                                                 counter->name);
374                                         printed = strdup(metric_names[i]);
375                                 }
376                                 invalid = true;
377                                 continue;
378                         }
379                         metric_events[i] = oc;
380                         oc->collect_stat = true;
381                 }
382                 metric_events[i] = NULL;
383                 free(metric_names);
384                 if (invalid) {
385                         free(metric_events);
386                         counter->metric_events = NULL;
387                         counter->metric_expr = NULL;
388                 }
389         }
390 }
391
392 static double runtime_stat_avg(struct runtime_stat *st,
393                                enum stat_type type, int ctx, int cpu)
394 {
395         struct saved_value *v;
396
397         v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
398         if (!v)
399                 return 0.0;
400
401         return avg_stats(&v->stats);
402 }
403
404 static double runtime_stat_n(struct runtime_stat *st,
405                              enum stat_type type, int ctx, int cpu)
406 {
407         struct saved_value *v;
408
409         v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
410         if (!v)
411                 return 0.0;
412
413         return v->stats.n;
414 }
415
416 static void print_stalled_cycles_frontend(struct perf_stat_config *config,
417                                           int cpu,
418                                           struct perf_evsel *evsel, double avg,
419                                           struct perf_stat_output_ctx *out,
420                                           struct runtime_stat *st)
421 {
422         double total, ratio = 0.0;
423         const char *color;
424         int ctx = evsel_context(evsel);
425
426         total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
427
428         if (total)
429                 ratio = avg / total * 100.0;
430
431         color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
432
433         if (ratio)
434                 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
435                                   ratio);
436         else
437                 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
438 }
439
440 static void print_stalled_cycles_backend(struct perf_stat_config *config,
441                                          int cpu,
442                                          struct perf_evsel *evsel, double avg,
443                                          struct perf_stat_output_ctx *out,
444                                          struct runtime_stat *st)
445 {
446         double total, ratio = 0.0;
447         const char *color;
448         int ctx = evsel_context(evsel);
449
450         total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
451
452         if (total)
453                 ratio = avg / total * 100.0;
454
455         color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
456
457         out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
458 }
459
460 static void print_branch_misses(struct perf_stat_config *config,
461                                 int cpu,
462                                 struct perf_evsel *evsel,
463                                 double avg,
464                                 struct perf_stat_output_ctx *out,
465                                 struct runtime_stat *st)
466 {
467         double total, ratio = 0.0;
468         const char *color;
469         int ctx = evsel_context(evsel);
470
471         total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
472
473         if (total)
474                 ratio = avg / total * 100.0;
475
476         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
477
478         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
479 }
480
481 static void print_l1_dcache_misses(struct perf_stat_config *config,
482                                    int cpu,
483                                    struct perf_evsel *evsel,
484                                    double avg,
485                                    struct perf_stat_output_ctx *out,
486                                    struct runtime_stat *st)
487
488 {
489         double total, ratio = 0.0;
490         const char *color;
491         int ctx = evsel_context(evsel);
492
493         total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
494
495         if (total)
496                 ratio = avg / total * 100.0;
497
498         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
499
500         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
501 }
502
503 static void print_l1_icache_misses(struct perf_stat_config *config,
504                                    int cpu,
505                                    struct perf_evsel *evsel,
506                                    double avg,
507                                    struct perf_stat_output_ctx *out,
508                                    struct runtime_stat *st)
509
510 {
511         double total, ratio = 0.0;
512         const char *color;
513         int ctx = evsel_context(evsel);
514
515         total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
516
517         if (total)
518                 ratio = avg / total * 100.0;
519
520         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
521         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
522 }
523
524 static void print_dtlb_cache_misses(struct perf_stat_config *config,
525                                     int cpu,
526                                     struct perf_evsel *evsel,
527                                     double avg,
528                                     struct perf_stat_output_ctx *out,
529                                     struct runtime_stat *st)
530 {
531         double total, ratio = 0.0;
532         const char *color;
533         int ctx = evsel_context(evsel);
534
535         total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
536
537         if (total)
538                 ratio = avg / total * 100.0;
539
540         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
541         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
542 }
543
544 static void print_itlb_cache_misses(struct perf_stat_config *config,
545                                     int cpu,
546                                     struct perf_evsel *evsel,
547                                     double avg,
548                                     struct perf_stat_output_ctx *out,
549                                     struct runtime_stat *st)
550 {
551         double total, ratio = 0.0;
552         const char *color;
553         int ctx = evsel_context(evsel);
554
555         total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
556
557         if (total)
558                 ratio = avg / total * 100.0;
559
560         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
561         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
562 }
563
564 static void print_ll_cache_misses(struct perf_stat_config *config,
565                                   int cpu,
566                                   struct perf_evsel *evsel,
567                                   double avg,
568                                   struct perf_stat_output_ctx *out,
569                                   struct runtime_stat *st)
570 {
571         double total, ratio = 0.0;
572         const char *color;
573         int ctx = evsel_context(evsel);
574
575         total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
576
577         if (total)
578                 ratio = avg / total * 100.0;
579
580         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
581         out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
582 }
583
584 /*
585  * High level "TopDown" CPU core pipe line bottleneck break down.
586  *
587  * Basic concept following
588  * Yasin, A Top Down Method for Performance analysis and Counter architecture
589  * ISPASS14
590  *
591  * The CPU pipeline is divided into 4 areas that can be bottlenecks:
592  *
593  * Frontend -> Backend -> Retiring
594  * BadSpeculation in addition means out of order execution that is thrown away
595  * (for example branch mispredictions)
596  * Frontend is instruction decoding.
597  * Backend is execution, like computation and accessing data in memory
598  * Retiring is good execution that is not directly bottlenecked
599  *
600  * The formulas are computed in slots.
601  * A slot is an entry in the pipeline each for the pipeline width
602  * (for example a 4-wide pipeline has 4 slots for each cycle)
603  *
604  * Formulas:
605  * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
606  *                      TotalSlots
607  * Retiring = SlotsRetired / TotalSlots
608  * FrontendBound = FetchBubbles / TotalSlots
609  * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
610  *
611  * The kernel provides the mapping to the low level CPU events and any scaling
612  * needed for the CPU pipeline width, for example:
613  *
614  * TotalSlots = Cycles * 4
615  *
616  * The scaling factor is communicated in the sysfs unit.
617  *
618  * In some cases the CPU may not be able to measure all the formulas due to
619  * missing events. In this case multiple formulas are combined, as possible.
620  *
621  * Full TopDown supports more levels to sub-divide each area: for example
622  * BackendBound into computing bound and memory bound. For now we only
623  * support Level 1 TopDown.
624  */
625
626 static double sanitize_val(double x)
627 {
628         if (x < 0 && x >= -0.02)
629                 return 0.0;
630         return x;
631 }
632
633 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
634 {
635         return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
636 }
637
638 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
639 {
640         double bad_spec = 0;
641         double total_slots;
642         double total;
643
644         total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
645                 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
646                 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
647
648         total_slots = td_total_slots(ctx, cpu, st);
649         if (total_slots)
650                 bad_spec = total / total_slots;
651         return sanitize_val(bad_spec);
652 }
653
654 static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
655 {
656         double retiring = 0;
657         double total_slots = td_total_slots(ctx, cpu, st);
658         double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
659                                             ctx, cpu);
660
661         if (total_slots)
662                 retiring = ret_slots / total_slots;
663         return retiring;
664 }
665
666 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
667 {
668         double fe_bound = 0;
669         double total_slots = td_total_slots(ctx, cpu, st);
670         double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
671                                             ctx, cpu);
672
673         if (total_slots)
674                 fe_bound = fetch_bub / total_slots;
675         return fe_bound;
676 }
677
678 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
679 {
680         double sum = (td_fe_bound(ctx, cpu, st) +
681                       td_bad_spec(ctx, cpu, st) +
682                       td_retiring(ctx, cpu, st));
683         if (sum == 0)
684                 return 0;
685         return sanitize_val(1.0 - sum);
686 }
687
688 static void print_smi_cost(struct perf_stat_config *config,
689                            int cpu, struct perf_evsel *evsel,
690                            struct perf_stat_output_ctx *out,
691                            struct runtime_stat *st)
692 {
693         double smi_num, aperf, cycles, cost = 0.0;
694         int ctx = evsel_context(evsel);
695         const char *color = NULL;
696
697         smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
698         aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
699         cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
700
701         if ((cycles == 0) || (aperf == 0))
702                 return;
703
704         if (smi_num)
705                 cost = (aperf - cycles) / aperf * 100.00;
706
707         if (cost > 10)
708                 color = PERF_COLOR_RED;
709         out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
710         out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
711 }
712
713 static void generic_metric(struct perf_stat_config *config,
714                            const char *metric_expr,
715                            struct perf_evsel **metric_events,
716                            char *name,
717                            const char *metric_name,
718                            double avg,
719                            int cpu,
720                            struct perf_stat_output_ctx *out,
721                            struct runtime_stat *st)
722 {
723         print_metric_t print_metric = out->print_metric;
724         struct parse_ctx pctx;
725         double ratio;
726         int i;
727         void *ctxp = out->ctx;
728         char *n, *pn;
729
730         expr__ctx_init(&pctx);
731         expr__add_id(&pctx, name, avg);
732         for (i = 0; metric_events[i]; i++) {
733                 struct saved_value *v;
734                 struct stats *stats;
735                 double scale;
736
737                 if (!strcmp(metric_events[i]->name, "duration_time")) {
738                         stats = &walltime_nsecs_stats;
739                         scale = 1e-9;
740                 } else {
741                         v = saved_value_lookup(metric_events[i], cpu, false,
742                                                STAT_NONE, 0, st);
743                         if (!v)
744                                 break;
745                         stats = &v->stats;
746                         scale = 1.0;
747                 }
748
749                 n = strdup(metric_events[i]->name);
750                 if (!n)
751                         return;
752                 /*
753                  * This display code with --no-merge adds [cpu] postfixes.
754                  * These are not supported by the parser. Remove everything
755                  * after the space.
756                  */
757                 pn = strchr(n, ' ');
758                 if (pn)
759                         *pn = 0;
760                 expr__add_id(&pctx, n, avg_stats(stats)*scale);
761         }
762         if (!metric_events[i]) {
763                 const char *p = metric_expr;
764
765                 if (expr__parse(&ratio, &pctx, &p) == 0)
766                         print_metric(config, ctxp, NULL, "%8.1f",
767                                 metric_name ?
768                                 metric_name :
769                                 out->force_header ?  name : "",
770                                 ratio);
771                 else
772                         print_metric(config, ctxp, NULL, NULL,
773                                      out->force_header ?
774                                      (metric_name ? metric_name : name) : "", 0);
775         } else
776                 print_metric(config, ctxp, NULL, NULL, "", 0);
777
778         for (i = 1; i < pctx.num_ids; i++)
779                 zfree(&pctx.ids[i].name);
780 }
781
782 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
783                                    struct perf_evsel *evsel,
784                                    double avg, int cpu,
785                                    struct perf_stat_output_ctx *out,
786                                    struct rblist *metric_events,
787                                    struct runtime_stat *st)
788 {
789         void *ctxp = out->ctx;
790         print_metric_t print_metric = out->print_metric;
791         double total, ratio = 0.0, total2;
792         const char *color = NULL;
793         int ctx = evsel_context(evsel);
794         struct metric_event *me;
795         int num = 1;
796
797         if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
798                 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
799
800                 if (total) {
801                         ratio = avg / total;
802                         print_metric(config, ctxp, NULL, "%7.2f ",
803                                         "insn per cycle", ratio);
804                 } else {
805                         print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
806                 }
807
808                 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
809                                          ctx, cpu);
810
811                 total = max(total, runtime_stat_avg(st,
812                                                     STAT_STALLED_CYCLES_BACK,
813                                                     ctx, cpu));
814
815                 if (total && avg) {
816                         out->new_line(config, ctxp);
817                         ratio = total / avg;
818                         print_metric(config, ctxp, NULL, "%7.2f ",
819                                         "stalled cycles per insn",
820                                         ratio);
821                 } else if (have_frontend_stalled) {
822                         out->new_line(config, ctxp);
823                         print_metric(config, ctxp, NULL, "%7.2f ",
824                                      "stalled cycles per insn", 0);
825                 }
826         } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
827                 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
828                         print_branch_misses(config, cpu, evsel, avg, out, st);
829                 else
830                         print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
831         } else if (
832                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
833                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
834                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
835                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
836
837                 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
838                         print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
839                 else
840                         print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0);
841         } else if (
842                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
843                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
844                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
845                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
846
847                 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
848                         print_l1_icache_misses(config, cpu, evsel, avg, out, st);
849                 else
850                         print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0);
851         } else if (
852                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
853                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
854                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
855                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
856
857                 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
858                         print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
859                 else
860                         print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0);
861         } else if (
862                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
863                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
864                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
865                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
866
867                 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
868                         print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
869                 else
870                         print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0);
871         } else if (
872                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
873                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
874                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
875                                          ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
876
877                 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
878                         print_ll_cache_misses(config, cpu, evsel, avg, out, st);
879                 else
880                         print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
881         } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
882                 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
883
884                 if (total)
885                         ratio = avg * 100 / total;
886
887                 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
888                         print_metric(config, ctxp, NULL, "%8.3f %%",
889                                      "of all cache refs", ratio);
890                 else
891                         print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
892         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
893                 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
894         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
895                 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
896         } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
897                 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
898
899                 if (total) {
900                         ratio = avg / total;
901                         print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
902                 } else {
903                         print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
904                 }
905         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
906                 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
907
908                 if (total)
909                         print_metric(config, ctxp, NULL,
910                                         "%7.2f%%", "transactional cycles",
911                                         100.0 * (avg / total));
912                 else
913                         print_metric(config, ctxp, NULL, NULL, "transactional cycles",
914                                      0);
915         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
916                 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
917                 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
918
919                 if (total2 < avg)
920                         total2 = avg;
921                 if (total)
922                         print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
923                                 100.0 * ((total2-avg) / total));
924                 else
925                         print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
926         } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
927                 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
928                                          ctx, cpu);
929
930                 if (avg)
931                         ratio = total / avg;
932
933                 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
934                         print_metric(config, ctxp, NULL, "%8.0f",
935                                      "cycles / transaction", ratio);
936                 else
937                         print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
938                                       0);
939         } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
940                 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
941                                          ctx, cpu);
942
943                 if (avg)
944                         ratio = total / avg;
945
946                 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
947         } else if (perf_evsel__is_clock(evsel)) {
948                 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
949                         print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
950                                      avg / (ratio * evsel->scale));
951                 else
952                         print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
953         } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
954                 double fe_bound = td_fe_bound(ctx, cpu, st);
955
956                 if (fe_bound > 0.2)
957                         color = PERF_COLOR_RED;
958                 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
959                                 fe_bound * 100.);
960         } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
961                 double retiring = td_retiring(ctx, cpu, st);
962
963                 if (retiring > 0.7)
964                         color = PERF_COLOR_GREEN;
965                 print_metric(config, ctxp, color, "%8.1f%%", "retiring",
966                                 retiring * 100.);
967         } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
968                 double bad_spec = td_bad_spec(ctx, cpu, st);
969
970                 if (bad_spec > 0.1)
971                         color = PERF_COLOR_RED;
972                 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
973                                 bad_spec * 100.);
974         } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
975                 double be_bound = td_be_bound(ctx, cpu, st);
976                 const char *name = "backend bound";
977                 static int have_recovery_bubbles = -1;
978
979                 /* In case the CPU does not support topdown-recovery-bubbles */
980                 if (have_recovery_bubbles < 0)
981                         have_recovery_bubbles = pmu_have_event("cpu",
982                                         "topdown-recovery-bubbles");
983                 if (!have_recovery_bubbles)
984                         name = "backend bound/bad spec";
985
986                 if (be_bound > 0.2)
987                         color = PERF_COLOR_RED;
988                 if (td_total_slots(ctx, cpu, st) > 0)
989                         print_metric(config, ctxp, color, "%8.1f%%", name,
990                                         be_bound * 100.);
991                 else
992                         print_metric(config, ctxp, NULL, NULL, name, 0);
993         } else if (evsel->metric_expr) {
994                 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
995                                 evsel->metric_name, avg, cpu, out, st);
996         } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
997                 char unit = 'M';
998                 char unit_buf[10];
999
1000                 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
1001
1002                 if (total)
1003                         ratio = 1000.0 * avg / total;
1004                 if (ratio < 0.001) {
1005                         ratio *= 1000;
1006                         unit = 'K';
1007                 }
1008                 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
1009                 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
1010         } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
1011                 print_smi_cost(config, cpu, evsel, out, st);
1012         } else {
1013                 num = 0;
1014         }
1015
1016         if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
1017                 struct metric_expr *mexp;
1018
1019                 list_for_each_entry (mexp, &me->head, nd) {
1020                         if (num++ > 0)
1021                                 out->new_line(config, ctxp);
1022                         generic_metric(config, mexp->metric_expr, mexp->metric_events,
1023                                         evsel->name, mexp->metric_name,
1024                                         avg, cpu, out, st);
1025                 }
1026         }
1027         if (num == 0)
1028                 print_metric(config, ctxp, NULL, NULL, NULL, 0);
1029 }