Linux-libre 3.18.37-gnu
[librecmc/linux-libre.git] / tools / perf / bench / mem-memcpy.c
1 /*
2  * mem-memcpy.c
3  *
4  * memcpy: Simple memory copy in various ways
5  *
6  * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
7  */
8
9 #include "../perf.h"
10 #include "../util/util.h"
11 #include "../util/parse-options.h"
12 #include "../util/header.h"
13 #include "../util/cloexec.h"
14 #include "bench.h"
15 #include "mem-memcpy-arch.h"
16
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <sys/time.h>
21 #include <errno.h>
22
23 #define K 1024
24
25 static const char       *length_str     = "1MB";
26 static const char       *routine        = "default";
27 static int              iterations      = 1;
28 static bool             use_cycle;
29 static int              cycle_fd;
30 static bool             only_prefault;
31 static bool             no_prefault;
32
33 static const struct option options[] = {
34         OPT_STRING('l', "length", &length_str, "1MB",
35                     "Specify length of memory to copy. "
36                     "Available units: B, KB, MB, GB and TB (upper and lower)"),
37         OPT_STRING('r', "routine", &routine, "default",
38                     "Specify routine to copy"),
39         OPT_INTEGER('i', "iterations", &iterations,
40                     "repeat memcpy() invocation this number of times"),
41         OPT_BOOLEAN('c', "cycle", &use_cycle,
42                     "Use cycles event instead of gettimeofday() for measuring"),
43         OPT_BOOLEAN('o', "only-prefault", &only_prefault,
44                     "Show only the result with page faults before memcpy()"),
45         OPT_BOOLEAN('n', "no-prefault", &no_prefault,
46                     "Show only the result without page faults before memcpy()"),
47         OPT_END()
48 };
49
50 typedef void *(*memcpy_t)(void *, const void *, size_t);
51
52 struct routine {
53         const char *name;
54         const char *desc;
55         memcpy_t fn;
56 };
57
58 struct routine routines[] = {
59         { "default",
60           "Default memcpy() provided by glibc",
61           memcpy },
62 #ifdef HAVE_ARCH_X86_64_SUPPORT
63
64 #define MEMCPY_FN(fn, name, desc) { name, desc, fn },
65 #include "mem-memcpy-x86-64-asm-def.h"
66 #undef MEMCPY_FN
67
68 #endif
69
70         { NULL,
71           NULL,
72           NULL   }
73 };
74
75 static const char * const bench_mem_memcpy_usage[] = {
76         "perf bench mem memcpy <options>",
77         NULL
78 };
79
80 static struct perf_event_attr cycle_attr = {
81         .type           = PERF_TYPE_HARDWARE,
82         .config         = PERF_COUNT_HW_CPU_CYCLES
83 };
84
85 static void init_cycle(void)
86 {
87         cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1,
88                                        perf_event_open_cloexec_flag());
89
90         if (cycle_fd < 0 && errno == ENOSYS)
91                 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
92         else
93                 BUG_ON(cycle_fd < 0);
94 }
95
96 static u64 get_cycle(void)
97 {
98         int ret;
99         u64 clk;
100
101         ret = read(cycle_fd, &clk, sizeof(u64));
102         BUG_ON(ret != sizeof(u64));
103
104         return clk;
105 }
106
107 static double timeval2double(struct timeval *ts)
108 {
109         return (double)ts->tv_sec +
110                 (double)ts->tv_usec / (double)1000000;
111 }
112
113 static void alloc_mem(void **dst, void **src, size_t length)
114 {
115         *dst = zalloc(length);
116         if (!*dst)
117                 die("memory allocation failed - maybe length is too large?\n");
118
119         *src = zalloc(length);
120         if (!*src)
121                 die("memory allocation failed - maybe length is too large?\n");
122         /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
123         memset(*src, 0, length);
124 }
125
126 static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
127 {
128         u64 cycle_start = 0ULL, cycle_end = 0ULL;
129         void *src = NULL, *dst = NULL;
130         int i;
131
132         alloc_mem(&src, &dst, len);
133
134         if (prefault)
135                 fn(dst, src, len);
136
137         cycle_start = get_cycle();
138         for (i = 0; i < iterations; ++i)
139                 fn(dst, src, len);
140         cycle_end = get_cycle();
141
142         free(src);
143         free(dst);
144         return cycle_end - cycle_start;
145 }
146
147 static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
148 {
149         struct timeval tv_start, tv_end, tv_diff;
150         void *src = NULL, *dst = NULL;
151         int i;
152
153         alloc_mem(&src, &dst, len);
154
155         if (prefault)
156                 fn(dst, src, len);
157
158         BUG_ON(gettimeofday(&tv_start, NULL));
159         for (i = 0; i < iterations; ++i)
160                 fn(dst, src, len);
161         BUG_ON(gettimeofday(&tv_end, NULL));
162
163         timersub(&tv_end, &tv_start, &tv_diff);
164
165         free(src);
166         free(dst);
167         return (double)((double)len / timeval2double(&tv_diff));
168 }
169
170 #define pf (no_prefault ? 0 : 1)
171
172 #define print_bps(x) do {                                       \
173                 if (x < K)                                      \
174                         printf(" %14lf B/Sec", x);              \
175                 else if (x < K * K)                             \
176                         printf(" %14lfd KB/Sec", x / K);        \
177                 else if (x < K * K * K)                         \
178                         printf(" %14lf MB/Sec", x / K / K);     \
179                 else                                            \
180                         printf(" %14lf GB/Sec", x / K / K / K); \
181         } while (0)
182
183 int bench_mem_memcpy(int argc, const char **argv,
184                      const char *prefix __maybe_unused)
185 {
186         int i;
187         size_t len;
188         double result_bps[2];
189         u64 result_cycle[2];
190
191         argc = parse_options(argc, argv, options,
192                              bench_mem_memcpy_usage, 0);
193
194         if (no_prefault && only_prefault) {
195                 fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
196                 return 1;
197         }
198
199         if (use_cycle)
200                 init_cycle();
201
202         len = (size_t)perf_atoll((char *)length_str);
203
204         result_cycle[0] = result_cycle[1] = 0ULL;
205         result_bps[0] = result_bps[1] = 0.0;
206
207         if ((s64)len <= 0) {
208                 fprintf(stderr, "Invalid length:%s\n", length_str);
209                 return 1;
210         }
211
212         /* same to without specifying either of prefault and no-prefault */
213         if (only_prefault && no_prefault)
214                 only_prefault = no_prefault = false;
215
216         for (i = 0; routines[i].name; i++) {
217                 if (!strcmp(routines[i].name, routine))
218                         break;
219         }
220         if (!routines[i].name) {
221                 printf("Unknown routine:%s\n", routine);
222                 printf("Available routines...\n");
223                 for (i = 0; routines[i].name; i++) {
224                         printf("\t%s ... %s\n",
225                                routines[i].name, routines[i].desc);
226                 }
227                 return 1;
228         }
229
230         if (bench_format == BENCH_FORMAT_DEFAULT)
231                 printf("# Copying %s Bytes ...\n\n", length_str);
232
233         if (!only_prefault && !no_prefault) {
234                 /* show both of results */
235                 if (use_cycle) {
236                         result_cycle[0] =
237                                 do_memcpy_cycle(routines[i].fn, len, false);
238                         result_cycle[1] =
239                                 do_memcpy_cycle(routines[i].fn, len, true);
240                 } else {
241                         result_bps[0] =
242                                 do_memcpy_gettimeofday(routines[i].fn,
243                                                 len, false);
244                         result_bps[1] =
245                                 do_memcpy_gettimeofday(routines[i].fn,
246                                                 len, true);
247                 }
248         } else {
249                 if (use_cycle) {
250                         result_cycle[pf] =
251                                 do_memcpy_cycle(routines[i].fn,
252                                                 len, only_prefault);
253                 } else {
254                         result_bps[pf] =
255                                 do_memcpy_gettimeofday(routines[i].fn,
256                                                 len, only_prefault);
257                 }
258         }
259
260         switch (bench_format) {
261         case BENCH_FORMAT_DEFAULT:
262                 if (!only_prefault && !no_prefault) {
263                         if (use_cycle) {
264                                 printf(" %14lf Cycle/Byte\n",
265                                         (double)result_cycle[0]
266                                         / (double)len);
267                                 printf(" %14lf Cycle/Byte (with prefault)\n",
268                                         (double)result_cycle[1]
269                                         / (double)len);
270                         } else {
271                                 print_bps(result_bps[0]);
272                                 printf("\n");
273                                 print_bps(result_bps[1]);
274                                 printf(" (with prefault)\n");
275                         }
276                 } else {
277                         if (use_cycle) {
278                                 printf(" %14lf Cycle/Byte",
279                                         (double)result_cycle[pf]
280                                         / (double)len);
281                         } else
282                                 print_bps(result_bps[pf]);
283
284                         printf("%s\n", only_prefault ? " (with prefault)" : "");
285                 }
286                 break;
287         case BENCH_FORMAT_SIMPLE:
288                 if (!only_prefault && !no_prefault) {
289                         if (use_cycle) {
290                                 printf("%lf %lf\n",
291                                         (double)result_cycle[0] / (double)len,
292                                         (double)result_cycle[1] / (double)len);
293                         } else {
294                                 printf("%lf %lf\n",
295                                         result_bps[0], result_bps[1]);
296                         }
297                 } else {
298                         if (use_cycle) {
299                                 printf("%lf\n", (double)result_cycle[pf]
300                                         / (double)len);
301                         } else
302                                 printf("%lf\n", result_bps[pf]);
303                 }
304                 break;
305         default:
306                 /* reaching this means there's some disaster: */
307                 die("unknown format: %d\n", bench_format);
308                 break;
309         }
310
311         return 0;
312 }