Linux-libre 4.9.174-gnu
[librecmc/linux-libre.git] / tools / testing / selftests / seccomp / seccomp_bpf.c
1 /*
2  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by the GPLv2 license.
4  *
5  * Test code for seccomp bpf.
6  */
7
8 #include <sys/types.h>
9
10 /*
11  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
12  * we need to use the kernel's siginfo.h file and trick glibc
13  * into accepting it.
14  */
15 #if !__GLIBC_PREREQ(2, 26)
16 # include <asm/siginfo.h>
17 # define __have_siginfo_t 1
18 # define __have_sigval_t 1
19 # define __have_sigevent_t 1
20 #endif
21
22 #include <errno.h>
23 #include <linux/filter.h>
24 #include <sys/prctl.h>
25 #include <sys/ptrace.h>
26 #include <sys/user.h>
27 #include <linux/prctl.h>
28 #include <linux/ptrace.h>
29 #include <linux/seccomp.h>
30 #include <pthread.h>
31 #include <semaphore.h>
32 #include <signal.h>
33 #include <stddef.h>
34 #include <stdbool.h>
35 #include <string.h>
36 #include <time.h>
37 #include <linux/elf.h>
38 #include <sys/uio.h>
39 #include <sys/utsname.h>
40 #include <sys/fcntl.h>
41 #include <sys/mman.h>
42 #include <sys/times.h>
43
44 #define _GNU_SOURCE
45 #include <unistd.h>
46 #include <sys/syscall.h>
47
48 #include "test_harness.h"
49
50 #ifndef PR_SET_PTRACER
51 # define PR_SET_PTRACER 0x59616d61
52 #endif
53
54 #ifndef PR_SET_NO_NEW_PRIVS
55 #define PR_SET_NO_NEW_PRIVS 38
56 #define PR_GET_NO_NEW_PRIVS 39
57 #endif
58
59 #ifndef PR_SECCOMP_EXT
60 #define PR_SECCOMP_EXT 43
61 #endif
62
63 #ifndef SECCOMP_EXT_ACT
64 #define SECCOMP_EXT_ACT 1
65 #endif
66
67 #ifndef SECCOMP_EXT_ACT_TSYNC
68 #define SECCOMP_EXT_ACT_TSYNC 1
69 #endif
70
71 #ifndef SECCOMP_MODE_STRICT
72 #define SECCOMP_MODE_STRICT 1
73 #endif
74
75 #ifndef SECCOMP_MODE_FILTER
76 #define SECCOMP_MODE_FILTER 2
77 #endif
78
79 #ifndef SECCOMP_RET_KILL
80 #define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
81 #define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
82 #define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
83 #define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
84 #define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
85
86 /* Masks for the return value sections. */
87 #define SECCOMP_RET_ACTION      0x7fff0000U
88 #define SECCOMP_RET_DATA        0x0000ffffU
89
90 struct seccomp_data {
91         int nr;
92         __u32 arch;
93         __u64 instruction_pointer;
94         __u64 args[6];
95 };
96 #endif
97
98 #if __BYTE_ORDER == __LITTLE_ENDIAN
99 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
100 #elif __BYTE_ORDER == __BIG_ENDIAN
101 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
102 #else
103 #error "wut? Unknown __BYTE_ORDER?!"
104 #endif
105
106 #define SIBLING_EXIT_UNKILLED   0xbadbeef
107 #define SIBLING_EXIT_FAILURE    0xbadface
108 #define SIBLING_EXIT_NEWPRIVS   0xbadfeed
109
110 TEST(mode_strict_support)
111 {
112         long ret;
113
114         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
115         ASSERT_EQ(0, ret) {
116                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
117         }
118         syscall(__NR_exit, 1);
119 }
120
121 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
122 {
123         long ret;
124
125         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
126         ASSERT_EQ(0, ret) {
127                 TH_LOG("Kernel does not support CONFIG_SECCOMP");
128         }
129         syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
130                 NULL, NULL, NULL);
131         EXPECT_FALSE(true) {
132                 TH_LOG("Unreachable!");
133         }
134 }
135
136 /* Note! This doesn't test no new privs behavior */
137 TEST(no_new_privs_support)
138 {
139         long ret;
140
141         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
142         EXPECT_EQ(0, ret) {
143                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
144         }
145 }
146
147 /* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
148 TEST(mode_filter_support)
149 {
150         long ret;
151
152         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
153         ASSERT_EQ(0, ret) {
154                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
155         }
156         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
157         EXPECT_EQ(-1, ret);
158         EXPECT_EQ(EFAULT, errno) {
159                 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
160         }
161 }
162
163 TEST(mode_filter_without_nnp)
164 {
165         struct sock_filter filter[] = {
166                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
167         };
168         struct sock_fprog prog = {
169                 .len = (unsigned short)ARRAY_SIZE(filter),
170                 .filter = filter,
171         };
172         long ret;
173
174         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
175         ASSERT_LE(0, ret) {
176                 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
177         }
178         errno = 0;
179         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
180         /* Succeeds with CAP_SYS_ADMIN, fails without */
181         /* TODO(wad) check caps not euid */
182         if (geteuid()) {
183                 EXPECT_EQ(-1, ret);
184                 EXPECT_EQ(EACCES, errno);
185         } else {
186                 EXPECT_EQ(0, ret);
187         }
188 }
189
190 #define MAX_INSNS_PER_PATH 32768
191
192 TEST(filter_size_limits)
193 {
194         int i;
195         int count = BPF_MAXINSNS + 1;
196         struct sock_filter allow[] = {
197                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
198         };
199         struct sock_filter *filter;
200         struct sock_fprog prog = { };
201         long ret;
202
203         filter = calloc(count, sizeof(*filter));
204         ASSERT_NE(NULL, filter);
205
206         for (i = 0; i < count; i++)
207                 filter[i] = allow[0];
208
209         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
210         ASSERT_EQ(0, ret);
211
212         prog.filter = filter;
213         prog.len = count;
214
215         /* Too many filter instructions in a single filter. */
216         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
217         ASSERT_NE(0, ret) {
218                 TH_LOG("Installing %d insn filter was allowed", prog.len);
219         }
220
221         /* One less is okay, though. */
222         prog.len -= 1;
223         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
224         ASSERT_EQ(0, ret) {
225                 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
226         }
227 }
228
229 TEST(filter_chain_limits)
230 {
231         int i;
232         int count = BPF_MAXINSNS;
233         struct sock_filter allow[] = {
234                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
235         };
236         struct sock_filter *filter;
237         struct sock_fprog prog = { };
238         long ret;
239
240         filter = calloc(count, sizeof(*filter));
241         ASSERT_NE(NULL, filter);
242
243         for (i = 0; i < count; i++)
244                 filter[i] = allow[0];
245
246         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
247         ASSERT_EQ(0, ret);
248
249         prog.filter = filter;
250         prog.len = 1;
251
252         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
253         ASSERT_EQ(0, ret);
254
255         prog.len = count;
256
257         /* Too many total filter instructions. */
258         for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
259                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
260                 if (ret != 0)
261                         break;
262         }
263         ASSERT_NE(0, ret) {
264                 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
265                        i, count, i * (count + 4));
266         }
267 }
268
269 TEST(mode_filter_cannot_move_to_strict)
270 {
271         struct sock_filter filter[] = {
272                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
273         };
274         struct sock_fprog prog = {
275                 .len = (unsigned short)ARRAY_SIZE(filter),
276                 .filter = filter,
277         };
278         long ret;
279
280         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
281         ASSERT_EQ(0, ret);
282
283         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
284         ASSERT_EQ(0, ret);
285
286         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
287         EXPECT_EQ(-1, ret);
288         EXPECT_EQ(EINVAL, errno);
289 }
290
291
292 TEST(mode_filter_get_seccomp)
293 {
294         struct sock_filter filter[] = {
295                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
296         };
297         struct sock_fprog prog = {
298                 .len = (unsigned short)ARRAY_SIZE(filter),
299                 .filter = filter,
300         };
301         long ret;
302
303         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
304         ASSERT_EQ(0, ret);
305
306         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
307         EXPECT_EQ(0, ret);
308
309         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
310         ASSERT_EQ(0, ret);
311
312         ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
313         EXPECT_EQ(2, ret);
314 }
315
316
317 TEST(ALLOW_all)
318 {
319         struct sock_filter filter[] = {
320                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
321         };
322         struct sock_fprog prog = {
323                 .len = (unsigned short)ARRAY_SIZE(filter),
324                 .filter = filter,
325         };
326         long ret;
327
328         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
329         ASSERT_EQ(0, ret);
330
331         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
332         ASSERT_EQ(0, ret);
333 }
334
335 TEST(empty_prog)
336 {
337         struct sock_filter filter[] = {
338         };
339         struct sock_fprog prog = {
340                 .len = (unsigned short)ARRAY_SIZE(filter),
341                 .filter = filter,
342         };
343         long ret;
344
345         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
346         ASSERT_EQ(0, ret);
347
348         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
349         EXPECT_EQ(-1, ret);
350         EXPECT_EQ(EINVAL, errno);
351 }
352
353 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
354 {
355         struct sock_filter filter[] = {
356                 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
357         };
358         struct sock_fprog prog = {
359                 .len = (unsigned short)ARRAY_SIZE(filter),
360                 .filter = filter,
361         };
362         long ret;
363
364         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
365         ASSERT_EQ(0, ret);
366
367         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
368         ASSERT_EQ(0, ret);
369         EXPECT_EQ(0, syscall(__NR_getpid)) {
370                 TH_LOG("getpid() shouldn't ever return");
371         }
372 }
373
374 /* return code >= 0x80000000 is unused. */
375 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
376 {
377         struct sock_filter filter[] = {
378                 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
379         };
380         struct sock_fprog prog = {
381                 .len = (unsigned short)ARRAY_SIZE(filter),
382                 .filter = filter,
383         };
384         long ret;
385
386         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
387         ASSERT_EQ(0, ret);
388
389         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
390         ASSERT_EQ(0, ret);
391         EXPECT_EQ(0, syscall(__NR_getpid)) {
392                 TH_LOG("getpid() shouldn't ever return");
393         }
394 }
395
396 TEST_SIGNAL(KILL_all, SIGSYS)
397 {
398         struct sock_filter filter[] = {
399                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
400         };
401         struct sock_fprog prog = {
402                 .len = (unsigned short)ARRAY_SIZE(filter),
403                 .filter = filter,
404         };
405         long ret;
406
407         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
408         ASSERT_EQ(0, ret);
409
410         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
411         ASSERT_EQ(0, ret);
412 }
413
414 TEST_SIGNAL(KILL_one, SIGSYS)
415 {
416         struct sock_filter filter[] = {
417                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
418                         offsetof(struct seccomp_data, nr)),
419                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
420                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
421                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
422         };
423         struct sock_fprog prog = {
424                 .len = (unsigned short)ARRAY_SIZE(filter),
425                 .filter = filter,
426         };
427         long ret;
428         pid_t parent = getppid();
429
430         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
431         ASSERT_EQ(0, ret);
432
433         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
434         ASSERT_EQ(0, ret);
435
436         EXPECT_EQ(parent, syscall(__NR_getppid));
437         /* getpid() should never return. */
438         EXPECT_EQ(0, syscall(__NR_getpid));
439 }
440
441 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
442 {
443         void *fatal_address;
444         struct sock_filter filter[] = {
445                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
446                         offsetof(struct seccomp_data, nr)),
447                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
448                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
449                 /* Only both with lower 32-bit for now. */
450                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
451                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
452                         (unsigned long)&fatal_address, 0, 1),
453                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
454                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
455         };
456         struct sock_fprog prog = {
457                 .len = (unsigned short)ARRAY_SIZE(filter),
458                 .filter = filter,
459         };
460         long ret;
461         pid_t parent = getppid();
462         struct tms timebuf;
463         clock_t clock = times(&timebuf);
464
465         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
466         ASSERT_EQ(0, ret);
467
468         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
469         ASSERT_EQ(0, ret);
470
471         EXPECT_EQ(parent, syscall(__NR_getppid));
472         EXPECT_LE(clock, syscall(__NR_times, &timebuf));
473         /* times() should never return. */
474         EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
475 }
476
477 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
478 {
479 #ifndef __NR_mmap2
480         int sysno = __NR_mmap;
481 #else
482         int sysno = __NR_mmap2;
483 #endif
484         struct sock_filter filter[] = {
485                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
486                         offsetof(struct seccomp_data, nr)),
487                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
488                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
489                 /* Only both with lower 32-bit for now. */
490                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
491                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
492                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
493                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
494         };
495         struct sock_fprog prog = {
496                 .len = (unsigned short)ARRAY_SIZE(filter),
497                 .filter = filter,
498         };
499         long ret;
500         pid_t parent = getppid();
501         int fd;
502         void *map1, *map2;
503         int page_size = sysconf(_SC_PAGESIZE);
504
505         ASSERT_LT(0, page_size);
506
507         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
508         ASSERT_EQ(0, ret);
509
510         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
511         ASSERT_EQ(0, ret);
512
513         fd = open("/dev/zero", O_RDONLY);
514         ASSERT_NE(-1, fd);
515
516         EXPECT_EQ(parent, syscall(__NR_getppid));
517         map1 = (void *)syscall(sysno,
518                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
519         EXPECT_NE(MAP_FAILED, map1);
520         /* mmap2() should never return. */
521         map2 = (void *)syscall(sysno,
522                  NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
523         EXPECT_EQ(MAP_FAILED, map2);
524
525         /* The test failed, so clean up the resources. */
526         munmap(map1, page_size);
527         munmap(map2, page_size);
528         close(fd);
529 }
530
531 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
532 TEST(arg_out_of_range)
533 {
534         struct sock_filter filter[] = {
535                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
536                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
537         };
538         struct sock_fprog prog = {
539                 .len = (unsigned short)ARRAY_SIZE(filter),
540                 .filter = filter,
541         };
542         long ret;
543
544         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
545         ASSERT_EQ(0, ret);
546
547         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
548         EXPECT_EQ(-1, ret);
549         EXPECT_EQ(EINVAL, errno);
550 }
551
552 TEST(ERRNO_valid)
553 {
554         struct sock_filter filter[] = {
555                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
556                         offsetof(struct seccomp_data, nr)),
557                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
558                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
559                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
560         };
561         struct sock_fprog prog = {
562                 .len = (unsigned short)ARRAY_SIZE(filter),
563                 .filter = filter,
564         };
565         long ret;
566         pid_t parent = getppid();
567
568         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
569         ASSERT_EQ(0, ret);
570
571         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
572         ASSERT_EQ(0, ret);
573
574         EXPECT_EQ(parent, syscall(__NR_getppid));
575         EXPECT_EQ(-1, read(0, NULL, 0));
576         EXPECT_EQ(E2BIG, errno);
577 }
578
579 TEST(ERRNO_zero)
580 {
581         struct sock_filter filter[] = {
582                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
583                         offsetof(struct seccomp_data, nr)),
584                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
585                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
586                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
587         };
588         struct sock_fprog prog = {
589                 .len = (unsigned short)ARRAY_SIZE(filter),
590                 .filter = filter,
591         };
592         long ret;
593         pid_t parent = getppid();
594
595         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
596         ASSERT_EQ(0, ret);
597
598         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
599         ASSERT_EQ(0, ret);
600
601         EXPECT_EQ(parent, syscall(__NR_getppid));
602         /* "errno" of 0 is ok. */
603         EXPECT_EQ(0, read(0, NULL, 0));
604 }
605
606 TEST(ERRNO_capped)
607 {
608         struct sock_filter filter[] = {
609                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
610                         offsetof(struct seccomp_data, nr)),
611                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
612                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
613                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
614         };
615         struct sock_fprog prog = {
616                 .len = (unsigned short)ARRAY_SIZE(filter),
617                 .filter = filter,
618         };
619         long ret;
620         pid_t parent = getppid();
621
622         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
623         ASSERT_EQ(0, ret);
624
625         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
626         ASSERT_EQ(0, ret);
627
628         EXPECT_EQ(parent, syscall(__NR_getppid));
629         EXPECT_EQ(-1, read(0, NULL, 0));
630         EXPECT_EQ(4095, errno);
631 }
632
633 FIXTURE_DATA(TRAP) {
634         struct sock_fprog prog;
635 };
636
637 FIXTURE_SETUP(TRAP)
638 {
639         struct sock_filter filter[] = {
640                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
641                         offsetof(struct seccomp_data, nr)),
642                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
643                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
644                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
645         };
646
647         memset(&self->prog, 0, sizeof(self->prog));
648         self->prog.filter = malloc(sizeof(filter));
649         ASSERT_NE(NULL, self->prog.filter);
650         memcpy(self->prog.filter, filter, sizeof(filter));
651         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
652 }
653
654 FIXTURE_TEARDOWN(TRAP)
655 {
656         if (self->prog.filter)
657                 free(self->prog.filter);
658 }
659
660 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
661 {
662         long ret;
663
664         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
665         ASSERT_EQ(0, ret);
666
667         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
668         ASSERT_EQ(0, ret);
669         syscall(__NR_getpid);
670 }
671
672 /* Ensure that SIGSYS overrides SIG_IGN */
673 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
674 {
675         long ret;
676
677         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
678         ASSERT_EQ(0, ret);
679
680         signal(SIGSYS, SIG_IGN);
681
682         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
683         ASSERT_EQ(0, ret);
684         syscall(__NR_getpid);
685 }
686
687 static siginfo_t TRAP_info;
688 static volatile int TRAP_nr;
689 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
690 {
691         memcpy(&TRAP_info, info, sizeof(TRAP_info));
692         TRAP_nr = nr;
693 }
694
695 TEST_F(TRAP, handler)
696 {
697         int ret, test;
698         struct sigaction act;
699         sigset_t mask;
700
701         memset(&act, 0, sizeof(act));
702         sigemptyset(&mask);
703         sigaddset(&mask, SIGSYS);
704
705         act.sa_sigaction = &TRAP_action;
706         act.sa_flags = SA_SIGINFO;
707         ret = sigaction(SIGSYS, &act, NULL);
708         ASSERT_EQ(0, ret) {
709                 TH_LOG("sigaction failed");
710         }
711         ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
712         ASSERT_EQ(0, ret) {
713                 TH_LOG("sigprocmask failed");
714         }
715
716         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
717         ASSERT_EQ(0, ret);
718         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
719         ASSERT_EQ(0, ret);
720         TRAP_nr = 0;
721         memset(&TRAP_info, 0, sizeof(TRAP_info));
722         /* Expect the registers to be rolled back. (nr = error) may vary
723          * based on arch. */
724         ret = syscall(__NR_getpid);
725         /* Silence gcc warning about volatile. */
726         test = TRAP_nr;
727         EXPECT_EQ(SIGSYS, test);
728         struct local_sigsys {
729                 void *_call_addr;       /* calling user insn */
730                 int _syscall;           /* triggering system call number */
731                 unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
732         } *sigsys = (struct local_sigsys *)
733 #ifdef si_syscall
734                 &(TRAP_info.si_call_addr);
735 #else
736                 &TRAP_info.si_pid;
737 #endif
738         EXPECT_EQ(__NR_getpid, sigsys->_syscall);
739         /* Make sure arch is non-zero. */
740         EXPECT_NE(0, sigsys->_arch);
741         EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
742 }
743
744 FIXTURE_DATA(precedence) {
745         struct sock_fprog allow;
746         struct sock_fprog trace;
747         struct sock_fprog error;
748         struct sock_fprog trap;
749         struct sock_fprog kill;
750 };
751
752 FIXTURE_SETUP(precedence)
753 {
754         struct sock_filter allow_insns[] = {
755                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
756         };
757         struct sock_filter trace_insns[] = {
758                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
759                         offsetof(struct seccomp_data, nr)),
760                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
761                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
762                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
763         };
764         struct sock_filter error_insns[] = {
765                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
766                         offsetof(struct seccomp_data, nr)),
767                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
768                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
769                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
770         };
771         struct sock_filter trap_insns[] = {
772                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
773                         offsetof(struct seccomp_data, nr)),
774                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
775                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
776                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
777         };
778         struct sock_filter kill_insns[] = {
779                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
780                         offsetof(struct seccomp_data, nr)),
781                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
782                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
783                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
784         };
785
786         memset(self, 0, sizeof(*self));
787 #define FILTER_ALLOC(_x) \
788         self->_x.filter = malloc(sizeof(_x##_insns)); \
789         ASSERT_NE(NULL, self->_x.filter); \
790         memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
791         self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
792         FILTER_ALLOC(allow);
793         FILTER_ALLOC(trace);
794         FILTER_ALLOC(error);
795         FILTER_ALLOC(trap);
796         FILTER_ALLOC(kill);
797 }
798
799 FIXTURE_TEARDOWN(precedence)
800 {
801 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
802         FILTER_FREE(allow);
803         FILTER_FREE(trace);
804         FILTER_FREE(error);
805         FILTER_FREE(trap);
806         FILTER_FREE(kill);
807 }
808
809 TEST_F(precedence, allow_ok)
810 {
811         pid_t parent, res = 0;
812         long ret;
813
814         parent = getppid();
815         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
816         ASSERT_EQ(0, ret);
817
818         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
819         ASSERT_EQ(0, ret);
820         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
821         ASSERT_EQ(0, ret);
822         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
823         ASSERT_EQ(0, ret);
824         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
825         ASSERT_EQ(0, ret);
826         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
827         ASSERT_EQ(0, ret);
828         /* Should work just fine. */
829         res = syscall(__NR_getppid);
830         EXPECT_EQ(parent, res);
831 }
832
833 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
834 {
835         pid_t parent, res = 0;
836         long ret;
837
838         parent = getppid();
839         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
840         ASSERT_EQ(0, ret);
841
842         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
843         ASSERT_EQ(0, ret);
844         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
845         ASSERT_EQ(0, ret);
846         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
847         ASSERT_EQ(0, ret);
848         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
849         ASSERT_EQ(0, ret);
850         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
851         ASSERT_EQ(0, ret);
852         /* Should work just fine. */
853         res = syscall(__NR_getppid);
854         EXPECT_EQ(parent, res);
855         /* getpid() should never return. */
856         res = syscall(__NR_getpid);
857         EXPECT_EQ(0, res);
858 }
859
860 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
861 {
862         pid_t parent;
863         long ret;
864
865         parent = getppid();
866         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
867         ASSERT_EQ(0, ret);
868
869         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
870         ASSERT_EQ(0, ret);
871         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
872         ASSERT_EQ(0, ret);
873         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
874         ASSERT_EQ(0, ret);
875         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
876         ASSERT_EQ(0, ret);
877         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
878         ASSERT_EQ(0, ret);
879         /* Should work just fine. */
880         EXPECT_EQ(parent, syscall(__NR_getppid));
881         /* getpid() should never return. */
882         EXPECT_EQ(0, syscall(__NR_getpid));
883 }
884
885 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
886 {
887         pid_t parent;
888         long ret;
889
890         parent = getppid();
891         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
892         ASSERT_EQ(0, ret);
893
894         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
895         ASSERT_EQ(0, ret);
896         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
897         ASSERT_EQ(0, ret);
898         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
899         ASSERT_EQ(0, ret);
900         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
901         ASSERT_EQ(0, ret);
902         /* Should work just fine. */
903         EXPECT_EQ(parent, syscall(__NR_getppid));
904         /* getpid() should never return. */
905         EXPECT_EQ(0, syscall(__NR_getpid));
906 }
907
908 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
909 {
910         pid_t parent;
911         long ret;
912
913         parent = getppid();
914         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
915         ASSERT_EQ(0, ret);
916
917         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
918         ASSERT_EQ(0, ret);
919         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
920         ASSERT_EQ(0, ret);
921         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
922         ASSERT_EQ(0, ret);
923         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
924         ASSERT_EQ(0, ret);
925         /* Should work just fine. */
926         EXPECT_EQ(parent, syscall(__NR_getppid));
927         /* getpid() should never return. */
928         EXPECT_EQ(0, syscall(__NR_getpid));
929 }
930
931 TEST_F(precedence, errno_is_third)
932 {
933         pid_t parent;
934         long ret;
935
936         parent = getppid();
937         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
938         ASSERT_EQ(0, ret);
939
940         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
941         ASSERT_EQ(0, ret);
942         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
943         ASSERT_EQ(0, ret);
944         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
945         ASSERT_EQ(0, ret);
946         /* Should work just fine. */
947         EXPECT_EQ(parent, syscall(__NR_getppid));
948         EXPECT_EQ(0, syscall(__NR_getpid));
949 }
950
951 TEST_F(precedence, errno_is_third_in_any_order)
952 {
953         pid_t parent;
954         long ret;
955
956         parent = getppid();
957         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
958         ASSERT_EQ(0, ret);
959
960         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
961         ASSERT_EQ(0, ret);
962         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
963         ASSERT_EQ(0, ret);
964         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
965         ASSERT_EQ(0, ret);
966         /* Should work just fine. */
967         EXPECT_EQ(parent, syscall(__NR_getppid));
968         EXPECT_EQ(0, syscall(__NR_getpid));
969 }
970
971 TEST_F(precedence, trace_is_fourth)
972 {
973         pid_t parent;
974         long ret;
975
976         parent = getppid();
977         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
978         ASSERT_EQ(0, ret);
979
980         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
981         ASSERT_EQ(0, ret);
982         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
983         ASSERT_EQ(0, ret);
984         /* Should work just fine. */
985         EXPECT_EQ(parent, syscall(__NR_getppid));
986         /* No ptracer */
987         EXPECT_EQ(-1, syscall(__NR_getpid));
988 }
989
990 TEST_F(precedence, trace_is_fourth_in_any_order)
991 {
992         pid_t parent;
993         long ret;
994
995         parent = getppid();
996         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
997         ASSERT_EQ(0, ret);
998
999         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1000         ASSERT_EQ(0, ret);
1001         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1002         ASSERT_EQ(0, ret);
1003         /* Should work just fine. */
1004         EXPECT_EQ(parent, syscall(__NR_getppid));
1005         /* No ptracer */
1006         EXPECT_EQ(-1, syscall(__NR_getpid));
1007 }
1008
1009 #ifndef PTRACE_O_TRACESECCOMP
1010 #define PTRACE_O_TRACESECCOMP   0x00000080
1011 #endif
1012
1013 /* Catch the Ubuntu 12.04 value error. */
1014 #if PTRACE_EVENT_SECCOMP != 7
1015 #undef PTRACE_EVENT_SECCOMP
1016 #endif
1017
1018 #ifndef PTRACE_EVENT_SECCOMP
1019 #define PTRACE_EVENT_SECCOMP 7
1020 #endif
1021
1022 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1023 bool tracer_running;
1024 void tracer_stop(int sig)
1025 {
1026         tracer_running = false;
1027 }
1028
1029 typedef void tracer_func_t(struct __test_metadata *_metadata,
1030                            pid_t tracee, int status, void *args);
1031
1032 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1033             tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1034 {
1035         int ret = -1;
1036         struct sigaction action = {
1037                 .sa_handler = tracer_stop,
1038         };
1039
1040         /* Allow external shutdown. */
1041         tracer_running = true;
1042         ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1043
1044         errno = 0;
1045         while (ret == -1 && errno != EINVAL)
1046                 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1047         ASSERT_EQ(0, ret) {
1048                 kill(tracee, SIGKILL);
1049         }
1050         /* Wait for attach stop */
1051         wait(NULL);
1052
1053         ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1054                                                       PTRACE_O_TRACESYSGOOD :
1055                                                       PTRACE_O_TRACESECCOMP);
1056         ASSERT_EQ(0, ret) {
1057                 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1058                 kill(tracee, SIGKILL);
1059         }
1060         ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1061                      tracee, NULL, 0);
1062         ASSERT_EQ(0, ret);
1063
1064         /* Unblock the tracee */
1065         ASSERT_EQ(1, write(fd, "A", 1));
1066         ASSERT_EQ(0, close(fd));
1067
1068         /* Run until we're shut down. Must assert to stop execution. */
1069         while (tracer_running) {
1070                 int status;
1071
1072                 if (wait(&status) != tracee)
1073                         continue;
1074                 if (WIFSIGNALED(status) || WIFEXITED(status))
1075                         /* Child is dead. Time to go. */
1076                         return;
1077
1078                 /* Check if this is a seccomp event. */
1079                 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1080
1081                 tracer_func(_metadata, tracee, status, args);
1082
1083                 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1084                              tracee, NULL, 0);
1085                 ASSERT_EQ(0, ret);
1086         }
1087         /* Directly report the status of our test harness results. */
1088         syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1089 }
1090
1091 /* Common tracer setup/teardown functions. */
1092 void cont_handler(int num)
1093 { }
1094 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1095                           tracer_func_t func, void *args, bool ptrace_syscall)
1096 {
1097         char sync;
1098         int pipefd[2];
1099         pid_t tracer_pid;
1100         pid_t tracee = getpid();
1101
1102         /* Setup a pipe for clean synchronization. */
1103         ASSERT_EQ(0, pipe(pipefd));
1104
1105         /* Fork a child which we'll promote to tracer */
1106         tracer_pid = fork();
1107         ASSERT_LE(0, tracer_pid);
1108         signal(SIGALRM, cont_handler);
1109         if (tracer_pid == 0) {
1110                 close(pipefd[0]);
1111                 start_tracer(_metadata, pipefd[1], tracee, func, args,
1112                              ptrace_syscall);
1113                 syscall(__NR_exit, 0);
1114         }
1115         close(pipefd[1]);
1116         prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1117         read(pipefd[0], &sync, 1);
1118         close(pipefd[0]);
1119
1120         return tracer_pid;
1121 }
1122 void teardown_trace_fixture(struct __test_metadata *_metadata,
1123                             pid_t tracer)
1124 {
1125         if (tracer) {
1126                 int status;
1127                 /*
1128                  * Extract the exit code from the other process and
1129                  * adopt it for ourselves in case its asserts failed.
1130                  */
1131                 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1132                 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1133                 if (WEXITSTATUS(status))
1134                         _metadata->passed = 0;
1135         }
1136 }
1137
1138 /* "poke" tracer arguments and function. */
1139 struct tracer_args_poke_t {
1140         unsigned long poke_addr;
1141 };
1142
1143 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1144                  void *args)
1145 {
1146         int ret;
1147         unsigned long msg;
1148         struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1149
1150         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1151         EXPECT_EQ(0, ret);
1152         /* If this fails, don't try to recover. */
1153         ASSERT_EQ(0x1001, msg) {
1154                 kill(tracee, SIGKILL);
1155         }
1156         /*
1157          * Poke in the message.
1158          * Registers are not touched to try to keep this relatively arch
1159          * agnostic.
1160          */
1161         ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1162         EXPECT_EQ(0, ret);
1163 }
1164
1165 FIXTURE_DATA(TRACE_poke) {
1166         struct sock_fprog prog;
1167         pid_t tracer;
1168         long poked;
1169         struct tracer_args_poke_t tracer_args;
1170 };
1171
1172 FIXTURE_SETUP(TRACE_poke)
1173 {
1174         struct sock_filter filter[] = {
1175                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1176                         offsetof(struct seccomp_data, nr)),
1177                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1178                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1179                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1180         };
1181
1182         self->poked = 0;
1183         memset(&self->prog, 0, sizeof(self->prog));
1184         self->prog.filter = malloc(sizeof(filter));
1185         ASSERT_NE(NULL, self->prog.filter);
1186         memcpy(self->prog.filter, filter, sizeof(filter));
1187         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1188
1189         /* Set up tracer args. */
1190         self->tracer_args.poke_addr = (unsigned long)&self->poked;
1191
1192         /* Launch tracer. */
1193         self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1194                                            &self->tracer_args, false);
1195 }
1196
1197 FIXTURE_TEARDOWN(TRACE_poke)
1198 {
1199         teardown_trace_fixture(_metadata, self->tracer);
1200         if (self->prog.filter)
1201                 free(self->prog.filter);
1202 }
1203
1204 TEST_F(TRACE_poke, read_has_side_effects)
1205 {
1206         ssize_t ret;
1207
1208         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1209         ASSERT_EQ(0, ret);
1210
1211         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1212         ASSERT_EQ(0, ret);
1213
1214         EXPECT_EQ(0, self->poked);
1215         ret = read(-1, NULL, 0);
1216         EXPECT_EQ(-1, ret);
1217         EXPECT_EQ(0x1001, self->poked);
1218 }
1219
1220 TEST_F(TRACE_poke, getpid_runs_normally)
1221 {
1222         long ret;
1223
1224         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1225         ASSERT_EQ(0, ret);
1226
1227         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1228         ASSERT_EQ(0, ret);
1229
1230         EXPECT_EQ(0, self->poked);
1231         EXPECT_NE(0, syscall(__NR_getpid));
1232         EXPECT_EQ(0, self->poked);
1233 }
1234
1235 #if defined(__x86_64__)
1236 # define ARCH_REGS      struct user_regs_struct
1237 # define SYSCALL_NUM    orig_rax
1238 # define SYSCALL_RET    rax
1239 #elif defined(__i386__)
1240 # define ARCH_REGS      struct user_regs_struct
1241 # define SYSCALL_NUM    orig_eax
1242 # define SYSCALL_RET    eax
1243 #elif defined(__arm__)
1244 # define ARCH_REGS      struct pt_regs
1245 # define SYSCALL_NUM    ARM_r7
1246 # define SYSCALL_RET    ARM_r0
1247 #elif defined(__aarch64__)
1248 # define ARCH_REGS      struct user_pt_regs
1249 # define SYSCALL_NUM    regs[8]
1250 # define SYSCALL_RET    regs[0]
1251 #elif defined(__hppa__)
1252 # define ARCH_REGS      struct user_regs_struct
1253 # define SYSCALL_NUM    gr[20]
1254 # define SYSCALL_RET    gr[28]
1255 #elif defined(__powerpc__)
1256 # define ARCH_REGS      struct pt_regs
1257 # define SYSCALL_NUM    gpr[0]
1258 # define SYSCALL_RET    gpr[3]
1259 #elif defined(__s390__)
1260 # define ARCH_REGS     s390_regs
1261 # define SYSCALL_NUM   gprs[2]
1262 # define SYSCALL_RET   gprs[2]
1263 #elif defined(__mips__)
1264 # define ARCH_REGS      struct pt_regs
1265 # define SYSCALL_NUM    regs[2]
1266 # define SYSCALL_SYSCALL_NUM regs[4]
1267 # define SYSCALL_RET    regs[2]
1268 # define SYSCALL_NUM_RET_SHARE_REG
1269 #else
1270 # error "Do not know how to find your architecture's registers and syscalls"
1271 #endif
1272
1273 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1274  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1275  */
1276 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1277 #define HAVE_GETREGS
1278 #endif
1279
1280 /* Architecture-specific syscall fetching routine. */
1281 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1282 {
1283         ARCH_REGS regs;
1284 #ifdef HAVE_GETREGS
1285         EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1286                 TH_LOG("PTRACE_GETREGS failed");
1287                 return -1;
1288         }
1289 #else
1290         struct iovec iov;
1291
1292         iov.iov_base = &regs;
1293         iov.iov_len = sizeof(regs);
1294         EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1295                 TH_LOG("PTRACE_GETREGSET failed");
1296                 return -1;
1297         }
1298 #endif
1299
1300 #if defined(__mips__)
1301         if (regs.SYSCALL_NUM == __NR_O32_Linux)
1302                 return regs.SYSCALL_SYSCALL_NUM;
1303 #endif
1304         return regs.SYSCALL_NUM;
1305 }
1306
1307 /* Architecture-specific syscall changing routine. */
1308 void change_syscall(struct __test_metadata *_metadata,
1309                     pid_t tracee, int syscall)
1310 {
1311         int ret;
1312         ARCH_REGS regs;
1313 #ifdef HAVE_GETREGS
1314         ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1315 #else
1316         struct iovec iov;
1317         iov.iov_base = &regs;
1318         iov.iov_len = sizeof(regs);
1319         ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1320 #endif
1321         EXPECT_EQ(0, ret) {}
1322
1323 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1324     defined(__s390__) || defined(__hppa__)
1325         {
1326                 regs.SYSCALL_NUM = syscall;
1327         }
1328 #elif defined(__mips__)
1329         {
1330                 if (regs.SYSCALL_NUM == __NR_O32_Linux)
1331                         regs.SYSCALL_SYSCALL_NUM = syscall;
1332                 else
1333                         regs.SYSCALL_NUM = syscall;
1334         }
1335
1336 #elif defined(__arm__)
1337 # ifndef PTRACE_SET_SYSCALL
1338 #  define PTRACE_SET_SYSCALL   23
1339 # endif
1340         {
1341                 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1342                 EXPECT_EQ(0, ret);
1343         }
1344
1345 #elif defined(__aarch64__)
1346 # ifndef NT_ARM_SYSTEM_CALL
1347 #  define NT_ARM_SYSTEM_CALL 0x404
1348 # endif
1349         {
1350                 iov.iov_base = &syscall;
1351                 iov.iov_len = sizeof(syscall);
1352                 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1353                              &iov);
1354                 EXPECT_EQ(0, ret);
1355         }
1356
1357 #else
1358         ASSERT_EQ(1, 0) {
1359                 TH_LOG("How is the syscall changed on this architecture?");
1360         }
1361 #endif
1362
1363         /* If syscall is skipped, change return value. */
1364         if (syscall == -1)
1365 #ifdef SYSCALL_NUM_RET_SHARE_REG
1366                 TH_LOG("Can't modify syscall return on this architecture");
1367 #else
1368                 regs.SYSCALL_RET = 1;
1369 #endif
1370
1371 #ifdef HAVE_GETREGS
1372         ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1373 #else
1374         iov.iov_base = &regs;
1375         iov.iov_len = sizeof(regs);
1376         ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1377 #endif
1378         EXPECT_EQ(0, ret);
1379 }
1380
1381 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1382                     int status, void *args)
1383 {
1384         int ret;
1385         unsigned long msg;
1386
1387         /* Make sure we got the right message. */
1388         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1389         EXPECT_EQ(0, ret);
1390
1391         /* Validate and take action on expected syscalls. */
1392         switch (msg) {
1393         case 0x1002:
1394                 /* change getpid to getppid. */
1395                 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1396                 change_syscall(_metadata, tracee, __NR_getppid);
1397                 break;
1398         case 0x1003:
1399                 /* skip gettid. */
1400                 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1401                 change_syscall(_metadata, tracee, -1);
1402                 break;
1403         case 0x1004:
1404                 /* do nothing (allow getppid) */
1405                 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1406                 break;
1407         default:
1408                 EXPECT_EQ(0, msg) {
1409                         TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1410                         kill(tracee, SIGKILL);
1411                 }
1412         }
1413
1414 }
1415
1416 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1417                    int status, void *args)
1418 {
1419         int ret, nr;
1420         unsigned long msg;
1421         static bool entry;
1422
1423         /* Make sure we got an empty message. */
1424         ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1425         EXPECT_EQ(0, ret);
1426         EXPECT_EQ(0, msg);
1427
1428         /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
1429         entry = !entry;
1430         if (!entry)
1431                 return;
1432
1433         nr = get_syscall(_metadata, tracee);
1434
1435         if (nr == __NR_getpid)
1436                 change_syscall(_metadata, tracee, __NR_getppid);
1437 }
1438
1439 FIXTURE_DATA(TRACE_syscall) {
1440         struct sock_fprog prog;
1441         pid_t tracer, mytid, mypid, parent;
1442 };
1443
1444 FIXTURE_SETUP(TRACE_syscall)
1445 {
1446         struct sock_filter filter[] = {
1447                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1448                         offsetof(struct seccomp_data, nr)),
1449                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1450                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1451                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1452                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1453                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1454                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1455                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1456         };
1457
1458         memset(&self->prog, 0, sizeof(self->prog));
1459         self->prog.filter = malloc(sizeof(filter));
1460         ASSERT_NE(NULL, self->prog.filter);
1461         memcpy(self->prog.filter, filter, sizeof(filter));
1462         self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1463
1464         /* Prepare some testable syscall results. */
1465         self->mytid = syscall(__NR_gettid);
1466         ASSERT_GT(self->mytid, 0);
1467         ASSERT_NE(self->mytid, 1) {
1468                 TH_LOG("Running this test as init is not supported. :)");
1469         }
1470
1471         self->mypid = getpid();
1472         ASSERT_GT(self->mypid, 0);
1473         ASSERT_EQ(self->mytid, self->mypid);
1474
1475         self->parent = getppid();
1476         ASSERT_GT(self->parent, 0);
1477         ASSERT_NE(self->parent, self->mypid);
1478
1479         /* Launch tracer. */
1480         self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1481                                            false);
1482 }
1483
1484 FIXTURE_TEARDOWN(TRACE_syscall)
1485 {
1486         teardown_trace_fixture(_metadata, self->tracer);
1487         if (self->prog.filter)
1488                 free(self->prog.filter);
1489 }
1490
1491 TEST_F(TRACE_syscall, syscall_allowed)
1492 {
1493         long ret;
1494
1495         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1496         ASSERT_EQ(0, ret);
1497
1498         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1499         ASSERT_EQ(0, ret);
1500
1501         /* getppid works as expected (no changes). */
1502         EXPECT_EQ(self->parent, syscall(__NR_getppid));
1503         EXPECT_NE(self->mypid, syscall(__NR_getppid));
1504 }
1505
1506 TEST_F(TRACE_syscall, syscall_redirected)
1507 {
1508         long ret;
1509
1510         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1511         ASSERT_EQ(0, ret);
1512
1513         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1514         ASSERT_EQ(0, ret);
1515
1516         /* getpid has been redirected to getppid as expected. */
1517         EXPECT_EQ(self->parent, syscall(__NR_getpid));
1518         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1519 }
1520
1521 TEST_F(TRACE_syscall, syscall_dropped)
1522 {
1523         long ret;
1524
1525         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1526         ASSERT_EQ(0, ret);
1527
1528         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1529         ASSERT_EQ(0, ret);
1530
1531 #ifdef SYSCALL_NUM_RET_SHARE_REG
1532         /* gettid has been skipped */
1533         EXPECT_EQ(-1, syscall(__NR_gettid));
1534 #else
1535         /* gettid has been skipped and an altered return value stored. */
1536         EXPECT_EQ(1, syscall(__NR_gettid));
1537 #endif
1538         EXPECT_NE(self->mytid, syscall(__NR_gettid));
1539 }
1540
1541 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1542 {
1543         struct sock_filter filter[] = {
1544                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1545                         offsetof(struct seccomp_data, nr)),
1546                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1547                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1548                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1549         };
1550         struct sock_fprog prog = {
1551                 .len = (unsigned short)ARRAY_SIZE(filter),
1552                 .filter = filter,
1553         };
1554         long ret;
1555
1556         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1557         ASSERT_EQ(0, ret);
1558
1559         /* Install fixture filter. */
1560         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1561         ASSERT_EQ(0, ret);
1562
1563         /* Install "errno on getppid" filter. */
1564         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1565         ASSERT_EQ(0, ret);
1566
1567         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1568         EXPECT_EQ(-1, syscall(__NR_getpid));
1569         EXPECT_EQ(EPERM, errno);
1570 }
1571
1572 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1573 {
1574         struct sock_filter filter[] = {
1575                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1576                         offsetof(struct seccomp_data, nr)),
1577                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1578                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1579                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1580         };
1581         struct sock_fprog prog = {
1582                 .len = (unsigned short)ARRAY_SIZE(filter),
1583                 .filter = filter,
1584         };
1585         long ret;
1586
1587         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1588         ASSERT_EQ(0, ret);
1589
1590         /* Install fixture filter. */
1591         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1592         ASSERT_EQ(0, ret);
1593
1594         /* Install "death on getppid" filter. */
1595         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1596         ASSERT_EQ(0, ret);
1597
1598         /* Tracer will redirect getpid to getppid, and we should die. */
1599         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1600 }
1601
1602 TEST_F(TRACE_syscall, skip_after_ptrace)
1603 {
1604         struct sock_filter filter[] = {
1605                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1606                         offsetof(struct seccomp_data, nr)),
1607                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1608                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1609                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1610         };
1611         struct sock_fprog prog = {
1612                 .len = (unsigned short)ARRAY_SIZE(filter),
1613                 .filter = filter,
1614         };
1615         long ret;
1616
1617         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1618         teardown_trace_fixture(_metadata, self->tracer);
1619         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1620                                            true);
1621
1622         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1623         ASSERT_EQ(0, ret);
1624
1625         /* Install "errno on getppid" filter. */
1626         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1627         ASSERT_EQ(0, ret);
1628
1629         /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1630         EXPECT_EQ(-1, syscall(__NR_getpid));
1631         EXPECT_EQ(EPERM, errno);
1632 }
1633
1634 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
1635 {
1636         struct sock_filter filter[] = {
1637                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1638                         offsetof(struct seccomp_data, nr)),
1639                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1640                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1641                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1642         };
1643         struct sock_fprog prog = {
1644                 .len = (unsigned short)ARRAY_SIZE(filter),
1645                 .filter = filter,
1646         };
1647         long ret;
1648
1649         /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1650         teardown_trace_fixture(_metadata, self->tracer);
1651         self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1652                                            true);
1653
1654         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1655         ASSERT_EQ(0, ret);
1656
1657         /* Install "death on getppid" filter. */
1658         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1659         ASSERT_EQ(0, ret);
1660
1661         /* Tracer will redirect getpid to getppid, and we should die. */
1662         EXPECT_NE(self->mypid, syscall(__NR_getpid));
1663 }
1664
1665 #ifndef __NR_seccomp
1666 # if defined(__i386__)
1667 #  define __NR_seccomp 354
1668 # elif defined(__x86_64__)
1669 #  define __NR_seccomp 317
1670 # elif defined(__arm__)
1671 #  define __NR_seccomp 383
1672 # elif defined(__aarch64__)
1673 #  define __NR_seccomp 277
1674 # elif defined(__hppa__)
1675 #  define __NR_seccomp 338
1676 # elif defined(__powerpc__)
1677 #  define __NR_seccomp 358
1678 # elif defined(__s390__)
1679 #  define __NR_seccomp 348
1680 # else
1681 #  warning "seccomp syscall number unknown for this architecture"
1682 #  define __NR_seccomp 0xffff
1683 # endif
1684 #endif
1685
1686 #ifndef SECCOMP_SET_MODE_STRICT
1687 #define SECCOMP_SET_MODE_STRICT 0
1688 #endif
1689
1690 #ifndef SECCOMP_SET_MODE_FILTER
1691 #define SECCOMP_SET_MODE_FILTER 1
1692 #endif
1693
1694 #ifndef SECCOMP_FILTER_FLAG_TSYNC
1695 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
1696 #endif
1697
1698 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
1699 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
1700 #endif
1701
1702 #ifndef seccomp
1703 int seccomp(unsigned int op, unsigned int flags, void *args)
1704 {
1705         errno = 0;
1706         return syscall(__NR_seccomp, op, flags, args);
1707 }
1708 #endif
1709
1710 TEST(seccomp_syscall)
1711 {
1712         struct sock_filter filter[] = {
1713                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1714         };
1715         struct sock_fprog prog = {
1716                 .len = (unsigned short)ARRAY_SIZE(filter),
1717                 .filter = filter,
1718         };
1719         long ret;
1720
1721         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1722         ASSERT_EQ(0, ret) {
1723                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1724         }
1725
1726         /* Reject insane operation. */
1727         ret = seccomp(-1, 0, &prog);
1728         ASSERT_NE(ENOSYS, errno) {
1729                 TH_LOG("Kernel does not support seccomp syscall!");
1730         }
1731         EXPECT_EQ(EINVAL, errno) {
1732                 TH_LOG("Did not reject crazy op value!");
1733         }
1734
1735         /* Reject strict with flags or pointer. */
1736         ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1737         EXPECT_EQ(EINVAL, errno) {
1738                 TH_LOG("Did not reject mode strict with flags!");
1739         }
1740         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1741         EXPECT_EQ(EINVAL, errno) {
1742                 TH_LOG("Did not reject mode strict with uargs!");
1743         }
1744
1745         /* Reject insane args for filter. */
1746         ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
1747         EXPECT_EQ(EINVAL, errno) {
1748                 TH_LOG("Did not reject crazy filter flags!");
1749         }
1750         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
1751         EXPECT_EQ(EFAULT, errno) {
1752                 TH_LOG("Did not reject NULL filter!");
1753         }
1754
1755         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1756         EXPECT_EQ(0, errno) {
1757                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
1758                         strerror(errno));
1759         }
1760 }
1761
1762 TEST(seccomp_syscall_mode_lock)
1763 {
1764         struct sock_filter filter[] = {
1765                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1766         };
1767         struct sock_fprog prog = {
1768                 .len = (unsigned short)ARRAY_SIZE(filter),
1769                 .filter = filter,
1770         };
1771         long ret;
1772
1773         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1774         ASSERT_EQ(0, ret) {
1775                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1776         }
1777
1778         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1779         ASSERT_NE(ENOSYS, errno) {
1780                 TH_LOG("Kernel does not support seccomp syscall!");
1781         }
1782         EXPECT_EQ(0, ret) {
1783                 TH_LOG("Could not install filter!");
1784         }
1785
1786         /* Make sure neither entry point will switch to strict. */
1787         ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
1788         EXPECT_EQ(EINVAL, errno) {
1789                 TH_LOG("Switched to mode strict!");
1790         }
1791
1792         ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
1793         EXPECT_EQ(EINVAL, errno) {
1794                 TH_LOG("Switched to mode strict!");
1795         }
1796 }
1797
1798 /*
1799  * Test detection of known and unknown filter flags. Userspace needs to be able
1800  * to check if a filter flag is supported by the current kernel and a good way
1801  * of doing that is by attempting to enter filter mode, with the flag bit in
1802  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
1803  * that the flag is valid and EINVAL indicates that the flag is invalid.
1804  */
1805 TEST(detect_seccomp_filter_flags)
1806 {
1807         unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
1808                                  SECCOMP_FILTER_FLAG_SPEC_ALLOW };
1809         unsigned int flag, all_flags;
1810         int i;
1811         long ret;
1812
1813         /* Test detection of known-good filter flags */
1814         for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
1815                 int bits = 0;
1816
1817                 flag = flags[i];
1818                 /* Make sure the flag is a single bit! */
1819                 while (flag) {
1820                         if (flag & 0x1)
1821                                 bits ++;
1822                         flag >>= 1;
1823                 }
1824                 ASSERT_EQ(1, bits);
1825                 flag = flags[i];
1826
1827                 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
1828                 ASSERT_NE(ENOSYS, errno) {
1829                         TH_LOG("Kernel does not support seccomp syscall!");
1830                 }
1831                 EXPECT_EQ(-1, ret);
1832                 EXPECT_EQ(EFAULT, errno) {
1833                         TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
1834                                flag);
1835                 }
1836
1837                 all_flags |= flag;
1838         }
1839
1840         /* Test detection of all known-good filter flags */
1841         ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
1842         EXPECT_EQ(-1, ret);
1843         EXPECT_EQ(EFAULT, errno) {
1844                 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
1845                        all_flags);
1846         }
1847
1848         /* Test detection of an unknown filter flag */
1849         flag = -1;
1850         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
1851         EXPECT_EQ(-1, ret);
1852         EXPECT_EQ(EINVAL, errno) {
1853                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
1854                        flag);
1855         }
1856
1857         /*
1858          * Test detection of an unknown filter flag that may simply need to be
1859          * added to this test
1860          */
1861         flag = flags[ARRAY_SIZE(flags) - 1] << 1;
1862         ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
1863         EXPECT_EQ(-1, ret);
1864         EXPECT_EQ(EINVAL, errno) {
1865                 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
1866                        flag);
1867         }
1868 }
1869
1870 TEST(TSYNC_first)
1871 {
1872         struct sock_filter filter[] = {
1873                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1874         };
1875         struct sock_fprog prog = {
1876                 .len = (unsigned short)ARRAY_SIZE(filter),
1877                 .filter = filter,
1878         };
1879         long ret;
1880
1881         ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1882         ASSERT_EQ(0, ret) {
1883                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1884         }
1885
1886         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
1887                       &prog);
1888         ASSERT_NE(ENOSYS, errno) {
1889                 TH_LOG("Kernel does not support seccomp syscall!");
1890         }
1891         EXPECT_EQ(0, ret) {
1892                 TH_LOG("Could not install initial filter with TSYNC!");
1893         }
1894 }
1895
1896 #define TSYNC_SIBLINGS 2
1897 struct tsync_sibling {
1898         pthread_t tid;
1899         pid_t system_tid;
1900         sem_t *started;
1901         pthread_cond_t *cond;
1902         pthread_mutex_t *mutex;
1903         int diverge;
1904         int num_waits;
1905         struct sock_fprog *prog;
1906         struct __test_metadata *metadata;
1907 };
1908
1909 FIXTURE_DATA(TSYNC) {
1910         struct sock_fprog root_prog, apply_prog;
1911         struct tsync_sibling sibling[TSYNC_SIBLINGS];
1912         sem_t started;
1913         pthread_cond_t cond;
1914         pthread_mutex_t mutex;
1915         int sibling_count;
1916 };
1917
1918 FIXTURE_SETUP(TSYNC)
1919 {
1920         struct sock_filter root_filter[] = {
1921                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1922         };
1923         struct sock_filter apply_filter[] = {
1924                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1925                         offsetof(struct seccomp_data, nr)),
1926                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1927                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1928                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1929         };
1930
1931         memset(&self->root_prog, 0, sizeof(self->root_prog));
1932         memset(&self->apply_prog, 0, sizeof(self->apply_prog));
1933         memset(&self->sibling, 0, sizeof(self->sibling));
1934         self->root_prog.filter = malloc(sizeof(root_filter));
1935         ASSERT_NE(NULL, self->root_prog.filter);
1936         memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
1937         self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
1938
1939         self->apply_prog.filter = malloc(sizeof(apply_filter));
1940         ASSERT_NE(NULL, self->apply_prog.filter);
1941         memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
1942         self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
1943
1944         self->sibling_count = 0;
1945         pthread_mutex_init(&self->mutex, NULL);
1946         pthread_cond_init(&self->cond, NULL);
1947         sem_init(&self->started, 0, 0);
1948         self->sibling[0].tid = 0;
1949         self->sibling[0].cond = &self->cond;
1950         self->sibling[0].started = &self->started;
1951         self->sibling[0].mutex = &self->mutex;
1952         self->sibling[0].diverge = 0;
1953         self->sibling[0].num_waits = 1;
1954         self->sibling[0].prog = &self->root_prog;
1955         self->sibling[0].metadata = _metadata;
1956         self->sibling[1].tid = 0;
1957         self->sibling[1].cond = &self->cond;
1958         self->sibling[1].started = &self->started;
1959         self->sibling[1].mutex = &self->mutex;
1960         self->sibling[1].diverge = 0;
1961         self->sibling[1].prog = &self->root_prog;
1962         self->sibling[1].num_waits = 1;
1963         self->sibling[1].metadata = _metadata;
1964 }
1965
1966 FIXTURE_TEARDOWN(TSYNC)
1967 {
1968         int sib = 0;
1969
1970         if (self->root_prog.filter)
1971                 free(self->root_prog.filter);
1972         if (self->apply_prog.filter)
1973                 free(self->apply_prog.filter);
1974
1975         for ( ; sib < self->sibling_count; ++sib) {
1976                 struct tsync_sibling *s = &self->sibling[sib];
1977                 void *status;
1978
1979                 if (!s->tid)
1980                         continue;
1981                 if (pthread_kill(s->tid, 0)) {
1982                         pthread_cancel(s->tid);
1983                         pthread_join(s->tid, &status);
1984                 }
1985         }
1986         pthread_mutex_destroy(&self->mutex);
1987         pthread_cond_destroy(&self->cond);
1988         sem_destroy(&self->started);
1989 }
1990
1991 void *tsync_sibling(void *data)
1992 {
1993         long ret = 0;
1994         struct tsync_sibling *me = data;
1995
1996         me->system_tid = syscall(__NR_gettid);
1997
1998         pthread_mutex_lock(me->mutex);
1999         if (me->diverge) {
2000                 /* Just re-apply the root prog to fork the tree */
2001                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2002                                 me->prog, 0, 0);
2003         }
2004         sem_post(me->started);
2005         /* Return outside of started so parent notices failures. */
2006         if (ret) {
2007                 pthread_mutex_unlock(me->mutex);
2008                 return (void *)SIBLING_EXIT_FAILURE;
2009         }
2010         do {
2011                 pthread_cond_wait(me->cond, me->mutex);
2012                 me->num_waits = me->num_waits - 1;
2013         } while (me->num_waits);
2014         pthread_mutex_unlock(me->mutex);
2015
2016         ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2017         if (!ret)
2018                 return (void *)SIBLING_EXIT_NEWPRIVS;
2019         read(0, NULL, 0);
2020         return (void *)SIBLING_EXIT_UNKILLED;
2021 }
2022
2023 void tsync_start_sibling(struct tsync_sibling *sibling)
2024 {
2025         pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2026 }
2027
2028 TEST_F(TSYNC, siblings_fail_prctl)
2029 {
2030         long ret;
2031         void *status;
2032         struct sock_filter filter[] = {
2033                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2034                         offsetof(struct seccomp_data, nr)),
2035                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2036                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2037                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2038         };
2039         struct sock_fprog prog = {
2040                 .len = (unsigned short)ARRAY_SIZE(filter),
2041                 .filter = filter,
2042         };
2043
2044         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2045                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2046         }
2047
2048         /* Check prctl failure detection by requesting sib 0 diverge. */
2049         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2050         ASSERT_NE(ENOSYS, errno) {
2051                 TH_LOG("Kernel does not support seccomp syscall!");
2052         }
2053         ASSERT_EQ(0, ret) {
2054                 TH_LOG("setting filter failed");
2055         }
2056
2057         self->sibling[0].diverge = 1;
2058         tsync_start_sibling(&self->sibling[0]);
2059         tsync_start_sibling(&self->sibling[1]);
2060
2061         while (self->sibling_count < TSYNC_SIBLINGS) {
2062                 sem_wait(&self->started);
2063                 self->sibling_count++;
2064         }
2065
2066         /* Signal the threads to clean up*/
2067         pthread_mutex_lock(&self->mutex);
2068         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2069                 TH_LOG("cond broadcast non-zero");
2070         }
2071         pthread_mutex_unlock(&self->mutex);
2072
2073         /* Ensure diverging sibling failed to call prctl. */
2074         pthread_join(self->sibling[0].tid, &status);
2075         EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2076         pthread_join(self->sibling[1].tid, &status);
2077         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2078 }
2079
2080 TEST_F(TSYNC, two_siblings_with_ancestor)
2081 {
2082         long ret;
2083         void *status;
2084
2085         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2086                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2087         }
2088
2089         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2090         ASSERT_NE(ENOSYS, errno) {
2091                 TH_LOG("Kernel does not support seccomp syscall!");
2092         }
2093         ASSERT_EQ(0, ret) {
2094                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2095         }
2096         tsync_start_sibling(&self->sibling[0]);
2097         tsync_start_sibling(&self->sibling[1]);
2098
2099         while (self->sibling_count < TSYNC_SIBLINGS) {
2100                 sem_wait(&self->started);
2101                 self->sibling_count++;
2102         }
2103
2104         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2105                       &self->apply_prog);
2106         ASSERT_EQ(0, ret) {
2107                 TH_LOG("Could install filter on all threads!");
2108         }
2109         /* Tell the siblings to test the policy */
2110         pthread_mutex_lock(&self->mutex);
2111         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2112                 TH_LOG("cond broadcast non-zero");
2113         }
2114         pthread_mutex_unlock(&self->mutex);
2115         /* Ensure they are both killed and don't exit cleanly. */
2116         pthread_join(self->sibling[0].tid, &status);
2117         EXPECT_EQ(0x0, (long)status);
2118         pthread_join(self->sibling[1].tid, &status);
2119         EXPECT_EQ(0x0, (long)status);
2120 }
2121
2122 TEST_F(TSYNC, two_sibling_want_nnp)
2123 {
2124         void *status;
2125
2126         /* start siblings before any prctl() operations */
2127         tsync_start_sibling(&self->sibling[0]);
2128         tsync_start_sibling(&self->sibling[1]);
2129         while (self->sibling_count < TSYNC_SIBLINGS) {
2130                 sem_wait(&self->started);
2131                 self->sibling_count++;
2132         }
2133
2134         /* Tell the siblings to test no policy */
2135         pthread_mutex_lock(&self->mutex);
2136         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2137                 TH_LOG("cond broadcast non-zero");
2138         }
2139         pthread_mutex_unlock(&self->mutex);
2140
2141         /* Ensure they are both upset about lacking nnp. */
2142         pthread_join(self->sibling[0].tid, &status);
2143         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2144         pthread_join(self->sibling[1].tid, &status);
2145         EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2146 }
2147
2148 TEST_F(TSYNC, two_siblings_with_no_filter)
2149 {
2150         long ret;
2151         void *status;
2152
2153         /* start siblings before any prctl() operations */
2154         tsync_start_sibling(&self->sibling[0]);
2155         tsync_start_sibling(&self->sibling[1]);
2156         while (self->sibling_count < TSYNC_SIBLINGS) {
2157                 sem_wait(&self->started);
2158                 self->sibling_count++;
2159         }
2160
2161         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2162                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2163         }
2164
2165         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2166                       &self->apply_prog);
2167         ASSERT_NE(ENOSYS, errno) {
2168                 TH_LOG("Kernel does not support seccomp syscall!");
2169         }
2170         ASSERT_EQ(0, ret) {
2171                 TH_LOG("Could install filter on all threads!");
2172         }
2173
2174         /* Tell the siblings to test the policy */
2175         pthread_mutex_lock(&self->mutex);
2176         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2177                 TH_LOG("cond broadcast non-zero");
2178         }
2179         pthread_mutex_unlock(&self->mutex);
2180
2181         /* Ensure they are both killed and don't exit cleanly. */
2182         pthread_join(self->sibling[0].tid, &status);
2183         EXPECT_EQ(0x0, (long)status);
2184         pthread_join(self->sibling[1].tid, &status);
2185         EXPECT_EQ(0x0, (long)status);
2186 }
2187
2188 TEST_F(TSYNC, two_siblings_with_one_divergence)
2189 {
2190         long ret;
2191         void *status;
2192
2193         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2194                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2195         }
2196
2197         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2198         ASSERT_NE(ENOSYS, errno) {
2199                 TH_LOG("Kernel does not support seccomp syscall!");
2200         }
2201         ASSERT_EQ(0, ret) {
2202                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2203         }
2204         self->sibling[0].diverge = 1;
2205         tsync_start_sibling(&self->sibling[0]);
2206         tsync_start_sibling(&self->sibling[1]);
2207
2208         while (self->sibling_count < TSYNC_SIBLINGS) {
2209                 sem_wait(&self->started);
2210                 self->sibling_count++;
2211         }
2212
2213         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2214                       &self->apply_prog);
2215         ASSERT_EQ(self->sibling[0].system_tid, ret) {
2216                 TH_LOG("Did not fail on diverged sibling.");
2217         }
2218
2219         /* Wake the threads */
2220         pthread_mutex_lock(&self->mutex);
2221         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2222                 TH_LOG("cond broadcast non-zero");
2223         }
2224         pthread_mutex_unlock(&self->mutex);
2225
2226         /* Ensure they are both unkilled. */
2227         pthread_join(self->sibling[0].tid, &status);
2228         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2229         pthread_join(self->sibling[1].tid, &status);
2230         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2231 }
2232
2233 TEST_F(TSYNC, two_siblings_not_under_filter)
2234 {
2235         long ret, sib;
2236         void *status;
2237
2238         ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2239                 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2240         }
2241
2242         /*
2243          * Sibling 0 will have its own seccomp policy
2244          * and Sibling 1 will not be under seccomp at
2245          * all. Sibling 1 will enter seccomp and 0
2246          * will cause failure.
2247          */
2248         self->sibling[0].diverge = 1;
2249         tsync_start_sibling(&self->sibling[0]);
2250         tsync_start_sibling(&self->sibling[1]);
2251
2252         while (self->sibling_count < TSYNC_SIBLINGS) {
2253                 sem_wait(&self->started);
2254                 self->sibling_count++;
2255         }
2256
2257         ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2258         ASSERT_NE(ENOSYS, errno) {
2259                 TH_LOG("Kernel does not support seccomp syscall!");
2260         }
2261         ASSERT_EQ(0, ret) {
2262                 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2263         }
2264
2265         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2266                       &self->apply_prog);
2267         ASSERT_EQ(ret, self->sibling[0].system_tid) {
2268                 TH_LOG("Did not fail on diverged sibling.");
2269         }
2270         sib = 1;
2271         if (ret == self->sibling[0].system_tid)
2272                 sib = 0;
2273
2274         pthread_mutex_lock(&self->mutex);
2275
2276         /* Increment the other siblings num_waits so we can clean up
2277          * the one we just saw.
2278          */
2279         self->sibling[!sib].num_waits += 1;
2280
2281         /* Signal the thread to clean up*/
2282         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2283                 TH_LOG("cond broadcast non-zero");
2284         }
2285         pthread_mutex_unlock(&self->mutex);
2286         pthread_join(self->sibling[sib].tid, &status);
2287         EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2288         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2289         while (!kill(self->sibling[sib].system_tid, 0))
2290                 sleep(0.1);
2291         /* Switch to the remaining sibling */
2292         sib = !sib;
2293
2294         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2295                       &self->apply_prog);
2296         ASSERT_EQ(0, ret) {
2297                 TH_LOG("Expected the remaining sibling to sync");
2298         };
2299
2300         pthread_mutex_lock(&self->mutex);
2301
2302         /* If remaining sibling didn't have a chance to wake up during
2303          * the first broadcast, manually reduce the num_waits now.
2304          */
2305         if (self->sibling[sib].num_waits > 1)
2306                 self->sibling[sib].num_waits = 1;
2307         ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2308                 TH_LOG("cond broadcast non-zero");
2309         }
2310         pthread_mutex_unlock(&self->mutex);
2311         pthread_join(self->sibling[sib].tid, &status);
2312         EXPECT_EQ(0, (long)status);
2313         /* Poll for actual task death. pthread_join doesn't guarantee it. */
2314         while (!kill(self->sibling[sib].system_tid, 0))
2315                 sleep(0.1);
2316
2317         ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2318                       &self->apply_prog);
2319         ASSERT_EQ(0, ret);  /* just us chickens */
2320 }
2321
2322 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
2323 TEST(syscall_restart)
2324 {
2325         long ret;
2326         unsigned long msg;
2327         pid_t child_pid;
2328         int pipefd[2];
2329         int status;
2330         siginfo_t info = { };
2331         struct sock_filter filter[] = {
2332                 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2333                          offsetof(struct seccomp_data, nr)),
2334
2335 #ifdef __NR_sigreturn
2336                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2337 #endif
2338                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2339                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2340                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2341                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2342                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2343
2344                 /* Allow __NR_write for easy logging. */
2345                 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2346                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2347                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2348                 /* The nanosleep jump target. */
2349                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2350                 /* The restart_syscall jump target. */
2351                 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2352         };
2353         struct sock_fprog prog = {
2354                 .len = (unsigned short)ARRAY_SIZE(filter),
2355                 .filter = filter,
2356         };
2357 #if defined(__arm__)
2358         struct utsname utsbuf;
2359 #endif
2360
2361         ASSERT_EQ(0, pipe(pipefd));
2362
2363         child_pid = fork();
2364         ASSERT_LE(0, child_pid);
2365         if (child_pid == 0) {
2366                 /* Child uses EXPECT not ASSERT to deliver status correctly. */
2367                 char buf = ' ';
2368                 struct timespec timeout = { };
2369
2370                 /* Attach parent as tracer and stop. */
2371                 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2372                 EXPECT_EQ(0, raise(SIGSTOP));
2373
2374                 EXPECT_EQ(0, close(pipefd[1]));
2375
2376                 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2377                         TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2378                 }
2379
2380                 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2381                 EXPECT_EQ(0, ret) {
2382                         TH_LOG("Failed to install filter!");
2383                 }
2384
2385                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2386                         TH_LOG("Failed to read() sync from parent");
2387                 }
2388                 EXPECT_EQ('.', buf) {
2389                         TH_LOG("Failed to get sync data from read()");
2390                 }
2391
2392                 /* Start nanosleep to be interrupted. */
2393                 timeout.tv_sec = 1;
2394                 errno = 0;
2395                 EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2396                         TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2397                 }
2398
2399                 /* Read final sync from parent. */
2400                 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2401                         TH_LOG("Failed final read() from parent");
2402                 }
2403                 EXPECT_EQ('!', buf) {
2404                         TH_LOG("Failed to get final data from read()");
2405                 }
2406
2407                 /* Directly report the status of our test harness results. */
2408                 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2409                                                      : EXIT_FAILURE);
2410         }
2411         EXPECT_EQ(0, close(pipefd[0]));
2412
2413         /* Attach to child, setup options, and release. */
2414         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2415         ASSERT_EQ(true, WIFSTOPPED(status));
2416         ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2417                             PTRACE_O_TRACESECCOMP));
2418         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2419         ASSERT_EQ(1, write(pipefd[1], ".", 1));
2420
2421         /* Wait for nanosleep() to start. */
2422         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2423         ASSERT_EQ(true, WIFSTOPPED(status));
2424         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2425         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2426         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2427         ASSERT_EQ(0x100, msg);
2428         EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2429
2430         /* Might as well check siginfo for sanity while we're here. */
2431         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2432         ASSERT_EQ(SIGTRAP, info.si_signo);
2433         ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2434         EXPECT_EQ(0, info.si_errno);
2435         EXPECT_EQ(getuid(), info.si_uid);
2436         /* Verify signal delivery came from child (seccomp-triggered). */
2437         EXPECT_EQ(child_pid, info.si_pid);
2438
2439         /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2440         ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2441         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2442         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2443         ASSERT_EQ(true, WIFSTOPPED(status));
2444         ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2445         /* Verify signal delivery came from parent now. */
2446         ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2447         EXPECT_EQ(getpid(), info.si_pid);
2448
2449         /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2450         ASSERT_EQ(0, kill(child_pid, SIGCONT));
2451         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2452         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2453         ASSERT_EQ(true, WIFSTOPPED(status));
2454         ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2455         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2456
2457         /* Wait for restart_syscall() to start. */
2458         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2459         ASSERT_EQ(true, WIFSTOPPED(status));
2460         ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2461         ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2462         ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2463
2464         ASSERT_EQ(0x200, msg);
2465         ret = get_syscall(_metadata, child_pid);
2466 #if defined(__arm__)
2467         /*
2468          * FIXME:
2469          * - native ARM registers do NOT expose true syscall.
2470          * - compat ARM registers on ARM64 DO expose true syscall.
2471          */
2472         ASSERT_EQ(0, uname(&utsbuf));
2473         if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2474                 EXPECT_EQ(__NR_nanosleep, ret);
2475         } else
2476 #endif
2477         {
2478                 EXPECT_EQ(__NR_restart_syscall, ret);
2479         }
2480
2481         /* Write again to end test. */
2482         ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2483         ASSERT_EQ(1, write(pipefd[1], "!", 1));
2484         EXPECT_EQ(0, close(pipefd[1]));
2485
2486         ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2487         if (WIFSIGNALED(status) || WEXITSTATUS(status))
2488                 _metadata->passed = 0;
2489 }
2490
2491 /*
2492  * TODO:
2493  * - add microbenchmarks
2494  * - expand NNP testing
2495  * - better arch-specific TRACE and TRAP handlers.
2496  * - endianness checking when appropriate
2497  * - 64-bit arg prodding
2498  * - arch value testing (x86 modes especially)
2499  * - ...
2500  */
2501
2502 TEST_HARNESS_MAIN