trace: switch to OCI seccomp JSON output
[project/procd.git] / trace / trace.c
1 /*
2 * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License version 2.1
6 * as published by the Free Software Foundation
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14 #define _GNU_SOURCE
15 #include <fcntl.h>
16 #include <stddef.h>
17 #include <sys/ptrace.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 #include <sys/user.h>
21 #include <sys/wait.h>
22 #include <unistd.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <errno.h>
26 #include <string.h>
27 #include <syslog.h>
28 #include <limits.h>
29
30 #ifndef PTRACE_EVENT_STOP
31 /* PTRACE_EVENT_STOP is defined in linux/ptrace.h, but this header
32 * collides with musl's sys/ptrace.h */
33 #define PTRACE_EVENT_STOP 128
34 #endif
35
36 #ifndef PTRACE_EVENT_SECCOMP
37 /* undefined with uClibc-ng */
38 #define PTRACE_EVENT_SECCOMP 7
39 #endif
40
41 #include <libubox/ulog.h>
42 #include <libubox/uloop.h>
43 #include <libubox/blobmsg.h>
44 #include <libubox/blobmsg_json.h>
45
46 #include "../syscall-names.h"
47
48 #define _offsetof(a, b) __builtin_offsetof(a,b)
49 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
50
51 #ifdef __amd64__
52 #define reg_syscall_nr _offsetof(struct user, regs.orig_rax)
53 #elif defined(__i386__)
54 #define reg_syscall_nr _offsetof(struct user, regs.orig_eax)
55 #elif defined(__mips)
56 # ifndef EF_REG2
57 # define EF_REG2 8
58 # endif
59 #define reg_syscall_nr (EF_REG2 / 4)
60 #elif defined(__arm__)
61 #include <asm/ptrace.h> /* for PTRACE_SET_SYSCALL */
62 #define reg_syscall_nr _offsetof(struct user, regs.uregs[7])
63 # if defined(__ARM_EABI__)
64 # define reg_retval_nr _offsetof(struct user, regs.uregs[0])
65 # endif
66 #elif defined(__PPC__)
67 #define reg_syscall_nr _offsetof(struct user, regs.gpr[0])
68 #define reg_retval_nr _offsetof(struct user, regs.gpr[3])
69 #else
70 #error tracing is not supported on this architecture
71 #endif
72
73 enum mode {
74 UTRACE,
75 SECCOMP_TRACE,
76 } mode = UTRACE;
77
78 struct tracee {
79 struct uloop_process proc;
80 int in_syscall;
81 };
82
83 static struct tracee tracer;
84 static int syscall_count[SYSCALL_COUNT];
85 static int violation_count;
86 static struct blob_buf b;
87 static int debug;
88 char *json = NULL;
89 int ptrace_restart;
90
91 static void set_syscall(const char *name, int val)
92 {
93 int i;
94
95 for (i = 0; i < SYSCALL_COUNT; i++) {
96 int sc = syscall_index_to_number(i);
97 if (syscall_name(sc) && !strcmp(syscall_name(sc), name)) {
98 syscall_count[i] = val;
99 return;
100 }
101 }
102 }
103
104 struct syscall {
105 int syscall;
106 int count;
107 };
108
109 static int cmp_count(const void *a, const void *b)
110 {
111 return ((struct syscall*)b)->count - ((struct syscall*)a)->count;
112 }
113
114 static void print_syscalls(int policy, const char *json)
115 {
116 void *c, *d, *e;
117 int i;
118
119 if (mode == UTRACE) {
120 set_syscall("rt_sigaction", 1);
121 set_syscall("sigreturn", 1);
122 set_syscall("rt_sigreturn", 1);
123 set_syscall("exit_group", 1);
124 set_syscall("exit", 1);
125 }
126
127 struct syscall sorted[SYSCALL_COUNT];
128
129 for (i = 0; i < SYSCALL_COUNT; i++) {
130 sorted[i].syscall = syscall_index_to_number(i);
131 sorted[i].count = syscall_count[i];
132 }
133
134 qsort(sorted, SYSCALL_COUNT, sizeof(sorted[0]), cmp_count);
135
136 blob_buf_init(&b, 0);
137 blobmsg_add_string(&b, "defaultAction", "SCMP_ACT_KILL_PROCESS");
138 c = blobmsg_open_array(&b, "syscalls");
139 d = blobmsg_open_table(&b, "");
140 e = blobmsg_open_array(&b, "names");
141
142 for (i = 0; i < SYSCALL_COUNT; i++) {
143 int sc = sorted[i].syscall;
144 if (!sorted[i].count)
145 break;
146 if (syscall_name(sc)) {
147 if (debug)
148 printf("syscall %d (%s) was called %d times\n",
149 sc, syscall_name(sc), sorted[i].count);
150 blobmsg_add_string(&b, NULL, syscall_name(sc));
151 } else {
152 ULOG_ERR("no name found for syscall(%d)\n", sc);
153 }
154 }
155 blobmsg_close_array(&b, e);
156 blobmsg_add_string(&b, "action", "SCMP_ACT_ALLOW");
157 blobmsg_close_table(&b, d);
158 blobmsg_close_array(&b, c);
159 if (json) {
160 FILE *fp = fopen(json, "w");
161 if (fp) {
162 fprintf(fp, "%s\n", blobmsg_format_json_indent(b.head, true, 0));
163 fclose(fp);
164 ULOG_INFO("saving syscall trace to %s\n", json);
165 } else {
166 ULOG_ERR("failed to open %s\n", json);
167 }
168 } else {
169 printf("%s\n",
170 blobmsg_format_json_indent(b.head, true, 0));
171 }
172
173 }
174
175 static void report_seccomp_vialation(pid_t pid, unsigned syscall)
176 {
177 char buf[200];
178 snprintf(buf, sizeof(buf), "/proc/%d/cmdline", pid);
179 int f = open(buf, O_RDONLY);
180 int r = read(f, buf, sizeof(buf) - 1);
181 if (r >= 0)
182 buf[r] = 0;
183 else
184 strcpy(buf, "unknown?");
185 close(f);
186
187 if (violation_count < INT_MAX)
188 violation_count++;
189 int i = syscall_index(syscall);
190 if (i >= 0) {
191 syscall_count[i]++;
192 ULOG_ERR("%s[%u] tried to call non-whitelisted syscall: %s (see %s)\n",
193 buf, pid, syscall_name(syscall), json);
194 } else {
195 ULOG_ERR("%s[%u] tried to call non-whitelisted syscall: %d (see %s)\n",
196 buf, pid, syscall, json);
197 }
198 }
199
200 static void tracer_cb(struct uloop_process *c, int ret)
201 {
202 struct tracee *tracee = container_of(c, struct tracee, proc);
203 int inject_signal = 0;
204
205 /* We explicitely check for events in upper 16 bits, because
206 * musl (as opposed to glibc) does not report
207 * PTRACE_EVENT_STOP as WIFSTOPPED */
208 if (WIFSTOPPED(ret) || (ret >> 16)) {
209 if (WSTOPSIG(ret) & 0x80) {
210 if (!tracee->in_syscall) {
211 int syscall = ptrace(PTRACE_PEEKUSER, c->pid, reg_syscall_nr);
212 int i = syscall_index(syscall);
213 if (i >= 0) {
214 syscall_count[i]++;
215 if (debug)
216 fprintf(stderr, "%s()\n", syscall_name(syscall));
217 } else if (debug) {
218 fprintf(stderr, "syscal(%d)\n", syscall);
219 }
220 }
221 tracee->in_syscall = !tracee->in_syscall;
222 } else if ((ret >> 8) == (SIGTRAP | (PTRACE_EVENT_FORK << 8)) ||
223 (ret >> 8) == (SIGTRAP | (PTRACE_EVENT_VFORK << 8)) ||
224 (ret >> 8) == (SIGTRAP | (PTRACE_EVENT_CLONE << 8))) {
225 struct tracee *child = calloc(1, sizeof(struct tracee));
226
227 unsigned long msg;
228 ptrace(PTRACE_GETEVENTMSG, c->pid, 0, &msg);
229 child->proc.pid = msg;
230 child->proc.cb = tracer_cb;
231 ptrace(ptrace_restart, child->proc.pid, 0, 0);
232 uloop_process_add(&child->proc);
233 if (debug)
234 fprintf(stderr, "Tracing new child %d\n", child->proc.pid);
235 } else if ((ret >> 16) == PTRACE_EVENT_STOP) {
236 /* Nothing special to do here */
237 } else if ((ret >> 8) == (SIGTRAP | (PTRACE_EVENT_SECCOMP << 8))) {
238 int syscall = ptrace(PTRACE_PEEKUSER, c->pid, reg_syscall_nr);
239 #if defined(__arm__)
240 ptrace(PTRACE_SET_SYSCALL, c->pid, 0, -1);
241 ptrace(PTRACE_POKEUSER, c->pid, reg_retval_nr, -ENOSYS);
242 #else
243 ptrace(PTRACE_POKEUSER, c->pid, reg_syscall_nr, -1);
244 #endif
245 report_seccomp_vialation(c->pid, syscall);
246 } else {
247 inject_signal = WSTOPSIG(ret);
248 if (debug)
249 fprintf(stderr, "Injecting signal %d into pid %d\n",
250 inject_signal, tracee->proc.pid);
251 }
252 } else if (WIFEXITED(ret) || (WIFSIGNALED(ret) && WTERMSIG(ret))) {
253 if (tracee == &tracer) {
254 uloop_end(); /* Main process exit */
255 } else {
256 if (debug)
257 fprintf(stderr, "Child %d exited\n", tracee->proc.pid);
258 free(tracee);
259 }
260 return;
261 }
262
263 ptrace(ptrace_restart, c->pid, 0, inject_signal);
264 uloop_process_add(c);
265 }
266
267 static void sigterm_handler(int signum)
268 {
269 /* When we receive SIGTERM, we forward it to the tracee. After
270 * the tracee exits, trace_cb() will be called and make us
271 * exit too. */
272 kill(tracer.proc.pid, SIGTERM);
273 }
274
275
276 int main(int argc, char **argv, char **envp)
277 {
278 int status, ch, policy = EPERM;
279 pid_t child;
280
281 /* When invoked via seccomp-trace symlink, work as seccomp
282 * violation logger rather than as syscall tracer */
283 if (strstr(argv[0], "seccomp-trace"))
284 mode = SECCOMP_TRACE;
285
286 while ((ch = getopt(argc, argv, "f:p:")) != -1) {
287 switch (ch) {
288 case 'f':
289 json = optarg;
290 break;
291 case 'p':
292 policy = atoi(optarg);
293 break;
294 }
295 }
296
297 if (!json)
298 json = getenv("SECCOMP_FILE");
299
300 argc -= optind;
301 argv += optind;
302
303 if (!argc)
304 return -1;
305
306 if (getenv("TRACE_DEBUG"))
307 debug = 1;
308 unsetenv("TRACE_DEBUG");
309
310 child = fork();
311
312 if (child == 0) {
313 char **_argv = calloc(argc + 1, sizeof(char *));
314 char **_envp;
315 char *preload = NULL;
316 const char *old_preload = getenv("LD_PRELOAD");
317 int newenv = 0;
318 int envc = 0;
319 int ret;
320
321 memcpy(_argv, argv, argc * sizeof(char *));
322
323 while (envp[envc++])
324 ;
325
326 _envp = calloc(envc + 2, sizeof(char *));
327 switch (mode) {
328 case UTRACE:
329 preload = "/lib/libpreload-trace.so";
330 newenv = 1;
331 break;
332 case SECCOMP_TRACE:
333 preload = "/lib/libpreload-seccomp.so";
334 newenv = 2;
335 if (asprintf(&_envp[1], "SECCOMP_FILE=%s", json ? json : "") < 0)
336 ULOG_ERR("failed to allocate SECCOMP_FILE env: %m\n");
337
338 kill(getpid(), SIGSTOP);
339 break;
340 }
341 if (asprintf(&_envp[0], "LD_PRELOAD=%s%s%s", preload,
342 old_preload ? ":" : "",
343 old_preload ? old_preload : "") < 0)
344 ULOG_ERR("failed to allocate LD_PRELOAD env: %m\n");
345
346 memcpy(&_envp[newenv], envp, envc * sizeof(char *));
347
348 ret = execve(_argv[0], _argv, _envp);
349 ULOG_ERR("failed to exec %s: %m\n", _argv[0]);
350
351 free(_argv);
352 free(_envp);
353 return ret;
354 }
355
356 if (child < 0)
357 return -1;
358
359 waitpid(child, &status, WUNTRACED);
360 if (!WIFSTOPPED(status)) {
361 ULOG_ERR("failed to start %s\n", *argv);
362 return -1;
363 }
364
365 /* Initialize uloop to catch all ptrace stops from now on. */
366 uloop_init();
367
368 int ptrace_options = PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE;
369 switch (mode) {
370 case UTRACE:
371 ptrace_options |= PTRACE_O_TRACESYSGOOD;
372 ptrace_restart = PTRACE_SYSCALL;
373 break;
374 case SECCOMP_TRACE:
375 ptrace_options |= PTRACE_O_TRACESECCOMP;
376 ptrace_restart = PTRACE_CONT;
377 break;
378 }
379 if (ptrace(PTRACE_SEIZE, child, 0, ptrace_options) == -1) {
380 ULOG_ERR("PTRACE_SEIZE: %m\n");
381 return -1;
382 }
383 if (ptrace(ptrace_restart, child, 0, SIGCONT) == -1) {
384 ULOG_ERR("ptrace_restart: %m\n");
385 return -1;
386 }
387
388 tracer.proc.pid = child;
389 tracer.proc.cb = tracer_cb;
390 uloop_process_add(&tracer.proc);
391 signal(SIGTERM, sigterm_handler); /* Override uloop's SIGTERM handler */
392 uloop_run();
393 uloop_done();
394
395
396 switch (mode) {
397 case UTRACE:
398 if (!json)
399 if (asprintf(&json, "/tmp/%s.%u.json", basename(*argv), child) < 0)
400 ULOG_ERR("failed to allocate output path: %m\n");
401 break;
402 case SECCOMP_TRACE:
403 if (!violation_count)
404 return 0;
405 if (asprintf(&json, "/tmp/%s.%u.violations.json", basename(*argv), child) < 0)
406 ULOG_ERR("failed to allocate violations output path: %m\n");
407 break;
408 }
409 print_syscalls(policy, json);
410 return 0;
411 }