add initial version of ujail and utrace
[project/procd.git] / jail / jail.c
1 /*
2 * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License version 2.1
6 * as published by the Free Software Foundation
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 */
13
14 #define _GNU_SOURCE
15 #include <sys/syscall.h>
16 #include <sys/mman.h>
17 #include <sys/utsname.h>
18 #include <sys/types.h>
19 #include <sys/syscall.h>
20 #include <sys/types.h>
21 #include <sys/mount.h>
22 #include <sys/prctl.h>
23 #include <sys/wait.h>
24
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <values.h>
28 #include <errno.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #include <syslog.h>
34 #include <libgen.h>
35 #include <glob.h>
36 #include <elf.h>
37 #include <sched.h>
38
39 #include "elf.h"
40
41 #include <libubox/utils.h>
42 #include <libubox/list.h>
43 #include <libubox/uloop.h>
44
45 #define STACK_SIZE (1024 * 1024)
46 #define OPT_ARGS "P:S:n:r:w:psuld"
47
48 struct extra {
49 struct list_head list;
50
51 const char *path;
52 const char *name;
53 int readonly;
54 };
55
56 static LIST_HEAD(extras);
57
58 extern int pivot_root(const char *new_root, const char *put_old);
59
60 int debug = 0;
61
62 static char child_stack[STACK_SIZE];
63
64 static int mkdir_p(char *dir, mode_t mask)
65 {
66 char *l = strrchr(dir, '/');
67 int ret;
68
69 if (!l)
70 return 0;
71
72 *l = '\0';
73
74 if (mkdir_p(dir, mask))
75 return -1;
76
77 *l = '/';
78
79 ret = mkdir(dir, mask);
80 if (ret && errno == EEXIST)
81 return 0;
82
83 if (ret)
84 ERROR("mkdir failed on %s: %s\n", dir, strerror(errno));
85
86 return ret;
87 }
88
89 static int mount_bind(const char *root, const char *path, const char *name, int readonly, int error)
90 {
91 const char *p = path;
92 struct stat s;
93 char old[256];
94 char new[256];
95 int fd;
96
97 if (strstr(p, "local"))
98 p = "/lib";
99
100 snprintf(old, sizeof(old), "%s/%s", path, name);
101 snprintf(new, sizeof(new), "%s%s", root, p);
102
103 mkdir_p(new, 0755);
104
105 snprintf(new, sizeof(new), "%s%s/%s", root, p, name);
106
107 if (stat(old, &s)) {
108 ERROR("%s does not exist\n", old);
109 return error;
110 }
111
112 if (S_ISDIR(s.st_mode)) {
113 mkdir_p(new, 0755);
114 } else {
115 fd = creat(new, 0644);
116 if (fd == -1) {
117 ERROR("failed to create %s: %s\n", new, strerror(errno));
118 return -1;
119 }
120 close(fd);
121 }
122
123 if (mount(old, new, NULL, MS_BIND, NULL)) {
124 ERROR("failed to mount -B %s %s: %s\n", old, new, strerror(errno));
125 return -1;
126 }
127
128 if (readonly && mount(old, new, NULL, MS_BIND | MS_REMOUNT | MS_RDONLY, NULL)) {
129 ERROR("failed to remount ro %s: %s\n", new, strerror(errno));
130 return -1;
131 }
132
133 DEBUG("mount -B %s %s\n", old, new);
134
135 return 0;
136 }
137
138 static int build_jail(const char *path)
139 {
140 struct library *l;
141 struct extra *m;
142 int ret = 0;
143
144 mkdir(path, 0755);
145
146 if (mount("tmpfs", path, "tmpfs", MS_NOATIME, "mode=0744")) {
147 ERROR("tmpfs mount failed %s\n", strerror(errno));
148 return -1;
149 }
150
151 avl_for_each_element(&libraries, l, avl)
152 if (mount_bind(path, l->path, l->name, 1, -1))
153 return -1;
154
155 list_for_each_entry(m, &extras, list)
156 if (mount_bind(path, m->path, m->name, m->readonly, 0))
157 return -1;
158
159 return ret;
160 }
161
162 static void _umount(const char *root, const char *path)
163 {
164 char *buf = NULL;
165
166 if (asprintf(&buf, "%s%s", root, path) < 0) {
167 ERROR("failed to alloc umount buffer: %s\n", strerror(errno));
168 } else {
169 DEBUG("umount %s\n", buf);
170 umount(buf);
171 free(buf);
172 }
173 }
174
175 static int stop_jail(const char *root)
176 {
177 struct library *l;
178 struct extra *m;
179
180 avl_for_each_element(&libraries, l, avl) {
181 char path[256];
182 char *p = l->path;
183
184 if (strstr(p, "local"))
185 p = "/lib";
186
187 snprintf(path, sizeof(path), "%s%s/%s", root, p, l->name);
188 DEBUG("umount %s\n", path);
189 umount(path);
190 }
191
192 list_for_each_entry(m, &extras, list) {
193 char path[256];
194
195 snprintf(path, sizeof(path), "%s%s/%s", root, m->path, m->name);
196 DEBUG("umount %s\n", path);
197 umount(path);
198 }
199
200 _umount(root, "/proc");
201 _umount(root, "/sys");
202
203 DEBUG("umount %s\n", root);
204 umount(root);
205 rmdir(root);
206
207 return 0;
208 }
209
210 #define MAX_ENVP 8
211 static char** build_envp(const char *seccomp, int debug)
212 {
213 static char *envp[MAX_ENVP];
214 static char preload_var[64];
215 static char seccomp_var[64];
216 static char debug_var[] = "LD_DEBUG=all";
217 char *preload_lib = find_lib("libpreload-seccomp.so");
218 int count = 0;
219
220 if (seccomp && !preload_lib) {
221 ERROR("failed to add preload-lib to env\n");
222 return NULL;
223 }
224 if (seccomp) {
225 snprintf(seccomp_var, sizeof(seccomp_var), "SECCOMP_FILE=%s", seccomp);
226 envp[count++] = seccomp_var;
227 snprintf(preload_var, sizeof(preload_var), "LD_PRELOAD=%s", preload_lib);
228 envp[count++] = preload_var;
229 }
230 if (debug)
231 envp[count++] = debug_var;
232
233 return envp;
234 }
235
236 static int spawn(const char *path, char **argv, const char *seccomp)
237 {
238 pid_t pid = fork();
239
240 if (pid < 0) {
241 ERROR("failed to spawn %s: %s\n", *argv, strerror(errno));
242 return -1;
243 } else if (!pid) {
244 char **envp = build_envp(seccomp, 0);
245
246 INFO("spawning %s\n", *argv);
247 execve(*argv, argv, envp);
248 ERROR("failed to spawn child %s: %s\n", *argv, strerror(errno));
249 exit(-1);
250 }
251
252 return pid;
253 }
254
255 static int usage(void)
256 {
257 fprintf(stderr, "jail <options> -D <binary> <params ...>\n");
258 fprintf(stderr, " -P <path>\tpath where the jail will be staged\n");
259 fprintf(stderr, " -S <file>\tseccomp filter\n");
260 fprintf(stderr, " -n <name>\tthe name of the jail\n");
261 fprintf(stderr, " -r <file>\treadonly files that should be staged\n");
262 fprintf(stderr, " -w <file>\twriteable files that should be staged\n");
263 fprintf(stderr, " -p\t\tjail has /proc\t\n");
264 fprintf(stderr, " -s\t\tjail has /sys\t\n");
265 fprintf(stderr, " -l\t\tjail has /dev/log\t\n");
266 fprintf(stderr, " -u\t\tjail has a ubus socket\t\n");
267
268 return -1;
269 }
270
271 static int child_running = 1;
272
273 static void child_process_handler(struct uloop_process *c, int ret)
274 {
275 INFO("child (%d) exited: %d\n", c->pid, ret);
276 uloop_end();
277 child_running = 0;
278 }
279
280 struct uloop_process child_process = {
281 .cb = child_process_handler,
282 };
283
284 static int spawn_child(void *arg)
285 {
286 char *path = get_current_dir_name();
287 int procfs = 0, sysfs = 0;
288 char *seccomp = NULL;
289 char **argv = arg;
290 int argc = 0, ch;
291 char *mpoint;
292
293 while (argv[argc])
294 argc++;
295
296 optind = 0;
297 while ((ch = getopt(argc, argv, OPT_ARGS)) != -1) {
298 switch (ch) {
299 case 'd':
300 debug = 1;
301 break;
302 case 'S':
303 seccomp = optarg;
304 break;
305 case 'p':
306 procfs = 1;
307 break;
308 case 's':
309 sysfs = 1;
310 break;
311 case 'n':
312 sethostname(optarg, strlen(optarg));
313 break;
314 }
315 }
316
317 asprintf(&mpoint, "%s/old", path);
318 mkdir_p(mpoint, 0755);
319 if (pivot_root(path, mpoint) == -1) {
320 ERROR("pivot_root failed:%s\n", strerror(errno));
321 return -1;
322 }
323 free(mpoint);
324 umount2("/old", MNT_DETACH);
325 rmdir("/old");
326 if (procfs) {
327 mkdir("/proc", 0755);
328 mount("proc", "/proc", "proc", MS_NOATIME, 0);
329 }
330 if (sysfs) {
331 mkdir("/sys", 0755);
332 mount("sysfs", "/sys", "sysfs", MS_NOATIME, 0);
333 }
334 mount(NULL, "/", NULL, MS_RDONLY | MS_REMOUNT, 0);
335
336 uloop_init();
337
338 child_process.pid = spawn(path, &argv[optind], seccomp);
339 uloop_process_add(&child_process);
340 uloop_run();
341 uloop_done();
342 if (child_running) {
343 kill(child_process.pid, SIGTERM);
344 waitpid(child_process.pid, NULL, 0);
345 }
346
347 return 0;
348 }
349
350 static int namespace_running = 1;
351
352 static void namespace_process_handler(struct uloop_process *c, int ret)
353 {
354 INFO("namespace (%d) exited: %d\n", c->pid, ret);
355 uloop_end();
356 namespace_running = 0;
357 }
358
359 struct uloop_process namespace_process = {
360 .cb = namespace_process_handler,
361 };
362
363 static void spawn_namespace(const char *path, int argc, char **argv)
364 {
365 char *dir = get_current_dir_name();
366
367 uloop_init();
368 chdir(path);
369 namespace_process.pid = clone(spawn_child,
370 child_stack + STACK_SIZE,
371 CLONE_NEWUTS | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD, argv);
372
373 if (namespace_process.pid != -1) {
374 chdir(dir);
375 free(dir);
376 uloop_process_add(&namespace_process);
377 uloop_run();
378 uloop_done();
379 if (namespace_running) {
380 kill(namespace_process.pid, SIGTERM);
381 waitpid(namespace_process.pid, NULL, 0);
382 }
383 } else {
384 ERROR("failed to spawn namespace: %s\n", strerror(errno));
385 }
386 }
387
388 static void add_extra(char *name, int readonly)
389 {
390 struct extra *f;
391
392 if (*name != '/') {
393 ERROR("%s is not an absolute path\n", name);
394 return;
395 }
396
397 f = calloc(1, sizeof(struct extra));
398
399 f->name = basename(name);
400 f->path = dirname(strdup(name));
401 f->readonly = readonly;
402
403 list_add_tail(&f->list, &extras);
404 }
405
406 int main(int argc, char **argv)
407 {
408 uid_t uid = getuid();
409 const char *name = NULL;
410 char *path = NULL;
411 struct stat s;
412 int ch, ret;
413 char log[] = "/dev/log";
414 char ubus[] = "/var/run/ubus.sock";
415
416 if (uid) {
417 ERROR("not root, aborting: %s\n", strerror(errno));
418 return -1;
419 }
420
421 umask(022);
422
423 while ((ch = getopt(argc, argv, OPT_ARGS)) != -1) {
424 switch (ch) {
425 case 'd':
426 debug = 1;
427 break;
428 case 'P':
429 path = optarg;
430 break;
431 case 'n':
432 name = optarg;
433 break;
434 case 'S':
435 case 'r':
436 add_extra(optarg, 1);
437 break;
438 case 'w':
439 add_extra(optarg, 0);
440 break;
441 case 'u':
442 add_extra(ubus, 0);
443 break;
444 case 'l':
445 add_extra(log, 0);
446 break;
447 }
448 }
449
450 if (argc - optind < 1)
451 return usage();
452
453 if (!path && asprintf(&path, "/tmp/%s", basename(argv[optind])) == -1) {
454 ERROR("failed to set root path\n: %s", strerror(errno));
455 return -1;
456 }
457
458 if (!stat(path, &s)) {
459 ERROR("%s already exists: %s\n", path, strerror(errno));
460 return -1;
461 }
462
463 if (name)
464 prctl(PR_SET_NAME, name, NULL, NULL, NULL);
465
466 avl_init(&libraries, avl_strcmp, false, NULL);
467 alloc_library_path("/lib64");
468 alloc_library_path("/lib");
469 alloc_library_path("/usr/lib");
470 load_ldso_conf("/etc/ld.so.conf");
471
472 if (elf_load_deps(argv[optind])) {
473 ERROR("failed to load dependencies\n");
474 return -1;
475 }
476
477 if (elf_load_deps("libpreload-seccomp.so")) {
478 ERROR("failed to load libpreload-seccomp.so\n");
479 return -1;
480 }
481
482 ret = build_jail(path);
483
484 if (!ret)
485 spawn_namespace(path, argc, argv);
486 else
487 ERROR("failed to build jail\n");
488
489 stop_jail(path);
490
491 return ret;
492 }