--- /dev/null
+name: OpenWrt CI testing
+
+on: [ push, pull_request ]
+env:
+ CI_ENABLE_UNIT_TESTING: 0
+ CI_TARGET_BUILD_DEPENDS: libubox
+ CI_CMAKE_EXTRA_BUILD_ARGS: -DJAIL_SUPPORT=1
+
+jobs:
+ native_testing:
+ name: Various native checks
+ runs-on: ubuntu-20.04
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - uses: ynezz/gh-actions-openwrt-ci-native@v0.0.1
+
+ - name: Upload build artifacts
+ uses: actions/upload-artifact@v2
+ if: failure()
+ with:
+ name: native-build-artifacts
+ if-no-files-found: ignore
+ path: |
+ build/scan
+ tests/cram/**/*.t.err
askfirst
udevtrigger
init
+upgraded/upgraded
.*
Makefile
CMakeCache.txt
)
-SET(SOURCES procd.c signal.c state.c inittab.c rcS.c ubus.c system.c sysupgrade.c
- service/service.c service/instance.c service/validate.c service/trigger.c service/watch.c
- utils/utils.c)
+SET(SOURCES procd.c signal.c state.c hotplug-dispatch.c inittab.c rcS.c ubus.c
+ system.c sysupgrade.c service/service.c service/instance.c
+ service/validate.c service/trigger.c service/watch.c utils/utils.c)
IF(NOT DISABLE_INIT)
SET(SOURCES ${SOURCES} watchdog.c plug/coldplug.c plug/hotplug.c)
ENDIF()
-SET(LIBS ubox ubus json-c blobmsg_json json_script)
+FIND_LIBRARY(ubox NAMES ubox)
+FIND_LIBRARY(ubus NAMES ubus)
+FIND_LIBRARY(uci NAMES uci)
+FIND_LIBRARY(blobmsg_json NAMES blobmsg_json)
+FIND_LIBRARY(json_script NAMES json_script)
+FIND_LIBRARY(json NAMES json-c json)
+FIND_LIBRARY(udebug NAMES udebug)
+
+SET(LIBS ${ubox} ${ubus} ${json} ${blobmsg_json} ${json_script} ${udebug})
IF(DEBUG)
ADD_DEFINITIONS(-DUDEV_DEBUG -g3)
ADD_DEFINITIONS(-DEARLY_PATH="${EARLY_PATH}")
ENDIF()
-IF(ZRAM_TMPFS)
- ADD_DEFINITIONS(-DZRAM_TMPFS)
- SET(SOURCES_ZRAM initd/zram.c)
+IF(SELINUX)
+ include(FindPkgConfig)
+ pkg_search_module(SELINUX REQUIRED libselinux)
+ add_compile_definitions(WITH_SELINUX)
ENDIF()
add_subdirectory(upgraded)
ADD_EXECUTABLE(procd ${SOURCES})
TARGET_LINK_LIBRARIES(procd ${LIBS})
+SET_TARGET_PROPERTIES(procd PROPERTIES COMPILE_DEFINITIONS "HAS_UDEBUG")
INSTALL(TARGETS procd
RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR}
)
FIND_PATH(ubox_include_dir libubox/uloop.h)
-INCLUDE_DIRECTORIES(${ubox_include_dir})
+FIND_PATH(udebug_include_dir NAMES udebug.h)
+INCLUDE_DIRECTORIES(${ubox_include_dir} ${udebug_include_dir})
IF(DISABLE_INIT)
ADD_DEFINITIONS(-DDISABLE_INIT)
ELSE()
ADD_EXECUTABLE(init initd/init.c initd/early.c initd/preinit.c initd/mkdev.c sysupgrade.c watchdog.c
- utils/utils.c ${SOURCES_ZRAM})
-TARGET_LINK_LIBRARIES(init ${LIBS})
+ utils/utils.c)
+TARGET_INCLUDE_DIRECTORIES(init PUBLIC ${SELINUX_INCLUDE_DIRS})
+TARGET_LINK_LIBRARIES(init ${LIBS} ${SELINUX_LIBRARIES})
INSTALL(TARGETS init
RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR}
)
IF(SECCOMP_SUPPORT)
ADD_DEFINITIONS(-DSECCOMP_SUPPORT)
-ADD_LIBRARY(preload-seccomp SHARED jail/preload.c jail/seccomp.c)
-TARGET_LINK_LIBRARIES(preload-seccomp dl ubox blobmsg_json)
+ADD_LIBRARY(preload-seccomp SHARED jail/preload.c jail/seccomp.c jail/seccomp-oci.c)
+TARGET_LINK_LIBRARIES(preload-seccomp dl ${ubox} ${blobmsg_json})
INSTALL(TARGETS preload-seccomp
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
ADD_DEPENDENCIES(preload-seccomp syscall-names-h)
-endif()
+SET(SOURCES_OCI_SECCOMP jail/seccomp-oci.c)
+ENDIF()
IF(JAIL_SUPPORT)
-ADD_EXECUTABLE(ujail jail/jail.c jail/elf.c jail/fs.c jail/capabilities.c)
-TARGET_LINK_LIBRARIES(ujail ubox blobmsg_json)
+ADD_EXECUTABLE(ujail jail/jail.c jail/cgroups.c jail/cgroups-bpf.c jail/elf.c jail/fs.c jail/capabilities.c jail/netifd.c ${SOURCES_OCI_SECCOMP})
+TARGET_LINK_LIBRARIES(ujail ${ubox} ${ubus} ${uci} ${blobmsg_json})
INSTALL(TARGETS ujail
RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR}
)
ADD_DEPENDENCIES(ujail capabilities-names-h)
+IF(SECCOMP_SUPPORT)
+ ADD_DEPENDENCIES(ujail syscall-names-h)
+ENDIF()
+
+ADD_EXECUTABLE(uxc uxc.c)
+TARGET_LINK_LIBRARIES(uxc ${ubox} ${ubus} ${blobmsg_json})
+INSTALL(TARGETS uxc
+ RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR}
+)
endif()
IF(UTRACE_SUPPORT)
ADD_EXECUTABLE(utrace trace/trace.c)
-TARGET_LINK_LIBRARIES(utrace ubox ${json} blobmsg_json)
+TARGET_LINK_LIBRARIES(utrace ${ubox} ${json} ${blobmsg_json})
INSTALL(TARGETS utrace
RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR}
)
#ifndef __CONTAINER_H
#define __CONTAINER_H
+
#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/stat.h>
static inline bool is_container() {
- return !!getenv("container");
+ struct stat s;
+ int r = stat("/.dockerenv", &s);
+ int pv_r = stat("/pantavisor", &s);
+ return !!getenv("container") || r == 0 || pv_r == 0;
}
#endif
--- /dev/null
+/*
+ * Copyright (C) 2021 Daniel Golle <daniel@makrotopia.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/inotify.h>
+#include <sys/types.h>
+
+#include <dirent.h>
+#include <errno.h>
+#include <glob.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <libubox/avl.h>
+#include <libubox/avl-cmp.h>
+#include <libubox/list.h>
+#include <libubox/uloop.h>
+#include <libubus.h>
+
+#include "procd.h"
+
+#define HOTPLUG_BASEDIR "/etc/hotplug.d"
+#define HOTPLUG_OBJECT_PREFIX "hotplug."
+
+#define INOTIFY_SZ (sizeof(struct inotify_event) + PATH_MAX + 1)
+
+struct ubus_context *ctx;
+static char *inotify_buffer;
+static struct uloop_fd fd_inotify_read;
+
+static LIST_HEAD(subsystems);
+
+extern char **environ;
+
+struct hotplug_subsys {
+ struct list_head list;
+ struct ubus_object ubus;
+};
+
+struct envlist {
+ struct avl_node avl;
+ char *env;
+};
+
+struct hotplug_process {
+ struct ubus_object *ubus;
+ char **envp;
+ struct uloop_timeout timeout;
+ struct uloop_process process;
+ glob_t globbuf;
+ unsigned int cnt;
+ int ret;
+};
+
+static void env_free(char **envp)
+{
+ char **tmp;
+
+ tmp = envp;
+ while (*tmp)
+ free(*(tmp++));
+ free(envp);
+}
+
+static void hotplug_free(struct hotplug_process *pc)
+{
+ env_free(pc->envp);
+ globfree(&pc->globbuf);
+ free(pc);
+}
+
+static void hotplug_done(struct uloop_process *c, int ret)
+{
+ struct hotplug_process *pc = container_of(c, struct hotplug_process, process);
+
+ pc->ret = ret;
+
+ uloop_timeout_set(&pc->timeout, 50);
+}
+
+static void hotplug_exec(struct uloop_timeout *t)
+{
+ struct hotplug_process *pc = container_of(t, struct hotplug_process, timeout);
+ char *script;
+ char *exec_argv[4];
+ /* we have reached the last entry in the globbuf */
+ if (pc->cnt == pc->globbuf.gl_pathc) {
+ hotplug_free(pc);
+ return;
+ }
+
+ if (asprintf(&script, ". /lib/functions.sh\n. %s\n", pc->globbuf.gl_pathv[pc->cnt++]) == -1) {
+ pc->ret = ENOMEM;
+ return;
+ }
+
+ /* prepare for execve() */
+ exec_argv[0] = "/bin/sh";
+ exec_argv[1] = "-c";
+ exec_argv[2] = script;
+ exec_argv[3] = NULL;
+
+ /* set callback in uloop_process */
+ pc->process.cb = hotplug_done;
+ pc->process.pid = fork();
+ if (pc->process.pid == 0) {
+ /* child */
+ exit(execve(exec_argv[0], exec_argv, pc->envp));
+ } else if (pc->process.pid < 0) {
+ /* fork error */
+ free(script);
+ hotplug_free(pc);
+ return;
+ }
+ /* parent */
+ free(script);
+ uloop_process_add(&pc->process);
+}
+
+static int avl_envcmp(const void *k1, const void *k2, void *ptr)
+{
+ const char *tmp;
+
+ tmp = strchr(k1, '=');
+ if (!tmp)
+ return -1;
+
+ /*
+ * compare the variable name only, ie. limit strncmp to check
+ * only up to and including the '=' sign
+ */
+ return strncmp(k1, k2, (tmp - (char *)k1) + 1);
+}
+
+/* validate NULL-terminated environment variable name */
+static int validate_envvarname(const char *envvarname)
+{
+ const char *tmp = envvarname;
+
+ /* check for illegal characters in env variable name */
+ while (tmp[0] != '\0') {
+ if (!((tmp[0] >= 'a' && tmp[0] <= 'z') ||
+ (tmp[0] >= 'A' && tmp[0] <= 'Z') ||
+ (tmp[0] == '_') ||
+ /* allow numbers unless they are at the first character */
+ ((tmp != envvarname) && tmp[0] >= '0' && tmp[0] <= '9')))
+ return EINVAL;
+ ++tmp;
+ }
+
+ return 0;
+}
+
+enum {
+ HOTPLUG_ENV,
+ __HOTPLUG_MAX
+};
+
+static const struct blobmsg_policy hotplug_policy[__HOTPLUG_MAX] = {
+ [HOTPLUG_ENV] = { .name = "env", .type = BLOBMSG_TYPE_ARRAY },
+};
+
+static int hotplug_call(struct ubus_context *ctx, struct ubus_object *obj,
+ struct ubus_request_data *req, const char *method,
+ struct blob_attr *msg)
+{
+ const char *subsys = &obj->name[strlen(HOTPLUG_OBJECT_PREFIX)];
+ struct blob_attr *tb[__HOTPLUG_MAX], *cur;
+ AVL_TREE(env, avl_envcmp, false, NULL);
+ struct envlist *envle, *p;
+ int rem;
+ char **envp, *globstr, *tmp, **tmpenv;
+ size_t envz = 0;
+ struct hotplug_process *pc;
+ bool async = true;
+ int err = UBUS_STATUS_UNKNOWN_ERROR;
+
+ blobmsg_parse(hotplug_policy, __HOTPLUG_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[HOTPLUG_ENV])
+ return UBUS_STATUS_INVALID_ARGUMENT;
+
+ tmpenv = environ;
+
+ /* first adding existing environment to avl_tree */
+ while (*tmpenv) {
+ envle = calloc(1, sizeof(struct envlist));
+ if (!envle)
+ goto err_envle;
+
+ envle->env = strdup(*tmpenv);
+ if (!envle->env) {
+ free(envle);
+ goto err_envle;
+ }
+ envle->avl.key = envle->env;
+ if (avl_insert(&env, &envle->avl) == -1) {
+ free(envle->env);
+ free(envle);
+ goto err_envle;
+ }
+
+ ++tmpenv;
+ }
+
+ /* then adding additional variables from ubus call */
+ blobmsg_for_each_attr(cur, tb[HOTPLUG_ENV], rem) {
+ char *enve = blobmsg_get_string(cur);
+ if (!enve)
+ continue;
+
+ if (!strncmp(enve, "LD_", 3))
+ continue;
+
+ if (!strcmp(enve, "PATH"))
+ continue;
+
+ if (strlen(enve) < 3)
+ continue;
+
+ if (!(tmp = strchr(enve, '=')))
+ continue;
+
+ *tmp = '\0';
+ if (validate_envvarname(enve))
+ continue;
+ *tmp = '=';
+
+ if (!strcmp(enve, "ASYNC=0"))
+ async = false;
+
+ envle = calloc(1, sizeof(struct envlist));
+ if (!envle)
+ goto err_envle;
+
+ envle->env = strdup(enve);
+ if (!envle->env) {
+ free(envle);
+ goto err_envle;
+ }
+ envle->avl.key = envle->env;
+ if (avl_insert(&env, &envle->avl)) {
+ /* do not override existing env values, just skip */
+ free((void*)envle->env);
+ free(envle);
+ }
+ }
+
+ /* synchronous calls are unsupported for now */
+ if (!async) {
+ err = UBUS_STATUS_NOT_SUPPORTED;
+ goto err_envle;
+ }
+
+ /* allocating new environment */
+ avl_for_each_element(&env, envle, avl)
+ ++envz;
+
+ envp = calloc(envz + 1, sizeof(char *));
+ if (!envp)
+ goto err_envle;
+
+ /* populating new environment */
+ envz = 0;
+ avl_for_each_element_safe(&env, envle, avl, p) {
+ envp[envz++] = envle->env;
+ avl_delete(&env, &envle->avl);
+ free(envle);
+ }
+
+ pc = calloc(1, sizeof(struct hotplug_process));
+ if (!pc) {
+ env_free(envp);
+ return UBUS_STATUS_UNKNOWN_ERROR;
+ }
+ pc->timeout.cb = hotplug_exec;
+ pc->envp = envp;
+ pc->cnt = 0;
+ pc->ubus = obj;
+
+ /* glob'ing for hotplug scripts */
+ if (asprintf(&globstr, "%s/%s/*", HOTPLUG_BASEDIR, subsys) == -1) {
+ hotplug_free(pc);
+ return UBUS_STATUS_UNKNOWN_ERROR;
+ }
+
+ if (glob(globstr, GLOB_DOOFFS, NULL, &pc->globbuf)) {
+ free(globstr);
+ hotplug_free(pc);
+ return UBUS_STATUS_OK;
+ }
+
+ free(globstr);
+
+ /* asynchronous call to hotplug_exec() */
+ uloop_timeout_set(&pc->timeout, 50);
+
+ return UBUS_STATUS_OK;
+
+err_envle:
+ avl_for_each_element_safe(&env, envle, avl, p) {
+ if (envle->env)
+ free(envle->env);
+
+ avl_delete(&env, &envle->avl);
+ free(envle);
+ }
+
+ return err;
+}
+
+static const struct ubus_method hotplug_methods[] = {
+ UBUS_METHOD("call", hotplug_call, hotplug_policy),
+};
+
+static struct ubus_object_type hotplug_object_type =
+ UBUS_OBJECT_TYPE("hotplug", hotplug_methods);
+
+static void add_subsystem(int nlen, char *newname)
+{
+ struct hotplug_subsys *nh = calloc(1, sizeof(struct hotplug_subsys));
+ char *name;
+
+ if (asprintf(&name, "%s%.*s", HOTPLUG_OBJECT_PREFIX, nlen, newname) == -1)
+ exit(ENOMEM);
+
+ /* prepare and add ubus object */
+ nh->ubus.name = name;
+ nh->ubus.type = &hotplug_object_type;
+ nh->ubus.methods = hotplug_object_type.methods;
+ nh->ubus.n_methods = hotplug_object_type.n_methods;
+ list_add(&nh->list, &subsystems);
+ ubus_add_object(ctx, &nh->ubus);
+}
+
+static void remove_subsystem(int nlen, char *name)
+{
+ struct hotplug_subsys *n, *h;
+
+ /* find match subsystem object by name or any if not given */
+ list_for_each_entry_safe(h, n, &subsystems, list) {
+ if (nlen && (strlen(h->ubus.name) != strnlen(name, nlen) + strlen(HOTPLUG_OBJECT_PREFIX)))
+ continue;
+ if (nlen && (strncmp(name, &h->ubus.name[strlen(HOTPLUG_OBJECT_PREFIX)], nlen)))
+ continue;
+
+ list_del(&h->list);
+ ubus_remove_object(ctx, &h->ubus);
+ free((void*)h->ubus.name);
+ free(h);
+ }
+}
+
+static int init_subsystems(void)
+{
+ DIR *dir;
+ struct dirent *dirent;
+
+ dir = opendir(HOTPLUG_BASEDIR);
+ if (dir == NULL)
+ return ENOENT;
+
+ while ((dirent = readdir(dir))) {
+ /* skip everything but directories */
+ if (dirent->d_type != DT_DIR)
+ continue;
+
+ /* skip '.' and '..' as well as hidden files */
+ if (dirent->d_name[0] == '.')
+ continue;
+
+ add_subsystem(strlen(dirent->d_name), dirent->d_name);
+ }
+ closedir(dir);
+
+ return 0;
+}
+
+static void inotify_read_handler(struct uloop_fd *u, unsigned int events)
+{
+ int rc;
+ char *p;
+ struct inotify_event *in;
+
+ /* read inotify events */
+ while ((rc = read(u->fd, inotify_buffer, INOTIFY_SZ)) == -1 && errno == EINTR);
+
+ if (rc <= 0)
+ return;
+
+ /* process events from buffer */
+ for (p = inotify_buffer;
+ rc - (p - inotify_buffer) >= (int)sizeof(struct inotify_event);
+ p += sizeof(struct inotify_event) + in->len) {
+ in = (struct inotify_event*)p;
+
+ /* skip everything but directories */
+ if (!(in->mask & IN_ISDIR))
+ continue;
+
+ if (in->len < 1)
+ continue;
+
+ /* skip hidden files */
+ if (in->name[0] == '.')
+ continue;
+
+ /* add/remove subsystem objects */
+ if (in->mask & (IN_CREATE | IN_MOVED_TO))
+ add_subsystem(in->len, in->name);
+ else if (in->mask & (IN_DELETE | IN_MOVED_FROM))
+ remove_subsystem(in->len, in->name);
+ }
+}
+
+void ubus_init_hotplug(struct ubus_context *newctx)
+{
+ ctx = newctx;
+ remove_subsystem(0, NULL);
+ if (init_subsystems()) {
+ printf("failed to initialize hotplug subsystems from %s\n", HOTPLUG_BASEDIR);
+ return;
+ }
+ fd_inotify_read.fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);
+ fd_inotify_read.cb = inotify_read_handler;
+ if (fd_inotify_read.fd == -1) {
+ printf("failed to initialize inotify handler for %s\n", HOTPLUG_BASEDIR);
+ return;
+ }
+
+ inotify_buffer = calloc(1, INOTIFY_SZ);
+ if (!inotify_buffer)
+ return;
+
+ if (inotify_add_watch(fd_inotify_read.fd, HOTPLUG_BASEDIR,
+ IN_CREATE | IN_MOVED_TO | IN_DELETE | IN_MOVED_FROM | IN_ONLYDIR) == -1)
+ return;
+
+ uloop_fd_add(&fd_inotify_read, ULOOP_READ);
+}
unsigned int oldumask = umask(0);
if (!is_container()) {
- mount("proc", "/proc", "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0);
- mount("sysfs", "/sys", "sysfs", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0);
- mount("cgroup", "/sys/fs/cgroup", "cgroup", MS_NODEV | MS_NOEXEC | MS_NOSUID, 0);
- mount("tmpfs", "/dev", "tmpfs", MS_NOATIME | MS_NOSUID, "mode=0755,size=512K");
+ mount("proc", "/proc", "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, NULL);
+ mount("sysfs", "/sys", "sysfs", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, NULL);
+ mount("efivars", "/sys/firmware/efi/efivars", "efivarfs", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, NULL);
+ mount("cgroup2", "/sys/fs/cgroup", "cgroup2", MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, "nsdelegate");
+ mount("tmpfs", "/dev", "tmpfs", MS_NOATIME | MS_NOEXEC | MS_NOSUID, "mode=0755,size=512K");
ignore(symlink("/tmp/shm", "/dev/shm"));
mkdir("/dev/pts", 0755);
- mount("devpts", "/dev/pts", "devpts", MS_NOATIME | MS_NOEXEC | MS_NOSUID, "mode=600");
+ mount("devpts", "/dev/pts", "devpts", MS_NOATIME | MS_NOEXEC | MS_NOSUID, NULL);
early_dev();
}
early_console("/dev/console");
- if (mount_zram_on_tmp()) {
- mount("tmpfs", "/tmp", "tmpfs", MS_NOSUID | MS_NODEV | MS_NOATIME, "mode=01777");
- mkdir("/tmp/shm", 01777);
- } else {
- mkdir("/tmp/shm", 01777);
- mount("tmpfs", "/tmp/shm", "tmpfs", MS_NOSUID | MS_NODEV | MS_NOATIME,
- "mode=01777");
- }
+
+ mount("tmpfs", "/tmp", "tmpfs", MS_NOSUID | MS_NODEV | MS_NOATIME, "mode=01777");
+ mkdir("/tmp/shm", 01777);
+
mkdir("/tmp/run", 0755);
mkdir("/tmp/lock", 0755);
mkdir("/tmp/state", 0755);
#include <unistd.h>
#include <stdio.h>
+#if defined(WITH_SELINUX)
+#include <selinux/selinux.h>
+#include <selinux/restorecon.h>
+#include <selinux/avc.h>
+#endif
+
#include "../utils/utils.h"
#include "init.h"
#include "../watchdog.h"
}
}
+#if defined(WITH_SELINUX)
+static int
+selinux(char **argv)
+{
+ int ret;
+ int enforce = selinux_status_getenforce();
+
+ /* is SELinux already initialized? */
+ if (getenv("SELINUX_INIT")) {
+ /* have initramfs permissions already been restored? */
+ if (!getenv("INITRAMFS") || getenv("SELINUX_RESTORECON")) {
+ unsetenv("SELINUX_INIT");
+ unsetenv("SELINUX_RESTORECON");
+ return 0;
+ }
+ /* Second call (initramfs only): restore filesystem labels */
+ const char *exclude_list[] = { "/dev/console", "/proc", "/sys", 0 };
+ selinux_restorecon_set_exclude_list(exclude_list);
+ ret = selinux_restorecon("/", SELINUX_RESTORECON_RECURSE | SELINUX_RESTORECON_MASS_RELABEL);
+ putenv("SELINUX_RESTORECON=1");
+ } else {
+ /* First call: load policy */
+ ret = selinux_init_load_policy(&enforce);
+ putenv("SELINUX_INIT=1");
+ }
+
+ if (ret == 0)
+ execv(argv[0], argv);
+
+ if (enforce > 0) {
+ fprintf(stderr, "Cannot load SELinux policy, but system in enforcing mode. Halting.\n");
+ return 1;
+ }
+
+ return 0;
+}
+#else
+static int
+selinux(char **argv)
+{
+ return 0;
+}
+#endif
+
int
main(int argc, char **argv)
{
sigaction(SIGUSR2, &sa_shutdown, NULL);
sigaction(SIGPWR, &sa_shutdown, NULL);
+ if (selinux(argv))
+ exit(-1);
early();
cmdline();
watchdog_init(1);
execvp(kmod[0], kmod);
ERROR("Failed to start kmodloader: %m\n");
- exit(-1);
+ exit(EXIT_FAILURE);
}
if (pid <= 0) {
ERROR("Failed to start kmodloader instance: %m\n");
} else {
+ const struct timespec req = {0, 10 * 1000 * 1000};
int i;
for (i = 0; i < 1200; i++) {
if (waitpid(pid, NULL, WNOHANG) > 0)
break;
- usleep(10 * 1000);
+ nanosleep(&req, NULL);
watchdog_ping();
}
}
void early(void);
int mkdev(const char *progname, int progmode);
-#ifdef ZRAM_TMPFS
-int mount_zram_on_tmp(void);
-#else
-static inline int mount_zram_on_tmp(void) {
- return -ENOSYS;
-}
-#endif
#endif
continue;
strcpy(path, dp->d_name);
- len = readlink(buf2, buf, sizeof(buf));
+ len = readlink(buf2, buf, sizeof(buf) - 1);
if (len <= 0)
continue;
#include <libubus.h>
#include <stdio.h>
-
+#include <stdlib.h>
#include <unistd.h>
#include "init.h"
fclose(sysupgrade);
- sysupgrade_exec_upgraded(prefix, path, command);
+ sysupgrade_exec_upgraded(prefix, path, NULL, command, NULL);
while (true)
sleep(1);
if (plugd_proc.pid > 0)
kill(plugd_proc.pid, SIGKILL);
- unsetenv("INITRAMFS");
unsetenv("PREINIT");
unlink("/tmp/.preinit");
if (!plugd_proc.pid) {
execvp(plug[0], plug);
ERROR("Failed to start plugd: %m\n");
- exit(-1);
+ exit(EXIT_FAILURE);
}
if (plugd_proc.pid <= 0) {
ERROR("Failed to start new plugd instance: %m\n");
if (!preinit_proc.pid) {
execvp(init[0], init);
ERROR("Failed to start preinit: %m\n");
- exit(-1);
+ exit(EXIT_FAILURE);
}
if (preinit_proc.pid <= 0) {
ERROR("Failed to start new preinit instance: %m\n");
+++ /dev/null
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <fcntl.h>
-
-#include <sys/utsname.h>
-#include <sys/mount.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/stat.h>
-
-#include "../log.h"
-#include "../container.h"
-
-#include "init.h"
-
-#define KB(x) (x * 1024)
-
-#define ZRAM_MOD_PATH "/lib/modules/%s/zram.ko"
-#define EXT4_MOD_PATH "/lib/modules/%s/ext4.ko"
-
-static long
-proc_meminfo(void)
-{
- FILE *fp;
- char line[256];
- char *key;
- long val = KB(16);
-
- fp = fopen("/proc/meminfo", "r");
- if (fp == NULL) {
- ERROR("Can't open /proc/meminfo: %m\n");
- return errno;
- }
-
- while (fgets(line, sizeof(line), fp)) {
- key = strtok(line, ":");
- if (strcasecmp(key, "MemTotal"))
- continue;
- val = atol(strtok(NULL, " kB\n"));
- break;
- }
- fclose(fp);
-
- if (val > KB(32))
- val = KB(32);
-
- return val;
-}
-
-static int
-early_insmod(char *module)
-{
- pid_t pid = fork();
- char *modprobe[] = { "/sbin/modprobe", NULL, NULL };
-
- if (!pid) {
- char *path;
- struct utsname ver;
-
- uname(&ver);
- path = alloca(strlen(module) + strlen(ver.release) + 1);
- sprintf(path, module, ver.release);
- modprobe[1] = path;
- execvp(modprobe[0], modprobe);
- ERROR("Can't exec %s: %m\n", modprobe[0]);
- exit(-1);
- }
-
- if (pid <= 0) {
- ERROR("Can't exec %s: %m\n", modprobe[0]);
- return -1;
- } else {
- waitpid(pid, NULL, 0);
- }
-
- return 0;
-}
-
-
-int
-mount_zram_on_tmp(void)
-{
- char *mkfs[] = { "/usr/sbin/mkfs.ext4", "-b", "4096", "-F", "-L", "TEMP", "-m", "0", "/dev/zram0", NULL };
- FILE *fp;
- long zramsize;
- pid_t pid;
- int ret;
-
- if (early_insmod(ZRAM_MOD_PATH) || early_insmod(EXT4_MOD_PATH)) {
- ERROR("failed to insmod zram support\n");
- return -1;
- }
-
- mkdev("*", 0600);
-
- zramsize = proc_meminfo() / 2;
- fp = fopen("/sys/block/zram0/disksize", "r+");
- if (fp == NULL) {
- ERROR("Can't open /sys/block/zram0/disksize: %m\n");
- return errno;
- }
- fprintf(fp, "%ld", KB(zramsize));
- fclose(fp);
-
- pid = fork();
- if (!pid) {
- execvp(mkfs[0], mkfs);
- ERROR("Can't exec %s: %m\n", mkfs[0]);
- exit(-1);
- } else if (pid <= 0) {
- ERROR("Can't exec %s: %m\n", mkfs[0]);
- return -1;
- } else {
- waitpid(pid, NULL, 0);
- }
-
- if (!is_container()) {
- ret = mount("/dev/zram0", "/tmp", "ext4", MS_NOSUID | MS_NODEV | MS_NOATIME, "errors=continue,noquota");
- if (ret < 0) {
- ERROR("Can't mount /dev/zram0 on /tmp: %m\n");
- return errno;
- }
- }
-
- LOG("Using up to %ld kB of RAM as ZRAM storage on /mnt\n", zramsize);
-
- ret = chmod("/tmp", 01777);
- if (ret < 0) {
- ERROR("Can't set /tmp mode to 1777: %m\n");
- return errno;
- }
-
- return 0;
-}
{
struct init_action *a = container_of(proc, struct init_action, proc);
- DEBUG(4, "pid:%d\n", proc->pid);
- uloop_timeout_set(&a->tout, a->respawn);
+ DEBUG(4, "pid:%d, exitcode:%d\n", proc->pid, ret);
+ proc->pid = 0;
+
+ if (a->respawn < 0)
+ return;
+
+ if (!dev_exist(a->id)) {
+ DEBUG(4, "Skipping respawn: device '%s' does not exist anymore\n", a->id);
+ return;
+ }
+
+ uloop_timeout_set(&a->tout, a->respawn);
}
static void respawn(struct uloop_timeout *tout)
{
struct init_action *a = container_of(tout, struct init_action, tout);
- fork_worker(a);
+ if (!a->proc.pid)
+ fork_worker(a);
}
static void rcdone(struct runqueue *q)
}
a->tout.cb = respawn;
- for (i = MAX_ARGS - 1; i >= 1; i--)
- a->argv[i] = a->argv[i - 1];
- a->argv[0] = ask;
+ /* shift arguments only if not yet done */
+ if (a->argv[0] != ask) {
+ for (i = MAX_ARGS - 1; i >= 1; i--)
+ a->argv[i] = a->argv[i - 1];
+ a->argv[0] = ask;
+ }
a->respawn = 500;
a->proc.cb = child_exit;
- fork_worker(a);
+ if (!a->proc.pid)
+ fork_worker(a);
}
static void askconsole(struct init_action *a)
char line[256], *tty, *split;
int i;
+ /* First, try console= on the kernel command line,
+ * then fallback to /sys/class/tty/console/active,
+ * which should work when linux,stdout-path (or equivalent)
+ * is in the device tree
+ */
tty = get_cmdline_val("console", line, sizeof(line));
+ if (tty == NULL ||
+ get_cmdline_val_offset("console", line, sizeof(line), 1)) {
+ if (dev_exist("console"))
+ tty = "console";
+ else
+ tty = get_active_console(line, sizeof(line));
+ }
if (tty != NULL) {
split = strchr(tty, ',');
if (split != NULL)
}
a->tout.cb = respawn;
- for (i = MAX_ARGS - 1; i >= 1; i--)
- a->argv[i] = a->argv[i - 1];
- a->argv[0] = ask;
+ /* shift arguments only if not yet done */
+ if (a->argv[0] != ask) {
+ for (i = MAX_ARGS - 1; i >= 1; i--)
+ a->argv[i] = a->argv[i - 1];
+ a->argv[0] = ask;
+ }
a->respawn = 500;
a->proc.cb = child_exit;
- fork_worker(a);
+ if (!a->proc.pid)
+ fork_worker(a);
}
static void rcrespawn(struct init_action *a)
a->respawn = 500;
a->proc.cb = child_exit;
- fork_worker(a);
+ if (!a->proc.pid)
+ fork_worker(a);
}
static struct init_handler handlers[] = {
list_for_each_entry(a, &actions, list)
if (!strcmp(a->handler->name, handler)) {
- if (a->handler->multi) {
- a->handler->cb(a);
- continue;
- }
a->handler->cb(a);
- break;
+ if (!a->handler->multi)
+ break;
}
}
+void procd_inittab_kill(void)
+{
+ struct init_action *a;
+
+ list_for_each_entry(a, &actions, list) {
+ a->respawn = -1;
+ if (a->proc.pid)
+ kill(a->proc.pid, SIGKILL);
+ }
+}
+
void procd_inittab(void)
{
#define LINE_LEN 128
/*
* Copyright (C) 2015 Etienne CHAMPETIER <champetier.etienne@gmail.com>
+ * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 2.1
#define _GNU_SOURCE 1
#include <syslog.h>
#include <sys/prctl.h>
-
#include <libubox/blobmsg.h>
#include <libubox/blobmsg_json.h>
#include "../capabilities-names.h"
#include "capabilities.h"
+#define JAIL_CAP_ERROR (1LLU << (CAP_LAST_CAP+1))
+#define JAIL_CAP_ALL (0xffffffffffffffffLLU)
+
static int find_capabilities(const char *name)
{
int i;
for (i = 0; i <= CAP_LAST_CAP; i++)
- if (capabilities_names[i] && !strcmp(capabilities_names[i], name))
+ if (capabilities_names[i] && !strcasecmp(capabilities_names[i], name))
return i;
return -1;
}
-int drop_capabilities(const char *file)
+enum {
+ OCI_CAPABILITIES_BOUNDING,
+ OCI_CAPABILITIES_EFFECTIVE,
+ OCI_CAPABILITIES_INHERITABLE,
+ OCI_CAPABILITIES_PERMITTED,
+ OCI_CAPABILITIES_AMBIENT,
+ __OCI_CAPABILITIES_MAX
+};
+
+static const struct blobmsg_policy oci_capabilities_policy[] = {
+ [OCI_CAPABILITIES_BOUNDING] = { "bounding", BLOBMSG_TYPE_ARRAY },
+ [OCI_CAPABILITIES_EFFECTIVE] = { "effective", BLOBMSG_TYPE_ARRAY },
+ [OCI_CAPABILITIES_INHERITABLE] = { "inheritable", BLOBMSG_TYPE_ARRAY },
+ [OCI_CAPABILITIES_PERMITTED] = { "permitted", BLOBMSG_TYPE_ARRAY },
+ [OCI_CAPABILITIES_AMBIENT] = { "ambient", BLOBMSG_TYPE_ARRAY },
+};
+
+static uint64_t parseOCIcap(struct blob_attr *msg)
{
- enum {
- CAP_KEEP,
- CAP_DROP,
- __CAP_MAX
- };
- static const struct blobmsg_policy policy[__CAP_MAX] = {
- [CAP_KEEP] = { .name = "cap.keep", .type = BLOBMSG_TYPE_ARRAY },
- [CAP_DROP] = { .name = "cap.drop", .type = BLOBMSG_TYPE_ARRAY },
- };
- struct blob_buf b = { 0 };
- struct blob_attr *tb[__CAP_MAX];
struct blob_attr *cur;
- int rem, cap;
- char *name;
- uint64_t capdrop = 0LLU;
+ int rem;
+ uint64_t caps = 0;
+ int capnum;
- DEBUG("dropping capabilities\n");
+ /* each capset is optional, set all-1 mask if absent */
+ if (!msg)
+ return JAIL_CAP_ALL;
- blob_buf_init(&b, 0);
- if (!blobmsg_add_json_from_file(&b, file)) {
- ERROR("failed to load %s\n", file);
- return -1;
+ blobmsg_for_each_attr(cur, msg, rem) {
+ capnum = find_capabilities(blobmsg_get_string(cur));
+ if (capnum < 0)
+ return JAIL_CAP_ERROR;
+
+ caps |= (1LLU << capnum);
+ }
+
+ return caps;
+}
+
+int parseOCIcapabilities(struct jail_capset *capset, struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_CAPABILITIES_MAX];
+ uint64_t caps;
+ blobmsg_parse(oci_capabilities_policy, __OCI_CAPABILITIES_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ caps = parseOCIcap(tb[OCI_CAPABILITIES_BOUNDING]);
+ if (caps == JAIL_CAP_ERROR)
+ return EINVAL;
+ else
+ capset->bounding = caps;
+
+ caps = parseOCIcap(tb[OCI_CAPABILITIES_EFFECTIVE]);
+ if (caps == JAIL_CAP_ERROR)
+ return EINVAL;
+ else
+ capset->effective = caps;
+
+ caps = parseOCIcap(tb[OCI_CAPABILITIES_INHERITABLE]);
+ if (caps == JAIL_CAP_ERROR)
+ return EINVAL;
+ else
+ capset->inheritable = caps;
+
+ caps = parseOCIcap(tb[OCI_CAPABILITIES_PERMITTED]);
+ if (caps == JAIL_CAP_ERROR)
+ return EINVAL;
+ else
+ capset->permitted = caps;
+
+ caps = parseOCIcap(tb[OCI_CAPABILITIES_AMBIENT]);
+ if (caps == JAIL_CAP_ERROR)
+ return EINVAL;
+ else
+ capset->ambient = caps;
+
+ capset->apply = 1;
+
+ return 0;
+}
+
+
+int applyOCIcapabilities(struct jail_capset ocicapset, uint64_t retain)
+{
+ struct __user_cap_header_struct uh = {};
+ struct __user_cap_data_struct ud[2];
+ int cap;
+ int is_set;
+
+ if (!ocicapset.apply)
+ return 0;
+
+ /* drop from bounding set */
+ if (ocicapset.bounding != JAIL_CAP_ALL) {
+ for (cap = 0; cap <= CAP_LAST_CAP; cap++) {
+ if (!prctl(PR_CAPBSET_READ, cap, 0, 0, 0)) {
+ /* can't raise */
+ if (ocicapset.bounding & (1LLU << cap))
+ ERROR("capability %s (%d) is not in bounding set\n", capabilities_names[cap], cap);
+
+ continue;
+ }
+ if ( ((ocicapset.bounding | retain) & (1LLU << cap)) == 0) {
+ DEBUG("dropping capability %s (%d) from bounding set\n", capabilities_names[cap], cap);
+ if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
+ ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %m\n", cap);
+ return errno;
+ }
+ } else {
+ DEBUG("keeping capability %s (%d)\n", capabilities_names[cap], cap);
+ }
+ }
}
- blobmsg_parse(policy, __CAP_MAX, tb, blob_data(b.head), blob_len(b.head));
- if (!tb[CAP_KEEP] && !tb[CAP_DROP]) {
- ERROR("failed to parse %s\n", file);
+ /* set effective, permitted and inheritable */
+ uh.version = _LINUX_CAPABILITY_VERSION_3;
+ uh.pid = getpid();
+
+ if (capget(&uh, ud)) {
+ ERROR("capget() failed\n");
return -1;
}
- blobmsg_for_each_attr(cur, tb[CAP_KEEP], rem) {
- name = blobmsg_get_string(cur);
- if (!name) {
- ERROR("invalid capability name in cap.keep\n");
- return -1;
- }
- cap = find_capabilities(name);
- if (cap == -1) {
- ERROR("unknown capability %s in cap.keep\n", name);
- return -1;
- }
- capdrop |= (1LLU << cap);
+ DEBUG("old capabilities: Pe=%016llx Pp=%016llx Pi=%016llx\n",
+ 0LLU | ud[0].effective | (0LLU | ud[1].effective) << 32,
+ 0LLU | ud[0].permitted | (0LLU | ud[1].permitted) << 32,
+ 0LLU | ud[0].inheritable | (0LLU | ud[1].inheritable) << 32);
+
+ if (ocicapset.effective != JAIL_CAP_ALL) {
+ ud[0].effective = (ocicapset.effective | retain) & 0xFFFFFFFFU;
+ ud[1].effective = ((ocicapset.effective | retain) >> 32) & 0xFFFFFFFFU;
}
- if (capdrop == 0LLU) {
- DEBUG("cap.keep empty -> only dropping capabilities from cap.drop (blacklist)\n");
- capdrop = 0xffffffffffffffffLLU;
- } else {
- DEBUG("cap.keep has at least one capability -> dropping every capabilities not in cap.keep (whitelist)\n");
+ if (ocicapset.permitted != JAIL_CAP_ALL) {
+ ud[0].permitted = (ocicapset.permitted | retain) & 0xFFFFFFFFU;
+ ud[1].permitted = ((ocicapset.permitted | retain) >> 32) & 0xFFFFFFFFU;
}
- blobmsg_for_each_attr(cur, tb[CAP_DROP], rem) {
- name = blobmsg_get_string(cur);
- if (!name) {
- ERROR("invalid capability name in cap.drop\n");
- return -1;
- }
- cap = find_capabilities(name);
- if (cap == -1) {
- ERROR("unknown capability %s in cap.drop\n", name);
- return -1;
- }
- capdrop &= ~(1LLU << cap);
+ if (ocicapset.inheritable != JAIL_CAP_ALL) {
+ ud[0].inheritable = (ocicapset.inheritable | retain) & 0xFFFFFFFFU;
+ ud[1].inheritable = ((ocicapset.inheritable | retain) >> 32) & 0xFFFFFFFFU;
+ }
+
+ DEBUG("new capabilities: Pe=%016llx Pp=%016llx Pi=%016llx\n",
+ 0LLU | ud[0].effective | (0LLU | ud[1].effective) << 32,
+ 0LLU | ud[0].permitted | (0LLU | ud[1].permitted) << 32,
+ 0LLU | ud[0].inheritable | (0LLU | ud[1].inheritable) << 32);
+
+ if (capset(&uh, ud)) {
+ ERROR("capset() failed\n");
+ return -1;
}
- for (cap = 0; cap <= CAP_LAST_CAP; cap++) {
- if ( (capdrop & (1LLU << cap)) == 0) {
- DEBUG("dropping capability %s (%d)\n", capabilities_names[cap], cap);
- if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
- ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %m\n", cap);
- return errno;
+ /* edit ambient set */
+ if (ocicapset.ambient != JAIL_CAP_ALL) {
+ for (cap = 0; cap <= CAP_LAST_CAP; cap++) {
+ is_set = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, cap, 0, 0);
+ if ( (ocicapset.ambient & (1LLU << cap)) == 0) {
+ if (is_set) {
+ DEBUG("dropping capability %s (%d) from ambient set\n", capabilities_names[cap], cap);
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, cap, 0, 0)) {
+ ERROR("prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, %d, 0, 0) failed: %m\n", cap);
+ return errno;
+ }
+ }
+ } else {
+ if (!is_set) {
+ DEBUG("raising capability %s (%d) to ambient set\n", capabilities_names[cap], cap);
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0)) {\
+ ERROR("prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, %d, 0, 0) failed: %m\n", cap);
+ return errno;
+ }
+ }
}
- } else {
- DEBUG("keeping capability %s (%d)\n", capabilities_names[cap], cap);
}
}
return 0;
}
+
+int parseOCIcapabilities_from_file(struct jail_capset *capset, const char *file)
+{
+ struct blob_buf b = { 0 };
+ int ret;
+
+ blob_buf_init(&b, 0);
+ ret = !blobmsg_add_json_from_file(&b, file);
+ if (ret) {
+ ERROR("failed to load %s\n", file);
+ goto err;
+ }
+
+ ret = parseOCIcapabilities(capset, b.head);
+
+err:
+ blob_buf_free(&b);
+ return ret;
+}
#ifndef _JAIL_CAPABILITIES_H_
#define _JAIL_CAPABILITIES_H_
-int drop_capabilities(const char *file);
+#include <libubox/blobmsg.h>
+#include <linux/capability.h>
+
+struct jail_capset {
+ uint64_t bounding;
+ uint64_t effective;
+ uint64_t inheritable;
+ uint64_t permitted;
+ uint64_t ambient;
+ uint8_t apply;
+};
+
+int parseOCIcapabilities(struct jail_capset *capset, struct blob_attr *msg);
+int parseOCIcapabilities_from_file(struct jail_capset *capset, const char *file);
+int applyOCIcapabilities(struct jail_capset capset, uint64_t retain);
+
+/* capget/capset syscall wrappers are provided by libc */
+extern int capget(cap_user_header_t header, cap_user_data_t data);
+extern int capset(cap_user_header_t header, const cap_user_data_t data);
#endif
--- /dev/null
+/*
+ * Copyright (C) 2021 Daniel Golle <daniel@makrotopia.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * somehow emulate devices.allow/devices.deny using eBPF
+ *
+ * OCI run-time spec defines the syntax for allowing/denying access
+ * to devices according to the definition of cgroup-v1 in the Kernel
+ * as described in Documentation/admin-guide/cgroup-v1.
+ */
+
+#include <assert.h>
+#include <linux/bpf.h>
+#ifdef __GLIBC__
+#include <sys/cdefs.h>
+#else
+#include <sys/reg.h>
+#endif
+#include <sys/syscall.h>
+
+#include <libubox/blobmsg.h>
+#include <libubox/blobmsg_json.h>
+#include <libubox/list.h>
+
+#include "cgroups.h"
+#include "cgroups-bpf.h"
+#include "log.h"
+
+static struct bpf_insn *program = NULL;
+static int bpf_total_insn = 0;
+static const char *license = "GPL";
+
+static int
+syscall_bpf (int cmd, union bpf_attr *attr, unsigned int size)
+{
+ return (int) syscall (__NR_bpf, cmd, attr, size);
+}
+
+/* from crun/src/libcrun/ebpf.c */
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn){ .code = BPF_ALU | BPF_OP (OP) | BPF_K, .dst_reg = DST, .src_reg = 0, .off = 0, .imm = IMM })
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn){ \
+ .code = BPF_LDX | BPF_SIZE (SIZE) | BPF_MEM, .dst_reg = DST, .src_reg = SRC, .off = OFF, .imm = 0 })
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn){ .code = BPF_ALU64 | BPF_MOV | BPF_X, .dst_reg = DST, .src_reg = SRC, .off = 0, .imm = 0 })
+
+#define BPF_JMP_A(OFF) \
+ ((struct bpf_insn){ .code = BPF_JMP | BPF_JA, .dst_reg = 0, .src_reg = 0, .off = OFF, .imm = 0 })
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn){ .code = BPF_JMP | BPF_OP (OP) | BPF_K, .dst_reg = DST, .src_reg = 0, .off = OFF, .imm = IMM })
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn){ .code = BPF_JMP | BPF_OP (OP) | BPF_X, .dst_reg = DST, .src_reg = SRC, .off = OFF, .imm = 0 })
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn){ .code = BPF_ALU64 | BPF_MOV | BPF_K, .dst_reg = DST, .src_reg = 0, .off = 0, .imm = IMM })
+
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn){ .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = DST, .src_reg = SRC, .off = 0, .imm = 0 })
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn){ .code = BPF_JMP | BPF_EXIT, .dst_reg = 0, .src_reg = 0, .off = 0, .imm = 0 })
+
+/* taken from systemd. */
+static const struct bpf_insn pre_insn[] = {
+ /* type -> R2. */
+ BPF_LDX_MEM (BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ BPF_ALU32_IMM (BPF_AND, BPF_REG_2, 0xFFFF),
+ /* access -> R3. */
+ BPF_LDX_MEM (BPF_W, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ALU32_IMM (BPF_RSH, BPF_REG_3, 16),
+ /* major -> R4. */
+ BPF_LDX_MEM (BPF_W, BPF_REG_4, BPF_REG_1, 4),
+ /* minor -> R5. */
+ BPF_LDX_MEM (BPF_W, BPF_REG_5, BPF_REG_1, 8),
+};
+
+enum {
+ OCI_LINUX_CGROUPS_DEVICES_ALLOW,
+ OCI_LINUX_CGROUPS_DEVICES_TYPE,
+ OCI_LINUX_CGROUPS_DEVICES_MAJOR,
+ OCI_LINUX_CGROUPS_DEVICES_MINOR,
+ OCI_LINUX_CGROUPS_DEVICES_ACCESS,
+ __OCI_LINUX_CGROUPS_DEVICES_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_cgroups_devices_policy[] = {
+ [OCI_LINUX_CGROUPS_DEVICES_ALLOW] = { "allow", BLOBMSG_TYPE_BOOL },
+ [OCI_LINUX_CGROUPS_DEVICES_TYPE] = { "type", BLOBMSG_TYPE_STRING },
+ [OCI_LINUX_CGROUPS_DEVICES_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_DEVICES_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_DEVICES_ACCESS] = { "access", BLOBMSG_TYPE_STRING },
+};
+
+/*
+ * cgroup-v1 devices got a (default) behaviour and a list of exceptions.
+ * define datatypes similar to the legacy kernel code.
+ */
+#define DEVCG_DEV_ALL (BPF_DEVCG_DEV_BLOCK | BPF_DEVCG_DEV_CHAR)
+#define DEVCG_ACC_ALL (BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD)
+
+enum devcg_behavior {
+ DEVCG_DEFAULT_NONE,
+ DEVCG_DEFAULT_ALLOW,
+ DEVCG_DEFAULT_DENY,
+};
+
+struct dev_exception_item {
+ uint32_t major, minor;
+ short type;
+ short access;
+ struct list_head list;
+ bool allow;
+};
+
+/*
+ * add a bunch of default rules
+ */
+static int add_default_exceptions(struct list_head *exceptions)
+{
+ int i, ret = 0;
+ struct dev_exception_item *cur;
+ /* from crun/src/libcrun/cgroup.c */
+ const struct dev_exception_item defrules[] = {
+ /* always allow mknod */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = ~0, .minor = ~0, .access = BPF_DEVCG_ACC_MKNOD },
+ { .allow = true, .type = BPF_DEVCG_DEV_BLOCK, .major = ~0, .minor = ~0, .access = BPF_DEVCG_ACC_MKNOD },
+ /* /dev/null */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 1, .minor = 3, .access = DEVCG_ACC_ALL },
+ /* /dev/random */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 1, .minor = 8, .access = DEVCG_ACC_ALL },
+ /* /dev/full */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 1, .minor = 7, .access = DEVCG_ACC_ALL },
+ /* /dev/tty */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 5, .minor = 0, .access = DEVCG_ACC_ALL },
+ /* /dev/zero */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 1, .minor = 5, .access = DEVCG_ACC_ALL },
+ /* /dev/urandom */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 1, .minor = 9, .access = DEVCG_ACC_ALL },
+ /* /dev/console */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 5, .minor = 1, .access = DEVCG_ACC_ALL },
+ /* /dev/pts/[0-255] */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 136, .minor = ~0, .access = DEVCG_ACC_ALL },
+ /* /dev/ptmx */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 5, .minor = 2, .access = DEVCG_ACC_ALL },
+ /* /dev/net/tun */
+ { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 10, .minor = 200, .access = DEVCG_ACC_ALL },
+ };
+
+ for (i = 0; i < (sizeof(defrules) / sizeof(struct dev_exception_item)); ++i) {
+ cur = malloc(sizeof(struct dev_exception_item));
+ if (!cur) {
+ ret = ENOMEM;
+ break;
+ }
+ /* add defaults to list in reverse order (last item will be first in list) */
+ memcpy(cur, &defrules[i], sizeof(struct dev_exception_item));
+ list_add(&cur->list, exceptions);
+ }
+
+ return ret;
+}
+
+/*
+ * free all exceptions in the list
+ */
+static void flush_exceptions(struct list_head *freelist)
+{
+ struct dev_exception_item *dl, *dln;
+
+ if (!list_empty(freelist))
+ list_for_each_entry_safe(dl, dln, freelist, list) {
+ list_del(&dl->list);
+ free(dl);
+ }
+}
+
+/*
+ * parse OCI cgroups devices and translate into cgroups-v2 eBPF program
+ */
+int parseOCIlinuxcgroups_devices(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_CGROUPS_DEVICES_MAX];
+ struct blob_attr *cur;
+ int rem, ret = 0;
+ int bpf_type, bpf_access;
+ unsigned char acidx;
+ bool allow = false,
+ has_access = false,
+ has_type = false,
+ has_major = false,
+ has_minor = false;
+ int total_ins = 0,
+ cur_ins = 0,
+ pre_insn_len = sizeof(pre_insn) / sizeof(struct bpf_insn),
+ next_ins;
+ char *access, *devtype;
+ uint32_t devmajor, devminor;
+ struct dev_exception_item *dl;
+ struct list_head exceptions;
+ enum devcg_behavior behavior = DEVCG_DEFAULT_ALLOW;
+ INIT_LIST_HEAD(&exceptions);
+
+ /* parse according to OCI spec */
+ blobmsg_for_each_attr(cur, msg, rem) {
+ blobmsg_parse(oci_linux_cgroups_devices_policy, __OCI_LINUX_CGROUPS_DEVICES_MAX,
+ tb, blobmsg_data(cur), blobmsg_len(cur));
+
+ if (!tb[OCI_LINUX_CGROUPS_DEVICES_ALLOW]) {
+ ret = EINVAL;
+ goto out;
+ }
+
+ allow = blobmsg_get_bool(tb[OCI_LINUX_CGROUPS_DEVICES_ALLOW]);
+
+ bpf_access = 0;
+ if (tb[OCI_LINUX_CGROUPS_DEVICES_ACCESS]) {
+ access = blobmsg_get_string(tb[OCI_LINUX_CGROUPS_DEVICES_ACCESS]);
+ if ((strlen(access) > 3) || (strlen(access) == 0)) {
+ ret = EINVAL;
+ goto out;
+ }
+
+ for (acidx = 0; acidx < strlen(access); ++acidx) {
+ switch (access[acidx]) {
+ case 'r':
+ bpf_access |= BPF_DEVCG_ACC_READ;
+ break;
+ case 'w':
+ bpf_access |= BPF_DEVCG_ACC_WRITE;
+ break;
+ case 'm':
+ bpf_access |= BPF_DEVCG_ACC_MKNOD;
+ break;
+ default:
+ ret = EINVAL;
+ goto out;
+ }
+ }
+ }
+
+ if (!bpf_access)
+ bpf_access = DEVCG_ACC_ALL;
+
+ bpf_type = 0;
+ if (tb[OCI_LINUX_CGROUPS_DEVICES_TYPE]) {
+ devtype = blobmsg_get_string(tb[OCI_LINUX_CGROUPS_DEVICES_TYPE]);
+
+ switch (devtype[0]) {
+ case 'c':
+ bpf_type = BPF_DEVCG_DEV_CHAR;
+ break;
+ case 'b':
+ bpf_type = BPF_DEVCG_DEV_BLOCK;
+ break;
+ case 'a':
+ bpf_type = DEVCG_DEV_ALL;
+ break;
+ default:
+ ret = EINVAL;
+ goto out;
+ }
+ }
+
+ if (!bpf_type)
+ bpf_type = DEVCG_DEV_ALL;
+
+ if (tb[OCI_LINUX_CGROUPS_DEVICES_MAJOR])
+ devmajor = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_DEVICES_MAJOR]);
+ else
+ devmajor = ~0;
+
+ if (tb[OCI_LINUX_CGROUPS_DEVICES_MINOR])
+ devminor = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_DEVICES_MINOR]);
+ else
+ devminor = ~0;
+
+ if (bpf_type == DEVCG_DEV_ALL) {
+ /* wildcard => change default policy and flush all existing rules */
+ flush_exceptions(&exceptions);
+ behavior = allow?DEVCG_DEFAULT_ALLOW:DEVCG_DEFAULT_DENY;
+ } else {
+ /* allocate and populate record for exception */
+ dl = malloc(sizeof(struct dev_exception_item));
+ if (!dl) {
+ ret = ENOSPC;
+ break;
+ }
+ dl->allow = allow;
+ dl->type = bpf_type;
+ dl->access = bpf_access;
+ dl->major = devmajor;
+ dl->minor = devminor;
+
+ /* push to exceptions list, last goes first */
+ list_add(&dl->list, &exceptions);
+ }
+ }
+ if (ret)
+ goto out;
+
+ /* add default rules */
+ ret = add_default_exceptions(&exceptions);
+ if (ret)
+ goto out;
+
+ /* calculate number of instructions to allocate */
+ list_for_each_entry(dl, &exceptions, list) {
+ has_access = dl->access != DEVCG_ACC_ALL;
+ has_type = dl->type != DEVCG_DEV_ALL;
+ has_major = dl->major != ~0;
+ has_minor = dl->minor != ~0;
+
+ total_ins += (has_type ? 1 : 0) + (has_access ? 3 : 0) + (has_major ? 1 : 0) + (has_minor ? 1 : 0) + 2;
+ }
+
+ /* acccount for loader instructions */
+ total_ins += pre_insn_len;
+
+ /* final accept/deny block */
+ total_ins += 2;
+
+ /* allocate memory for eBPF program */
+ program = calloc(total_ins, sizeof(struct bpf_insn));
+ if (!program) {
+ ret = ENOMEM;
+ goto out;
+ }
+
+ /* copy program loader instructions */
+ memcpy(program, &pre_insn, sizeof(pre_insn));
+ cur_ins = pre_insn_len;
+
+ /* generate eBPF program */
+ list_for_each_entry(dl, &exceptions, list) {
+ has_access = dl->access != DEVCG_ACC_ALL;
+ has_type = dl->type != DEVCG_DEV_ALL;
+ has_major = dl->major != ~0;
+ has_minor = dl->minor != ~0;
+
+ next_ins = (has_type ? 1 : 0) + (has_access ? 3 : 0) + (has_major ? 1 : 0) + (has_minor ? 1 : 0) + 1;
+
+ if (has_type) {
+ program[cur_ins++] = BPF_JMP_IMM(BPF_JNE, BPF_REG_2, dl->type, next_ins);
+ --next_ins;
+ }
+
+ if (has_access) {
+ program[cur_ins++] = BPF_MOV32_REG(BPF_REG_1, BPF_REG_3);
+ program[cur_ins++] = BPF_ALU32_IMM(BPF_AND, BPF_REG_1, dl->access);
+ program[cur_ins++] = BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, next_ins - 2);
+ next_ins -= 3;
+ }
+
+ if (has_major) {
+ program[cur_ins++] = BPF_JMP_IMM(BPF_JNE, BPF_REG_4, dl->major, next_ins);
+ --next_ins;
+ }
+
+ if (has_minor) {
+ program[cur_ins++] = BPF_JMP_IMM(BPF_JNE, BPF_REG_5, dl->minor, next_ins);
+ --next_ins;
+ }
+
+ program[cur_ins++] = BPF_MOV64_IMM(BPF_REG_0, dl->allow ? 1 : 0);
+ program[cur_ins++] = BPF_EXIT_INSN();
+ }
+
+ /* default behavior */
+ program[cur_ins++] = BPF_MOV64_IMM(BPF_REG_0, (behavior == DEVCG_DEFAULT_ALLOW)?1:0);
+ program[cur_ins++] = BPF_EXIT_INSN();
+
+ if (debug) {
+ fprintf(stderr, "cgroup devices:\na > devices.%s\n",
+ (behavior == DEVCG_DEFAULT_ALLOW)?"allow":"deny");
+
+ list_for_each_entry(dl, &exceptions, list)
+ fprintf(stderr, "%c %d:%d %s%s%s > devices.%s\n",
+ (dl->type == DEVCG_DEV_ALL)?'a':
+ (dl->type == BPF_DEVCG_DEV_CHAR)?'c':'b',
+ (dl->major == ~0)?-1:dl->major,
+ (dl->minor == ~0)?-1:dl->minor,
+ (dl->access & BPF_DEVCG_ACC_READ)?"r":"",
+ (dl->access & BPF_DEVCG_ACC_WRITE)?"w":"",
+ (dl->access & BPF_DEVCG_ACC_MKNOD)?"m":"",
+ (dl->allow)?"allow":"deny");
+
+ fprintf(stderr, "generated cgroup-devices eBPF program:\n");
+ fprintf(stderr, " [idx]\tcode\t dest\t src\t off\t imm\n");
+ for (cur_ins=0; cur_ins<total_ins; cur_ins++)
+ fprintf(stderr, " [%03d]\t%02hhx\t%3hhu\t%3hhu\t%04hx\t%d\n", cur_ins,
+ program[cur_ins].code,
+ program[cur_ins].dst_reg,
+ program[cur_ins].src_reg,
+ program[cur_ins].off,
+ program[cur_ins].imm);
+ }
+
+ assert(cur_ins == total_ins);
+ bpf_total_insn = total_ins;
+ ret = 0;
+
+out:
+ flush_exceptions(&exceptions);
+ return ret;
+}
+
+/*
+ * attach eBPF program to cgroup
+ */
+int attach_cgroups_ebpf(int cgroup_dirfd) {
+ int prog_fd;
+#if ( __WORDSIZE == 64 )
+ uint64_t program_ptr = (uint64_t)program;
+ uint64_t license_ptr = (uint64_t)license;
+#elif ( __WORDSIZE == 32 )
+ uint32_t program_ptr = (uint32_t)program;
+ uint32_t license_ptr = (uint32_t)license;
+#else
+#error
+#endif
+ union bpf_attr load_attr = {
+ .prog_type = BPF_PROG_TYPE_CGROUP_DEVICE,
+ .license = license_ptr,
+ .insns = program_ptr,
+ .insn_cnt = bpf_total_insn,
+ };
+
+ if (!program)
+ return 0;
+
+ prog_fd = syscall_bpf(BPF_PROG_LOAD, &load_attr, sizeof(load_attr));
+ if (prog_fd < 0)
+ return EIO;
+
+ union bpf_attr attach_attr = {
+ .attach_type = BPF_CGROUP_DEVICE,
+ .target_fd = cgroup_dirfd,
+ .attach_bpf_fd = prog_fd,
+ };
+
+ return syscall_bpf(BPF_PROG_ATTACH, &attach_attr, sizeof (attach_attr));
+}
--- /dev/null
+/*
+ * Copyright (C) 2021 Daniel Golle <daniel@makrotopia.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _JAIL_CGROUPS_BPF_H
+#define _JAIL_CGROUPS_BPF_H
+
+int parseOCIlinuxcgroups_devices(struct blob_attr *msg);
+int attach_cgroups_ebpf(int cgroup_dirfd);
+
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * reads unified cgroup config as proposed in
+ * https://github.com/opencontainers/runtime-spec/pull/1040
+ * attempt conversion from cgroup1 -> cgroup2
+ * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2
+ *
+ * ToDo:
+ * - convert cgroup1 net_prio and net_cls to eBPF program
+ * - rdma (anyone?) intelrdt (anyone?)
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <inttypes.h>
+
+#include <libubox/avl.h>
+#include <libubox/avl-cmp.h>
+#include <libubox/blobmsg.h>
+#include <libubox/list.h>
+#include <libubox/utils.h>
+
+#include "log.h"
+#include "cgroups.h"
+#include "cgroups-bpf.h"
+
+#define CGROUP_ROOT "/sys/fs/cgroup/"
+#define CGROUP_IO_WEIGHT_MAX 10000
+
+struct cgval {
+ struct avl_node avl;
+ char *val;
+};
+
+struct avl_tree cgvals;
+static char *cgroup_path;
+static bool initialized;
+
+void cgroups_prepare(void) {
+ initialized = false;
+}
+
+void cgroups_init(const char *p) {
+ avl_init(&cgvals, avl_strcmp, false, NULL);
+ cgroup_path = strdup(p);
+ initialized = true;
+}
+
+static void cgroups_set(const char *key, const char *val)
+{
+ struct cgval *valp;
+
+ valp = avl_find_element(&cgvals, key, valp, avl);
+ if (!valp) {
+ valp = malloc(sizeof(struct cgval));
+ if (!valp)
+ exit(ENOMEM);
+
+ valp->avl.key = strdup(key);
+ avl_insert(&cgvals, &valp->avl);
+ } else {
+ DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key, valp->val);
+ free(valp->val);
+ }
+
+ valp->val = strdup(val);
+}
+
+void cgroups_free(void)
+{
+ struct cgval *valp, *tmp;
+
+ if (initialized) {
+ avl_remove_all_elements(&cgvals, valp, avl, tmp) {
+ free((void *)(valp->avl.key));
+ free(valp->val);
+ free(valp);
+ }
+ free(cgroup_path);
+ }
+}
+
+void cgroups_apply(pid_t pid)
+{
+ struct cgval *valp;
+ char *cdir, *ent;
+ int fd;
+ size_t maxlen = strlen("cgroup.subtree_control");
+
+ bool cpuset = false,
+ cpu = false,
+ hugetlb = false,
+ io = false,
+ memory = false,
+ pids = false,
+ rdma = false;
+
+ char subtree_control[64] = { 0 };
+
+ DEBUG("using cgroup path %s\n", cgroup_path);
+ mkdir_p(cgroup_path, 0700);
+
+ /* find which controllers need to be enabled */
+ avl_for_each_element(&cgvals, valp, avl) {
+ ent = (char *)valp->avl.key;
+ if (strlen(ent) > maxlen)
+ maxlen = strlen(ent);
+
+ if (!strncmp("cpuset.", ent, 7))
+ cpuset = true;
+ else if (!strncmp("cpu.", ent, 4))
+ cpu = true;
+ else if (!strncmp("hugetlb.", ent, 8))
+ hugetlb = true;
+ else if (!strncmp("io.", ent, 3))
+ io = true;
+ else if (!strncmp("memory.", ent, 7))
+ memory = true;
+ else if (!strncmp("pids.", ent, 5))
+ pids = true;
+ else if (!strncmp("rdma.", ent, 5))
+ rdma = true;
+ }
+
+ maxlen += strlen(cgroup_path) + 2;
+
+ if (cpuset)
+ strcat(subtree_control, "+cpuset ");
+
+ if (cpu)
+ strcat(subtree_control, "+cpu ");
+
+ if (hugetlb)
+ strcat(subtree_control, "+hugetlb ");
+
+ if (io)
+ strcat(subtree_control, "+io ");
+
+ if (memory)
+ strcat(subtree_control, "+memory ");
+
+ if (pids)
+ strcat(subtree_control, "+pids ");
+
+ if (rdma)
+ strcat(subtree_control, "+rdma ");
+
+ /* remove trailing space */
+ ent = strchr(subtree_control, '\0') - 1;
+ *ent = '\0';
+
+ ent = malloc(maxlen);
+ if (!ent)
+ exit(ENOMEM);
+
+ DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control);
+ cdir = &cgroup_path[strlen(CGROUP_ROOT) - 2];
+ while ((cdir = strchr(cdir + 1, '/'))) {
+ *cdir = '\0';
+ snprintf(ent, maxlen, "%s/cgroup.subtree_control", cgroup_path);
+ DEBUG(" * %s\n", ent);
+ if ((fd = open(ent, O_WRONLY)) < 0) {
+ ERROR("can't open %s: %m\n", ent);
+ continue;
+ }
+
+ if (write(fd, subtree_control, strlen(subtree_control)) == -1) {
+ ERROR("can't write to %s: %m\n", ent);
+ close(fd);
+ continue;
+ }
+
+ close(fd);
+ *cdir = '/';
+ }
+
+ avl_for_each_element(&cgvals, valp, avl) {
+ DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp->avl.key, valp->val);
+ snprintf(ent, maxlen, "%s/%s", cgroup_path, (char *)valp->avl.key);
+ fd = open(ent, O_WRONLY);
+ if (fd < 0) {
+ ERROR("can't open %s: %m\n", ent);
+ continue;
+ }
+ if (dprintf(fd, "%s", valp->val) < 0) {
+ ERROR("can't write to %s: %m\n", ent);
+ };
+ close(fd);
+ }
+
+ int dirfd = open(cgroup_path, O_DIRECTORY);
+ if (dirfd < 0) {
+ ERROR("can't open %s: %m\n", cgroup_path);
+ } else {
+ attach_cgroups_ebpf(dirfd);
+ close(dirfd);
+ }
+
+ snprintf(ent, maxlen, "%s/%s", cgroup_path, "cgroup.procs");
+ fd = open(ent, O_WRONLY);
+ if (fd < 0) {
+ ERROR("can't open %s: %m\n", cgroup_path);
+ } else {
+ dprintf(fd, "%d", pid);
+ close(fd);
+ }
+
+ free(ent);
+}
+
+enum {
+ OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR,
+ OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR,
+ OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT,
+ OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT,
+ __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy[] = {
+ [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
+};
+
+enum {
+ OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR,
+ OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR,
+ OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE,
+ __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy[] = {
+ [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE] = { "rate", BLOBMSG_CAST_INT64 },
+};
+
+enum {
+ OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT,
+ OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT,
+ OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE,
+ OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE,
+ OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE,
+ OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE,
+ OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE,
+ __OCI_LINUX_CGROUPS_BLOCKIO_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_cgroups_blockio_policy[] = {
+ [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE] = { "weightDevice", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY },
+};
+
+struct posix_dev {
+ uint64_t major;
+ uint64_t minor;
+};
+
+struct iomax_line {
+ struct avl_node avl;
+ struct posix_dev dev;
+ uint64_t rbps;
+ uint64_t wbps;
+ uint64_t riops;
+ uint64_t wiops;
+};
+
+static int avl_devcmp(const void *k1, const void *k2, void *ptr)
+{
+ struct posix_dev *d1 = (struct posix_dev *)k1, *d2 = (struct posix_dev *)k2;
+
+ if (d1->major < d2->major)
+ return -1;
+
+ if (d1->major > d2->major)
+ return 1;
+
+ if (d1->minor < d2->minor)
+ return -1;
+
+ if (d1->minor > d2->minor)
+ return 1;
+
+ return 0;
+}
+
+static struct iomax_line *get_iomax_line(struct avl_tree *iomax, uint64_t major, uint64_t minor)
+{
+ struct iomax_line *l;
+ struct posix_dev d;
+ d.major = major;
+ d.minor = minor;
+ l = avl_find_element(iomax, &d, l, avl);
+ if (!l) {
+ l = malloc(sizeof(struct iomax_line));
+ if (!l)
+ exit(ENOMEM);
+
+ l->dev.major = d.major;
+ l->dev.minor = d.minor;
+ l->avl.key = &l->dev;
+ l->rbps = -1;
+ l->wbps = -1;
+ l->riops = -1;
+ l->wiops = -1;
+ avl_insert(iomax, &l->avl);
+ }
+
+ return l;
+}
+
+static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_CGROUPS_BLOCKIO_MAX],
+ *tbwd[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX],
+ *tbtd[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX],
+ *cur;
+ int rem;
+ int weight = -1, leafweight = -1;
+ size_t numweightstrs = 0, numiomaxstrs = 0, strtotlen = 1;
+ char **weightstrs = NULL, **iomaxstrs = NULL, **curstr;
+ char *weightstr, *iomaxstr;
+ struct avl_tree iomax;
+ struct iomax_line *curiomax, *tmp;
+
+ blobmsg_parse(oci_linux_cgroups_blockio_policy, __OCI_LINUX_CGROUPS_BLOCKIO_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]) {
+ weight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
+ ++numweightstrs;
+ }
+
+ if (weight > CGROUP_IO_WEIGHT_MAX)
+ return ERANGE;
+
+ if (tb[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT])
+ leafweight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
+
+ if (leafweight > CGROUP_IO_WEIGHT_MAX)
+ return ERANGE;
+
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem)
+ ++numweightstrs;
+
+ weightstrs = calloc(numweightstrs + 1, sizeof(char *));
+ if (!weightstrs)
+ exit(ENOMEM);
+
+ numweightstrs = 0;
+
+ if (weight > -1)
+ if (asprintf(&weightstrs[numweightstrs++], "default %d", weight) < 0)
+ return ENOMEM;
+
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem) {
+ uint64_t major, minor;
+ int devweight = weight, devleafweight = leafweight;
+
+ blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX, tbwd, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] ||
+ !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR])
+ return ENODATA;
+
+ if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] &&
+ !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
+ return ENODATA;
+
+ if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT])
+ devweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT]);
+
+ if (devweight > CGROUP_IO_WEIGHT_MAX)
+ return ERANGE;
+
+ if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
+ devleafweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]);
+
+ if (devleafweight > CGROUP_IO_WEIGHT_MAX)
+ return ERANGE;
+
+ if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
+ return ENOTSUP;
+
+ major = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR]);
+ minor = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR]);
+
+ if (asprintf(&weightstrs[numweightstrs++], "%" PRIu64 ":%" PRIu64 " %u", major, minor, devweight) < 0)
+ return ENOMEM;
+ }
+
+ if (numweightstrs) {
+ curstr = weightstrs;
+ while (*curstr)
+ strtotlen += strlen(*(curstr++)) + 1;
+
+ weightstr = calloc(strtotlen, sizeof(char));
+ if (!weightstr)
+ exit(ENOMEM);
+
+ curstr = weightstrs;
+ while (*curstr) {
+ strcat(weightstr, *curstr);
+ strcat(weightstr, "\n");
+ free(*(curstr++));
+ }
+
+ cgroups_set("io.bfq.weight", weightstr);
+ free(weightstr);
+ };
+
+ free(weightstrs);
+
+ avl_init(&iomax, avl_devcmp, false, NULL);
+
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE], rem) {
+ struct iomax_line *l;
+
+ blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
+
+ if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
+ !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
+ !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
+ return ENODATA;
+
+ l = get_iomax_line(&iomax,
+ blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
+ blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
+
+ l->rbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
+ }
+
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE], rem) {
+ struct iomax_line *l;
+
+ blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
+
+ if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
+ !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
+ !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
+ return ENODATA;
+
+ l = get_iomax_line(&iomax,
+ blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
+ blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
+
+ l->wbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
+ }
+
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE], rem) {
+ struct iomax_line *l;
+
+ blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
+
+ if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
+ !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
+ !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
+ return ENODATA;
+
+ l = get_iomax_line(&iomax,
+ blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
+ blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
+
+ l->riops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
+ }
+
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE], rem) {
+ struct iomax_line *l;
+
+ blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
+
+ if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
+ !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
+ !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
+ return ENODATA;
+
+ l = get_iomax_line(&iomax,
+ blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
+ blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
+
+ l->wiops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
+ }
+
+ avl_for_each_element(&iomax, curiomax, avl)
+ ++numiomaxstrs;
+
+ if (!numiomaxstrs)
+ return 0;
+
+ iomaxstrs = calloc(numiomaxstrs + 1, sizeof(char *));
+ if (!iomaxstrs)
+ exit(ENOMEM);
+
+ numiomaxstrs = 0;
+
+ avl_for_each_element(&iomax, curiomax, avl) {
+ char iomaxlstr[160];
+ char lstr[32];
+
+ sprintf(iomaxlstr, "%" PRIu64 ":%" PRIu64 " ", curiomax->dev.major, curiomax->dev.minor);
+
+ if (curiomax->rbps != -1) {
+ sprintf(lstr, "rbps=%" PRIu64 " ", curiomax->rbps);
+ strcat(iomaxlstr, lstr);
+ }
+ if (curiomax->wbps != -1) {
+ sprintf(lstr, "wbps=%" PRIu64 " ", curiomax->wbps);
+ strcat(iomaxlstr, lstr);
+ }
+ if (curiomax->riops != -1) {
+ sprintf(lstr, "riops=%" PRIu64 " ", curiomax->riops);
+ strcat(iomaxlstr, lstr);
+ }
+ if (curiomax->wiops != -1) {
+ sprintf(lstr, "wiops=%" PRIu64 " ", curiomax->wiops);
+ strcat(iomaxlstr, lstr);
+ }
+
+ iomaxstrs[numiomaxstrs++] = strdup(iomaxlstr);
+ }
+
+ avl_for_each_element_safe(&iomax, curiomax, avl, tmp) {
+ avl_delete(&iomax, &curiomax->avl);
+ free(curiomax);
+ }
+
+ strtotlen = 1; /* 1 accounts for \0 at end of string */
+ if (numiomaxstrs) {
+ curstr = iomaxstrs;
+ while (*curstr)
+ strtotlen += strlen(*(curstr++)) + 1; /* +1 accounts for \n at end of line */
+
+ iomaxstr = calloc(strtotlen, sizeof(char));
+ if (!iomaxstr)
+ exit(ENOMEM);
+
+ curstr = iomaxstrs;
+
+ while (*curstr) {
+ strcat(iomaxstr, *curstr);
+ strcat(iomaxstr, "\n");
+ free(*(curstr++));
+ }
+
+ cgroups_set("io.max", iomaxstr);
+ free(iomaxstr);
+ };
+
+ free(iomaxstrs);
+
+ return 0;
+}
+
+
+enum {
+ OCI_LINUX_CGROUPS_CPU_SHARES,
+ OCI_LINUX_CGROUPS_CPU_PERIOD,
+ OCI_LINUX_CGROUPS_CPU_QUOTA,
+ OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME,
+ OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD,
+ OCI_LINUX_CGROUPS_CPU_CPUS,
+ OCI_LINUX_CGROUPS_CPU_MEMS,
+ __OCI_LINUX_CGROUPS_CPU_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_cgroups_cpu_policy[] = {
+ [OCI_LINUX_CGROUPS_CPU_SHARES] = { "shares", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_CPU_PERIOD] = { "period", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_CPU_QUOTA] = { "quota", BLOBMSG_CAST_INT64 }, /* signed int64! */
+ [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] = { "realtimePeriod", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME] = { "realtimeRuntime", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_CPU_CPUS] = { "cpus", BLOBMSG_TYPE_STRING },
+ [OCI_LINUX_CGROUPS_CPU_MEMS] = { "mems", BLOBMSG_TYPE_STRING },
+};
+
+static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_CGROUPS_CPU_MAX];
+ uint64_t shares, period = 0;
+ int64_t quota = -2; /* unset */
+ char tmp[32] = { 0 };
+
+ blobmsg_parse(oci_linux_cgroups_cpu_policy, __OCI_LINUX_CGROUPS_CPU_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (tb[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] ||
+ tb[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME])
+ return ENOTSUP; /* no equivalent in cgroup2 */
+
+ if (tb[OCI_LINUX_CGROUPS_CPU_SHARES]) {
+ shares = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_SHARES]);
+ if ((shares < 2) || (shares > 262144))
+ return ERANGE;
+
+ snprintf(tmp, sizeof(tmp), "%" PRIu64, (((uint64_t)1) + ((shares - 2) * 9999) / 262142));
+ cgroups_set("cpu.weight", tmp);
+ tmp[0] = '\0';
+ }
+
+ if (tb[OCI_LINUX_CGROUPS_CPU_QUOTA])
+ quota = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_CPU_QUOTA]);
+
+ if (tb[OCI_LINUX_CGROUPS_CPU_PERIOD])
+ period = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_PERIOD]);
+
+ if (period) {
+ if (quota >= 0)
+ snprintf(tmp, sizeof(tmp), "%" PRId64 " %" PRIu64 , quota, period);
+ else
+ snprintf(tmp, sizeof(tmp), "max %" PRIu64, period); /* assume default */
+ } else if (quota >= 0) {
+ snprintf(tmp, sizeof(tmp), "%" PRId64, quota);
+ } else if (quota == -1) {
+ strcpy(tmp, "max");
+ }
+
+ if (tmp[0])
+ cgroups_set("cpu.max", tmp);
+
+ if (tb[OCI_LINUX_CGROUPS_CPU_CPUS])
+ cgroups_set("cpuset.cpus", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_CPUS]));
+
+ if (tb[OCI_LINUX_CGROUPS_CPU_MEMS])
+ cgroups_set("cpuset.mems", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_MEMS]));
+
+ return 0;
+}
+
+
+enum {
+ OCI_LINUX_CGROUPS_MEMORY_LIMIT,
+ OCI_LINUX_CGROUPS_MEMORY_RESERVATION,
+ OCI_LINUX_CGROUPS_MEMORY_SWAP,
+ OCI_LINUX_CGROUPS_MEMORY_KERNEL,
+ OCI_LINUX_CGROUPS_MEMORY_KERNELTCP,
+ OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS,
+ OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER,
+ OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY,
+ __OCI_LINUX_CGROUPS_MEMORY_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_cgroups_memory_policy[] = {
+ [OCI_LINUX_CGROUPS_MEMORY_LIMIT] = { "limit", BLOBMSG_CAST_INT64 }, /* signed int64! */
+ [OCI_LINUX_CGROUPS_MEMORY_RESERVATION] = { "reservation", BLOBMSG_CAST_INT64 }, /* signed int64! */
+ [OCI_LINUX_CGROUPS_MEMORY_SWAP] = { "swap", BLOBMSG_CAST_INT64 }, /* signed int64! */
+ [OCI_LINUX_CGROUPS_MEMORY_KERNEL] = { "kernel", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */
+ [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP] = { "kernelTCP", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */
+ [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] = { "swappiness", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL },
+ [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY] { "useHierarchy", BLOBMSG_TYPE_BOOL },
+};
+
+static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
+ char tmp[32] = { 0 };
+ int64_t limit = -1, swap, reservation;
+
+ blobmsg_parse(oci_linux_cgroups_memory_policy, __OCI_LINUX_CGROUPS_MEMORY_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ /*
+ * not all properties of the OCI memory section can be mapped to cgroup2
+ * kernel memory accounting is always enabled and included in the set
+ * memory limit, hence these options can be ignored
+ * disableOOMKiller could be emulated using oom_score_adj + seccomp eBPF
+ * preventing self-upgrade (but allow downgrade)
+ *
+ * see also https://github.com/opencontainers/runtime-spec/issues/1005
+ */
+ if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] ||
+ tb[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] ||
+ tb[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY])
+ return ENOTSUP;
+
+
+ if (tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]) {
+ limit = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]);
+ if (limit == -1)
+ strcpy(tmp, "max");
+ else
+ snprintf(tmp, sizeof(tmp), "%" PRId64, limit);
+
+ cgroups_set("memory.max", tmp);
+ }
+
+ if (tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]) {
+ reservation = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]);
+
+ if (reservation == -1)
+ strcpy(tmp, "max");
+ else
+ snprintf(tmp, sizeof(tmp), "%" PRId64, reservation);
+
+ cgroups_set("memory.low", tmp);
+ }
+
+ /* OCI 'swap' acounts for memory+swap */
+ if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]) {
+ swap = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]);
+
+ if (swap == -1)
+ strcpy(tmp, "max");
+ else if (limit == -1 || (limit < swap))
+ snprintf(tmp, sizeof(tmp), "%" PRId64, swap);
+ else
+ snprintf(tmp, sizeof(tmp), "%" PRId64, limit - swap);
+
+ cgroups_set("memory.swap_max", tmp);
+ }
+
+ return 0;
+}
+
+
+enum {
+ OCI_LINUX_CGROUPS_PIDS_LIMIT,
+ __OCI_LINUX_CGROUPS_PIDS_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_cgroups_pids_policy[] = {
+ [OCI_LINUX_CGROUPS_PIDS_LIMIT] = { "limit", BLOBMSG_CAST_INT64 },
+};
+
+static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
+ char tmp[32] = { 0 };
+
+ blobmsg_parse(oci_linux_cgroups_pids_policy, __OCI_LINUX_CGROUPS_PIDS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_LINUX_CGROUPS_PIDS_LIMIT])
+ return EINVAL;
+
+ snprintf(tmp, sizeof(tmp), "%" PRIu64, blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_PIDS_LIMIT]));
+
+ cgroups_set("pids.max", tmp);
+
+ return 0;
+}
+
+static int parseOCIlinuxcgroups_unified(struct blob_attr *msg)
+{
+ struct blob_attr *cur;
+ int rem;
+
+ blobmsg_for_each_attr(cur, msg, rem) {
+ if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING)
+ return EINVAL;
+
+ /* restrict keys */
+ if (strchr(blobmsg_name(cur), '/') ||
+ !strcmp(blobmsg_name(cur), "cgroup.subtree_control") ||
+ !strcmp(blobmsg_name(cur), "cgroup.procs") ||
+ !strcmp(blobmsg_name(cur), "cgroup.threads") ||
+ !strcmp(blobmsg_name(cur), "cgroup.freeze"))
+ return EINVAL;
+
+ cgroups_set(blobmsg_name(cur), blobmsg_get_string(cur));
+ }
+
+ return 0;
+}
+
+enum {
+ OCI_LINUX_CGROUPS_BLOCKIO,
+ OCI_LINUX_CGROUPS_CPU,
+ OCI_LINUX_CGROUPS_DEVICES,
+ OCI_LINUX_CGROUPS_HUGEPAGELIMITS,
+ OCI_LINUX_CGROUPS_INTELRDT,
+ OCI_LINUX_CGROUPS_MEMORY,
+ OCI_LINUX_CGROUPS_NETWORK,
+ OCI_LINUX_CGROUPS_PIDS,
+ OCI_LINUX_CGROUPS_RDMA,
+ OCI_LINUX_CGROUPS_UNIFIED,
+ __OCI_LINUX_CGROUPS_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_cgroups_policy[] = {
+ [OCI_LINUX_CGROUPS_BLOCKIO] = { "blockIO", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_CGROUPS_CPU] = { "cpu", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_CGROUPS_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_CGROUPS_HUGEPAGELIMITS] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_CGROUPS_INTELRDT] = { "intelRdt", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_CGROUPS_MEMORY] = { "memory", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_CGROUPS_NETWORK] = { "network", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_CGROUPS_PIDS] = { "pids", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_CGROUPS_RDMA] = { "rdma", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_CGROUPS_UNIFIED] = { "unified", BLOBMSG_TYPE_TABLE },
+};
+
+int parseOCIlinuxcgroups(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_CGROUPS_MAX];
+ int ret;
+
+ blobmsg_parse(oci_linux_cgroups_policy, __OCI_LINUX_CGROUPS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (tb[OCI_LINUX_CGROUPS_HUGEPAGELIMITS] ||
+ tb[OCI_LINUX_CGROUPS_INTELRDT] ||
+ tb[OCI_LINUX_CGROUPS_NETWORK] ||
+ tb[OCI_LINUX_CGROUPS_RDMA])
+ return ENOTSUP;
+
+ if (tb[OCI_LINUX_CGROUPS_BLOCKIO]) {
+ ret = parseOCIlinuxcgroups_legacy_blockio(tb[OCI_LINUX_CGROUPS_BLOCKIO]);
+ if (ret)
+ return ret;
+ }
+
+ if (tb[OCI_LINUX_CGROUPS_CPU]) {
+ ret = parseOCIlinuxcgroups_legacy_cpu(tb[OCI_LINUX_CGROUPS_CPU]);
+ if (ret)
+ return ret;
+ }
+
+ if (tb[OCI_LINUX_CGROUPS_DEVICES]) {
+ ret = parseOCIlinuxcgroups_devices(tb[OCI_LINUX_CGROUPS_DEVICES]);
+ if (ret)
+ return ret;
+ }
+
+ if (tb[OCI_LINUX_CGROUPS_MEMORY]) {
+ ret = parseOCIlinuxcgroups_legacy_memory(tb[OCI_LINUX_CGROUPS_MEMORY]);
+ if (ret)
+ return ret;
+ }
+
+ if (tb[OCI_LINUX_CGROUPS_PIDS]) {
+ ret = parseOCIlinuxcgroups_legacy_pids(tb[OCI_LINUX_CGROUPS_PIDS]);
+ if (ret)
+ return ret;
+ }
+
+ if (tb[OCI_LINUX_CGROUPS_UNIFIED]) {
+ ret = parseOCIlinuxcgroups_unified(tb[OCI_LINUX_CGROUPS_UNIFIED]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _JAIL_CGROUPS_H
+#define _JAIL_CGROUPS_H
+
+void cgroups_init(const char *p);
+int parseOCIlinuxcgroups(struct blob_attr *msg);
+void cgroups_apply(pid_t pid);
+void cgroups_free(void);
+void cgroups_prepare(void);
+
+#endif
return l->path;
}
-static int elf64_find_section(const char *map, unsigned int type, unsigned int *offset, unsigned int *size, unsigned int *vaddr)
+static int elf64_find_section(const char *map, unsigned int type, unsigned long *offset, unsigned long *size, unsigned long *vaddr)
{
Elf64_Ehdr *e;
Elf64_Phdr *ph;
return -1;
}
-static int elf32_find_section(const char *map, unsigned int type, unsigned int *offset, unsigned int *size, unsigned int *vaddr)
+static int elf32_find_section(const char *map, unsigned int type, unsigned long *offset, unsigned long *size, unsigned long *vaddr)
{
Elf32_Ehdr *e;
Elf32_Phdr *ph;
return -1;
}
-static int elf_find_section(const char *map, unsigned int type, unsigned int *offset, unsigned int *size, unsigned int *vaddr)
+static int elf_find_section(const char *map, unsigned int type, unsigned long *offset, unsigned long *size, unsigned long *vaddr)
{
int clazz = map[EI_CLASS];
return -1;
}
-static int elf32_scan_dynamic(const char *map, int dyn_offset, int dyn_size, int load_offset)
+static int elf32_scan_dynamic(const char *map, unsigned long dyn_offset, unsigned long dyn_size, long load_offset)
{
Elf32_Dyn *dynamic = (Elf32_Dyn *) (map + dyn_offset);
const char *strtab = NULL;
return 0;
}
-static int elf64_scan_dynamic(const char *map, int dyn_offset, int dyn_size, int load_offset)
+static int elf64_scan_dynamic(const char *map, unsigned long dyn_offset, unsigned long dyn_size, long load_offset)
{
Elf64_Dyn *dynamic = (Elf64_Dyn *) (map + dyn_offset);
const char *strtab = NULL;
int elf_load_deps(const char *path, const char *map)
{
- unsigned int dyn_offset, dyn_size;
- unsigned int load_offset, load_vaddr;
- unsigned int interp_offset;
+ unsigned long dyn_offset, dyn_size;
+ unsigned long load_offset, load_vaddr;
+ unsigned long interp_offset;
- if (elf_find_section(map, PT_LOAD, &load_offset, NULL, &load_vaddr)) {
- ERROR("failed to load the .load section from %s\n", path);
- return -1;
+ if (elf_find_section(map, PT_INTERP, &interp_offset, NULL, NULL) == 0) {
+ add_path_and_deps(map+interp_offset, 1, -1, 0);
}
- if (elf_find_section(map, PT_DYNAMIC, &dyn_offset, &dyn_size, NULL)) {
- ERROR("failed to load the .dynamic section from %s\n", path);
- return -1;
+ if (elf_find_section(map, PT_LOAD, &load_offset, NULL, &load_vaddr)) {
+ DEBUG("failed to load the .load section from %s\n", path);
+ return 0;
}
- if (elf_find_section(map, PT_INTERP, &interp_offset, NULL, NULL) == 0) {
- add_path_and_deps(map+interp_offset, 1, -1, 0);
+ if (elf_find_section(map, PT_DYNAMIC, &dyn_offset, &dyn_size, NULL)) {
+ DEBUG("failed to load the .dynamic section from %s\n", path);
+ return 0;
}
int clazz = map[EI_CLASS];
alloc_library_path("/usr/lib");
load_ldso_conf("/etc/ld.so.conf");
}
+
+void free_library_search(void)
+{
+ struct library_path *p, *ptmp;
+ struct library *l, *tmp;
+
+ list_for_each_entry_safe(p, ptmp, &library_paths, list)
+ free(p);
+
+ avl_remove_all_elements(&libraries, l, avl, tmp)
+ free(l);
+}
const char* find_lib(const char *file);
void init_library_search(void);
int lib_open(char **fullpath, const char *file);
+void free_library_search(void);
#endif
/*
* Copyright (C) 2015 John Crispin <blogic@openwrt.org>
* Copyright (C) 2015 Etienne Champetier <champetier.etienne@gmail.com>
+ * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 2.1
#include <fcntl.h>
#include <linux/limits.h>
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
+#include <libgen.h>
#include <libubox/avl.h>
#include <libubox/avl-cmp.h>
+#include <libubox/blobmsg.h>
+#include <libubox/list.h>
+#include <libubox/utils.h>
#include "elf.h"
#include "fs.h"
#include "jail.h"
#include "log.h"
+#define UJAIL_NOAFILE "/tmp/.ujailnoafile"
+
struct mount {
- struct avl_node avl;
- const char *path;
- int readonly;
- int error;
+ struct avl_node avl;
+ const char *source;
+ const char *target;
+ const char *filesystemtype;
+ unsigned long mountflags;
+ unsigned long propflags;
+ const char *optstr;
+ int error;
+ bool inner;
};
struct avl_tree mounts;
-int add_mount(const char *path, int readonly, int error)
+static int do_mount(const char *root, const char *orig_source, const char *target, const char *filesystemtype,
+ unsigned long orig_mountflags, unsigned long propflags, const char *optstr, int error, bool inner)
{
- assert(path != NULL);
+ struct stat s;
+ char new[PATH_MAX];
+ char *source = (char *)orig_source;
+ int fd, ret = 0;
+ bool is_bind = (orig_mountflags & MS_BIND);
+ bool is_mask = (source == (void *)(-1));
+ unsigned long mountflags = orig_mountflags;
+
+ assert(!(inner && is_mask));
+ assert(!(inner && !orig_source));
+
+ if (source && is_bind && stat(source, &s)) {
+ ERROR("stat(%s) failed: %m\n", source);
+ return error;
+ }
+
+ if (inner)
+ if (asprintf(&source, "%s%s", root, orig_source) < 0)
+ return ENOMEM;
+
+ snprintf(new, sizeof(new), "%s%s", root, target?target:source);
+
+ if (is_mask) {
+ if (stat(new, &s))
+ return 0; /* doesn't exists, nothing to mask */
+
+ if (S_ISDIR(s.st_mode)) {/* use empty 0-sized tmpfs for directories */
+ if (mount("none", new, "tmpfs", MS_RDONLY | MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_NOATIME, "size=0,mode=000"))
+ return error;
+ } else {
+ /* mount-bind 0-sized file having mode 000 */
+ if (mount(UJAIL_NOAFILE, new, "bind", MS_BIND, NULL))
+ return error;
+
+ if (mount(UJAIL_NOAFILE, new, "bind", MS_REMOUNT | MS_BIND | MS_RDONLY | MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_NOATIME, NULL))
+ return error;
+ }
+
+ DEBUG("masked path %s\n", new);
+ return 0;
+ }
+
+
+ if (!is_bind || (source && S_ISDIR(s.st_mode))) {
+ mkdir_p(new, 0755);
+ } else if (is_bind && source) {
+ mkdir_p(dirname(new), 0755);
+ snprintf(new, sizeof(new), "%s%s", root, target?target:source);
+ fd = open(new, O_CREAT|O_WRONLY|O_TRUNC|O_EXCL, 0644);
+ if (fd >= 0)
+ close(fd);
+
+ if (error && fd < 0 && errno != EEXIST) {
+ ERROR("failed to create mount target %s: %m\n", new);
+
+ ret = errno;
+ goto free_source_out;
+ }
+ }
+
+ if (is_bind) {
+ if (mount(source?:new, new, filesystemtype?:"bind", MS_BIND | (mountflags & MS_REC), optstr)) {
+ if (error)
+ ERROR("failed to mount -B %s %s: %m\n", source, new);
+
+ ret = error;
+ goto free_source_out;
+ }
+ mountflags |= MS_REMOUNT;
+ }
+
+ const char *hack_fstype = ((!filesystemtype || strcmp(filesystemtype, "cgroup"))?filesystemtype:"cgroup2");
+ if (mount(source?:(is_bind?new:NULL), new, hack_fstype?:"none", mountflags, optstr)) {
+ if (error)
+ ERROR("failed to mount %s %s: %m\n", source, new);
- if (avl_find(&mounts, path))
+ ret = error;
+ goto free_source_out;
+ }
+
+ DEBUG("mount %s%s %s (%s)\n", (mountflags & MS_BIND)?"-B ":"", source, new,
+ (mountflags & MS_RDONLY)?"ro":"rw");
+
+ if (propflags && mount("none", new, "none", propflags, NULL)) {
+ if (error)
+ ERROR("failed to mount --make-... %s \n", new);
+
+ ret = error;
+ }
+
+free_source_out:
+ if (inner)
+ free(source);
+
+ return ret;
+}
+
+static int _add_mount(const char *source, const char *target, const char *filesystemtype,
+ unsigned long mountflags, unsigned long propflags, const char *optstr,
+ int error, bool inner)
+{
+ assert(target != NULL);
+
+ if (avl_find(&mounts, target))
return 1;
struct mount *m;
m = calloc(1, sizeof(struct mount));
- assert(m != NULL);
- m->avl.key = m->path = strdup(path);
- m->readonly = readonly;
+ if (!m)
+ return ENOMEM;
+
+ m->avl.key = m->target = strdup(target);
+ if (source) {
+ if (source != (void*)(-1))
+ m->source = strdup(source);
+ else
+ m->source = (void*)(-1);
+ }
+ if (filesystemtype)
+ m->filesystemtype = strdup(filesystemtype);
+
+ if (optstr)
+ m->optstr = strdup(optstr);
+
+ m->mountflags = mountflags;
+ m->propflags = propflags;
m->error = error;
+ m->inner = inner;
avl_insert(&mounts, &m->avl);
- DEBUG("adding mount %s ro(%d) err(%d)\n", m->path, m->readonly, m->error != 0);
+ DEBUG("adding mount %s %s bind(%d) ro(%d) err(%d)\n", (m->source == (void*)(-1))?"mask":m->source, m->target,
+ !!(m->mountflags & MS_BIND), !!(m->mountflags & MS_RDONLY), m->error != 0);
+
+ return 0;
+}
+
+int add_mount(const char *source, const char *target, const char *filesystemtype,
+ unsigned long mountflags, unsigned long propflags, const char *optstr, int error)
+{
+ return _add_mount(source, target, filesystemtype, mountflags, propflags, optstr, error, false);
+}
+
+int add_mount_inner(const char *source, const char *target, const char *filesystemtype,
+ unsigned long mountflags, unsigned long propflags, const char *optstr, int error)
+{
+ return _add_mount(source, target, filesystemtype, mountflags, propflags, optstr, error, true);
+}
+
+static int _add_mount_bind(const char *path, const char *path2, int readonly, int error)
+{
+ unsigned long mountflags = MS_BIND;
+
+ if (readonly)
+ mountflags |= MS_RDONLY;
+
+ return add_mount(path, path2, NULL, mountflags, 0, NULL, error);
+}
+
+int add_mount_bind(const char *path, int readonly, int error)
+{
+ return _add_mount_bind(path, path, readonly, error);
+}
+
+enum {
+ OCI_MOUNT_SOURCE,
+ OCI_MOUNT_DESTINATION,
+ OCI_MOUNT_TYPE,
+ OCI_MOUNT_OPTIONS,
+ __OCI_MOUNT_MAX,
+};
+
+static const struct blobmsg_policy oci_mount_policy[] = {
+ [OCI_MOUNT_SOURCE] = { "source", BLOBMSG_TYPE_STRING },
+ [OCI_MOUNT_DESTINATION] = { "destination", BLOBMSG_TYPE_STRING },
+ [OCI_MOUNT_TYPE] = { "type", BLOBMSG_TYPE_STRING },
+ [OCI_MOUNT_OPTIONS] = { "options", BLOBMSG_TYPE_ARRAY },
+};
+
+struct mount_opt {
+ struct list_head list;
+ char *optstr;
+};
+
+#ifndef MS_LAZYTIME
+#define MS_LAZYTIME (1 << 25)
+#endif
+
+static int parseOCImountopts(struct blob_attr *msg, unsigned long *mount_flags, unsigned long *propagation_flags, char **mount_data, int *error)
+{
+ struct blob_attr *cur;
+ int rem;
+ unsigned long mf = 0;
+ unsigned long pf = 0;
+ char *tmp;
+ struct list_head fsopts = LIST_HEAD_INIT(fsopts);
+ size_t len = 0;
+ struct mount_opt *opt, *tmpopt;
+
+ blobmsg_for_each_attr(cur, msg, rem) {
+ tmp = blobmsg_get_string(cur);
+ if (!strcmp("ro", tmp))
+ mf |= MS_RDONLY;
+ else if (!strcmp("rw", tmp))
+ mf &= ~MS_RDONLY;
+ else if (!strcmp("bind", tmp))
+ mf = MS_BIND;
+ else if (!strcmp("rbind", tmp))
+ mf |= MS_BIND | MS_REC;
+ else if (!strcmp("sync", tmp))
+ mf |= MS_SYNCHRONOUS;
+ else if (!strcmp("async", tmp))
+ mf &= ~MS_SYNCHRONOUS;
+ else if (!strcmp("atime", tmp))
+ mf &= ~MS_NOATIME;
+ else if (!strcmp("noatime", tmp))
+ mf |= MS_NOATIME;
+ else if (!strcmp("defaults", tmp))
+ mf = 0; /* rw, suid, dev, exec, auto, nouser, and async */
+ else if (!strcmp("dev", tmp))
+ mf &= ~MS_NODEV;
+ else if (!strcmp("nodev", tmp))
+ mf |= MS_NODEV;
+ else if (!strcmp("iversion", tmp))
+ mf |= MS_I_VERSION;
+ else if (!strcmp("noiversion", tmp))
+ mf &= ~MS_I_VERSION;
+ else if (!strcmp("diratime", tmp))
+ mf &= ~MS_NODIRATIME;
+ else if (!strcmp("nodiratime", tmp))
+ mf |= MS_NODIRATIME;
+ else if (!strcmp("dirsync", tmp))
+ mf |= MS_DIRSYNC;
+ else if (!strcmp("exec", tmp))
+ mf &= ~MS_NOEXEC;
+ else if (!strcmp("noexec", tmp))
+ mf |= MS_NOEXEC;
+ else if (!strcmp("mand", tmp))
+ mf |= MS_MANDLOCK;
+ else if (!strcmp("nomand", tmp))
+ mf &= ~MS_MANDLOCK;
+ else if (!strcmp("relatime", tmp))
+ mf |= MS_RELATIME;
+ else if (!strcmp("norelatime", tmp))
+ mf &= ~MS_RELATIME;
+ else if (!strcmp("strictatime", tmp))
+ mf |= MS_STRICTATIME;
+ else if (!strcmp("nostrictatime", tmp))
+ mf &= ~MS_STRICTATIME;
+ else if (!strcmp("lazytime", tmp))
+ mf |= MS_LAZYTIME;
+ else if (!strcmp("nolazytime", tmp))
+ mf &= ~MS_LAZYTIME;
+ else if (!strcmp("suid", tmp))
+ mf &= ~MS_NOSUID;
+ else if (!strcmp("nosuid", tmp))
+ mf |= MS_NOSUID;
+ else if (!strcmp("remount", tmp))
+ mf |= MS_REMOUNT;
+ /* propagation flags */
+ else if (!strcmp("private", tmp))
+ pf |= MS_PRIVATE;
+ else if (!strcmp("rprivate", tmp))
+ pf |= MS_PRIVATE | MS_REC;
+ else if (!strcmp("slave", tmp))
+ pf |= MS_SLAVE;
+ else if (!strcmp("rslave", tmp))
+ pf |= MS_SLAVE | MS_REC;
+ else if (!strcmp("shared", tmp))
+ pf |= MS_SHARED;
+ else if (!strcmp("rshared", tmp))
+ pf |= MS_SHARED | MS_REC;
+ else if (!strcmp("unbindable", tmp))
+ pf |= MS_UNBINDABLE;
+ else if (!strcmp("runbindable", tmp))
+ pf |= MS_UNBINDABLE | MS_REC;
+ /* special case: 'nofail' */
+ else if(!strcmp("nofail", tmp))
+ *error = 0;
+ else if (!strcmp("auto", tmp) ||
+ !strcmp("noauto", tmp) ||
+ !strcmp("user", tmp) ||
+ !strcmp("group", tmp) ||
+ !strcmp("_netdev", tmp))
+ DEBUG("ignoring built-in mount option %s\n", tmp);
+ else {
+ /* filesystem-specific free-form option */
+ opt = calloc(1, sizeof(*opt));
+ opt->optstr = tmp;
+ list_add_tail(&opt->list, &fsopts);
+ }
+ };
+
+ *mount_flags = mf;
+ *propagation_flags = pf;
+
+ list_for_each_entry(opt, &fsopts, list) {
+ if (len)
+ ++len;
+
+ len += strlen(opt->optstr);
+ };
+
+ if (len) {
+ *mount_data = calloc(len + 1, sizeof(char));
+ if (!(*mount_data))
+ return ENOMEM;
+
+ len = 0;
+ list_for_each_entry(opt, &fsopts, list) {
+ if (len)
+ strcat(*mount_data, ",");
+
+ strcat(*mount_data, opt->optstr);
+ ++len;
+ }
+
+ list_for_each_entry_safe(opt, tmpopt, &fsopts, list) {
+ list_del(&opt->list);
+ free(opt);
+ }
+ }
+
+ DEBUG("mount flags(%08lx) propagation(%08lx) fsopts(\"%s\")\n", mf, pf, *mount_data?:"");
+
return 0;
}
+int parseOCImount(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_MOUNT_MAX];
+ unsigned long mount_flags = 0;
+ unsigned long propagation_flags = 0;
+ char *mount_data = NULL;
+ int ret, err = -1;
+
+ blobmsg_parse(oci_mount_policy, __OCI_MOUNT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_MOUNT_DESTINATION])
+ return EINVAL;
+
+ if (tb[OCI_MOUNT_OPTIONS]) {
+ ret = parseOCImountopts(tb[OCI_MOUNT_OPTIONS], &mount_flags, &propagation_flags, &mount_data, &err);
+ if (ret)
+ return ret;
+ }
+
+ ret = add_mount(tb[OCI_MOUNT_SOURCE] ? blobmsg_get_string(tb[OCI_MOUNT_SOURCE]) : NULL,
+ blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]),
+ tb[OCI_MOUNT_TYPE] ? blobmsg_get_string(tb[OCI_MOUNT_TYPE]) : NULL,
+ mount_flags, propagation_flags, mount_data, err);
+
+ if (mount_data)
+ free(mount_data);
+
+ return ret;
+}
+
+static void build_noafile(void) {
+ int fd;
+
+ fd = creat(UJAIL_NOAFILE, 0000);
+ if (fd < 0)
+ return;
+
+ close(fd);
+ return;
+}
+
int mount_all(const char *jailroot) {
struct library *l;
struct mount *m;
+ build_noafile();
+
avl_for_each_element(&libraries, l, avl)
- add_mount(l->path, 1, -1);
+ add_mount_bind(l->path, 1, -1);
avl_for_each_element(&mounts, m, avl)
- if (mount_bind(jailroot, m->path, m->readonly, m->error))
+ if (do_mount(jailroot, m->source, m->target, m->filesystemtype, m->mountflags,
+ m->propflags, m->optstr, m->error, m->inner))
return -1;
return 0;
}
+void mount_free(void) {
+ struct mount *m, *tmp;
+
+ avl_remove_all_elements(&mounts, m, avl, tmp) {
+ if (m->source != (void*)(-1))
+ free((void*)m->source);
+ free((void*)m->target);
+ free((void*)m->filesystemtype);
+ free((void*)m->optstr);
+ free(m);
+ }
+}
+
void mount_list_init(void) {
avl_init(&mounts, avl_strcmp, false, NULL);
}
return add_path_and_deps(buf, 1, -1, 0);
}
-int add_path_and_deps(const char *path, int readonly, int error, int lib)
+int add_2paths_and_deps(const char *path, const char *path2, int readonly, int error, int lib)
{
assert(path != NULL);
+ assert(path2 != NULL);
if (lib == 0 && path[0] != '/') {
ERROR("%s is not an absolute path\n", path);
char *map = NULL;
int fd, ret = -1;
if (path[0] == '/') {
- if (avl_find(&mounts, path))
+ if (avl_find(&mounts, path2))
return 0;
fd = open(path, O_RDONLY|O_CLOEXEC);
- if (fd == -1)
+ if (fd < 0)
return error;
- add_mount(path, readonly, error);
+ _add_mount_bind(path, path2, readonly, error);
} else {
if (avl_find(&libraries, path))
return 0;
char *fullpath;
fd = lib_open(&fullpath, path);
- if (fd == -1)
+ if (fd < 0)
return error;
if (fullpath) {
alloc_library(fullpath, path);
#ifndef _JAIL_FS_H_
#define _JAIL_FS_H_
-int add_mount(const char *path, int readonly, int error);
-int add_path_and_deps(const char *path, int readonly, int error, int lib);
+#include <sys/mount.h>
+#include <libubox/blobmsg.h>
+
+int add_mount(const char *source, const char *target, const char *filesystemtype,
+ unsigned long mountflags, unsigned long propflags, const char *optstr, int error);
+int add_mount_inner(const char *source, const char *target, const char *filesystemtype,
+ unsigned long mountflags, unsigned long propflags, const char *optstr, int error);
+int add_mount_bind(const char *path, int readonly, int error);
+int parseOCImount(struct blob_attr *msg);
+int add_2paths_and_deps(const char *path, const char *path2, int readonly, int error, int lib);
+
+static inline int add_path_and_deps(const char *path, int readonly, int error, int lib)
+{
+ return add_2paths_and_deps(path, path, readonly, error, lib);
+}
+
int mount_all(const char *jailroot);
void mount_list_init(void);
+void mount_free(void);
#endif
/*
* Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 2.1
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/wait.h>
-
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+
+/* musl only defined 15 limit types, make sure all 16 are supported */
+#ifndef RLIMIT_RTTIME
+#define RLIMIT_RTTIME 15
+#undef RLIMIT_NLIMITS
+#define RLIMIT_NLIMITS 16
+#undef RLIM_NLIMITS
+#define RLIM_NLIMITS 16
+#endif
+
+#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
+#include <pwd.h>
+#include <grp.h>
#include <string.h>
-#include <sys/stat.h>
#include <fcntl.h>
-#include <libgen.h>
#include <sched.h>
+#include <linux/filter.h>
#include <linux/limits.h>
+#include <linux/nsfs.h>
+#include <linux/securebits.h>
#include <signal.h>
+#include <inttypes.h>
#include "capabilities.h"
#include "elf.h"
#include "fs.h"
#include "jail.h"
#include "log.h"
-
+#include "seccomp-oci.h"
+#include "cgroups.h"
+#include "netifd.h"
+
+#include <libubox/blobmsg.h>
+#include <libubox/blobmsg_json.h>
+#include <libubox/list.h>
+#include <libubox/vlist.h>
#include <libubox/uloop.h>
+#include <libubox/utils.h>
+#include <libubus.h>
+
+#ifndef CLONE_NEWCGROUP
+#define CLONE_NEWCGROUP 0x02000000
+#endif
#define STACK_SIZE (1024 * 1024)
-#define OPT_ARGS "S:C:n:h:r:w:d:psuloc"
+#define OPT_ARGS "cC:d:e:EfFG:h:ij:J:ln:NoO:pP:r:R:sS:uU:w:t:T:y"
+
+#define OCI_VERSION_STRING "1.0.2"
+
+struct hook_execvpe {
+ char *file;
+ char **argv;
+ char **envp;
+ int timeout;
+};
+
+struct sysctl_val {
+ char *entry;
+ char *value;
+};
+
+struct mknod_args {
+ char *path;
+ mode_t mode;
+ dev_t dev;
+ uid_t uid;
+ gid_t gid;
+};
static struct {
char *name;
char *hostname;
char **jail_argv;
+ char *cwd;
char *seccomp;
+ struct sock_fprog *ociseccomp;
char *capabilities;
+ struct jail_capset capset;
+ char *user;
+ char *group;
+ char *extroot;
+ char *overlaydir;
+ char *tmpoverlaysize;
+ char **envp;
+ char *uidmap;
+ char *gidmap;
+ char *pidfile;
+ struct sysctl_val **sysctl;
int no_new_privs;
int namespace;
+ struct {
+ int pid;
+ int net;
+ int ns;
+ int ipc;
+ int uts;
+ int user;
+ int cgroup;
+#ifdef CLONE_NEWTIME
+ int time;
+#endif
+ } setns;
int procfs;
int ronly;
int sysfs;
+ int console;
+ int pw_uid;
+ int pw_gid;
+ int gr_gid;
+ int root_map_uid;
+ gid_t *additional_gids;
+ size_t num_additional_gids;
+ mode_t umask;
+ bool set_umask;
+ int require_jail;
+ struct {
+ struct hook_execvpe **createRuntime;
+ struct hook_execvpe **createContainer;
+ struct hook_execvpe **startContainer;
+ struct hook_execvpe **poststart;
+ struct hook_execvpe **poststop;
+ } hooks;
+ struct rlimit *rlimits[RLIM_NLIMITS];
+ int oom_score_adj;
+ bool set_oom_score_adj;
+ struct mknod_args **devices;
+ char *ocibundle;
+ bool immediately;
+ struct blob_attr *annotations;
+ int term_timeout;
} opts;
+static struct blob_buf ocibuf;
+
extern int pivot_root(const char *new_root, const char *put_old);
int debug = 0;
static char child_stack[STACK_SIZE];
-static int mkdir_p(char *dir, mode_t mask)
+static struct ubus_context *parent_ctx;
+
+int console_fd;
+
+
+static inline bool has_namespaces(void)
{
- char *l = strrchr(dir, '/');
- int ret;
+return ((opts.setns.pid != -1) ||
+ (opts.setns.net != -1) ||
+ (opts.setns.ns != -1) ||
+ (opts.setns.ipc != -1) ||
+ (opts.setns.uts != -1) ||
+ (opts.setns.user != -1) ||
+ (opts.setns.cgroup != -1) ||
+#ifdef CLONE_NEWTIME
+ (opts.setns.time != -1) ||
+#endif
+ opts.namespace);
+}
- if (!l)
- return 0;
+static void free_oci_envp(char **p) {
+ char **tmp;
- *l = '\0';
+ if (p) {
+ tmp = p;
+ while (*tmp)
+ free(*(tmp++));
- if (mkdir_p(dir, mask))
- return -1;
+ free(p);
+ }
+}
- *l = '/';
+static void free_hooklist(struct hook_execvpe **hooklist)
+{
+ struct hook_execvpe *cur;
- ret = mkdir(dir, mask);
- if (ret && errno == EEXIST)
- return 0;
+ if (!hooklist)
+ return;
+
+ cur = *hooklist;
+ while (cur) {
+ free_oci_envp(cur->argv);
+ free_oci_envp(cur->envp);
+ free(cur->file);
+ free(cur++);
+ }
+ free(hooklist);
+}
+
+static void free_sysctl(void) {
+ struct sysctl_val *cur;
+
+ if (!opts.sysctl)
+ return;
+
+ cur = *opts.sysctl;
+
+ while (cur) {
+ free(cur->entry);
+ free(cur->value);
+ free(cur++);
+ }
+ free(opts.sysctl);
+}
+
+static void free_devices(void) {
+ struct mknod_args **cur;
+
+ if (!opts.devices)
+ return;
+
+ cur = opts.devices;
+
+ while (*cur) {
+ free((*cur)->path);
+ free(*(cur++));
+ }
+ free(opts.devices);
+}
+
+static void free_rlimits(void) {
+ int type;
+
+ for (type = 0; type < RLIM_NLIMITS; ++type)
+ free(opts.rlimits[type]);
+}
+
+static void free_opts(bool parent) {
+
+ free_library_search();
+ mount_free();
+ cgroups_free();
+
+ /* we need to keep argv, envp and seccomp filter in child */
+ if (parent) { /* parent-only */
+ if (opts.ociseccomp) {
+ free(opts.ociseccomp->filter);
+ free(opts.ociseccomp);
+ }
+
+ free_oci_envp(opts.jail_argv);
+ free_oci_envp(opts.envp);
+ }
- if (ret)
- ERROR("mkdir(%s, %d) failed: %m\n", dir, mask);
+ free_rlimits();
+ free_sysctl();
+ free_devices();
+ free(opts.hostname);
+ free(opts.cwd);
+ free(opts.uidmap);
+ free(opts.gidmap);
+ free(opts.annotations);
+ free(opts.extroot);
+ free(opts.overlaydir);
+ free_hooklist(opts.hooks.createRuntime);
+ free_hooklist(opts.hooks.createContainer);
+ free_hooklist(opts.hooks.startContainer);
+ free_hooklist(opts.hooks.poststart);
+ free_hooklist(opts.hooks.poststop);
+}
+
+static int mount_overlay(char *jail_root, char *overlaydir) {
+ char *upperdir, *workdir, *optsstr, *upperetc, *upperresolvconf;
+ const char mountoptsformat[] = "lowerdir=%s,upperdir=%s,workdir=%s";
+ int ret = -1, fd;
+
+ if (asprintf(&upperdir, "%s%s", overlaydir, "/upper") < 0)
+ goto out;
+
+ if (asprintf(&workdir, "%s%s", overlaydir, "/work") < 0)
+ goto upper_printf;
+
+ if (asprintf(&optsstr, mountoptsformat, jail_root, upperdir, workdir) < 0)
+ goto work_printf;
+
+ if (mkdir_p(upperdir, 0755) || mkdir_p(workdir, 0755))
+ goto opts_printf;
+
+/*
+ * make sure /etc/resolv.conf exists in overlay and is owned by jail userns root
+ * this is to work-around a bug in overlayfs described in the overlayfs-userns
+ * patch:
+ * 3. modification of a file 'hithere' which is in l but not yet
+ * in u, and which is not owned by T, is not allowed, even if
+ * writes to u are allowed. This may be a bug in overlayfs,
+ * but it is safe behavior.
+ */
+ if (asprintf(&upperetc, "%s/etc", upperdir) < 0)
+ goto opts_printf;
+
+ if (mkdir_p(upperetc, 0755))
+ goto upper_etc_printf;
+ if (asprintf(&upperresolvconf, "%s/resolv.conf", upperetc) < 0)
+ goto upper_etc_printf;
+
+ fd = creat(upperresolvconf, 0644);
+ if (fd < 0) {
+ if (errno != EEXIST)
+ ERROR("creat(%s) failed: %m\n", upperresolvconf);
+ } else {
+ close(fd);
+ }
+ DEBUG("mount -t overlay %s %s (%s)\n", jail_root, jail_root, optsstr);
+
+ if (mount(jail_root, jail_root, "overlay", MS_NOATIME, optsstr))
+ goto upper_resolvconf_printf;
+
+ ret = 0;
+
+upper_resolvconf_printf:
+ free(upperresolvconf);
+upper_etc_printf:
+ free(upperetc);
+opts_printf:
+ free(optsstr);
+work_printf:
+ free(workdir);
+upper_printf:
+ free(upperdir);
+out:
return ret;
}
-int mount_bind(const char *root, const char *path, int readonly, int error)
+static void pass_console(int console_fd)
+{
+ struct ubus_context *child_ctx = ubus_connect(NULL);
+ static struct blob_buf req;
+ uint32_t id;
+
+ if (!child_ctx)
+ return;
+
+ blob_buf_init(&req, 0);
+ blobmsg_add_string(&req, "name", opts.name);
+
+ if (ubus_lookup_id(child_ctx, "container", &id) ||
+ ubus_invoke_fd(child_ctx, id, "console_set", req.head, NULL, NULL, 3000, console_fd))
+ INFO("ubus request failed\n");
+ else
+ close(console_fd);
+
+ blob_buf_free(&req);
+ ubus_free(child_ctx);
+}
+
+static int create_dev_console(const char *jail_root)
+{
+ char *console_fname;
+ char dev_console_path[PATH_MAX];
+ int slave_console_fd, dev_console_dummy;
+
+ /* Open UNIX/98 virtual console */
+ console_fd = posix_openpt(O_RDWR | O_NOCTTY);
+ if (console_fd < 0)
+ return -1;
+
+ console_fname = ptsname(console_fd);
+ DEBUG("got console fd %d and PTS client name %s\n", console_fd, console_fname);
+ if (!console_fname)
+ goto no_console;
+
+ grantpt(console_fd);
+ unlockpt(console_fd);
+
+ /* pass PTY master to procd */
+ pass_console(console_fd);
+
+ /* mount-bind PTY slave to /dev/console in jail */
+ snprintf(dev_console_path, sizeof(dev_console_path), "%s/dev/console", jail_root);
+ dev_console_dummy = creat(dev_console_path, 0620);
+ if (dev_console_dummy < 0)
+ goto no_console;
+
+ close(dev_console_dummy);
+
+ if (mount(console_fname, dev_console_path, "bind", MS_BIND, NULL))
+ goto no_console;
+
+ /* use PTY slave for stdio */
+ slave_console_fd = open(console_fname, O_RDWR); /* | O_NOCTTY */
+ if (slave_console_fd < 0)
+ goto no_console;
+
+ dup2(slave_console_fd, 0);
+ dup2(slave_console_fd, 1);
+ dup2(slave_console_fd, 2);
+ close(slave_console_fd);
+
+ INFO("using guest console %s\n", console_fname);
+
+ return 0;
+
+no_console:
+ close(console_fd);
+ return 1;
+}
+
+static int hook_running = 0;
+static int hook_return_code = 0;
+static struct hook_execvpe **current_hook = NULL;
+typedef void (*hook_return_handler)(void);
+static hook_return_handler hook_return_cb = NULL;
+
+static void hook_process_timeout_cb(struct uloop_timeout *t);
+static struct uloop_timeout hook_process_timeout = {
+ .cb = hook_process_timeout_cb,
+};
+
+static void run_hooklist(void);
+static void hook_process_handler(struct uloop_process *c, int ret)
+{
+ uloop_timeout_cancel(&hook_process_timeout);
+
+ if (WIFEXITED(ret)) {
+ hook_return_code = WEXITSTATUS(ret);
+ if (hook_return_code)
+ ERROR("hook (%d) exited with exit: %d\n", c->pid, hook_return_code);
+ else
+ DEBUG("hook (%d) exited with exit: %d\n", c->pid, hook_return_code);
+
+ } else {
+ hook_return_code = WTERMSIG(ret);
+ ERROR("hook (%d) exited with signal: %d\n", c->pid, hook_return_code);
+ }
+ hook_running = 0;
+ ++current_hook;
+ run_hooklist();
+}
+
+static struct uloop_process hook_process = {
+ .cb = hook_process_handler,
+};
+
+static void hook_process_timeout_cb(struct uloop_timeout *t)
+{
+ DEBUG("hook process failed to stop, sending SIGKILL\n");
+ kill(hook_process.pid, SIGKILL);
+}
+
+static void run_hooklist(void)
{
+ struct hook_execvpe *hook = *current_hook;
struct stat s;
- char new[PATH_MAX];
- int fd;
- if (stat(path, &s)) {
- ERROR("stat(%s) failed: %m\n", path);
- return error;
+ if (!hook)
+ return hook_return_cb();
+
+ DEBUG("executing hook %s\n", hook->file);
+
+ if (stat(hook->file, &s))
+ hook_process_handler(&hook_process, ENOENT);
+
+ if (!((unsigned long)s.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)))
+ hook_process_handler(&hook_process, EPERM);
+
+ hook_running = 1;
+ hook_process.pid = fork();
+ if (hook_process.pid == 0) {
+ /* child */
+ execve(hook->file, hook->argv, hook->envp);
+ ERROR("execve error %m\n");
+ _exit(errno);
+ } else if (hook_process.pid < 0) {
+ /* fork error */
+ ERROR("hook fork error\n");
+ hook_running = 0;
+ hook_process_handler(&hook_process, errno);
}
- snprintf(new, sizeof(new), "%s%s", root, path);
- if (S_ISDIR(s.st_mode)) {
- mkdir_p(new, 0755);
- } else {
- mkdir_p(dirname(new), 0755);
- snprintf(new, sizeof(new), "%s%s", root, path);
- fd = creat(new, 0644);
- if (fd == -1) {
- ERROR("creat(%s) failed: %m\n", new);
- return -1;
+ /* parent */
+ uloop_process_add(&hook_process);
+
+ if (hook->timeout > 0)
+ uloop_timeout_set(&hook_process_timeout, 1000 * hook->timeout);
+
+ uloop_run();
+ if (hook_running) {
+ DEBUG("uloop interrupted, killing jail process\n");
+ kill(hook_process.pid, SIGTERM);
+ uloop_timeout_set(&hook_process_timeout, 1000);
+ uloop_run();
+ }
+}
+
+static void run_hooks(struct hook_execvpe **hooklist, hook_return_handler return_cb)
+{
+ if (!hooklist)
+ return_cb();
+
+ current_hook = hooklist;
+ hook_return_cb = return_cb;
+
+ run_hooklist();
+}
+
+static int apply_sysctl(const char *jail_root)
+{
+ struct sysctl_val **cur;
+ char *procdir, *fname;
+ int f;
+
+ if (!opts.sysctl)
+ return 0;
+
+ if (asprintf(&procdir, "%s/proc", jail_root) < 0)
+ return ENOMEM;
+
+ mkdir(procdir, 0700);
+ if (mount("proc", procdir, "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0))
+ return EPERM;
+
+ cur = opts.sysctl;
+
+ while (*cur) {
+ if (asprintf(&fname, "%s/sys/%s", procdir, (*cur)->entry) < 0)
+ return ENOMEM;
+
+ DEBUG("sysctl: writing '%s' to %s\n", (*cur)->value, fname);
+
+ f = open(fname, O_WRONLY);
+ if (f < 0) {
+ ERROR("sysctl: can't open %s\n", fname);
+ free(fname);
+ return errno;
}
- close(fd);
+ if (write(f, (*cur)->value, strlen((*cur)->value)) < 0) {
+ ERROR("sysctl: write to %s\n", fname);
+ free(fname);
+ close(f);
+ return errno;
+ }
+
+ free(fname);
+ close(f);
+ ++cur;
}
+ umount(procdir);
+ rmdir(procdir);
+ free(procdir);
- if (mount(path, new, NULL, MS_BIND, NULL)) {
- ERROR("failed to mount -B %s %s: %m\n", path, new);
- return -1;
+ return 0;
+}
+
+/* glibc defines makedev calling a function. make sure it's a pure macro */
+#if defined(__GLIBC__)
+#undef makedev
+/* from musl's sys/sysmacros.h */
+#define makedev(x,y) ( \
+ (((x)&0xfffff000ULL) << 32) | \
+ (((x)&0x00000fffULL) << 8) | \
+ (((y)&0xffffff00ULL) << 12) | \
+ (((y)&0x000000ffULL)) )
+#endif
+
+static struct mknod_args default_devices[] = {
+ { .path = "/dev/null", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 3) },
+ { .path = "/dev/zero", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 5) },
+ { .path = "/dev/full", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 7) },
+ { .path = "/dev/random", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 8) },
+ { .path = "/dev/urandom", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH), .dev = makedev(1, 9) },
+ { .path = "/dev/tty", .mode = (S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP), .dev = makedev(5, 0), .gid = 5 },
+ { 0 },
+};
+
+static int create_devices(void)
+{
+ struct mknod_args **cur, *curdef;
+ char *path, *tmp;
+ int ret;
+
+ if (!opts.devices)
+ goto only_default_devices;
+
+ cur = opts.devices;
+
+ while (*cur) {
+ path = (*cur)->path;
+ /* don't allow devices outside of /dev */
+ if (strncmp(path, "/dev", 4))
+ return EPERM;
+
+ /* make sure parent folder exists */
+ tmp = strrchr(path, '/');
+ if (!tmp)
+ return EINVAL;
+
+ *tmp = '\0';
+ if (strcmp(path, "/dev")) {
+ DEBUG("creating directory %s\n", path);
+
+ mkdir_p(path, 0755);
+ }
+ *tmp = '/';
+
+ DEBUG("creating %s (mode=%08o)\n", path, (*cur)->mode);
+
+ /* create device */
+ if (mknod(path, (*cur)->mode, (*cur)->dev))
+ return errno;
+
+ /* change owner, if needed */
+ if (((*cur)->uid || (*cur)->gid) &&
+ chown(path, (*cur)->uid, (*cur)->gid))
+ return errno;
+
+ ++cur;
}
- if (readonly && mount(NULL, new, NULL, MS_BIND | MS_REMOUNT | MS_RDONLY, NULL)) {
- ERROR("failed to remount ro %s: %m\n", new);
- return -1;
+only_default_devices:
+ curdef = default_devices;
+ while(curdef->path) {
+ DEBUG("creating %s (mode=%08o)\n", curdef->path, curdef->mode);
+ if (mknod(curdef->path, curdef->mode, curdef->dev)) {
+ ++curdef;
+ continue; /* may already exist, eg. due to a bind-mount */
+ }
+ if ((curdef->uid || curdef->gid) &&
+ chown(curdef->path, curdef->uid, curdef->gid))
+ return errno;
+
+ ++curdef;
}
- DEBUG("mount -B %s %s (%s)\n", path, new, readonly?"ro":"rw");
+ /* Dev symbolic links as defined in OCI spec */
+ ret = symlink("/dev/pts/ptmx", "/dev/ptmx");
+ if (ret < 0)
+ WARNING("symlink() failed to create link to /dev/pts/ptmx");
+
+ ret = symlink("/proc/self/fd", "/dev/fd");
+ if (ret < 0)
+ WARNING("symlink() failed to create link to /proc/self/fd");
+
+ ret = symlink("/proc/self/fd/0", "/dev/stdin");
+ if (ret < 0)
+ WARNING("symlink() failed to create link to /proc/self/fd/0");
+
+ ret = symlink("/proc/self/fd/1", "/dev/stdout");
+ if (ret < 0)
+ WARNING("symlink() failed to create link to /proc/self/fd/1");
+
+ ret = symlink("/proc/self/fd/2", "/dev/stderr");
+ if (ret < 0)
+ WARNING("symlink() failed to create link to /proc/self/fd/2");
return 0;
}
+static char jail_root[] = "/tmp/ujail-XXXXXX";
+static char tmpovdir[] = "/tmp/ujail-overlay-XXXXXX";
+static mode_t old_umask;
+static void enter_jail_fs(void);
static int build_jail_fs(void)
{
- char jail_root[] = "/tmp/ujail-XXXXXX";
+ char *overlaydir = NULL;
+ int ret;
+
+ old_umask = umask(0);
+
if (mkdtemp(jail_root) == NULL) {
ERROR("mkdtemp(%s) failed: %m\n", jail_root);
return -1;
}
+ if (apply_sysctl(jail_root)) {
+ ERROR("failed to apply sysctl values\n");
+ return -1;
+ }
+
/* oldroot can't be MS_SHARED else pivot_root() fails */
- if (mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)) {
+ if (mount("none", "/", "none", MS_REC|MS_PRIVATE, NULL)) {
ERROR("private mount failed %m\n");
return -1;
}
- if (mount("tmpfs", jail_root, "tmpfs", MS_NOATIME, "mode=0755")) {
- ERROR("tmpfs mount failed %m\n");
- return -1;
+ if (opts.extroot) {
+ if (mount(opts.extroot, jail_root, "bind", MS_BIND, NULL)) {
+ ERROR("extroot mount failed %m\n");
+ return -1;
+ }
+ } else {
+ if (mount("tmpfs", jail_root, "tmpfs", MS_NOATIME, "mode=0755")) {
+ ERROR("tmpfs mount failed %m\n");
+ return -1;
+ }
+ }
+
+ if (opts.tmpoverlaysize) {
+ char mountoptsstr[] = "mode=0755,size=XXXXXXXX";
+
+ snprintf(mountoptsstr, sizeof(mountoptsstr),
+ "mode=0755,size=%s", opts.tmpoverlaysize);
+ if (mkdtemp(tmpovdir) == NULL) {
+ ERROR("mkdtemp(%s) failed: %m\n", jail_root);
+ return -1;
+ }
+ if (mount("tmpfs", tmpovdir, "tmpfs", MS_NOATIME,
+ mountoptsstr)) {
+ ERROR("failed to mount tmpfs for overlay (size=%s)\n", opts.tmpoverlaysize);
+ return -1;
+ }
+ overlaydir = tmpovdir;
+ }
+
+ if (opts.overlaydir)
+ overlaydir = opts.overlaydir;
+
+ if (overlaydir) {
+ ret = mount_overlay(jail_root, overlaydir);
+ if (ret)
+ return ret;
}
if (chdir(jail_root)) {
return -1;
}
+ if (opts.console)
+ create_dev_console(jail_root);
+
+ /* make sure /etc/resolv.conf exists if in new network namespace */
+ if (opts.namespace & CLONE_NEWNET) {
+ char jailetc[PATH_MAX], jaillink[PATH_MAX];
+
+ snprintf(jailetc, PATH_MAX, "%s/etc", jail_root);
+ mkdir_p(jailetc, 0755);
+ snprintf(jaillink, PATH_MAX, "%s/etc/resolv.conf", jail_root);
+ if (overlaydir)
+ unlink(jaillink);
+
+ ret = symlink("../dev/resolv.conf.d/resolv.conf.auto", jaillink);
+ if (ret < 0)
+ WARNING("symlink() failed to create link to ../dev/resolv.conf.d/resolv.conf.auto");
+ }
+
+ run_hooks(opts.hooks.createContainer, enter_jail_fs);
+
+ return 0;
+}
+
+static bool exit_from_child;
+static void free_and_exit(int ret)
+{
+ if (!exit_from_child && opts.ocibundle)
+ cgroups_free();
+
+ if (!exit_from_child && parent_ctx)
+ ubus_free(parent_ctx);
+
+ free_opts(!exit_from_child);
+
+ exit(ret);
+}
+
+static void post_jail_fs(void);
+static void enter_jail_fs(void)
+{
char dirbuf[sizeof(jail_root) + 4];
+
snprintf(dirbuf, sizeof(dirbuf), "%s/old", jail_root);
mkdir(dirbuf, 0755);
if (pivot_root(jail_root, dirbuf) == -1) {
ERROR("pivot_root(%s, %s) failed: %m\n", jail_root, dirbuf);
- return -1;
+ free_and_exit(-1);
}
if (chdir("/")) {
ERROR("chdir(/) (after pivot_root) failed: %m\n");
+ free_and_exit(-1);
+ }
+
+ snprintf(dirbuf, sizeof(dirbuf), "/old%s", jail_root);
+ umount2(dirbuf, MNT_DETACH);
+ rmdir(dirbuf);
+ if (opts.tmpoverlaysize) {
+ char tmpdirbuf[sizeof(tmpovdir) + 4];
+ snprintf(tmpdirbuf, sizeof(tmpdirbuf), "/old%s", tmpovdir);
+ umount2(tmpdirbuf, MNT_DETACH);
+ rmdir(tmpdirbuf);
+ }
+
+ umount2("/old", MNT_DETACH);
+ rmdir("/old");
+
+ if (create_devices()) {
+ ERROR("create_devices() failed\n");
+ free_and_exit(-1);
+ }
+ if (opts.ronly)
+ mount(NULL, "/", "bind", MS_REMOUNT | MS_BIND | MS_RDONLY, 0);
+
+ umask(old_umask);
+ post_jail_fs();
+}
+
+static int write_uid_gid_map(pid_t child_pid, bool gidmap, char *mapstr)
+{
+ int map_file;
+ char map_path[64];
+
+ if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s",
+ child_pid, gidmap?"gid_map":"uid_map") < 0)
+ return -1;
+
+ if ((map_file = open(map_path, O_WRONLY)) < 0)
+ return -1;
+
+ if (dprintf(map_file, "%s", mapstr)) {
+ close(map_file);
+ return -1;
+ }
+
+ close(map_file);
+ return 0;
+}
+
+static int write_single_uid_gid_map(pid_t child_pid, bool gidmap, int id)
+{
+ int map_file;
+ char map_path[64];
+ const char *map_format = "%d %d %d\n";
+ if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s",
+ child_pid, gidmap?"gid_map":"uid_map") < 0)
+ return -1;
+
+ if ((map_file = open(map_path, O_WRONLY)) < 0)
+ return -1;
+
+ if (dprintf(map_file, map_format, 0, id, 1) < 0) {
+ close(map_file);
+ return -1;
+ }
+
+ close(map_file);
+ return 0;
+}
+
+static int write_setgroups(pid_t child_pid, bool allow)
+{
+ int setgroups_file;
+ char setgroups_path[64];
+
+ if (snprintf(setgroups_path, sizeof(setgroups_path), "/proc/%d/setgroups",
+ child_pid) < 0) {
+ return -1;
+ }
+
+ if ((setgroups_file = open(setgroups_path, O_WRONLY)) < 0) {
return -1;
}
- snprintf(dirbuf, sizeof(dirbuf), "/old%s", jail_root);
- rmdir(dirbuf);
- umount2("/old", MNT_DETACH);
- rmdir("/old");
+ if (dprintf(setgroups_file, "%s", allow?"allow":"deny") == -1) {
+ close(setgroups_file);
+ return -1;
+ }
+
+ close(setgroups_file);
+ return 0;
+}
+
+static void get_jail_user(int *user, int *user_gid, int *gr_gid)
+{
+ struct passwd *p = NULL;
+ struct group *g = NULL;
+
+ if (opts.user) {
+ p = getpwnam(opts.user);
+ if (!p) {
+ ERROR("failed to get uid/gid for user %s: %d (%s)\n",
+ opts.user, errno, strerror(errno));
+ free_and_exit(EXIT_FAILURE);
+ }
+ *user = p->pw_uid;
+ *user_gid = p->pw_gid;
+ } else {
+ *user = -1;
+ *user_gid = -1;
+ }
+
+ if (opts.group) {
+ g = getgrnam(opts.group);
+ if (!g) {
+ ERROR("failed to get gid for group %s: %m\n", opts.group);
+ free_and_exit(EXIT_FAILURE);
+ }
+ *gr_gid = g->gr_gid;
+ } else {
+ *gr_gid = -1;
+ }
+};
+
+static void set_jail_user(int pw_uid, int user_gid, int gr_gid)
+{
+ if (opts.user && (user_gid != -1) && initgroups(opts.user, user_gid)) {
+ ERROR("failed to initgroups() for user %s: %m\n", opts.user);
+ free_and_exit(EXIT_FAILURE);
+ }
+
+ if ((gr_gid != -1) && setregid(gr_gid, gr_gid)) {
+ ERROR("failed to set group id %d: %m\n", gr_gid);
+ free_and_exit(EXIT_FAILURE);
+ }
+
+ if ((pw_uid != -1) && setreuid(pw_uid, pw_uid)) {
+ ERROR("failed to set user id %d: %m\n", pw_uid);
+ free_and_exit(EXIT_FAILURE);
+ }
+}
+
+static int apply_rlimits(void)
+{
+ int resource;
+
+ for (resource = 0; resource < RLIM_NLIMITS; ++resource) {
+ if (opts.rlimits[resource])
+ DEBUG("applying limits to resource %u\n", resource);
+
+ if (opts.rlimits[resource] &&
+ setrlimit(resource, opts.rlimits[resource]))
+ return errno;
+ }
+
+ return 0;
+}
+
+#define MAX_ENVP 64
+static char** build_envp(const char *seccomp, char **ocienvp)
+{
+ static char *envp[MAX_ENVP];
+ static char preload_var[PATH_MAX];
+ static char seccomp_var[PATH_MAX];
+ static char seccomp_debug_var[20];
+ static char debug_var[] = "LD_DEBUG=all";
+ static char container_var[] = "container=ujail";
+ const char *preload_lib = find_lib("libpreload-seccomp.so");
+ char **addenv;
+
+ int count = 0;
+
+ if (seccomp && !preload_lib) {
+ ERROR("failed to add preload-lib to env\n");
+ return NULL;
+ }
+ if (seccomp) {
+ snprintf(seccomp_var, sizeof(seccomp_var), "SECCOMP_FILE=%s", seccomp);
+ envp[count++] = seccomp_var;
+ snprintf(seccomp_debug_var, sizeof(seccomp_debug_var), "SECCOMP_DEBUG=%2d", debug);
+ envp[count++] = seccomp_debug_var;
+ snprintf(preload_var, sizeof(preload_var), "LD_PRELOAD=%s", preload_lib);
+ envp[count++] = preload_var;
+ }
+
+ envp[count++] = container_var;
+
+ if (debug > 1)
+ envp[count++] = debug_var;
+
+ addenv = ocienvp;
+ while (addenv && *addenv) {
+ envp[count++] = *(addenv++);
+ if (count >= MAX_ENVP) {
+ ERROR("environment limited to %d extra records, truncating\n", MAX_ENVP);
+ break;
+ }
+ }
+ return envp;
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "ujail <options> -- <binary> <params ...>\n");
+ fprintf(stderr, " -d <num>\tshow debug log (increase num to increase verbosity)\n");
+ fprintf(stderr, " -S <file>\tseccomp filter config\n");
+ fprintf(stderr, " -C <file>\tcapabilities drop config\n");
+ fprintf(stderr, " -c\t\tset PR_SET_NO_NEW_PRIVS\n");
+ fprintf(stderr, " -n <name>\tthe name of the jail\n");
+ fprintf(stderr, " -e <var>\timport environment variable\n");
+ fprintf(stderr, "namespace jail options:\n");
+ fprintf(stderr, " -h <hostname>\tchange the hostname of the jail\n");
+ fprintf(stderr, " -N\t\tjail has network namespace\n");
+ fprintf(stderr, " -f\t\tjail has user namespace\n");
+ fprintf(stderr, " -F\t\tjail has cgroups namespace\n");
+ fprintf(stderr, " -r <file>\treadonly files that should be staged\n");
+ fprintf(stderr, " -w <file>\twriteable files that should be staged\n");
+ fprintf(stderr, " -p\t\tjail has /proc\n");
+ fprintf(stderr, " -s\t\tjail has /sys\n");
+ fprintf(stderr, " -l\t\tjail has /dev/log\n");
+ fprintf(stderr, " -u\t\tjail has a ubus socket\n");
+ fprintf(stderr, " -U <name>\tuser to run jailed process\n");
+ fprintf(stderr, " -G <name>\tgroup to run jailed process\n");
+ fprintf(stderr, " -o\t\tremont jail root (/) read only\n");
+ fprintf(stderr, " -R <dir>\texternal jail rootfs (system container)\n");
+ fprintf(stderr, " -O <dir>\tdirectory for r/w overlayfs\n");
+ fprintf(stderr, " -T <size>\tuse tmpfs r/w overlayfs with <size>\n");
+ fprintf(stderr, " -E\t\tfail if jail cannot be setup\n");
+ fprintf(stderr, " -y\t\tprovide jail console\n");
+ fprintf(stderr, " -J <dir>\tcreate container from OCI bundle\n");
+ fprintf(stderr, " -i\t\tstart container immediately\n");
+ fprintf(stderr, " -P <pidfile>\tcreate <pidfile>\n");
+ fprintf(stderr, "\nWarning: by default root inside the jail is the same\n\
+and he has the same powers as root outside the jail,\n\
+thus he can escape the jail and/or break stuff.\n\
+Please use seccomp/capabilities (-S/-C) to restrict his powers\n\n\
+If you use none of the namespace jail options,\n\
+ujail will not use namespace/build a jail,\n\
+and will only drop capabilities/apply seccomp filter.\n\n");
+}
+
+static int* get_namespace_fd(const unsigned int nstype)
+{
+ switch (nstype) {
+ case CLONE_NEWPID:
+ return &opts.setns.pid;
+ case CLONE_NEWNET:
+ return &opts.setns.net;
+ case CLONE_NEWNS:
+ return &opts.setns.ns;
+ case CLONE_NEWIPC:
+ return &opts.setns.ipc;
+ case CLONE_NEWUTS:
+ return &opts.setns.uts;
+ case CLONE_NEWUSER:
+ return &opts.setns.user;
+ case CLONE_NEWCGROUP:
+ return &opts.setns.cgroup;
+#ifdef CLONE_NEWTIME
+ case CLONE_NEWTIME:
+ return &opts.setns.time;
+#endif
+ default:
+ return NULL;
+ }
+}
+
+static int setns_open(unsigned long nstype)
+{
+ int *fd = get_namespace_fd(nstype);
+
+ assert(fd != NULL);
+
+ if (*fd < 0)
+ return 0;
+
+ if (setns(*fd, nstype) == -1) {
+ close(*fd);
+ return errno;
+ }
+
+ close(*fd);
+ return 0;
+}
+
+static int jail_running = 0;
+static int jail_return_code = 0;
+
+static void jail_process_timeout_cb(struct uloop_timeout *t);
+static struct uloop_timeout jail_process_timeout = {
+ .cb = jail_process_timeout_cb,
+};
+static void poststop(void);
+static void jail_process_handler(struct uloop_process *c, int ret)
+{
+ uloop_timeout_cancel(&jail_process_timeout);
+ if (WIFEXITED(ret)) {
+ jail_return_code = WEXITSTATUS(ret);
+ INFO("jail (%d) exited with exit: %d\n", c->pid, jail_return_code);
+ } else {
+ jail_return_code = WTERMSIG(ret);
+ INFO("jail (%d) exited with signal: %d\n", c->pid, jail_return_code);
+ }
+ jail_running = 0;
+ poststop();
+}
+
+static struct uloop_process jail_process = {
+ .cb = jail_process_handler,
+};
+
+static void jail_process_timeout_cb(struct uloop_timeout *t)
+{
+ DEBUG("jail process failed to stop, sending SIGKILL\n");
+ kill(jail_process.pid, SIGKILL);
+}
+
+static void jail_handle_signal(int signo)
+{
+ if (hook_running) {
+ DEBUG("forwarding signal %d to the hook process\n", signo);
+ kill(hook_process.pid, signo);
+ /* set timeout to send SIGKILL hook process in case SIGTERM doesn't succeed */
+ if (signo == SIGTERM)
+ uloop_timeout_set(&hook_process_timeout, opts.term_timeout * 1000);
+ }
+
+ if (jail_running) {
+ DEBUG("forwarding signal %d to the jailed process\n", signo);
+ kill(jail_process.pid, signo);
+ /* set timeout to send SIGKILL jail process in case SIGTERM doesn't succeed */
+ if (signo == SIGTERM)
+ uloop_timeout_set(&jail_process_timeout, opts.term_timeout * 1000);
+ }
+}
+
+static void signals_init(void)
+{
+ int i;
+ sigset_t sigmask;
+
+ sigfillset(&sigmask);
+ for (i = 0; i < _NSIG; i++) {
+ struct sigaction s = { 0 };
+
+ if (!sigismember(&sigmask, i))
+ continue;
+ if ((i == SIGCHLD) || (i == SIGPIPE) || (i == SIGSEGV) || (i == SIGSTOP) || (i == SIGKILL))
+ continue;
+
+ s.sa_handler = jail_handle_signal;
+ sigaction(i, &s, NULL);
+ }
+}
+
+static void pre_exec_jail(struct uloop_timeout *t);
+static struct uloop_timeout pre_exec_timeout = {
+ .cb = pre_exec_jail,
+};
+
+int pipes[4];
+static int exec_jail(void *arg)
+{
+ char buf[1];
+
+ exit_from_child = true;
+ prctl(PR_SET_SECUREBITS, 0);
+
+ uloop_init();
+ signals_init();
+
+ close(pipes[0]);
+ close(pipes[3]);
+
+ setns_open(CLONE_NEWUSER);
+ setns_open(CLONE_NEWNET);
+ setns_open(CLONE_NEWNS);
+ setns_open(CLONE_NEWIPC);
+ setns_open(CLONE_NEWUTS);
+
+ buf[0] = 'i';
+ if (write(pipes[1], buf, 1) < 1) {
+ ERROR("can't write to parent\n");
+ return EXIT_FAILURE;
+ }
+ close(pipes[1]);
+ if (read(pipes[2], buf, 1) < 1) {
+ ERROR("can't read from parent\n");
+ return EXIT_FAILURE;
+ }
+ if (buf[0] != 'O') {
+ ERROR("parent had an error, child exiting\n");
+ return EXIT_FAILURE;
+ }
+
+ if (opts.namespace & CLONE_NEWCGROUP)
+ unshare(CLONE_NEWCGROUP);
+
+ setns_open(CLONE_NEWCGROUP);
+
+ if ((opts.namespace & CLONE_NEWUSER) || (opts.setns.user != -1)) {
+ if (setregid(0, 0) < 0) {
+ ERROR("setgid\n");
+ free_and_exit(EXIT_FAILURE);
+ }
+ if (setreuid(0, 0) < 0) {
+ ERROR("setuid\n");
+ free_and_exit(EXIT_FAILURE);
+ }
+ if (setgroups(0, NULL) < 0) {
+ ERROR("setgroups\n");
+ free_and_exit(EXIT_FAILURE);
+ }
+ }
+
+ if (opts.namespace && opts.hostname && strlen(opts.hostname) > 0
+ && sethostname(opts.hostname, strlen(opts.hostname))) {
+ ERROR("sethostname(%s) failed: %m\n", opts.hostname);
+ free_and_exit(EXIT_FAILURE);
+ }
+
+ uloop_timeout_add(&pre_exec_timeout);
+ uloop_run();
+
+ free_and_exit(-1);
+ return -1;
+}
+
+static void pre_exec_jail(struct uloop_timeout *t)
+{
+ if ((opts.namespace & CLONE_NEWNS) && build_jail_fs()) {
+ ERROR("failed to build jail fs\n");
+ free_and_exit(EXIT_FAILURE);
+ } else {
+ run_hooks(opts.hooks.createContainer, post_jail_fs);
+ }
+}
+
+static void post_start_hook(void);
+static void post_jail_fs(void)
+{
+ char buf[1];
+
+ if (read(pipes[2], buf, 1) < 1) {
+ ERROR("can't read from parent\n");
+ free_and_exit(EXIT_FAILURE);
+ }
+ if (buf[0] != '!') {
+ ERROR("parent had an error, child exiting\n");
+ free_and_exit(EXIT_FAILURE);
+ }
+ close(pipes[2]);
+
+ run_hooks(opts.hooks.startContainer, post_start_hook);
+}
+
+static void post_start_hook(void)
+{
+ int pw_uid, pw_gid, gr_gid;
+
+ /*
+ * make sure setuid/setgid won't drop capabilities in case capabilities
+ * have been specified explicitely.
+ */
+ if (opts.capset.apply) {
+ if (prctl(PR_SET_SECUREBITS, SECBIT_NO_SETUID_FIXUP)) {
+ ERROR("prctl(PR_SET_SECUREBITS) failed: %m\n");
+ free_and_exit(EXIT_FAILURE);
+ }
+ }
+
+ /* drop capabilities, retain those still needed to further setup jail */
+ if (applyOCIcapabilities(opts.capset, (1LLU << CAP_SETGID) | (1LLU << CAP_SETUID) | (1LLU << CAP_SETPCAP)))
+ free_and_exit(EXIT_FAILURE);
+
+ /* use either cmdline-supplied user/group or uid/gid from OCI spec */
+ get_jail_user(&pw_uid, &pw_gid, &gr_gid);
+ set_jail_user(opts.pw_uid?:pw_uid, opts.pw_gid?:pw_gid, opts.gr_gid?:gr_gid);
+
+ if (opts.additional_gids &&
+ (setgroups(opts.num_additional_gids, opts.additional_gids) < 0)) {
+ ERROR("setgroups failed: %m\n");
+ free_and_exit(EXIT_FAILURE);
+ }
+
+ if (opts.set_umask)
+ umask(opts.umask);
+
+ /* restore securebits back to normal (and lock them if not in userns) */
+ if (opts.capset.apply) {
+ if (prctl(PR_SET_SECUREBITS, (opts.namespace & CLONE_NEWUSER)?0:
+ SECBIT_KEEP_CAPS_LOCKED|SECBIT_NO_SETUID_FIXUP_LOCKED|SECBIT_NOROOT_LOCKED)) {
+ ERROR("prctl(PR_SET_SECUREBITS) failed: %m\n");
+ free_and_exit(EXIT_FAILURE);
+ }
+ }
+
+ /* drop remaining capabilities to end up with specified sets */
+ if (applyOCIcapabilities(opts.capset, 0))
+ free_and_exit(EXIT_FAILURE);
+
+ if (opts.no_new_privs && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
+ free_and_exit(EXIT_FAILURE);
+ }
+
+ char **envp = build_envp(opts.seccomp, opts.envp);
+ if (!envp)
+ free_and_exit(EXIT_FAILURE);
+
+ if (opts.cwd && chdir(opts.cwd))
+ free_and_exit(EXIT_FAILURE);
+
+ if (opts.ociseccomp && applyOCIlinuxseccomp(opts.ociseccomp))
+ free_and_exit(EXIT_FAILURE);
+
+ uloop_end();
+ free_opts(false);
+ INFO("exec-ing %s\n", *opts.jail_argv);
+ if (opts.envp) /* respect PATH if potentially set in ENV */
+ execvpe(*opts.jail_argv, opts.jail_argv, envp);
+ else
+ execve(*opts.jail_argv, opts.jail_argv, envp);
+
+ /* we get there only if execve fails */
+ ERROR("failed to execve %s: %m\n", *opts.jail_argv);
+ exit(EXIT_FAILURE);
+}
+
+int ns_open_pid(const char *nstype, const pid_t target_ns)
+{
+ char pid_pid_path[PATH_MAX];
+
+ snprintf(pid_pid_path, sizeof(pid_pid_path), "/proc/%u/ns/%s", target_ns, nstype);
+
+ return open(pid_pid_path, O_RDONLY);
+}
+
+static int parseOCIenvarray(struct blob_attr *msg, char ***envp)
+{
+ struct blob_attr *cur;
+ int sz = 0, rem;
+
+ blobmsg_for_each_attr(cur, msg, rem)
+ ++sz;
+
+ if (sz > 0) {
+ *envp = calloc(1 + sz, sizeof(char*));
+ if (!(*envp))
+ return ENOMEM;
+ } else {
+ *envp = NULL;
+ return 0;
+ }
+
+ sz = 0;
+ blobmsg_for_each_attr(cur, msg, rem)
+ (*envp)[sz++] = strdup(blobmsg_get_string(cur));
+
+ if (sz)
+ (*envp)[sz] = NULL;
+
+ return 0;
+}
+
+enum {
+ OCI_ROOT_PATH,
+ OCI_ROOT_READONLY,
+ __OCI_ROOT_MAX,
+};
+
+static const struct blobmsg_policy oci_root_policy[] = {
+ [OCI_ROOT_PATH] = { "path", BLOBMSG_TYPE_STRING },
+ [OCI_ROOT_READONLY] = { "readonly", BLOBMSG_TYPE_BOOL },
+};
+
+static int parseOCIroot(const char *jsonfile, struct blob_attr *msg)
+{
+ char extroot[PATH_MAX] = { 0 };
+ struct blob_attr *tb[__OCI_ROOT_MAX];
+ char *cur;
+ char *root_path;
+
+ blobmsg_parse(oci_root_policy, __OCI_ROOT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_ROOT_PATH])
+ return ENODATA;
+
+ root_path = blobmsg_get_string(tb[OCI_ROOT_PATH]);
+
+ /* prepend bundle directory in case of relative paths */
+ if (root_path[0] != '/') {
+ strncpy(extroot, jsonfile, PATH_MAX - 1);
+
+ cur = strrchr(extroot, '/');
+
+ if (!cur)
+ return ENOTDIR;
+
+ *(++cur) = '\0';
+ }
+
+ strncat(extroot, root_path, PATH_MAX - (strlen(extroot) + 1));
+
+ /* follow symbolic link(s) */
+ opts.extroot = realpath(extroot, NULL);
+ if (!opts.extroot)
+ return errno;
+
+ if (tb[OCI_ROOT_READONLY])
+ opts.ronly = blobmsg_get_bool(tb[OCI_ROOT_READONLY]);
+
+ return 0;
+}
+
+
+enum {
+ OCI_HOOK_PATH,
+ OCI_HOOK_ARGS,
+ OCI_HOOK_ENV,
+ OCI_HOOK_TIMEOUT,
+ __OCI_HOOK_MAX,
+};
+
+static const struct blobmsg_policy oci_hook_policy[] = {
+ [OCI_HOOK_PATH] = { "path", BLOBMSG_TYPE_STRING },
+ [OCI_HOOK_ARGS] = { "args", BLOBMSG_TYPE_ARRAY },
+ [OCI_HOOK_ENV] = { "env", BLOBMSG_TYPE_ARRAY },
+ [OCI_HOOK_TIMEOUT] = { "timeout", BLOBMSG_TYPE_INT32 },
+};
+
+
+static int parseOCIhook(struct hook_execvpe ***hooklist, struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_HOOK_MAX];
+ struct blob_attr *cur;
+ int rem, ret = 0;
+ int idx = 0;
+
+ blobmsg_for_each_attr(cur, msg, rem)
+ ++idx;
+
+ if (!idx)
+ return 0;
+
+ *hooklist = calloc(idx + 1, sizeof(struct hook_execvpe *));
+ idx = 0;
+
+ if (!(*hooklist))
+ return ENOMEM;
+
+ blobmsg_for_each_attr(cur, msg, rem) {
+ blobmsg_parse(oci_hook_policy, __OCI_HOOK_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+
+ if (!tb[OCI_HOOK_PATH]) {
+ ret = EINVAL;
+ goto errout;
+ }
+
+ (*hooklist)[idx] = calloc(1, sizeof(struct hook_execvpe));
+ if (tb[OCI_HOOK_ARGS]) {
+ ret = parseOCIenvarray(tb[OCI_HOOK_ARGS], &((*hooklist)[idx]->argv));
+ if (ret)
+ goto errout;
+ } else {
+ (*hooklist)[idx]->argv = calloc(2, sizeof(char *));
+ ((*hooklist)[idx]->argv)[0] = strdup(blobmsg_get_string(tb[OCI_HOOK_PATH]));
+ ((*hooklist)[idx]->argv)[1] = NULL;
+ };
+
+
+ if (tb[OCI_HOOK_ENV]) {
+ ret = parseOCIenvarray(tb[OCI_HOOK_ENV], &((*hooklist)[idx]->envp));
+ if (ret)
+ goto errout;
+ }
+
+ if (tb[OCI_HOOK_TIMEOUT])
+ (*hooklist)[idx]->timeout = blobmsg_get_u32(tb[OCI_HOOK_TIMEOUT]);
+
+ (*hooklist)[idx]->file = strdup(blobmsg_get_string(tb[OCI_HOOK_PATH]));
+
+ ++idx;
+ }
+
+ (*hooklist)[idx] = NULL;
+
+ DEBUG("added %d hooks\n", idx);
+
+ return 0;
+
+errout:
+ free_hooklist(*hooklist);
+ *hooklist = NULL;
+
+ return ret;
+};
+
+
+enum {
+ OCI_HOOKS_PRESTART,
+ OCI_HOOKS_CREATERUNTIME,
+ OCI_HOOKS_CREATECONTAINER,
+ OCI_HOOKS_STARTCONTAINER,
+ OCI_HOOKS_POSTSTART,
+ OCI_HOOKS_POSTSTOP,
+ __OCI_HOOKS_MAX,
+};
+
+static const struct blobmsg_policy oci_hooks_policy[] = {
+ [OCI_HOOKS_PRESTART] = { "prestart", BLOBMSG_TYPE_ARRAY },
+ [OCI_HOOKS_CREATERUNTIME] = { "createRuntime", BLOBMSG_TYPE_ARRAY },
+ [OCI_HOOKS_CREATECONTAINER] = { "createContainer", BLOBMSG_TYPE_ARRAY },
+ [OCI_HOOKS_STARTCONTAINER] = { "startContainer", BLOBMSG_TYPE_ARRAY },
+ [OCI_HOOKS_POSTSTART] = { "poststart", BLOBMSG_TYPE_ARRAY },
+ [OCI_HOOKS_POSTSTOP] = { "poststop", BLOBMSG_TYPE_ARRAY },
+};
+
+static int parseOCIhooks(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_HOOKS_MAX];
+ int ret;
+
+ blobmsg_parse(oci_hooks_policy, __OCI_HOOKS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (tb[OCI_HOOKS_PRESTART])
+ INFO("warning: ignoring deprecated prestart hook\n");
+
+ if (tb[OCI_HOOKS_CREATERUNTIME]) {
+ ret = parseOCIhook(&opts.hooks.createRuntime, tb[OCI_HOOKS_CREATERUNTIME]);
+ if (ret)
+ return ret;
+ }
+
+ if (tb[OCI_HOOKS_CREATECONTAINER]) {
+ ret = parseOCIhook(&opts.hooks.createContainer, tb[OCI_HOOKS_CREATECONTAINER]);
+ if (ret)
+ goto out_createruntime;
+ }
+
+ if (tb[OCI_HOOKS_STARTCONTAINER]) {
+ ret = parseOCIhook(&opts.hooks.startContainer, tb[OCI_HOOKS_STARTCONTAINER]);
+ if (ret)
+ goto out_createcontainer;
+ }
+
+ if (tb[OCI_HOOKS_POSTSTART]) {
+ ret = parseOCIhook(&opts.hooks.poststart, tb[OCI_HOOKS_POSTSTART]);
+ if (ret)
+ goto out_startcontainer;
+ }
+
+ if (tb[OCI_HOOKS_POSTSTOP]) {
+ ret = parseOCIhook(&opts.hooks.poststop, tb[OCI_HOOKS_POSTSTOP]);
+ if (ret)
+ goto out_poststart;
+ }
+
+ return 0;
+
+out_poststart:
+ free_hooklist(opts.hooks.poststart);
+out_startcontainer:
+ free_hooklist(opts.hooks.startContainer);
+out_createcontainer:
+ free_hooklist(opts.hooks.createContainer);
+out_createruntime:
+ free_hooklist(opts.hooks.createRuntime);
+
+ return ret;
+};
+
+
+enum {
+ OCI_PROCESS_USER_UID,
+ OCI_PROCESS_USER_GID,
+ OCI_PROCESS_USER_UMASK,
+ OCI_PROCESS_USER_ADDITIONALGIDS,
+ __OCI_PROCESS_USER_MAX,
+};
+
+static const struct blobmsg_policy oci_process_user_policy[] = {
+ [OCI_PROCESS_USER_UID] = { "uid", BLOBMSG_TYPE_INT32 },
+ [OCI_PROCESS_USER_GID] = { "gid", BLOBMSG_TYPE_INT32 },
+ [OCI_PROCESS_USER_UMASK] = { "umask", BLOBMSG_TYPE_INT32 },
+ [OCI_PROCESS_USER_ADDITIONALGIDS] = { "additionalGids", BLOBMSG_TYPE_ARRAY },
+};
+
+static int parseOCIprocessuser(struct blob_attr *msg) {
+ struct blob_attr *tb[__OCI_PROCESS_USER_MAX];
+ struct blob_attr *cur;
+ int rem;
+ int has_gid = 0;
+
+ blobmsg_parse(oci_process_user_policy, __OCI_PROCESS_USER_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (tb[OCI_PROCESS_USER_UID])
+ opts.pw_uid = blobmsg_get_u32(tb[OCI_PROCESS_USER_UID]);
+
+ if (tb[OCI_PROCESS_USER_GID]) {
+ opts.pw_gid = blobmsg_get_u32(tb[OCI_PROCESS_USER_GID]);
+ opts.gr_gid = blobmsg_get_u32(tb[OCI_PROCESS_USER_GID]);
+ has_gid = 1;
+ }
+
+ if (tb[OCI_PROCESS_USER_ADDITIONALGIDS]) {
+ size_t gidcnt = 0;
+
+ blobmsg_for_each_attr(cur, tb[OCI_PROCESS_USER_ADDITIONALGIDS], rem) {
+ ++gidcnt;
+ if (has_gid && (blobmsg_get_u32(cur) == opts.gr_gid))
+ continue;
+ }
+
+ if (gidcnt) {
+ opts.additional_gids = calloc(gidcnt + has_gid, sizeof(gid_t));
+ gidcnt = 0;
+
+ /* always add primary GID to set of GIDs if set */
+ if (has_gid)
+ opts.additional_gids[gidcnt++] = opts.gr_gid;
+
+ blobmsg_for_each_attr(cur, tb[OCI_PROCESS_USER_ADDITIONALGIDS], rem) {
+ if (has_gid && (blobmsg_get_u32(cur) == opts.gr_gid))
+ continue;
+ opts.additional_gids[gidcnt++] = blobmsg_get_u32(cur);
+ }
+ opts.num_additional_gids = gidcnt;
+ }
+ DEBUG("read %zu additional groups\n", gidcnt);
+ }
+
+ if (tb[OCI_PROCESS_USER_UMASK]) {
+ opts.umask = blobmsg_get_u32(tb[OCI_PROCESS_USER_UMASK]);
+ opts.set_umask = true;
+ }
+
+ return 0;
+}
+
+enum {
+ OCI_PROCESS_RLIMIT_TYPE,
+ OCI_PROCESS_RLIMIT_SOFT,
+ OCI_PROCESS_RLIMIT_HARD,
+ __OCI_PROCESS_RLIMIT_MAX,
+};
+
+static const struct blobmsg_policy oci_process_rlimit_policy[] = {
+ [OCI_PROCESS_RLIMIT_TYPE] = { "type", BLOBMSG_TYPE_STRING },
+ [OCI_PROCESS_RLIMIT_SOFT] = { "soft", BLOBMSG_CAST_INT64 },
+ [OCI_PROCESS_RLIMIT_HARD] = { "hard", BLOBMSG_CAST_INT64 },
+};
+
+/* from manpage GETRLIMIT(2) */
+static const char* const rlimit_names[RLIM_NLIMITS] = {
+ [RLIMIT_AS] = "AS",
+ [RLIMIT_CORE] = "CORE",
+ [RLIMIT_CPU] = "CPU",
+ [RLIMIT_DATA] = "DATA",
+ [RLIMIT_FSIZE] = "FSIZE",
+ [RLIMIT_LOCKS] = "LOCKS",
+ [RLIMIT_MEMLOCK] = "MEMLOCK",
+ [RLIMIT_MSGQUEUE] = "MSGQUEUE",
+ [RLIMIT_NICE] = "NICE",
+ [RLIMIT_NOFILE] = "NOFILE",
+ [RLIMIT_NPROC] = "NPROC",
+ [RLIMIT_RSS] = "RSS",
+ [RLIMIT_RTPRIO] = "RTPRIO",
+ [RLIMIT_RTTIME] = "RTTIME",
+ [RLIMIT_SIGPENDING] = "SIGPENDING",
+ [RLIMIT_STACK] = "STACK",
+};
+
+static int resolve_rlimit(char *type) {
+ unsigned int rltype;
+
+ for (rltype = 0; rltype < RLIM_NLIMITS; ++rltype)
+ if (rlimit_names[rltype] &&
+ !strncmp("RLIMIT_", type, 7) &&
+ !strcmp(rlimit_names[rltype], type + 7))
+ return rltype;
+
+ return -1;
+}
+
+
+static int parseOCIrlimit(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_PROCESS_RLIMIT_MAX];
+ int limtype = -1;
+ struct rlimit *curlim;
+
+ blobmsg_parse(oci_process_rlimit_policy, __OCI_PROCESS_RLIMIT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_PROCESS_RLIMIT_TYPE] ||
+ !tb[OCI_PROCESS_RLIMIT_SOFT] ||
+ !tb[OCI_PROCESS_RLIMIT_HARD])
+ return ENODATA;
+
+ limtype = resolve_rlimit(blobmsg_get_string(tb[OCI_PROCESS_RLIMIT_TYPE]));
+
+ if (limtype < 0)
+ return EINVAL;
+
+ if (opts.rlimits[limtype])
+ return ENOTUNIQ;
+
+ curlim = malloc(sizeof(struct rlimit));
+ curlim->rlim_cur = blobmsg_cast_u64(tb[OCI_PROCESS_RLIMIT_SOFT]);
+ curlim->rlim_max = blobmsg_cast_u64(tb[OCI_PROCESS_RLIMIT_HARD]);
+
+ opts.rlimits[limtype] = curlim;
+
+ return 0;
+};
+
+enum {
+ OCI_PROCESS_ARGS,
+ OCI_PROCESS_CAPABILITIES,
+ OCI_PROCESS_CWD,
+ OCI_PROCESS_ENV,
+ OCI_PROCESS_OOMSCOREADJ,
+ OCI_PROCESS_NONEWPRIVILEGES,
+ OCI_PROCESS_RLIMITS,
+ OCI_PROCESS_TERMINAL,
+ OCI_PROCESS_USER,
+ __OCI_PROCESS_MAX,
+};
+
+static const struct blobmsg_policy oci_process_policy[] = {
+ [OCI_PROCESS_ARGS] = { "args", BLOBMSG_TYPE_ARRAY },
+ [OCI_PROCESS_CAPABILITIES] = { "capabilities", BLOBMSG_TYPE_TABLE },
+ [OCI_PROCESS_CWD] = { "cwd", BLOBMSG_TYPE_STRING },
+ [OCI_PROCESS_ENV] = { "env", BLOBMSG_TYPE_ARRAY },
+ [OCI_PROCESS_OOMSCOREADJ] = { "oomScoreAdj", BLOBMSG_TYPE_INT32 },
+ [OCI_PROCESS_NONEWPRIVILEGES] = { "noNewPrivileges", BLOBMSG_TYPE_BOOL },
+ [OCI_PROCESS_RLIMITS] = { "rlimits", BLOBMSG_TYPE_ARRAY },
+ [OCI_PROCESS_TERMINAL] = { "terminal", BLOBMSG_TYPE_BOOL },
+ [OCI_PROCESS_USER] = { "user", BLOBMSG_TYPE_TABLE },
+};
+
+
+static int parseOCIprocess(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_PROCESS_MAX], *cur;
+ int rem, res;
+
+ blobmsg_parse(oci_process_policy, __OCI_PROCESS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_PROCESS_ARGS])
+ return ENOENT;
+
+ res = parseOCIenvarray(tb[OCI_PROCESS_ARGS], &opts.jail_argv);
+ if (res)
+ return res;
+
+ if (tb[OCI_PROCESS_TERMINAL])
+ opts.console = blobmsg_get_bool(tb[OCI_PROCESS_TERMINAL]);
+
+ if (tb[OCI_PROCESS_NONEWPRIVILEGES])
+ opts.no_new_privs = blobmsg_get_bool(tb[OCI_PROCESS_NONEWPRIVILEGES]);
+
+ if (tb[OCI_PROCESS_CWD])
+ opts.cwd = strdup(blobmsg_get_string(tb[OCI_PROCESS_CWD]));
+
+ if (tb[OCI_PROCESS_ENV]) {
+ res = parseOCIenvarray(tb[OCI_PROCESS_ENV], &opts.envp);
+ if (res)
+ return res;
+ }
+
+ if (tb[OCI_PROCESS_USER] && (res = parseOCIprocessuser(tb[OCI_PROCESS_USER])))
+ return res;
+
+ if (tb[OCI_PROCESS_CAPABILITIES] &&
+ (res = parseOCIcapabilities(&opts.capset, tb[OCI_PROCESS_CAPABILITIES])))
+ return res;
+
+ if (tb[OCI_PROCESS_RLIMITS]) {
+ blobmsg_for_each_attr(cur, tb[OCI_PROCESS_RLIMITS], rem) {
+ res = parseOCIrlimit(cur);
+ if (res)
+ return res;
+ }
+ }
+
+ if (tb[OCI_PROCESS_OOMSCOREADJ]) {
+ opts.oom_score_adj = blobmsg_get_u32(tb[OCI_PROCESS_OOMSCOREADJ]);
+ opts.set_oom_score_adj = true;
+ }
+
+ return 0;
+}
+
+enum {
+ OCI_LINUX_NAMESPACE_TYPE,
+ OCI_LINUX_NAMESPACE_PATH,
+ __OCI_LINUX_NAMESPACE_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_namespace_policy[] = {
+ [OCI_LINUX_NAMESPACE_TYPE] = { "type", BLOBMSG_TYPE_STRING },
+ [OCI_LINUX_NAMESPACE_PATH] = { "path", BLOBMSG_TYPE_STRING },
+};
+
+static int resolve_nstype(char *type) {
+ if (!strcmp("pid", type))
+ return CLONE_NEWPID;
+ else if (!strcmp("network", type))
+ return CLONE_NEWNET;
+ else if (!strcmp("net", type))
+ return CLONE_NEWNET;
+ else if (!strcmp("mount", type))
+ return CLONE_NEWNS;
+ else if (!strcmp("ipc", type))
+ return CLONE_NEWIPC;
+ else if (!strcmp("uts", type))
+ return CLONE_NEWUTS;
+ else if (!strcmp("user", type))
+ return CLONE_NEWUSER;
+ else if (!strcmp("cgroup", type))
+ return CLONE_NEWCGROUP;
+#ifdef CLONE_NEWTIME
+ else if (!strcmp("time", type))
+ return CLONE_NEWTIME;
+#endif
+ else
+ return 0;
+}
+
+static int parseOCIlinuxns(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_NAMESPACE_MAX];
+ int nstype;
+ int *setns;
+ int fd;
+
+ blobmsg_parse(oci_linux_namespace_policy, __OCI_LINUX_NAMESPACE_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_LINUX_NAMESPACE_TYPE])
+ return EINVAL;
+
+ nstype = resolve_nstype(blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_TYPE]));
+ if (!nstype)
+ return EINVAL;
+
+ if (opts.namespace & nstype)
+ return ENOTUNIQ;
+
+ setns = get_namespace_fd(nstype);
+
+ if (!setns)
+ return EFAULT;
+
+ if (*setns != -1)
+ return ENOTUNIQ;
+
+ if (tb[OCI_LINUX_NAMESPACE_PATH]) {
+ DEBUG("opening existing %s namespace from path %s\n",
+ blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_TYPE]),
+ blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_PATH]));
+
+ fd = open(blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_PATH]), O_RDONLY);
+ if (fd < 0)
+ return errno?:ESTALE;
+
+ if (ioctl(fd, NS_GET_NSTYPE) != nstype) {
+ close(fd);
+ return EINVAL;
+ }
+
+ DEBUG("opened existing %s namespace got filehandler %u\n",
+ blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_TYPE]),
+ fd);
+
+ *setns = fd;
+ } else {
+ opts.namespace |= nstype;
+ }
+
+ return 0;
+}
+
+/*
+ * join namespace of existing PID
+ * The string argument is the reference PID followed by ':' and a
+ * ',' separated list of namespaces to to join.
+ */
+static int jail_join_ns(char *arg)
+{
+ pid_t pid;
+ int fd;
+ int nstype;
+ char *tmp, *etmp, *nspath;
+ int *setns;
+
+ tmp = strchr(arg, ':');
+ if (!tmp)
+ return EINVAL;
+
+ *tmp = '\0';
+ pid = atoi(arg);
+
+ do {
+ ++tmp;
+ etmp = strchr(tmp, ',');
+ if (etmp)
+ *etmp = '\0';
+
+ nstype = resolve_nstype(tmp);
+ if (!nstype)
+ return EINVAL;
+
+ if (opts.namespace & nstype)
+ return ENOTUNIQ;
+
+ setns = get_namespace_fd(nstype);
+
+ if (!setns)
+ return EFAULT;
+
+ if (*setns != -1)
+ return ENOTUNIQ;
+
+ if (asprintf(&nspath, "/proc/%d/ns/%s", pid, tmp) < 0)
+ return ENOMEM;
+
+ fd = open(nspath, O_RDONLY);
+ free(nspath);
+
+ if (fd < 0)
+ return errno?:ESTALE;
+
+ *setns = fd;
+
+ if (etmp)
+ tmp = etmp;
+ else
+ tmp = NULL;
+ } while (tmp);
+
+ return 0;
+}
+
+static void get_jail_root_user(bool is_gidmap, uint32_t container_id, uint32_t host_id, uint32_t size)
+{
+ if (container_id == 0 && size >= 1)
+ if (!is_gidmap)
+ opts.root_map_uid = host_id;
+}
+
+enum {
+ OCI_LINUX_UIDGIDMAP_CONTAINERID,
+ OCI_LINUX_UIDGIDMAP_HOSTID,
+ OCI_LINUX_UIDGIDMAP_SIZE,
+ __OCI_LINUX_UIDGIDMAP_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_uidgidmap_policy[] = {
+ [OCI_LINUX_UIDGIDMAP_CONTAINERID] = { "containerID", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_UIDGIDMAP_HOSTID] = { "hostID", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_UIDGIDMAP_SIZE] = { "size", BLOBMSG_TYPE_INT32 },
+};
+
+static int parseOCIuidgidmappings(struct blob_attr *msg, bool is_gidmap)
+{
+ struct blob_attr *tb[__OCI_LINUX_UIDGIDMAP_MAX];
+ struct blob_attr *cur;
+ int rem;
+ char *map;
+ size_t len, pos, totallen = 0;
+
+ blobmsg_for_each_attr(cur, msg, rem) {
+ blobmsg_parse(oci_linux_uidgidmap_policy, __OCI_LINUX_UIDGIDMAP_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+
+ if (!tb[OCI_LINUX_UIDGIDMAP_CONTAINERID] ||
+ !tb[OCI_LINUX_UIDGIDMAP_HOSTID] ||
+ !tb[OCI_LINUX_UIDGIDMAP_SIZE])
+ return EINVAL;
+
+ /* count length */
+ totallen += snprintf(NULL, 0, "%d %d %d\n",
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_CONTAINERID]),
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_HOSTID]),
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_SIZE]));
+ }
+
+ /* allocate combined mapping string */
+ map = malloc(totallen + 1);
+ if (!map)
+ return ENOMEM;
+
+ pos = 0;
+ blobmsg_for_each_attr(cur, msg, rem) {
+ blobmsg_parse(oci_linux_uidgidmap_policy, __OCI_LINUX_UIDGIDMAP_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+
+ get_jail_root_user(is_gidmap, blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_CONTAINERID]),
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_HOSTID]),
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_SIZE]));
+
+ /* write mapping line into pre-allocated string */
+ len = snprintf(&map[pos], totallen + 1, "%d %d %d\n",
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_CONTAINERID]),
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_HOSTID]),
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_SIZE]));
+ pos += len;
+ totallen -= len;
+ }
+
+ assert(totallen == 0);
+
+ if (is_gidmap)
+ opts.gidmap = map;
+ else
+ opts.uidmap = map;
+
+ return 0;
+}
+
+enum {
+ OCI_DEVICES_TYPE,
+ OCI_DEVICES_PATH,
+ OCI_DEVICES_MAJOR,
+ OCI_DEVICES_MINOR,
+ OCI_DEVICES_FILEMODE,
+ OCI_DEVICES_UID,
+ OCI_DEVICES_GID,
+ __OCI_DEVICES_MAX,
+};
+
+static const struct blobmsg_policy oci_devices_policy[] = {
+ [OCI_DEVICES_TYPE] = { "type", BLOBMSG_TYPE_STRING },
+ [OCI_DEVICES_PATH] = { "path", BLOBMSG_TYPE_STRING },
+ [OCI_DEVICES_MAJOR] = { "major", BLOBMSG_TYPE_INT32 },
+ [OCI_DEVICES_MINOR] = { "minor", BLOBMSG_TYPE_INT32 },
+ [OCI_DEVICES_FILEMODE] = { "fileMode", BLOBMSG_TYPE_INT32 },
+ [OCI_DEVICES_UID] = { "uid", BLOBMSG_TYPE_INT32 },
+ [OCI_DEVICES_GID] = { "uid", BLOBMSG_TYPE_INT32 },
+};
+
+static mode_t resolve_devtype(char *tstr)
+{
+ if (!strcmp("c", tstr) ||
+ !strcmp("u", tstr))
+ return S_IFCHR;
+ else if (!strcmp("b", tstr))
+ return S_IFBLK;
+ else if (!strcmp("p", tstr))
+ return S_IFIFO;
+ else
+ return 0;
+}
+
+static int parseOCIdevices(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_DEVICES_MAX];
+ struct blob_attr *cur;
+ int rem;
+ size_t cnt = 0;
+ struct mknod_args *tmp;
+
+ blobmsg_for_each_attr(cur, msg, rem)
+ ++cnt;
+
+ opts.devices = calloc(cnt + 1, sizeof(struct mknod_args *));
+
+ cnt = 0;
+ blobmsg_for_each_attr(cur, msg, rem) {
+ blobmsg_parse(oci_devices_policy, __OCI_DEVICES_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tb[OCI_DEVICES_TYPE] ||
+ !tb[OCI_DEVICES_PATH])
+ return ENODATA;
+
+ tmp = calloc(1, sizeof(struct mknod_args));
+ if (!tmp)
+ return ENOMEM;
+
+ tmp->mode = resolve_devtype(blobmsg_get_string(tb[OCI_DEVICES_TYPE]));
+ if (!tmp->mode) {
+ free(tmp);
+ return EINVAL;
+ }
+
+ if (tmp->mode != S_IFIFO) {
+ if (!tb[OCI_DEVICES_MAJOR] || !tb[OCI_DEVICES_MINOR]) {
+ free(tmp);
+ return ENODATA;
+ }
+
+ tmp->dev = makedev(blobmsg_get_u32(tb[OCI_DEVICES_MAJOR]),
+ blobmsg_get_u32(tb[OCI_DEVICES_MINOR]));
+ }
+
+ if (tb[OCI_DEVICES_FILEMODE]) {
+ if (~(S_IRWXU|S_IRWXG|S_IRWXO) & blobmsg_get_u32(tb[OCI_DEVICES_FILEMODE])) {
+ free(tmp);
+ return EINVAL;
+ }
+
+ tmp->mode |= blobmsg_get_u32(tb[OCI_DEVICES_FILEMODE]);
+ } else {
+ tmp->mode |= (S_IRUSR|S_IWUSR); /* 0600 */
+ }
+
+ tmp->path = strdup(blobmsg_get_string(tb[OCI_DEVICES_PATH]));
+
+ if (tb[OCI_DEVICES_UID])
+ tmp->uid = blobmsg_get_u32(tb[OCI_DEVICES_UID]);
+ else
+ tmp->uid = -1;
+
+ if (tb[OCI_DEVICES_GID])
+ tmp->gid = blobmsg_get_u32(tb[OCI_DEVICES_GID]);
+ else
+ tmp->gid = -1;
+
+ DEBUG("read device %s (%s)\n", blobmsg_get_string(tb[OCI_DEVICES_PATH]), blobmsg_get_string(tb[OCI_DEVICES_TYPE]));
+ opts.devices[cnt++] = tmp;
+ }
+
+ opts.devices[cnt] = NULL;
+
+ return 0;
+}
+
+static int parseOCIsysctl(struct blob_attr *msg)
+{
+ struct blob_attr *cur;
+ int rem;
+ char *tmp, *tc;
+ size_t cnt = 0;
+
+ blobmsg_for_each_attr(cur, msg, rem) {
+ if (!blobmsg_name(cur) || !blobmsg_get_string(cur))
+ return EINVAL;
+
+ ++cnt;
+ }
+
+ if (!cnt)
+ return 0;
+
+ opts.sysctl = calloc(cnt + 1, sizeof(struct sysctl_val *));
+ if (!opts.sysctl)
+ return ENOMEM;
+
+ cnt = 0;
+ blobmsg_for_each_attr(cur, msg, rem) {
+ opts.sysctl[cnt] = malloc(sizeof(struct sysctl_val));
+ if (!opts.sysctl[cnt])
+ return ENOMEM;
+
+ /* replace '.' with '/' in entry name */
+ tc = tmp = strdup(blobmsg_name(cur));
+ while ((tc = strchr(tc, '.')))
+ *tc = '/';
+
+ opts.sysctl[cnt]->value = strdup(blobmsg_get_string(cur));
+ opts.sysctl[cnt]->entry = tmp;
+
+ ++cnt;
+ }
+
+ opts.sysctl[cnt] = NULL;
+
+ return 0;
+}
+
+
+enum {
+ OCI_LINUX_CGROUPSPATH,
+ OCI_LINUX_RESOURCES,
+ OCI_LINUX_SECCOMP,
+ OCI_LINUX_SYSCTL,
+ OCI_LINUX_NAMESPACES,
+ OCI_LINUX_DEVICES,
+ OCI_LINUX_UIDMAPPINGS,
+ OCI_LINUX_GIDMAPPINGS,
+ OCI_LINUX_MASKEDPATHS,
+ OCI_LINUX_READONLYPATHS,
+ OCI_LINUX_ROOTFSPROPAGATION,
+ __OCI_LINUX_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_policy[] = {
+ [OCI_LINUX_CGROUPSPATH] = { "cgroupsPath", BLOBMSG_TYPE_STRING },
+ [OCI_LINUX_RESOURCES] = { "resources", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_SECCOMP] = { "seccomp", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_SYSCTL] = { "sysctl", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_NAMESPACES] = { "namespaces", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_UIDMAPPINGS] = { "uidMappings", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_GIDMAPPINGS] = { "gidMappings", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_MASKEDPATHS] = { "maskedPaths", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_READONLYPATHS] = { "readonlyPaths", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_ROOTFSPROPAGATION] = { "rootfsPropagation", BLOBMSG_TYPE_STRING },
+};
+
+static int parseOCIlinux(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_MAX];
+ struct blob_attr *cur;
+ int rem;
+ int res = 0;
+ char *cgpath;
+ char cgfullpath[256] = "/sys/fs/cgroup";
+
+ blobmsg_parse(oci_linux_policy, __OCI_LINUX_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (tb[OCI_LINUX_NAMESPACES]) {
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_NAMESPACES], rem) {
+ res = parseOCIlinuxns(cur);
+ if (res)
+ return res;
+ }
+ }
+
+ if (tb[OCI_LINUX_UIDMAPPINGS]) {
+ res = parseOCIuidgidmappings(tb[OCI_LINUX_GIDMAPPINGS], 0);
+ if (res)
+ return res;
+ }
+
+ if (tb[OCI_LINUX_GIDMAPPINGS]) {
+ res = parseOCIuidgidmappings(tb[OCI_LINUX_GIDMAPPINGS], 1);
+ if (res)
+ return res;
+ }
+
+ if (tb[OCI_LINUX_READONLYPATHS]) {
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_READONLYPATHS], rem) {
+ res = add_mount(NULL, blobmsg_get_string(cur), NULL, MS_BIND | MS_REC | MS_RDONLY, 0, NULL, 0);
+ if (res)
+ return res;
+ }
+ }
+
+ if (tb[OCI_LINUX_MASKEDPATHS]) {
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_MASKEDPATHS], rem) {
+ res = add_mount((void *)(-1), blobmsg_get_string(cur), NULL, 0, 0, NULL, 0);
+ if (res)
+ return res;
+ }
+ }
+
+ if (tb[OCI_LINUX_SYSCTL]) {
+ res = parseOCIsysctl(tb[OCI_LINUX_SYSCTL]);
+ if (res)
+ return res;
+ }
+
+ if (tb[OCI_LINUX_SECCOMP]) {
+ opts.ociseccomp = parseOCIlinuxseccomp(tb[OCI_LINUX_SECCOMP]);
+ if (!opts.ociseccomp)
+ return EINVAL;
+ }
+
+ if (tb[OCI_LINUX_DEVICES]) {
+ res = parseOCIdevices(tb[OCI_LINUX_DEVICES]);
+ if (res)
+ return res;
+ }
+
+ if (tb[OCI_LINUX_CGROUPSPATH]) {
+ cgpath = blobmsg_get_string(tb[OCI_LINUX_CGROUPSPATH]);
+ if (cgpath[0] == '/') {
+ if (strlen(cgpath) + 1 >= (sizeof(cgfullpath) - strlen(cgfullpath)))
+ return E2BIG;
+
+ strcat(cgfullpath, cgpath);
+ } else {
+ strcat(cgfullpath, "/containers/");
+ if (strlen(opts.name) + strlen(cgpath) + 2 >= (sizeof(cgfullpath) - strlen(cgfullpath)))
+ return E2BIG;
+
+ strcat(cgfullpath, opts.name); /* should be container name rather than jail name */
+ strcat(cgfullpath, "/");
+ strcat(cgfullpath, cgpath);
+ }
+ } else {
+ strcat(cgfullpath, "/containers/");
+ if (2 * strlen(opts.name) + 2 >= (sizeof(cgfullpath) - strlen(cgfullpath)))
+ return E2BIG;
+
+ strcat(cgfullpath, opts.name); /* should be container name rather than jail name */
+ strcat(cgfullpath, "/");
+ strcat(cgfullpath, opts.name); /* should be container instance name rather than jail name */
+ }
+
+ cgroups_init(cgfullpath);
+
+ if (tb[OCI_LINUX_RESOURCES]) {
+ res = parseOCIlinuxcgroups(tb[OCI_LINUX_RESOURCES]);
+ if (res)
+ return res;
+ }
+
+ return 0;
+}
+
+enum {
+ OCI_VERSION,
+ OCI_HOSTNAME,
+ OCI_PROCESS,
+ OCI_ROOT,
+ OCI_MOUNTS,
+ OCI_HOOKS,
+ OCI_LINUX,
+ OCI_ANNOTATIONS,
+ __OCI_MAX,
+};
+
+static const struct blobmsg_policy oci_policy[] = {
+ [OCI_VERSION] = { "ociVersion", BLOBMSG_TYPE_STRING },
+ [OCI_HOSTNAME] = { "hostname", BLOBMSG_TYPE_STRING },
+ [OCI_PROCESS] = { "process", BLOBMSG_TYPE_TABLE },
+ [OCI_ROOT] = { "root", BLOBMSG_TYPE_TABLE },
+ [OCI_MOUNTS] = { "mounts", BLOBMSG_TYPE_ARRAY },
+ [OCI_HOOKS] = { "hooks", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX] = { "linux", BLOBMSG_TYPE_TABLE },
+ [OCI_ANNOTATIONS] = { "annotations", BLOBMSG_TYPE_TABLE },
+};
+
+static int parseOCI(const char *jsonfile)
+{
+ struct blob_attr *tb[__OCI_MAX];
+ struct blob_attr *cur;
+ int rem;
+ int res;
+
+ blob_buf_init(&ocibuf, 0);
+
+ if (!blobmsg_add_json_from_file(&ocibuf, jsonfile)) {
+ res=ENOENT;
+ goto errout;
+ }
+
+ blobmsg_parse(oci_policy, __OCI_MAX, tb, blob_data(ocibuf.head), blob_len(ocibuf.head));
+
+ if (!tb[OCI_VERSION]) {
+ res=ENOMSG;
+ goto errout;
+ }
+
+ if (strncmp("1.0", blobmsg_get_string(tb[OCI_VERSION]), 3)) {
+ ERROR("unsupported ociVersion %s\n", blobmsg_get_string(tb[OCI_VERSION]));
+ res=ENOTSUP;
+ goto errout;
+ }
+
+ if (tb[OCI_HOSTNAME])
+ opts.hostname = strdup(blobmsg_get_string(tb[OCI_HOSTNAME]));
+
+ if (!tb[OCI_PROCESS]) {
+ res=ENODATA;
+ goto errout;
+ }
+
+ if ((res = parseOCIprocess(tb[OCI_PROCESS])))
+ goto errout;
- if (opts.procfs) {
- mkdir("/proc", 0755);
- mount("proc", "/proc", "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0);
+ if (!tb[OCI_ROOT]) {
+ res=ENODATA;
+ goto errout;
}
- if (opts.sysfs) {
- mkdir("/sys", 0755);
- mount("sysfs", "/sys", "sysfs", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0);
+ if ((res = parseOCIroot(jsonfile, tb[OCI_ROOT])))
+ goto errout;
+
+ if (!tb[OCI_MOUNTS]) {
+ res=ENODATA;
+ goto errout;
}
- if (opts.ronly)
- mount(NULL, "/", NULL, MS_RDONLY | MS_REMOUNT, 0);
- return 0;
+ blobmsg_for_each_attr(cur, tb[OCI_MOUNTS], rem)
+ if ((res = parseOCImount(cur)))
+ goto errout;
+
+ if (tb[OCI_LINUX] && (res = parseOCIlinux(tb[OCI_LINUX])))
+ goto errout;
+
+ if (tb[OCI_HOOKS] && (res = parseOCIhooks(tb[OCI_HOOKS])))
+ goto errout;
+
+ if (tb[OCI_ANNOTATIONS])
+ opts.annotations = blob_memdup(tb[OCI_ANNOTATIONS]);
+
+errout:
+ blob_buf_free(&ocibuf);
+
+ return res;
}
-#define MAX_ENVP 8
-static char** build_envp(const char *seccomp)
+static int set_oom_score_adj(void)
{
- static char *envp[MAX_ENVP];
- static char preload_var[PATH_MAX];
- static char seccomp_var[PATH_MAX];
- static char debug_var[] = "LD_DEBUG=all";
- const char *preload_lib = find_lib("libpreload-seccomp.so");
- int count = 0;
+ int f;
+ char fname[32];
- if (seccomp && !preload_lib) {
- ERROR("failed to add preload-lib to env\n");
- return NULL;
- }
- if (seccomp) {
- snprintf(seccomp_var, sizeof(seccomp_var), "SECCOMP_FILE=%s", seccomp);
- envp[count++] = seccomp_var;
- snprintf(preload_var, sizeof(preload_var), "LD_PRELOAD=%s", preload_lib);
- envp[count++] = preload_var;
- }
- if (debug > 1)
- envp[count++] = debug_var;
+ if (!opts.set_oom_score_adj)
+ return 0;
- return envp;
+ snprintf(fname, sizeof(fname), "/proc/%u/oom_score_adj", jail_process.pid);
+ f = open(fname, O_WRONLY | O_TRUNC);
+ if (f < 0)
+ return errno;
+
+ dprintf(f, "%d", opts.oom_score_adj);
+ close(f);
+
+ return 0;
}
-static void usage(void)
+
+enum {
+ OCI_STATE_CREATING,
+ OCI_STATE_CREATED,
+ OCI_STATE_RUNNING,
+ OCI_STATE_STOPPED,
+};
+
+static int jail_oci_state = OCI_STATE_CREATED;
+static void pipe_send_start_container(struct uloop_timeout *t);
+static struct uloop_timeout start_container_timeout = {
+ .cb = pipe_send_start_container,
+};
+
+static int handle_start(struct ubus_context *ctx, struct ubus_object *obj,
+ struct ubus_request_data *req, const char *method,
+ struct blob_attr *msg)
{
- fprintf(stderr, "ujail <options> -- <binary> <params ...>\n");
- fprintf(stderr, " -d <num>\tshow debug log (increase num to increase verbosity)\n");
- fprintf(stderr, " -S <file>\tseccomp filter config\n");
- fprintf(stderr, " -C <file>\tcapabilities drop config\n");
- fprintf(stderr, " -c\t\tset PR_SET_NO_NEW_PRIVS\n");
- fprintf(stderr, " -n <name>\tthe name of the jail\n");
- fprintf(stderr, "namespace jail options:\n");
- fprintf(stderr, " -h <hostname>\tchange the hostname of the jail\n");
- fprintf(stderr, " -r <file>\treadonly files that should be staged\n");
- fprintf(stderr, " -w <file>\twriteable files that should be staged\n");
- fprintf(stderr, " -p\t\tjail has /proc\n");
- fprintf(stderr, " -s\t\tjail has /sys\n");
- fprintf(stderr, " -l\t\tjail has /dev/log\n");
- fprintf(stderr, " -u\t\tjail has a ubus socket\n");
- fprintf(stderr, " -o\t\tremont jail root (/) read only\n");
- fprintf(stderr, "\nWarning: by default root inside the jail is the same\n\
-and he has the same powers as root outside the jail,\n\
-thus he can escape the jail and/or break stuff.\n\
-Please use seccomp/capabilities (-S/-C) to restrict his powers\n\n\
-If you use none of the namespace jail options,\n\
-ujail will not use namespace/build a jail,\n\
-and will only drop capabilities/apply seccomp filter.\n\n");
+ if (jail_oci_state != OCI_STATE_CREATED)
+ return UBUS_STATUS_INVALID_ARGUMENT;
+
+ uloop_timeout_add(&start_container_timeout);
+
+ return UBUS_STATUS_OK;
}
-static int exec_jail(void *_notused)
+static struct blob_buf bb;
+static int handle_state(struct ubus_context *ctx, struct ubus_object *obj,
+ struct ubus_request_data *req, const char *method,
+ struct blob_attr *msg)
{
- if (opts.capabilities && drop_capabilities(opts.capabilities))
- exit(EXIT_FAILURE);
+ char *statusstr;
- if (opts.no_new_privs && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
- ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
- exit(EXIT_FAILURE);
+ switch (jail_oci_state) {
+ case OCI_STATE_CREATING:
+ statusstr = "creating";
+ break;
+ case OCI_STATE_CREATED:
+ statusstr = "created";
+ break;
+ case OCI_STATE_RUNNING:
+ statusstr = "running";
+ break;
+ case OCI_STATE_STOPPED:
+ statusstr = "stopped";
+ break;
+ default:
+ statusstr = "unknown";
}
- if (opts.namespace && opts.hostname && strlen(opts.hostname) > 0
- && sethostname(opts.hostname, strlen(opts.hostname))) {
- ERROR("sethostname(%s) failed: %m\n", opts.hostname);
- exit(EXIT_FAILURE);
- }
+ blob_buf_init(&bb, 0);
+ blobmsg_add_string(&bb, "ociVersion", OCI_VERSION_STRING);
+ blobmsg_add_string(&bb, "id", opts.name);
+ blobmsg_add_string(&bb, "status", statusstr);
+ if (jail_oci_state == OCI_STATE_CREATED ||
+ jail_oci_state == OCI_STATE_RUNNING)
+ blobmsg_add_u32(&bb, "pid", jail_process.pid);
- if (opts.namespace && build_jail_fs()) {
- ERROR("failed to build jail fs\n");
- exit(EXIT_FAILURE);
- }
+ blobmsg_add_string(&bb, "bundle", opts.ocibundle);
- char **envp = build_envp(opts.seccomp);
- if (!envp)
- exit(EXIT_FAILURE);
+ if (opts.annotations)
+ blobmsg_add_blob(&bb, opts.annotations);
- INFO("exec-ing %s\n", *opts.jail_argv);
- execve(*opts.jail_argv, opts.jail_argv, envp);
- /* we get there only if execve fails */
- ERROR("failed to execve %s: %m\n", *opts.jail_argv);
- exit(EXIT_FAILURE);
+ ubus_send_reply(ctx, req, bb.head);
+
+ return UBUS_STATUS_OK;
}
-static int jail_running = 1;
-static int jail_return_code = 0;
+enum {
+ CONTAINER_KILL_ATTR_SIGNAL,
+ __CONTAINER_KILL_ATTR_MAX,
+};
-static void jail_process_timeout_cb(struct uloop_timeout *t);
-static struct uloop_timeout jail_process_timeout = {
- .cb = jail_process_timeout_cb,
+static const struct blobmsg_policy container_kill_attrs[__CONTAINER_KILL_ATTR_MAX] = {
+ [CONTAINER_KILL_ATTR_SIGNAL] = { "signal", BLOBMSG_TYPE_INT32 },
};
-static void jail_process_handler(struct uloop_process *c, int ret)
+static int
+container_handle_kill(struct ubus_context *ctx, struct ubus_object *obj,
+ struct ubus_request_data *req, const char *method,
+ struct blob_attr *msg)
{
- uloop_timeout_cancel(&jail_process_timeout);
- if (WIFEXITED(ret)) {
- jail_return_code = WEXITSTATUS(ret);
- INFO("jail (%d) exited with exit: %d\n", c->pid, jail_return_code);
- } else {
- jail_return_code = WTERMSIG(ret);
- INFO("jail (%d) exited with signal: %d\n", c->pid, jail_return_code);
+ struct blob_attr *tb[__CONTAINER_KILL_ATTR_MAX], *cur;
+ int sig = SIGTERM;
+
+ blobmsg_parse(container_kill_attrs, __CONTAINER_KILL_ATTR_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg));
+
+ cur = tb[CONTAINER_KILL_ATTR_SIGNAL];
+ if (cur)
+ sig = blobmsg_get_u32(cur);
+
+ if (jail_oci_state == OCI_STATE_CREATING)
+ return UBUS_STATUS_NOT_FOUND;
+
+ if (kill(jail_process.pid, sig) == 0)
+ return 0;
+
+ switch (errno) {
+ case EINVAL: return UBUS_STATUS_INVALID_ARGUMENT;
+ case EPERM: return UBUS_STATUS_PERMISSION_DENIED;
+ case ESRCH: return UBUS_STATUS_NOT_FOUND;
}
- jail_running = 0;
- uloop_end();
-}
-static struct uloop_process jail_process = {
- .cb = jail_process_handler,
-};
+ return UBUS_STATUS_UNKNOWN_ERROR;
+}
-static void jail_process_timeout_cb(struct uloop_timeout *t)
+static int
+jail_writepid(pid_t pid)
{
- DEBUG("jail process failed to stop, sending SIGKILL\n");
- kill(jail_process.pid, SIGKILL);
+ FILE *_pidfile;
+
+ if (!opts.pidfile)
+ return 0;
+
+ _pidfile = fopen(opts.pidfile, "w");
+ if (_pidfile == NULL)
+ return errno;
+
+ if (fprintf(_pidfile, "%d\n", pid) < 0) {
+ fclose(_pidfile);
+ return errno;
+ }
+
+ if (fclose(_pidfile))
+ return errno;
+
+ return 0;
}
-static void jail_handle_signal(int signo)
+static int checkpath(const char *path)
{
- DEBUG("forwarding signal %d to the jailed process\n", signo);
- kill(jail_process.pid, signo);
+ int dirfd = open(path, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ if (dirfd < 0) {
+ ERROR("path %s open failed %m\n", path);
+ return -1;
+ }
+ close(dirfd);
+
+ return 0;
}
+static struct ubus_method container_methods[] = {
+ UBUS_METHOD_NOARG("start", handle_start),
+ UBUS_METHOD_NOARG("state", handle_state),
+ UBUS_METHOD("kill", container_handle_kill, container_kill_attrs),
+};
+
+static struct ubus_object_type container_object_type =
+ UBUS_OBJECT_TYPE("container", container_methods);
+
+static struct ubus_object container_object = {
+ .type = &container_object_type,
+ .methods = container_methods,
+ .n_methods = ARRAY_SIZE(container_methods),
+};
+
+static void post_main(struct uloop_timeout *t);
+static struct uloop_timeout post_main_timeout = {
+ .cb = post_main,
+};
+static int netns_fd;
+static int pidns_fd;
+#ifdef CLONE_NEWTIME
+static int timens_fd;
+#endif
+static void post_create_runtime(void);
+
+struct env_e {
+ struct list_head list;
+ char *envarg;
+};
+
int main(int argc, char **argv)
{
- sigset_t sigmask;
uid_t uid = getuid();
- char log[] = "/dev/log";
- char ubus[] = "/var/run/ubus.sock";
- int ch, i;
+ const char log[] = "/dev/log";
+ const char ubus[] = "/var/run/ubus/ubus.sock";
+ int ret = EXIT_FAILURE;
+ int ch;
+ char *tmp;
+ struct list_head envl = LIST_HEAD_INIT(envl);
+ struct env_e *enve, *tmpenve;
+ unsigned short int envn = 0, envc = 0;
if (uid) {
ERROR("not root, aborting: %m\n");
return EXIT_FAILURE;
}
+ /* those are filehandlers, so -1 indicates unused */
+ opts.setns.pid = -1;
+ opts.setns.net = -1;
+ opts.setns.ns = -1;
+ opts.setns.ipc = -1;
+ opts.setns.uts = -1;
+ opts.setns.user = -1;
+ opts.setns.cgroup = -1;
+#ifdef CLONE_NEWTIME
+ opts.setns.time = -1;
+#endif
+
+ /* default 5 seconds timeout after SIGTERM before SIGKILL is sent */
+ opts.term_timeout = 5;
+
umask(022);
mount_list_init();
init_library_search();
+ cgroups_prepare();
+ exit_from_child = false;
while ((ch = getopt(argc, argv, OPT_ARGS)) != -1) {
switch (ch) {
case 'd':
debug = atoi(optarg);
break;
+ case 'e':
+ enve = calloc(1, sizeof(*enve));
+ enve->envarg = optarg;
+ list_add_tail(&enve->list, &envl);
+ break;
case 'p':
- opts.namespace = 1;
+ opts.namespace |= CLONE_NEWNS;
opts.procfs = 1;
break;
case 'o':
- opts.namespace = 1;
+ opts.namespace |= CLONE_NEWNS;
opts.ronly = 1;
break;
+ case 'f':
+ opts.namespace |= CLONE_NEWUSER;
+ break;
+ case 'F':
+ opts.namespace |= CLONE_NEWCGROUP;
+ break;
+ case 'R':
+ opts.extroot = realpath(optarg, NULL);
+ break;
case 's':
- opts.namespace = 1;
+ opts.namespace |= CLONE_NEWNS;
opts.sysfs = 1;
break;
case 'S':
opts.seccomp = optarg;
- add_mount(optarg, 1, -1);
+ add_mount_bind(optarg, 1, -1);
break;
case 'C':
opts.capabilities = optarg;
case 'n':
opts.name = optarg;
break;
+ case 'N':
+ opts.namespace |= CLONE_NEWNET;
+ break;
case 'h':
- opts.hostname = optarg;
+ opts.namespace |= CLONE_NEWUTS;
+ opts.hostname = strdup(optarg);
+ break;
+ case 'j':
+ jail_join_ns(optarg);
break;
case 'r':
- opts.namespace = 1;
- add_path_and_deps(optarg, 1, 0, 0);
+ opts.namespace |= CLONE_NEWNS;
+ tmp = strchr(optarg, ':');
+ if (tmp) {
+ *(tmp++) = '\0';
+ add_2paths_and_deps(optarg, tmp, 1, 0, 0);
+ } else {
+ add_path_and_deps(optarg, 1, 0, 0);
+ }
break;
case 'w':
- opts.namespace = 1;
- add_path_and_deps(optarg, 0, 0, 0);
+ opts.namespace |= CLONE_NEWNS;
+ tmp = strchr(optarg, ':');
+ if (tmp) {
+ *(tmp++) = '\0';
+ add_2paths_and_deps(optarg, tmp, 0, 0, 0);
+ } else {
+ add_path_and_deps(optarg, 0, 0, 0);
+ }
break;
case 'u':
- opts.namespace = 1;
- add_mount(ubus, 0, -1);
+ opts.namespace |= CLONE_NEWNS;
+ add_mount_bind(ubus, 0, -1);
break;
case 'l':
- opts.namespace = 1;
- add_mount(log, 0, -1);
+ opts.namespace |= CLONE_NEWNS;
+ add_mount_bind(log, 0, -1);
+ break;
+ case 'U':
+ opts.user = optarg;
+ break;
+ case 'G':
+ opts.group = optarg;
break;
+ case 'O':
+ opts.overlaydir = realpath(optarg, NULL);
+ break;
+ case 't':
+ opts.term_timeout = atoi(optarg);
+ break;
+ case 'T':
+ opts.tmpoverlaysize = optarg;
+ break;
+ case 'E':
+ opts.require_jail = 1;
+ break;
+ case 'y':
+ opts.console = 1;
+ break;
+ case 'J':
+ opts.ocibundle = optarg;
+ break;
+ case 'i':
+ opts.immediately = true;
+ break;
+ case 'P':
+ opts.pidfile = optarg;
+ break;
+ }
+ }
+
+ if (opts.namespace && !opts.ocibundle)
+ opts.namespace |= CLONE_NEWIPC | CLONE_NEWPID;
+
+ /*
+ * env import from cmdline is not available for OCI containers
+ */
+ if (opts.ocibundle && !list_empty(&envl)) {
+ ret=-ENOTSUP;
+ goto errout;
+ }
+
+ /*
+ * prepare list of env variables to import for slim containers
+ */
+ if (!list_empty(&envl)) {
+ list_for_each_entry(enve, &envl, list)
+ ++envn;
+
+ opts.envp = calloc(1 + envn, sizeof(char*));
+ list_for_each_entry_safe(enve, tmpenve, &envl, list) {
+ tmp = getenv(enve->envarg);
+ if (tmp) {
+ ret = asprintf(&opts.envp[envc++], "%s=%s", enve->envarg, tmp);
+ if (ret < 0) {
+ ERROR("filed to handle envargs %s\n", tmp);
+ free(enve);
+ goto errout;
+ }
+ }
+
+ list_del(&enve->list);
+ free(enve);
+ }
+
+ opts.envp[envc] = NULL;
+ }
+
+ /*
+ * uid in parent user namespace representing root user in new
+ * user namespace, defaults to nobody unless specified in uidMappings
+ */
+ opts.root_map_uid = 65534;
+
+ if (opts.capabilities && parseOCIcapabilities_from_file(&opts.capset, opts.capabilities)) {
+ ERROR("failed to read capabilities from file %s\n", opts.capabilities);
+ ret=-1;
+ goto errout;
+ }
+
+ if (opts.ocibundle) {
+ char *jsonfile;
+ int ocires;
+
+ if (!opts.name) {
+ ERROR("OCI bundle needs a named jail\n");
+ ret=-1;
+ goto errout;
+ }
+ if (asprintf(&jsonfile, "%s/config.json", opts.ocibundle) < 0) {
+ ret=-ENOMEM;
+ goto errout;
+ }
+ ocires = parseOCI(jsonfile);
+ free(jsonfile);
+ if (ocires) {
+ ERROR("parsing of OCI JSON spec has failed: %s (%d)\n", strerror(ocires), ocires);
+ ret=ocires;
+ goto errout;
}
}
+ if (opts.namespace & CLONE_NEWNET) {
+ if (!opts.name) {
+ ERROR("netns needs a named jail\n");
+ ret=-1;
+ goto errout;
+ }
+ }
+
+
+ if (opts.tmpoverlaysize && strlen(opts.tmpoverlaysize) > 8) {
+ ERROR("size parameter too long: \"%s\"\n", opts.tmpoverlaysize);
+ ret=-1;
+ goto errout;
+ }
+
+ if (opts.extroot && checkpath(opts.extroot)) {
+ ERROR("invalid rootfs path '%s'", opts.extroot);
+ ret=-1;
+ goto errout;
+ }
+
+ if (opts.overlaydir && checkpath(opts.overlaydir)) {
+ ERROR("invalid rootfs overlay path '%s'", opts.overlaydir);
+ ret=-1;
+ goto errout;
+ }
+
/* no <binary> param found */
- if (argc - optind < 1) {
+ if (!opts.ocibundle && (argc - optind < 1)) {
usage();
- return EXIT_FAILURE;
+ ret=EXIT_FAILURE;
+ goto errout;
}
- if (!(opts.namespace||opts.capabilities||opts.seccomp)) {
+ if (!(opts.ocibundle||opts.namespace||opts.capabilities||opts.seccomp||
+ (opts.setns.net != -1) ||
+ (opts.setns.ns != -1) ||
+ (opts.setns.ipc != -1) ||
+ (opts.setns.uts != -1) ||
+ (opts.setns.user != -1) ||
+ (opts.setns.cgroup != -1))) {
ERROR("Not using namespaces, capabilities or seccomp !!!\n\n");
usage();
- return EXIT_FAILURE;
+ ret=EXIT_FAILURE;
+ goto errout;
}
- DEBUG("Using namespaces(%d), capabilities(%d), seccomp(%d)\n",
+ DEBUG("Using namespaces(0x%08x), capabilities(%d), seccomp(%d)\n",
opts.namespace,
- opts.capabilities != 0,
- opts.seccomp != 0);
+ opts.capset.apply,
+ opts.seccomp != 0 || opts.ociseccomp != 0);
- opts.jail_argv = &argv[optind];
+ uloop_init();
+ signals_init();
- if (opts.namespace && add_path_and_deps(*opts.jail_argv, 1, -1, 0)) {
- ERROR("failed to load dependencies\n");
- return -1;
+ parent_ctx = ubus_connect(NULL);
+ ubus_add_uloop(parent_ctx);
+
+ if (opts.ocibundle) {
+ char *objname;
+ if (asprintf(&objname, "container.%s", opts.name) < 0) {
+ ret=-ENOMEM;
+ goto errout;
+ }
+
+ container_object.name = objname;
+ ret = ubus_add_object(parent_ctx, &container_object);
+ if (ret) {
+ ERROR("Failed to add object: %s\n", ubus_strerror(ret));
+ ret=-1;
+ goto errout;
+ }
+ }
+
+ /* deliberately not using 'else' on unrelated conditional branches */
+ if (!opts.ocibundle) {
+ /* allocate NULL-terminated array for argv */
+ opts.jail_argv = calloc(1 + argc - optind, sizeof(void *));
+ if (!opts.jail_argv) {
+ ret=EXIT_FAILURE;
+ goto errout;
+ }
+ for (size_t s = optind; s < argc; s++)
+ opts.jail_argv[s - optind] = strdup(argv[s]);
+
+ if (opts.namespace & CLONE_NEWUSER)
+ get_jail_user(&opts.pw_uid, &opts.pw_gid, &opts.gr_gid);
+ }
+
+ if (!opts.extroot) {
+ if (opts.namespace && add_path_and_deps(*opts.jail_argv, 1, -1, 0)) {
+ ERROR("failed to load dependencies\n");
+ ret=-1;
+ goto errout;
+ }
}
if (opts.namespace && opts.seccomp && add_path_and_deps("libpreload-seccomp.so", 1, -1, 1)) {
ERROR("failed to load libpreload-seccomp.so\n");
- return -1;
+ opts.seccomp = 0;
+ if (opts.require_jail) {
+ ret=-1;
+ goto errout;
+ }
}
- if (opts.name)
- prctl(PR_SET_NAME, opts.name, NULL, NULL, NULL);
+ uloop_timeout_add(&post_main_timeout);
+ uloop_run();
- uloop_init();
+errout:
+ if (opts.ocibundle)
+ cgroups_free();
- sigfillset(&sigmask);
- for (i = 0; i < _NSIG; i++) {
- struct sigaction s = { 0 };
+ free_opts(true);
- if (!sigismember(&sigmask, i))
- continue;
- if ((i == SIGCHLD) || (i == SIGPIPE))
- continue;
+ return ret;
+}
- s.sa_handler = jail_handle_signal;
- sigaction(i, &s, NULL);
+static void post_main(struct uloop_timeout *t)
+{
+ if (apply_rlimits()) {
+ ERROR("error applying resource limits\n");
+ free_and_exit(EXIT_FAILURE);
}
- if (opts.namespace) {
- add_mount("/dev/full", 0, -1);
- add_mount("/dev/null", 0, -1);
- add_mount("/dev/urandom", 0, -1);
- add_mount("/dev/zero", 0, -1);
+ if (opts.name)
+ prctl(PR_SET_NAME, opts.name, NULL, NULL, NULL);
+
+ if (pipe(&pipes[0]) < 0 || pipe(&pipes[2]) < 0)
+ free_and_exit(-1);
+
+ if (has_namespaces()) {
+ if (opts.namespace & CLONE_NEWNS) {
+ if (!opts.extroot && (opts.user || opts.group)) {
+ add_mount_bind("/etc/passwd", 1, -1);
+ add_mount_bind("/etc/group", 1, -1);
+ }
+
+#if defined(__GLIBC__)
+ if (!opts.extroot)
+ add_mount_bind("/etc/nsswitch.conf", 1, -1);
+#endif
+ if (opts.setns.ns == -1) {
+ if (!(opts.namespace & CLONE_NEWNET)) {
+ add_mount_bind("/etc/resolv.conf", 1, 0);
+ } else {
+ /* new mount namespace to provide /dev/resolv.conf.d */
+ char hostdir[PATH_MAX];
+
+ snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name);
+ mkdir_p(hostdir, 0755);
+ add_mount(hostdir, "/dev/resolv.conf.d", NULL,
+ MS_BIND | MS_NOEXEC | MS_NOATIME | MS_NOSUID | MS_NODEV | MS_RDONLY, 0, NULL, 0);
+ }
+ }
+ /* default mounts */
+ add_mount(NULL, "/dev", "tmpfs", MS_NOATIME | MS_NOEXEC | MS_NOSUID, 0, "size=1M", -1);
+ add_mount(NULL, "/dev/pts", "devpts", MS_NOATIME | MS_NOEXEC | MS_NOSUID, 0, "newinstance,ptmxmode=0666,mode=0620,gid=5", 0);
+
+ if (opts.procfs || opts.ocibundle) {
+ add_mount("proc", "/proc", "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0, NULL, -1);
+
+ /*
+ * hack to make /proc/sys/net read-write while the rest of /proc/sys is read-only
+ * which cannot be expressed with OCI spec, but happends to be very useful.
+ * Only apply it if '/proc/sys' is not already listed as mount, maskedPath or
+ * readonlyPath.
+ * If not running in a new network namespace, only make /proc/sys read-only.
+ * If running in a new network namespace, temporarily stash (ie. mount-bind)
+ * /proc/sys/net into (totally unrelated, but surely existing) /proc/self/net.
+ * Then we mount-bind /proc/sys read-only and then mount-move /proc/self/net into
+ * /proc/sys/net.
+ * This works because mounts are executed in incrementing strcmp() order and
+ * /proc/self/net appears there before /proc/sys/net and hence the operation
+ * succeeds as the bind-mount of /proc/self/net is performed first and then
+ * move-mount of /proc/sys/net follows because 'e' preceeds 'y' in the ASCII
+ * table (and in the alphabet).
+ */
+ if (!add_mount(NULL, "/proc/sys", NULL, MS_BIND | MS_RDONLY, 0, NULL, -1))
+ if (opts.namespace & CLONE_NEWNET)
+ if (!add_mount_inner("/proc/self/net", "/proc/sys/net", NULL, MS_MOVE, 0, NULL, -1))
+ add_mount_inner("/proc/sys/net", "/proc/self/net", NULL, MS_BIND, 0, NULL, -1);
+
+ }
+ if (opts.sysfs || opts.ocibundle)
+ add_mount("sysfs", "/sys", "sysfs", MS_RELATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, 0, NULL, -1);
+
+ if (opts.ocibundle)
+ add_mount("shm", "/dev/shm", "tmpfs", MS_NOSUID | MS_NOEXEC | MS_NODEV, 0, "mode=1777", -1);
+
+ }
+
+ if (opts.setns.pid != -1) {
+ pidns_fd = ns_open_pid("pid", getpid());
+ setns_open(CLONE_NEWPID);
+ } else {
+ pidns_fd = -1;
+ }
+
+#ifdef CLONE_NEWTIME
+ if (opts.setns.time != -1) {
+ timens_fd = ns_open_pid("time", getpid());
+ setns_open(CLONE_NEWTIME);
+ } else {
+ timens_fd = -1;
+ }
+#endif
+
+ if (opts.namespace & CLONE_NEWUSER) {
+ if (prctl(PR_SET_SECUREBITS, SECBIT_NO_SETUID_FIXUP)) {
+ ERROR("prctl(PR_SET_SECUREBITS) failed: %m\n");
+ free_and_exit(EXIT_FAILURE);
+ }
+ if (seteuid(opts.root_map_uid)) {
+ ERROR("seteuid(%d) failed: %m\n", opts.root_map_uid);
+ free_and_exit(EXIT_FAILURE);
+ }
+ }
- int flags = CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWIPC | SIGCHLD;
- if (opts.hostname)
- flags |= CLONE_NEWUTS;
- jail_process.pid = clone(exec_jail, child_stack + STACK_SIZE, flags, NULL);
+ jail_process.pid = clone(exec_jail, child_stack + STACK_SIZE, SIGCHLD | (opts.namespace & (~CLONE_NEWCGROUP)), NULL);
} else {
jail_process.pid = fork();
}
if (jail_process.pid > 0) {
/* parent process */
+ char sig_buf[1];
+
uloop_process_add(&jail_process);
- uloop_run();
- if (jail_running) {
- DEBUG("uloop interrupted, killing jail process\n");
- kill(jail_process.pid, SIGTERM);
- uloop_timeout_set(&jail_process_timeout, 1000);
- uloop_run();
- }
- uloop_done();
- return jail_return_code;
+ jail_running = 1;
+ if (seteuid(0)) {
+ ERROR("seteuid(%d) failed: %m\n", opts.root_map_uid);
+ free_and_exit(EXIT_FAILURE);
+ }
+
+ prctl(PR_SET_SECUREBITS, 0);
+
+ if (pidns_fd != -1) {
+ setns(pidns_fd, CLONE_NEWPID);
+ close(pidns_fd);
+ }
+#ifdef CLONE_NEWTIME
+ if (timens_fd != -1) {
+ setns(timens_fd, CLONE_NEWTIME);
+ close(timens_fd);
+ }
+#endif
+ if (opts.setns.net != -1)
+ close(opts.setns.net);
+ if (opts.setns.ns != -1)
+ close(opts.setns.ns);
+ if (opts.setns.ipc != -1)
+ close(opts.setns.ipc);
+ if (opts.setns.uts != -1)
+ close(opts.setns.uts);
+ if (opts.setns.user != -1)
+ close(opts.setns.user);
+ if (opts.setns.cgroup != -1)
+ close(opts.setns.cgroup);
+ close(pipes[1]);
+ close(pipes[2]);
+ if (read(pipes[0], sig_buf, 1) < 1) {
+ ERROR("can't read from child\n");
+ free_and_exit(-1);
+ }
+ close(pipes[0]);
+ set_oom_score_adj();
+
+ if (opts.ocibundle)
+ cgroups_apply(jail_process.pid);
+
+ if (opts.namespace & CLONE_NEWUSER) {
+ if (write_setgroups(jail_process.pid, true)) {
+ ERROR("can't write setgroups\n");
+ free_and_exit(-1);
+ }
+ if (!opts.uidmap) {
+ bool has_gr = (opts.gr_gid != -1);
+ if (opts.pw_uid != -1) {
+ write_single_uid_gid_map(jail_process.pid, 0, opts.pw_uid);
+ write_single_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:opts.pw_gid);
+ } else {
+ write_single_uid_gid_map(jail_process.pid, 0, 65534);
+ write_single_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:65534);
+ }
+ } else {
+ write_uid_gid_map(jail_process.pid, 0, opts.uidmap);
+ if (opts.gidmap)
+ write_uid_gid_map(jail_process.pid, 1, opts.gidmap);
+ }
+ }
+
+ if (opts.namespace & CLONE_NEWNET)
+ jail_network_start(parent_ctx, opts.name, jail_process.pid);
+
+ if (jail_writepid(jail_process.pid)) {
+ ERROR("failed to write pidfile: %m\n");
+ free_and_exit(-1);
+ }
} else if (jail_process.pid == 0) {
/* fork child process */
- return exec_jail(NULL);
+ free_and_exit(exec_jail(NULL));
} else {
ERROR("failed to clone/fork: %m\n");
- return EXIT_FAILURE;
+ free_and_exit(EXIT_FAILURE);
+ }
+ run_hooks(opts.hooks.createRuntime, post_create_runtime);
+}
+
+static void post_poststart(void);
+static void post_create_runtime(void)
+{
+ char sig_buf[1];
+
+ sig_buf[0] = 'O';
+ if (write(pipes[3], sig_buf, 1) < 0) {
+ ERROR("can't write to child\n");
+ free_and_exit(-1);
+ }
+
+ jail_oci_state = OCI_STATE_CREATED;
+ if (opts.ocibundle && !opts.immediately)
+ uloop_run(); /* wait for 'start' command via ubus */
+ else
+ pipe_send_start_container(NULL);
+}
+
+static void pipe_send_start_container(struct uloop_timeout *t)
+{
+ char sig_buf[1];
+
+ jail_oci_state = OCI_STATE_RUNNING;
+ sig_buf[0] = '!';
+ if (write(pipes[3], sig_buf, 1) < 0) {
+ ERROR("can't write to child\n");
+ free_and_exit(-1);
+ }
+ close(pipes[3]);
+
+ run_hooks(opts.hooks.poststart, post_poststart);
+}
+
+static void post_poststart(void)
+{
+ uloop_run(); /* idle here while jail is running */
+ if (jail_running) {
+ DEBUG("uloop interrupted, killing jail process\n");
+ kill(jail_process.pid, SIGTERM);
+ uloop_timeout_set(&jail_process_timeout, 1000);
+ uloop_run();
+ }
+ uloop_done();
+ poststop();
+}
+
+static void post_poststop(void);
+static void poststop(void) {
+ if (opts.namespace & CLONE_NEWNET) {
+ setns(netns_fd, CLONE_NEWNET);
+ jail_network_stop();
+ close(netns_fd);
}
+ run_hooks(opts.hooks.poststop, post_poststop);
+}
+
+static void post_poststop(void)
+{
+ free_opts(true);
+ if (parent_ctx)
+ ubus_free(parent_ctx);
+
+ exit(jail_return_code);
}
#define _JAIL_JAIL_H_
int mount_bind(const char *root, const char *path, int readonly, int error);
+int ns_open_pid(const char *nstype, const pid_t target_ns);
#endif
#define INFO(fmt, ...) do { \
printf("jail: "fmt, ## __VA_ARGS__); \
} while (0)
+#define WARNING(fmt, ...) do { \
+ syslog(LOG_WARNING, "jail: "fmt, ## __VA_ARGS__); \
+ printf("jail: "fmt, ## __VA_ARGS__); \
+ } while (0)
#define ERROR(fmt, ...) do { \
syslog(LOG_ERR, "jail: "fmt, ## __VA_ARGS__); \
fprintf(stderr,"jail: "fmt, ## __VA_ARGS__); \
--- /dev/null
+/*
+ * Copyright (C) 2021 Daniel Golle <daniel@makrotopia.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * launch private ubus and netifd instances for containers with managed
+ * network namespace.
+ */
+
+#define _GNU_SOURCE /* See feature_test_macros(7) */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <libgen.h>
+#include <fcntl.h>
+
+#include <sys/inotify.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <pwd.h>
+
+#include <linux/limits.h>
+
+#include <libubox/uloop.h>
+#include <libubox/utils.h>
+#include <libubus.h>
+#include <libubox/blobmsg.h>
+#include <libubox/blobmsg_json.h>
+#include <uci.h>
+
+#include "netifd.h"
+#include "log.h"
+#include "jail.h"
+
+#define INOTIFY_SZ (sizeof(struct inotify_event) + PATH_MAX + 1)
+
+static const char ubusd_path[] = "/sbin/ubusd";
+static const char netifd_path[] = "/sbin/netifd";
+static const char uci_net[] = "network";
+static const char ubus_sock_name[] = "ubus.sock";
+
+static char *jail_name, *ubus_sock_path, *ubus_sock_dir, *uci_config_network = NULL;
+
+static char *inotify_buffer;
+static struct uloop_fd fd_inotify_read;
+static struct passwd *ubus_pw;
+static pid_t ns_pid;
+
+static struct ubus_context *host_ubus_ctx = NULL;
+static struct ubus_context *jail_ubus_ctx = NULL;
+
+static struct ubus_subscriber config_watch_subscribe;
+
+/* generate /etc/config/network for jail'ed netifd */
+static int gen_jail_uci_network(void)
+{
+ struct uci_context *uci_ctx = uci_alloc_context();
+ struct uci_package *pkg = NULL;
+ struct uci_element *e, *t;
+ bool has_loopback = false;
+ int ret = 0;
+ FILE *ucinetf;
+
+ /* if no network configuration is active just return */
+ if (!uci_config_network)
+ goto uci_out;
+
+ /* open output uci network config file */
+ ucinetf = fopen(uci_config_network, "w");
+ if (!ucinetf) {
+ ret = errno;
+ goto uci_out;
+ }
+
+ /* load network uci package */
+ if (uci_load(uci_ctx, uci_net, &pkg) != UCI_OK) {
+ char *err;
+ uci_get_errorstr(uci_ctx, &err, uci_net);
+ fprintf(stderr, "unable to load configuration (%s)\n", err);
+ free(err);
+ ret = EIO;
+ goto ucinetf_out;
+ }
+
+ /* remove all sections which don't match jail */
+ uci_foreach_element_safe(&pkg->sections, t, e) {
+ struct uci_section *s = uci_to_section(e);
+ struct uci_option *o = uci_lookup_option(uci_ctx, s, "jail");
+ struct uci_ptr ptr = { .p = pkg, .s = s };
+
+ /* keep match, but remove 'jail' option and rename 'jail_ifname' */
+ if (o && o->type == UCI_TYPE_STRING && !strcmp(o->v.string, jail_name)) {
+ ptr.o = o;
+ struct uci_option *jio = uci_lookup_option(uci_ctx, s, "jail_device");
+ if (!jio)
+ jio = uci_lookup_option(uci_ctx, s, "jail_ifname");
+
+ if (jio) {
+ struct uci_ptr ren_ptr = { .p = pkg, .s = s, .o = jio, .value = "device" };
+ struct uci_option *host_device = uci_lookup_option(uci_ctx, s, "device");
+ struct uci_option *legacy_ifname = uci_lookup_option(uci_ctx, s, "ifname");
+ if (host_device && legacy_ifname) {
+ struct uci_ptr delif_ptr = { .p = pkg, .s = s, .o = legacy_ifname };
+ uci_delete(uci_ctx, &delif_ptr);
+ }
+
+ struct uci_ptr renif_ptr = { .p = pkg, .s = s, .o = host_device?:legacy_ifname, .value = "host_device" };
+ uci_rename(uci_ctx, &renif_ptr);
+ uci_rename(uci_ctx, &ren_ptr);
+ }
+ }
+
+ uci_delete(uci_ctx, &ptr);
+ }
+
+ /* check if device 'lo' is defined by any remaining interfaces */
+ uci_foreach_element(&pkg->sections, e) {
+ struct uci_section *s = uci_to_section(e);
+ if (strcmp(s->type, "interface"))
+ continue;
+
+ const char *devname = uci_lookup_option_string(uci_ctx, s, "device");
+ if (devname && !strcmp(devname, "lo")) {
+ has_loopback = true;
+ break;
+ }
+ }
+
+ /* create loopback interface section if not defined */
+ if (!has_loopback) {
+ struct uci_ptr ptr = { .p = pkg, .section = "loopback", .value = "interface" };
+ uci_set(uci_ctx, &ptr);
+ uci_reorder_section(uci_ctx, ptr.s, 0);
+ struct uci_ptr ptr1 = { .p = pkg, .s = ptr.s, .option = "device", .value = "lo" };
+ struct uci_ptr ptr2 = { .p = pkg, .s = ptr.s, .option = "proto", .value = "static" };
+ struct uci_ptr ptr3 = { .p = pkg, .s = ptr.s, .option = "ipaddr", .value = "127.0.0.1" };
+ struct uci_ptr ptr4 = { .p = pkg, .s = ptr.s, .option = "netmask", .value = "255.0.0.0" };
+ uci_set(uci_ctx, &ptr1);
+ uci_set(uci_ctx, &ptr2);
+ uci_set(uci_ctx, &ptr3);
+ uci_set(uci_ctx, &ptr4);
+ }
+
+ ret = uci_export(uci_ctx, ucinetf, pkg, false);
+
+ucinetf_out:
+ fclose(ucinetf);
+
+uci_out:
+ uci_free_context(uci_ctx);
+
+ return ret;
+}
+
+static void run_ubusd(struct uloop_timeout *t)
+{
+ static struct blob_buf req;
+ void *ins, *in, *cmd;
+ uint32_t id;
+
+ blob_buf_init(&req, 0);
+ blobmsg_add_string(&req, "name", jail_name);
+ ins = blobmsg_open_table(&req, "instances");
+ in = blobmsg_open_table(&req, "ubus");
+ cmd = blobmsg_open_array(&req, "command");
+ blobmsg_add_string(&req, "", ubusd_path);
+ blobmsg_add_string(&req, "", "-s");
+ blobmsg_add_string(&req, "", ubus_sock_path);
+ blobmsg_close_array(&req, cmd);
+
+ if (ubus_pw) {
+ blobmsg_add_string(&req, "user", "ubus");
+ blobmsg_add_string(&req, "group", "ubus");
+ }
+
+ blobmsg_close_table(&req, in);
+ blobmsg_close_table(&req, ins);
+
+ if (!ubus_lookup_id(host_ubus_ctx, "container", &id))
+ ubus_invoke(host_ubus_ctx, id, "add", req.head, NULL, NULL, 3000);
+
+ blob_buf_free(&req);
+}
+
+static void run_netifd(struct uloop_timeout *t)
+{
+ static struct blob_buf req;
+ void *ins, *in, *cmd, *jail, *setns, *setnso, *namespaces, *mount, *pathenv;
+ char *resolvconf_dir, *resolvconf, *ucimount, *ubusmount;
+ char uci_dir[] = "/var/containers/ujail-uci-XXXXXX";
+
+ uint32_t id;
+ bool running = false;
+
+ uloop_fd_delete(&fd_inotify_read);
+ close(fd_inotify_read.fd);
+
+ jail_ubus_ctx = ubus_connect(ubus_sock_path);
+ if (!jail_ubus_ctx)
+ return;
+
+ if (asprintf(&resolvconf_dir, "/tmp/resolv.conf-%s.d", jail_name) == -1)
+ return;
+
+ if (asprintf(&resolvconf, "%s/resolv.conf.auto", resolvconf_dir) == -1)
+ goto netifd_out_resolvconf_dir;
+
+ if (!mkdtemp(uci_dir))
+ goto netifd_out_resolvconf;
+
+ if (asprintf(&uci_config_network, "%s/network", uci_dir) == -1)
+ goto netifd_out_ucidir;
+
+ if (asprintf(&ucimount, "%s:/etc/config", uci_dir) == -1)
+ goto netifd_out_ucinetconf;
+
+ if (asprintf(&ubusmount, "%s:/var/run/ubus", ubus_sock_dir) == -1)
+ goto netifd_out_ucimount;
+
+ if (gen_jail_uci_network())
+ goto netifd_out_ubusmount;
+
+ blob_buf_init(&req, 0);
+ blobmsg_add_string(&req, "name", jail_name);
+ ins = blobmsg_open_table(&req, "instances");
+ in = blobmsg_open_table(&req, "netifd");
+
+ cmd = blobmsg_open_array(&req, "command");
+ blobmsg_add_string(&req, "", netifd_path);
+ blobmsg_add_string(&req, "", "-r");
+ blobmsg_add_string(&req, "", resolvconf);
+ blobmsg_close_array(&req, cmd);
+
+ pathenv = blobmsg_open_table(&req, "env");
+ blobmsg_add_string(&req, "PATH", "/usr/sbin:/usr/bin:/sbin:/bin");
+ blobmsg_close_table(&req, pathenv);
+
+ jail = blobmsg_open_table(&req, "jail");
+
+ setns = blobmsg_open_array(&req, "setns");
+ setnso = blobmsg_open_table(&req, "");
+ blobmsg_add_u32(&req, "pid", ns_pid);
+ namespaces = blobmsg_open_array(&req, "namespaces");
+ blobmsg_add_string(&req, "", "net");
+ blobmsg_add_string(&req, "", "ipc");
+ blobmsg_add_string(&req, "", "uts");
+ blobmsg_close_array(&req, namespaces);
+ blobmsg_close_table(&req, setnso);
+ blobmsg_close_array(&req, setns);
+
+ mount = blobmsg_open_table(&req, "mount");
+ blobmsg_add_string(&req, ubusmount, "1");
+ blobmsg_add_string(&req, resolvconf_dir, "1");
+ blobmsg_add_string(&req, ucimount, "0");
+ blobmsg_add_string(&req, "/bin/cat", "0");
+ blobmsg_add_string(&req, "/bin/ipcalc.sh", "0");
+ blobmsg_add_string(&req, "/bin/kill", "0");
+ blobmsg_add_string(&req, "/bin/ubus", "0");
+ blobmsg_add_string(&req, "/etc/hotplug.d", "0");
+ blobmsg_add_string(&req, "/lib/functions", "0");
+ blobmsg_add_string(&req, "/lib/functions.sh", "0");
+ blobmsg_add_string(&req, "/lib/netifd", "0");
+ blobmsg_add_string(&req, "/lib/network", "0");
+ blobmsg_add_string(&req, "/usr/bin/awk", "0");
+ blobmsg_add_string(&req, "/usr/bin/killall", "0");
+ blobmsg_add_string(&req, "/usr/bin/logger", "0");
+ blobmsg_add_string(&req, "/usr/bin/jshn", "0");
+ blobmsg_add_string(&req, "/usr/share/libubox/jshn.sh", "0");
+ blobmsg_add_string(&req, "/sbin/hotplug-call", "0");
+ blobmsg_add_string(&req, "/sbin/udhcpc", "0");
+ blobmsg_close_table(&req, mount);
+
+ blobmsg_add_u8(&req, "log", 1);
+ blobmsg_add_u8(&req, "procfs", 1);
+ blobmsg_add_u8(&req, "sysfs", 1);
+
+ blobmsg_add_u8(&req, "requirejail", 1);
+
+ blobmsg_close_table(&req, jail);
+
+ blobmsg_add_u8(&req, "stdout", 1);
+ blobmsg_add_u8(&req, "stderr", 1);
+
+ blobmsg_close_table(&req, in);
+ blobmsg_close_table(&req, ins);
+
+ if (!ubus_lookup_id(host_ubus_ctx, "container", &id))
+ running = !ubus_invoke(host_ubus_ctx, id, "add", req.head, NULL, NULL, 3000);
+
+ if (!running)
+ blob_buf_free(&req);
+netifd_out_ubusmount:
+ free(ubusmount);
+netifd_out_ucimount:
+ free(ucimount);
+netifd_out_ucinetconf:
+ if (!running) {
+ unlink(uci_config_network);
+ free(uci_config_network);
+ }
+netifd_out_ucidir:
+ if (!running)
+ rmdir(uci_dir);
+netifd_out_resolvconf:
+ free(resolvconf);
+netifd_out_resolvconf_dir:
+ free(resolvconf_dir);
+
+ uloop_end();
+}
+
+static struct uloop_timeout netifd_start_timeout = { .cb = run_netifd, };
+
+static void inotify_read_handler(struct uloop_fd *u, unsigned int events)
+{
+ int rc;
+ char *p;
+ struct inotify_event *in;
+
+ /* read inotify events */
+ while ((rc = read(u->fd, inotify_buffer, INOTIFY_SZ)) == -1 && errno == EINTR);
+
+ if (rc <= 0)
+ return;
+
+ /* process events from buffer */
+ for (p = inotify_buffer;
+ rc - (p - inotify_buffer) >= (int)sizeof(struct inotify_event);
+ p += sizeof(struct inotify_event) + in->len) {
+ in = (struct inotify_event*)p;
+
+ if (in->len < 4)
+ continue;
+
+ if (!strncmp(ubus_sock_name, in->name, in->len))
+ uloop_timeout_add(&netifd_start_timeout);
+ }
+}
+
+static void netns_updown(struct ubus_context *ubus, const char *name, bool start, int netns_fd)
+{
+ static struct blob_buf req;
+ uint32_t id;
+
+ if (!ubus)
+ return;
+
+ blob_buf_init(&req, 0);
+ if (name)
+ blobmsg_add_string(&req, "jail", name);
+
+ blobmsg_add_u8(&req, "start", start);
+
+ if (ubus_lookup_id(ubus, "network", &id) ||
+ ubus_invoke_fd(ubus, id, "netns_updown", req.head, NULL, NULL, 3000, netns_fd)) {
+ INFO("ubus request failed\n");
+ }
+
+ blob_buf_free(&req);
+}
+
+static void jail_network_reload(struct uloop_timeout *t)
+{
+ uint32_t id;
+
+ if (!jail_ubus_ctx)
+ return;
+
+ if (gen_jail_uci_network())
+ return;
+
+ if (ubus_lookup_id(jail_ubus_ctx, "network", &id))
+ return;
+
+ ubus_invoke(jail_ubus_ctx, id, "reload", NULL, NULL, NULL, 3000);
+}
+
+static const struct blobmsg_policy service_watch_policy = { "config", BLOBMSG_TYPE_STRING };
+static struct uloop_timeout jail_network_reload_timeout = { .cb = jail_network_reload, };
+
+static int config_watch_notify_cb(struct ubus_context *ctx, struct ubus_object *obj,
+ struct ubus_request_data *req, const char *method,
+ struct blob_attr *msg)
+{
+ struct blob_attr *attr;
+ const char *config;
+
+ if (strcmp(method, "config.change"))
+ return 0;
+
+ blobmsg_parse(&service_watch_policy, 1, &attr, blob_data(msg), blob_len(msg));
+ if (!attr)
+ return 1;
+
+ config = blobmsg_get_string(attr);
+ if (strcmp(config, "network"))
+ return 0;
+
+ uloop_timeout_add(&jail_network_reload_timeout);
+
+ return 0;
+}
+
+static void watch_ubus_service(void)
+{
+ uint32_t id;
+
+ config_watch_subscribe.cb = config_watch_notify_cb;
+ if (ubus_register_subscriber(host_ubus_ctx, &config_watch_subscribe)) {
+ ERROR("failed to register ubus subscriber\n");
+ return;
+ }
+
+ if (ubus_lookup_id(host_ubus_ctx, "service", &id))
+ return;
+
+ if (!ubus_subscribe(host_ubus_ctx, &config_watch_subscribe, id))
+ return;
+
+ ERROR("failed to subscribe %d\n", id);
+}
+
+static struct uloop_timeout ubus_start_timeout = { .cb = run_ubusd, };
+
+int jail_network_start(struct ubus_context *new_ctx, char *new_jail_name, pid_t new_ns_pid)
+{
+ ubus_pw = getpwnam("ubus");
+ int ret = 0;
+ int netns_fd;
+
+ host_ubus_ctx = new_ctx;
+ ns_pid = new_ns_pid;
+ jail_name = new_jail_name;
+
+ if (asprintf(&ubus_sock_dir, "/var/containers/ubus-%s", jail_name) == -1) {
+ ret = ENOMEM;
+ goto errout_dir;
+ }
+
+ if (asprintf(&ubus_sock_path, "%s/%s", ubus_sock_dir, ubus_sock_name) == -1) {
+ ret = ENOMEM;
+ goto errout_path;
+ }
+
+ mkdir_p(ubus_sock_dir, 0755);
+ if (ubus_pw) {
+ ret = chown(ubus_sock_dir, ubus_pw->pw_uid, ubus_pw->pw_gid);
+ if (ret) {
+ ret = errno;
+ goto errout;
+ }
+ }
+
+ fd_inotify_read.fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);
+ fd_inotify_read.cb = inotify_read_handler;
+ if (fd_inotify_read.fd == -1) {
+ ERROR("failed to initialize inotify handler\n");
+ ret = EIO;
+ goto errout;
+ }
+ uloop_fd_add(&fd_inotify_read, ULOOP_READ);
+
+ inotify_buffer = calloc(1, INOTIFY_SZ);
+ if (!inotify_buffer) {
+ ret = ENOMEM;
+ goto errout_inotify;
+ }
+
+ if (inotify_add_watch(fd_inotify_read.fd, ubus_sock_dir, IN_CREATE) == -1) {
+ ERROR("failed to add inotify watch on %s\n", ubus_sock_dir);
+ free(inotify_buffer);
+ ret = EIO;
+ goto errout_inotify;
+ }
+
+ watch_ubus_service();
+
+ netns_fd = ns_open_pid("net", ns_pid);
+ if (netns_fd < 0) {
+ ret = ESRCH;
+ goto errout_inotify;
+ }
+
+ netns_updown(host_ubus_ctx, jail_name, true, netns_fd);
+
+ close(netns_fd);
+ uloop_timeout_add(&ubus_start_timeout);
+ uloop_run();
+
+ return 0;
+
+errout_inotify:
+ close(fd_inotify_read.fd);
+errout:
+ free(ubus_sock_path);
+errout_path:
+ free(ubus_sock_dir);
+errout_dir:
+ return ret;
+}
+
+static int jail_delete_instance(const char *instance)
+{
+ static struct blob_buf req;
+ uint32_t id;
+
+ if (ubus_lookup_id(host_ubus_ctx, "container", &id))
+ return -1;
+
+ blob_buf_init(&req, 0);
+ blobmsg_add_string(&req, "name", jail_name);
+ blobmsg_add_string(&req, "instance", instance);
+
+ return ubus_invoke(host_ubus_ctx, id, "delete", req.head, NULL, NULL, 3000);
+}
+
+int jail_network_stop(void)
+{
+ int host_netns = open("/proc/self/ns/net", O_RDONLY);
+
+ if (host_netns < 0)
+ return errno;
+
+ netns_updown(jail_ubus_ctx, NULL, false, host_netns);
+
+ close(host_netns);
+ ubus_free(jail_ubus_ctx);
+
+ jail_delete_instance("netifd");
+ jail_delete_instance("ubus");
+
+ if (uci_config_network) {
+ unlink(uci_config_network);
+ rmdir(dirname(uci_config_network));
+ free(uci_config_network);
+ }
+
+ free(ubus_sock_path);
+ rmdir(ubus_sock_dir);
+ free(ubus_sock_dir);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (C) 2021 Daniel Golle <daniel@makrotopia.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _JAIL_NETIFD_H
+#define _JAIL_NETIFD_H
+#include <libubus.h>
+
+int jail_network_start(struct ubus_context *new_ctx, char *new_jail_name, pid_t new_ns_pid);
+int jail_network_stop(void);
+
+#endif
#include <string.h>
#include <dlfcn.h>
+#include "log.h"
#include "seccomp.h"
#include "../preload.h"
static main_t __main__;
+int debug;
static int __preload_main__(int argc, char **argv, char **envp)
{
char *env_file = getenv("SECCOMP_FILE");
+ char *env_debug = getenv("SECCOMP_DEBUG");
if (!env_file || !env_file[0]) {
ERROR("SECCOMP_FILE not specified\n");
return -1;
}
+ if (env_debug)
+ debug = atoi(env_debug);
+ else
+ debug = 0;
+
if (install_syscall_filter(*argv, env_file))
return -1;
unsetenv("LD_PRELOAD");
+ unsetenv("SECCOMP_DEBUG");
unsetenv("SECCOMP_FILE");
return (*__main__)(argc, argv, envp);
start_main_t __start_main__;
__start_main__ = dlsym(RTLD_NEXT, "__libc_start_main");
- if (!__start_main__)
+ if (!__start_main__) {
INFO("failed to find __libc_start_main %s\n", dlerror());
+ return -1;
+ }
__main__ = main;
uClibc_main __start_main__;
__start_main__ = dlsym(RTLD_NEXT, "__uClibc_main");
- if (!__start_main__)
+ if (!__start_main__) {
INFO("failed to find __uClibc_main %s\n", dlerror());
+ return;
+ }
__main__ = main;
#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
#define SECCOMP_RET_LOG 0x00070000U
+#define SECCOMP_RET_LOGALLOW 0x7ffc0000U
#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
+#define SECCOMP_RET_KILLPROCESS 0x80000000U
#define SECCOMP_RET_ERROR(x) (SECCOMP_RET_ERRNO | ((x) & 0x0000ffffU))
#define SECCOMP_RET_LOGGER(x) (SECCOMP_RET_LOG | ((x) & 0x0000ffffU))
#define syscall_nr (offsetof(struct seccomp_data, nr))
#define arch_nr (offsetof(struct seccomp_data, arch))
+#define syscall_arg(x) (offsetof(struct seccomp_data, args[x]))
-#if defined(__i386__)
-# define REG_SYSCALL REG_EAX
-# define ARCH_NR AUDIT_ARCH_I386
-#elif defined(__x86_64__)
+#if defined(__aarch64__)
+# define REG_SYSCALL regs.regs[8]
+# define ARCH_NR AUDIT_ARCH_AARCH64
+#elif defined(__amd64__)
# define REG_SYSCALL REG_RAX
# define ARCH_NR AUDIT_ARCH_X86_64
-#elif defined(__mips__)
-# define REG_SYSCALL regs[2]
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define ARCH_NR AUDIT_ARCH_MIPSEL
-# else
-# define ARCH_NR AUDIT_ARCH_MIPS
-# endif
#elif defined(__arm__) && (defined(__ARM_EABI__) || defined(__thumb__))
# define REG_SYSCALL regs.uregs[7]
# if __BYTE_ORDER == __LITTLE_ENDIAN
# else
# define ARCH_NR AUDIT_ARCH_ARMEB
# endif
+#elif defined(__i386__)
+# define REG_SYSCALL REG_EAX
+# define ARCH_NR AUDIT_ARCH_I386
+#elif defined(__mips__)
+# define REG_SYSCALL regs[2]
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+# define ARCH_NR AUDIT_ARCH_MIPSEL
+# else
+# define ARCH_NR AUDIT_ARCH_MIPS
+# endif
#elif defined(__PPC__)
# define REG_SYSCALL regs.gpr[0]
# define ARCH_NR AUDIT_ARCH_PPC
--- /dev/null
+/*
+ * parse and setup OCI seccomp filter
+ * Copyright (c) 2020 Daniel Golle <daniel@makrotopia.org>
+ * seccomp example with syscall reporting
+ * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ * Authors:
+ * Kees Cook <keescook@chromium.org>
+ * Will Drewry <wad@chromium.org>
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ *
+ * BPF control flow
+ *
+ * (check_arch)<t>---(check_syscall)<f>---+----[...]<f>---(return default_action)
+ * |<f> |<t> |
+ * KILL (check_argument)<f>--+
+ * |<t>
+ * [...]
+ * |<t>
+ * (return action)
+ */
+#define _GNU_SOURCE 1
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <libubox/utils.h>
+#include <libubox/blobmsg.h>
+#include <libubox/blobmsg_json.h>
+
+#include "log.h"
+#include "seccomp-bpf.h"
+#include "seccomp-oci.h"
+#include "../syscall-names.h"
+#include "seccomp-syscalls-helpers.h"
+
+static uint32_t resolve_action(char *actname)
+{
+ if (!strcmp(actname, "SCMP_ACT_KILL"))
+ return SECCOMP_RET_KILL;
+ else if (!strcmp(actname, "SCMP_ACT_KILL_PROCESS"))
+ return SECCOMP_RET_KILLPROCESS;
+ else if (!strcmp(actname, "SCMP_ACT_TRAP"))
+ return SECCOMP_RET_TRAP;
+ else if (!strcmp(actname, "SCMP_ACT_ERRNO"))
+ return SECCOMP_RET_ERRNO;
+ else if (!strcmp(actname, "SCMP_ACT_ERROR"))
+ return SECCOMP_RET_ERRNO;
+ else if (!strcmp(actname, "SCMP_ACT_TRACE"))
+ return SECCOMP_RET_TRACE;
+ else if (!strcmp(actname, "SCMP_ACT_ALLOW"))
+ return SECCOMP_RET_ALLOW;
+ else if (!strcmp(actname, "SCMP_ACT_LOG"))
+ return SECCOMP_RET_LOGALLOW;
+ else {
+ ERROR("unknown seccomp action %s\n", actname);
+ return SECCOMP_RET_KILL;
+ }
+}
+
+static uint8_t resolve_op_ins(const char *op)
+{
+ if (!strcmp(op, "SCMP_CMP_NE")) /* invert EQ */
+ return BPF_JEQ;
+ else if (!strcmp(op, "SCMP_CMP_LT")) /* invert GE */
+ return BPF_JGE;
+ else if (!strcmp(op, "SCMP_CMP_LE")) /* invert GT */
+ return BPF_JGT;
+ else if (!strcmp(op, "SCMP_CMP_EQ"))
+ return BPF_JEQ;
+ else if (!strcmp(op, "SCMP_CMP_GE"))
+ return BPF_JGE;
+ else if (!strcmp(op, "SCMP_CMP_GT"))
+ return BPF_JGT;
+ else if (!strcmp(op, "SCMP_CMP_MASKED_EQ"))
+ return BPF_JEQ;
+ else {
+ ERROR("unknown seccomp op %s\n", op);
+ return 0;
+ }
+}
+
+static bool resolve_op_is_masked(const char *op)
+{
+ if (!strcmp(op, "SCMP_CMP_MASKED_EQ"))
+ return true;
+
+ return false;
+}
+
+static bool resolve_op_inv(const char *op)
+{
+ if (!strcmp(op, "SCMP_CMP_NE") ||
+ !strcmp(op, "SCMP_CMP_LT") ||
+ !strcmp(op, "SCMP_CMP_LE"))
+ return true;
+
+ return false;
+}
+
+static uint32_t resolve_architecture(char *archname)
+{
+ if (!archname)
+ return 0;
+
+ if (!strcmp(archname, "SCMP_ARCH_X86"))
+ return AUDIT_ARCH_I386;
+ else if (!strcmp(archname, "SCMP_ARCH_X86_64"))
+ return AUDIT_ARCH_X86_64;
+ else if (!strcmp(archname, "SCMP_ARCH_X32"))
+ /*
+ * return AUDIT_ARCH_X86_64;
+ * 32-bit userland on 64-bit kernel is not supported yet
+ */
+ return 0;
+ else if (!strcmp(archname, "SCMP_ARCH_ARM"))
+ return AUDIT_ARCH_ARM;
+ else if (!strcmp(archname, "SCMP_ARCH_AARCH64"))
+ return AUDIT_ARCH_AARCH64;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPS"))
+ return AUDIT_ARCH_MIPS;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPS64"))
+ return AUDIT_ARCH_MIPS64;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPS64N32"))
+ return AUDIT_ARCH_MIPS64N32;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPSEL"))
+ return AUDIT_ARCH_MIPSEL;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64"))
+ return AUDIT_ARCH_MIPSEL64;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64N32"))
+ return AUDIT_ARCH_MIPSEL64N32;
+ else if (!strcmp(archname, "SCMP_ARCH_PPC"))
+ return AUDIT_ARCH_PPC;
+ else if (!strcmp(archname, "SCMP_ARCH_PPC64"))
+ return AUDIT_ARCH_PPC64;
+ else if (!strcmp(archname, "SCMP_ARCH_PPC64LE"))
+ return AUDIT_ARCH_PPC64LE;
+ else if (!strcmp(archname, "SCMP_ARCH_S390"))
+ return AUDIT_ARCH_S390;
+ else if (!strcmp(archname, "SCMP_ARCH_S390X"))
+ return AUDIT_ARCH_S390X;
+ else if (!strcmp(archname, "SCMP_ARCH_PARISC"))
+ return AUDIT_ARCH_PARISC;
+ else if (!strcmp(archname, "SCMP_ARCH_PARISC64"))
+ return AUDIT_ARCH_PARISC64;
+ else {
+ ERROR("unknown seccomp architecture %s\n", archname);
+ return 0;
+ }
+}
+
+enum {
+ OCI_LINUX_SECCOMP_DEFAULTACTION,
+ OCI_LINUX_SECCOMP_ARCHITECTURES,
+ OCI_LINUX_SECCOMP_FLAGS,
+ OCI_LINUX_SECCOMP_SYSCALLS,
+ __OCI_LINUX_SECCOMP_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_seccomp_policy[] = {
+ [OCI_LINUX_SECCOMP_DEFAULTACTION] = { "defaultAction", BLOBMSG_TYPE_STRING },
+ [OCI_LINUX_SECCOMP_ARCHITECTURES] = { "architectures", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_SECCOMP_FLAGS] = { "flags", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_SECCOMP_SYSCALLS] = { "syscalls", BLOBMSG_TYPE_ARRAY },
+};
+
+enum {
+ OCI_LINUX_SECCOMP_SYSCALLS_NAMES,
+ OCI_LINUX_SECCOMP_SYSCALLS_ACTION,
+ OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET,
+ OCI_LINUX_SECCOMP_SYSCALLS_ARGS,
+ __OCI_LINUX_SECCOMP_SYSCALLS_MAX
+};
+
+static const struct blobmsg_policy oci_linux_seccomp_syscalls_policy[] = {
+ [OCI_LINUX_SECCOMP_SYSCALLS_NAMES] = { "names", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET] = { "errnoRet", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ARGS] = { "args", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ACTION] = { "action", BLOBMSG_TYPE_STRING },
+};
+
+enum {
+ OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX,
+ OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE,
+ OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO,
+ OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP,
+ __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX
+};
+
+static const struct blobmsg_policy oci_linux_seccomp_syscalls_args_policy[] = {
+ [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] = { "index", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] = { "value", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO] = { "valueTwo", BLOBMSG_CAST_INT64 },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP] = { "op", BLOBMSG_TYPE_STRING },
+};
+
+struct sock_fprog *parseOCIlinuxseccomp(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_SECCOMP_MAX];
+ struct blob_attr *tbn[__OCI_LINUX_SECCOMP_SYSCALLS_MAX];
+ struct blob_attr *tba[__OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX];
+ struct blob_attr *cur, *curn, *curarg;
+ int rem, remn, remargs, sc;
+ struct sock_filter *filter;
+ struct sock_fprog *prog;
+ int sz = 4, idx = 0;
+ uint32_t default_policy = 0;
+ uint32_t seccomp_arch;
+ bool arch_matched;
+ char *op_str;
+
+ blobmsg_parse(oci_linux_seccomp_policy, __OCI_LINUX_SECCOMP_MAX,
+ tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_LINUX_SECCOMP_DEFAULTACTION]) {
+ ERROR("seccomp: no default action set\n");
+ return NULL;
+ }
+
+ default_policy = resolve_action(blobmsg_get_string(tb[OCI_LINUX_SECCOMP_DEFAULTACTION]));
+
+ /* verify architecture while ignoring the x86_64 anomaly for now */
+ if (tb[OCI_LINUX_SECCOMP_ARCHITECTURES]) {
+ arch_matched = false;
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_ARCHITECTURES], rem) {
+ seccomp_arch = resolve_architecture(blobmsg_get_string(cur));
+ if (ARCH_NR == seccomp_arch) {
+ arch_matched = true;
+ break;
+ }
+ }
+ if (!arch_matched) {
+ ERROR("seccomp architecture doesn't match system\n");
+ return NULL;
+ }
+ }
+
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) {
+ sz += 2; /* load and return */
+
+ blobmsg_parse(oci_linux_seccomp_syscalls_policy,
+ __OCI_LINUX_SECCOMP_SYSCALLS_MAX,
+ tbn, blobmsg_data(cur), blobmsg_len(cur));
+ blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
+ sc = find_syscall(blobmsg_get_string(curn));
+ if (sc == -1) {
+ DEBUG("unknown syscall '%s'\n", blobmsg_get_string(curn));
+ /* TODO: support run.oci.seccomp_fail_unknown_syscall=1 annotation */
+ continue;
+ }
+ ++sz;
+ }
+
+ if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS]) {
+ blobmsg_for_each_attr(curarg, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remargs) {
+ sz += 2; /* load and compare */
+
+ blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
+ __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
+ tba, blobmsg_data(curarg), blobmsg_len(curarg));
+ if (!tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] ||
+ !tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] ||
+ !tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP])
+ return NULL;
+
+ if (blobmsg_get_u32(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX]) > 5)
+ return NULL;
+
+ op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
+ if (!resolve_op_ins(op_str))
+ return NULL;
+
+ if (resolve_op_is_masked(op_str))
+ ++sz; /* SCMP_CMP_MASKED_EQ needs an extra BPF_AND op */
+ }
+ }
+ }
+
+ if (sz < 6)
+ return NULL;
+
+ prog = malloc(sizeof(struct sock_fprog));
+ if (!prog)
+ return NULL;
+
+ filter = calloc(sz, sizeof(struct sock_filter));
+ if (!filter) {
+ ERROR("failed to allocate memory for seccomp filter\n");
+ goto errout2;
+ }
+
+ /* validate arch */
+ set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, arch_nr);
+ set_filter(&filter[idx++], BPF_JMP + BPF_JEQ + BPF_K, 1, 0, ARCH_NR);
+ set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, SECCOMP_RET_KILL);
+
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) {
+ uint32_t action;
+ uint32_t op_idx;
+ uint8_t op_ins;
+ bool op_inv, op_masked;
+ uint64_t op_val, op_val2;
+ int start_rule_idx;
+ int next_rule_idx;
+
+ blobmsg_parse(oci_linux_seccomp_syscalls_policy,
+ __OCI_LINUX_SECCOMP_SYSCALLS_MAX,
+ tbn, blobmsg_data(cur), blobmsg_len(cur));
+ action = resolve_action(blobmsg_get_string(
+ tbn[OCI_LINUX_SECCOMP_SYSCALLS_ACTION]));
+ if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]) {
+ if (action != SECCOMP_RET_ERRNO)
+ goto errout1;
+
+ action = SECCOMP_RET_ERROR(blobmsg_get_u32(
+ tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]));
+ } else if (action == SECCOMP_RET_ERRNO)
+ action = SECCOMP_RET_ERROR(EPERM);
+
+ /* load syscall */
+ set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_nr);
+
+ /* get number of syscall names */
+ next_rule_idx = idx;
+ blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
+ if (find_syscall(blobmsg_get_string(curn)) == -1)
+ continue;
+
+ ++next_rule_idx;
+ }
+ start_rule_idx = next_rule_idx;
+
+ /* calculate length of argument filter rules */
+ blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) {
+ blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
+ __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
+ tba, blobmsg_data(curn), blobmsg_len(curn));
+ next_rule_idx += 2;
+ op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
+ if (resolve_op_is_masked(op_str))
+ ++next_rule_idx;
+ }
+
+ ++next_rule_idx; /* account for return action */
+
+ blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
+ sc = find_syscall(blobmsg_get_string(curn));
+ if (sc == -1)
+ continue;
+ /*
+ * check syscall, skip other syscall checks if match is found.
+ * if no match is found, jump to next section
+ */
+ set_filter(&filter[idx], BPF_JMP + BPF_JEQ + BPF_K,
+ start_rule_idx - (idx + 1),
+ ((idx + 1) == start_rule_idx)?(next_rule_idx - (idx + 1)):0,
+ sc);
+ ++idx;
+ }
+
+ assert(idx = start_rule_idx);
+
+ /* generate argument filter rules */
+ blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) {
+ blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
+ __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
+ tba, blobmsg_data(curn), blobmsg_len(curn));
+
+ op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
+ op_ins = resolve_op_ins(op_str);
+ op_inv = resolve_op_inv(op_str);
+ op_masked = resolve_op_is_masked(op_str);
+ op_idx = blobmsg_get_u32(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX]);
+ op_val = blobmsg_cast_u64(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE]);
+ if (tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO])
+ op_val2 = blobmsg_cast_u64(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO]);
+ else
+ op_val2 = 0;
+
+ /* load argument */
+ set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_arg(op_idx));
+
+ /* apply mask */
+ if (op_masked)
+ set_filter(&filter[idx++], BPF_ALU + BPF_K + BPF_AND, 0, 0, op_val);
+
+ set_filter(&filter[idx], BPF_JMP + op_ins + BPF_K,
+ op_inv?(next_rule_idx - (idx + 1)):0,
+ op_inv?0:(next_rule_idx - (idx + 1)),
+ op_masked?op_val2:op_val);
+ ++idx;
+ }
+
+ /* if we have reached until here, all conditions were met and we can return */
+ set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, action);
+
+ assert(idx == next_rule_idx);
+ }
+
+ set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, default_policy);
+
+ assert(idx == sz);
+
+ prog->len = (unsigned short) idx;
+ prog->filter = filter;
+
+ DEBUG("generated seccomp-bpf program:\n");
+ if (debug) {
+ fprintf(stderr, " [idx]\tcode\t jt\t jf\tk\n");
+ for (idx=0; idx<sz; idx++)
+ fprintf(stderr, " [%03d]\t%04hx\t%3hhu\t%3hhu\t%08x\n", idx,
+ filter[idx].code,
+ filter[idx].jt,
+ filter[idx].jf,
+ filter[idx].k);
+ }
+
+ return prog;
+
+errout1:
+ free(prog->filter);
+errout2:
+ free(prog);
+ return NULL;
+}
+
+
+int applyOCIlinuxseccomp(struct sock_fprog *prog)
+{
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
+ goto errout;
+ }
+
+ if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prog)) {
+ ERROR("prctl(PR_SET_SECCOMP) failed: %m\n");
+ goto errout;
+ }
+ free(prog);
+
+ return 0;
+
+errout:
+ free(prog->filter);
+ free(prog);
+ return errno;
+}
--- /dev/null
+/*
+ * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#ifndef _JAIL_SECCOMP_OCI_H_
+#define _JAIL_SECCOMP_OCI_H_
+
+#include <linux/filter.h>
+
+struct sock_fprog *parseOCIlinuxseccomp(struct blob_attr *msg);
+int applyOCIlinuxseccomp(struct sock_fprog *prog);
+
+#ifndef SECCOMP_SUPPORT
+struct sock_fprog *parseOCIlinuxseccomp(struct blob_attr *msg) {
+ return NULL;
+}
+
+int applyOCIlinuxseccomp(struct sock_fprog *prog) {
+ return ENOTSUP;
+}
+#endif
+
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#ifndef _JAIL_SECCOMP_HELPERS_H_
+#define _JAIL_SECCOMP_HELPERS_H_
+
+static int find_syscall(const char *name)
+{
+ int i;
+
+ for (i = 0; i < SYSCALL_COUNT; i++) {
+ int sc = syscall_index_to_number(i);
+ if (syscall_name(sc) && !strcmp(syscall_name(sc), name))
+ return sc;
+ }
+
+ return -1;
+}
+
+static void set_filter(struct sock_filter *filter, __u16 code, __u8 jt, __u8 jf, __u32 k)
+{
+ filter->code = code;
+ filter->jt = jt;
+ filter->jf = jf;
+ filter->k = k;
+}
+
+#endif
#include <libubox/blobmsg.h>
#include <libubox/blobmsg_json.h>
-#include "seccomp-bpf.h"
+#include "log.h"
#include "seccomp.h"
-#include "../syscall-names.h"
-
-static int find_syscall(const char *name)
-{
- int i;
-
- for (i = 0; i < SYSCALL_COUNT; i++) {
- int sc = syscall_index_to_number(i);
- if (syscall_name(sc) && !strcmp(syscall_name(sc), name))
- return sc;
- }
-
- return -1;
-}
-
-static void set_filter(struct sock_filter *filter, __u16 code, __u8 jt, __u8 jf, __u32 k)
-{
- filter->code = code;
- filter->jt = jt;
- filter->jf = jf;
- filter->k = k;
-}
+#include "seccomp-oci.h"
int install_syscall_filter(const char *argv, const char *file)
{
- enum {
- SECCOMP_WHITELIST,
- SECCOMP_POLICY,
- __SECCOMP_MAX
- };
- static const struct blobmsg_policy policy[__SECCOMP_MAX] = {
- [SECCOMP_WHITELIST] = { .name = "whitelist", .type = BLOBMSG_TYPE_ARRAY },
- [SECCOMP_POLICY] = { .name = "policy", .type = BLOBMSG_TYPE_INT32 },
- };
struct blob_buf b = { 0 };
- struct blob_attr *tb[__SECCOMP_MAX];
- struct blob_attr *cur;
- int rem;
+ struct sock_fprog *prog = NULL;
- struct sock_filter *filter;
- struct sock_fprog prog = { 0 };
- int sz = 5, idx = 0, default_policy = 0;
-
- INFO("%s: setting up syscall filter\n", argv);
+ DEBUG("%s: setting up syscall filter\n", argv);
blob_buf_init(&b, 0);
if (!blobmsg_add_json_from_file(&b, file)) {
return -1;
}
- blobmsg_parse(policy, __SECCOMP_MAX, tb, blob_data(b.head), blob_len(b.head));
- if (!tb[SECCOMP_WHITELIST]) {
- ERROR("%s: %s is missing the syscall table\n", argv, file);
+ prog = parseOCIlinuxseccomp(b.head);
+ if (!prog) {
+ ERROR("%s: failed to parse seccomp filter rules %s\n", argv, file);
return -1;
}
- if (tb[SECCOMP_POLICY])
- default_policy = blobmsg_get_u32(tb[SECCOMP_POLICY]);
-
- blobmsg_for_each_attr(cur, tb[SECCOMP_WHITELIST], rem)
- sz += 2;
-
- filter = calloc(sz, sizeof(struct sock_filter));
- if (!filter) {
- ERROR("failed to allocate filter memory\n");
- return -1;
- }
-
- /* validate arch */
- set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, arch_nr);
- set_filter(&filter[idx++], BPF_JMP + BPF_JEQ + BPF_K, 1, 0, ARCH_NR);
- set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, SECCOMP_RET_KILL);
-
- /* get syscall */
- set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_nr);
-
- blobmsg_for_each_attr(cur, tb[SECCOMP_WHITELIST], rem) {
- char *name = blobmsg_get_string(cur);
- int nr;
-
- if (!name) {
- INFO("%s: invalid syscall name\n", argv);
- continue;
- }
-
- nr = find_syscall(name);
- if (nr == -1) {
- INFO("%s: unknown syscall %s\n", argv, name);
- continue;
- }
-
- /* add whitelist */
- set_filter(&filter[idx++], BPF_JMP + BPF_JEQ + BPF_K, 0, 1, nr);
- set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, SECCOMP_RET_ALLOW);
- }
-
- if (default_policy)
- /* notify tracer; without tracer return -1 and set errno to ENOSYS */
- set_filter(&filter[idx], BPF_RET + BPF_K, 0, 0, SECCOMP_RET_TRACE);
- else
- /* kill the process */
- set_filter(&filter[idx], BPF_RET + BPF_K, 0, 0, SECCOMP_RET_KILL);
-
- if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
- ERROR("%s: prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n", argv);
- return errno;
- }
-
- prog.len = (unsigned short) idx + 1;
- prog.filter = filter;
-
- if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
- ERROR("%s: prctl(PR_SET_SECCOMP) failed: %m\n", argv);
- return errno;
- }
- return 0;
+ return applyOCIlinuxseccomp(prog);
}
#include <stdio.h>
#include <syslog.h>
-#define INFO(fmt, ...) do { \
- syslog(LOG_INFO,"preload-seccomp: "fmt, ## __VA_ARGS__); \
- fprintf(stderr,"preload-seccomp: "fmt, ## __VA_ARGS__); \
- } while (0)
-#define ERROR(fmt, ...) do { \
- syslog(LOG_ERR,"preload-seccomp: "fmt, ## __VA_ARGS__); \
- fprintf(stderr,"preload-seccomp: "fmt, ## __VA_ARGS__); \
- } while (0)
-
int install_syscall_filter(const char *argv, const char *file);
#endif
ulog(LOG_NOTICE, fmt, ## __VA_ARGS__); \
} } while (0)
+#define P_DEBUG(level, fmt, ...) do { \
+ if (debug >= level) { \
+ ulog(LOG_NOTICE, fmt, ## __VA_ARGS__); \
+ } else { \
+ procd_udebug_printf(fmt, ## __VA_ARGS__); \
+ } } while (0)
+
#define LOG ULOG_INFO
#define ERROR ULOG_ERR
echo "#include <asm/unistd.h>"
echo "static const char *__syscall_names[] = {"
-echo "#include <sys/syscall.h>" | ${CC} -E -dM - | grep '^#define __NR_' | \
- LC_ALL=C sed -r -n -e 's/^\#define[ \t]+__NR_([a-z0-9_]+)[ \t]+([ ()+0-9a-zNR_Linux]+)(.*)/ [\2] = "\1",/p'
+echo "#include <sys/syscall.h>" | ${CC} -E -dM - | grep '^#define __NR_[a-z0-9_]\+[ \t].*[0-9].*$' | \
+ LC_ALL=C sed -r -n -e 's/^\#define[ \t]+__NR_([a-z0-9_]+)[ \t]+([ ()+0-9a-zNR_LSYCABE]+)(.*)/ [\2] = "\1",/p'
echo "};"
-extra_syscalls="$(echo "#include <sys/syscall.h>" | ${CC} -E -dM - | sed -n -e '/^#define __ARM_NR_/ s///p')"
+extra_syscalls="$(echo "#include <sys/syscall.h>" | ${CC} -E -dM - | sed -r -n -e 's/^#define __ARM_NR_([a-z0-9_]+)/\1/p')"
cat <<EOF
static inline const char *syscall_name(unsigned i) {
switch (i) {
EOF
echo "$extra_syscalls" | \
- LC_ALL=C sed -r -n -e 's/^([a-z0-9_]+)[ \t]+([ ()+0-9a-zNR_Linux]+)(.*)/ case \2: return "\1";/p'
+ LC_ALL=C sed -r -n -e 's/^([a-z0-9_]+)[ \t]+([ ()+0-9a-zNR_LAMBSE]+)(.*)/ case \2: return "\1";/p'
cat <<EOF
default: return (void*)0;
}
switch (i) {
EOF
echo "$extra_syscalls" | \
- LC_ALL=C perl -ne 'print " case $2: return ARRAY_SIZE(__syscall_names) + ", $. - 1, ";\n" if /^([a-z0-9_]+)[ \t]+([ ()+0-9a-zNR_Linux]+)(.*)/;'
+ LC_ALL=C perl -ne 'print " case $2: return ARRAY_SIZE(__syscall_names) + ", $. - 1, ";\n" if /^([a-z0-9_]+)[ \t]+([ ()+0-9a-zNR_LAMBSE]+)(.*)/;'
cat <<EOF
default: return -1;
}
switch (i) {
EOF
echo "$extra_syscalls" | \
- LC_ALL=C perl -ne 'print " case ARRAY_SIZE(__syscall_names) + ", $. - 1, ": return $2;\n" if /^([a-z0-9_]+)[ \t]+([ ()+0-9a-zNR_Linux]+)(.*)/;'
+ LC_ALL=C perl -ne 'print " case ARRAY_SIZE(__syscall_names) + ", $. - 1, ": return $2;\n" if /^([a-z0-9_]+)[ \t]+([ ()+0-9a-zNR_LAMBSE]+)(.*)/;'
cat <<EOF
default: return -1;
}
#include <sys/types.h>
#include <sys/mount.h>
+#include <stdlib.h>
#include <unistd.h>
#include "../procd.h"
static void coldplug_complete(struct uloop_timeout *t)
{
- DEBUG(4, "Coldplug complete\n");
+ P_DEBUG(4, "Coldplug complete\n");
hotplug_last_event(NULL);
procd_state_next();
}
static void udevtrigger_complete(struct uloop_process *proc, int ret)
{
- DEBUG(4, "Finished udevtrigger\n");
+ P_DEBUG(4, "Finished udevtrigger\n");
hotplug_last_event(coldplug_complete);
}
if (!is_container()) {
umount2("/dev/pts", MNT_DETACH);
umount2("/dev/", MNT_DETACH);
- mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755,size=512K");
+ mount("tmpfs", "/dev", "tmpfs", MS_NOATIME | MS_NOEXEC | MS_NOSUID, "mode=0755,size=512K");
mkdir("/dev/pts", 0755);
- mount("devpts", "/dev/pts", "devpts", MS_NOEXEC | MS_NOSUID, 0);
+ mount("devpts", "/dev/pts", "devpts", MS_NOATIME | MS_NOEXEC | MS_NOSUID, 0);
}
ignore(symlink("/tmp/shm", "/dev/shm"));
if (!udevtrigger.pid) {
execvp(argv[0], argv);
ERROR("Failed to start coldplug: %m\n");
- exit(-1);
+ exit(EXIT_FAILURE);
}
if (udevtrigger.pid <= 0) {
uloop_process_add(&udevtrigger);
- DEBUG(4, "Launched coldplug instance, pid=%d\n", (int) udevtrigger.pid);
+ P_DEBUG(4, "Launched coldplug instance, pid=%d\n", (int) udevtrigger.pid);
}
#include <libubox/blobmsg_json.h>
#include <libubox/json_script.h>
#include <libubox/uloop.h>
+#include <libubox/utils.h>
#include <json-c/json.h>
+#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
return NULL;
}
-static void mkdir_p(char *dir)
+static void chgrp_error(const char *group, const char *target, const char *failed)
{
- char *l = strrchr(dir, '/');
+ ERROR("cannot set group %s for %s (%s: %d)\n",
+ group, target, failed, errno);
+}
- if (l) {
- *l = '\0';
- mkdir_p(dir);
- *l = '/';
- mkdir(dir, 0755);
- }
+static void chgrp_target(struct blob_attr *bgroup, struct blob_attr *btarget)
+{
+ int ret = 0;
+ struct group *g = NULL;
+ const char *group = blobmsg_get_string(bgroup);
+ const char *target = blobmsg_get_string(btarget);
+
+ errno = 0;
+
+ g = getgrnam(group);
+ if (!g)
+ return chgrp_error(group, target, "getgrnam");
+
+ ret = chown(target, 0, g->gr_gid);
+ if (ret < 0)
+ return chgrp_error(group, target, "chown");
}
static void handle_makedev(struct blob_attr *msg, struct blob_attr *data)
char *minor = hotplug_msg_find_var(msg, "MINOR");
char *major = hotplug_msg_find_var(msg, "MAJOR");
char *subsystem = hotplug_msg_find_var(msg, "SUBSYSTEM");
- int ret = 0;
blobmsg_parse_array(mkdev_policy, 3, tb, blobmsg_data(data), blobmsg_data_len(data));
if (tb[0] && tb[1] && minor && major && subsystem) {
char *d = strdup(blobmsg_get_string(tb[0]));
d = dirname(d);
- mkdir_p(d);
+ mkdir_p(d, 0755);
free(d);
if (!strcmp(subsystem, "block"))
mknod(blobmsg_get_string(tb[0]),
m | strtoul(blobmsg_data(tb[1]), NULL, 8),
makedev(atoi(major), atoi(minor)));
- if (tb[2]) {
- struct group *g = getgrnam(blobmsg_get_string(tb[2]));
-
- if (g)
- ret = chown(blobmsg_get_string(tb[0]), 0, g->gr_gid);
-
- if (!g || ret < 0)
- ERROR("cannot set group %s for %s\n",
- blobmsg_get_string(tb[2]),
- blobmsg_get_string(tb[0]));
- }
+ if (tb[2])
+ chgrp_target(tb[2], tb[0]);
}
umask(oldumask);
}
argv[i] = NULL;
execvp(argv[0], &argv[0]);
}
- exit(-1);
+ exit(EXIT_FAILURE);
}
static void handle_button_start(struct blob_attr *msg, struct blob_attr *data)
int fw, src, load, len;
static char buf[4096];
- DEBUG(2, "Firmware request for %s/%s\n", dir, file);
+ P_DEBUG(2, "Firmware request for %s/%s\n", dir, file);
if (!file || !dir || !dev) {
ERROR("Request for unknown firmware %s/%s\n", dir, file);
- exit(-1);
+ exit(EXIT_FAILURE);
}
path = alloca(strlen(dir) + strlen(file) + 2);
load = open(loadpath, O_WRONLY);
if (!load) {
ERROR("Failed to open %s: %m\n", loadpath);
- exit(-1);
+ exit(EXIT_FAILURE);
}
if (write(load, "1", 1) == -1) {
ERROR("Failed to write to %s: %m\n", loadpath);
- exit(-1);
+ exit(EXIT_FAILURE);
}
close(load);
fw = open(syspath, O_WRONLY);
if (fw < 0) {
ERROR("Failed to open %s: %m\n", syspath);
- exit(-1);
+ exit(EXIT_FAILURE);
}
len = s.st_size;
ERROR("failed to write to %s: %m\n", loadpath);
close(load);
- DEBUG(2, "Done loading %s\n", path);
+ P_DEBUG(2, "Done loading %s\n", path);
- exit(-1);
+ exit(EXIT_FAILURE);
+}
+
+static void handle_start_console(struct blob_attr *msg, struct blob_attr *data)
+{
+ char *dev = blobmsg_get_string(blobmsg_data(data));
+
+ P_DEBUG(2, "Start console request for %s\n", dev);
+
+ procd_inittab_run("respawn");
+ procd_inittab_run("askfirst");
+
+ P_DEBUG(2, "Done starting console for %s\n", dev);
+
+ exit(EXIT_FAILURE);
}
enum {
HANDLER_EXEC,
HANDLER_BUTTON,
HANDLER_FW,
+ HANDLER_START_CONSOLE,
};
static struct cmd_handler {
.name = "load-firmware",
.handler = handle_firmware,
},
+ [HANDLER_START_CONSOLE] = {
+ .name = "start-console",
+ .handler = handle_start_console,
+ },
};
static void queue_next(void)
uloop_process_add(&queue_proc);
- DEBUG(4, "Launched hotplug exec instance, pid=%d\n", (int) queue_proc.pid);
+ P_DEBUG(4, "Launched hotplug exec instance, pid=%d\n", (int) queue_proc.pid);
}
static void queue_proc_cb(struct uloop_process *c, int ret)
{
- DEBUG(4, "Finished hotplug exec instance, pid=%d\n", (int) c->pid);
+ P_DEBUG(4, "Finished hotplug exec instance, pid=%d\n", (int) c->pid);
if (current) {
current->complete(current->msg, current->data, ret);
int rem, i;
if (debug > 3) {
- DEBUG(4, "Command: %s\n", name);
+ P_DEBUG(4, "Command: %s\n", name);
blobmsg_for_each_attr(cur, data, rem)
- DEBUG(4, " %s\n", (char *) blobmsg_data(cur));
+ P_DEBUG(4, " %s\n", (char *) blobmsg_data(cur));
- DEBUG(4, "Message:\n");
+ P_DEBUG(4, "Message:\n");
blobmsg_for_each_attr(cur, vars, rem)
- DEBUG(4, " %s=%s\n", blobmsg_name(cur), (char *) blobmsg_data(cur));
+ P_DEBUG(4, " %s=%s\n", blobmsg_name(cur), (char *) blobmsg_data(cur));
}
for (i = 0; i < ARRAY_SIZE(handlers); i++)
return;
str = blobmsg_format_json(data, true);
- DEBUG(3, "%s\n", str);
+ P_DEBUG(3, "%s\n", str);
free(str);
}
rule_file = strdup(rules);
nls.nl_family = AF_NETLINK;
- nls.nl_pid = getpid();
+ nls.nl_pid = 0;
nls.nl_groups = -1;
if ((hotplug_fd.fd = socket(PF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_KOBJECT_UEVENT)) == -1) {
unsigned int debug;
+static struct udebug ud;
+static struct udebug_buf udb;
+static bool udebug_enabled;
+
+static void procd_udebug_vprintf(const char *format, va_list ap)
+{
+ if (!udebug_enabled)
+ return;
+
+ udebug_entry_init(&udb);
+ udebug_entry_vprintf(&udb, format, ap);
+ udebug_entry_add(&udb);
+}
+
+void procd_udebug_printf(const char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ procd_udebug_vprintf(format, ap);
+ va_end(ap);
+}
+
+void procd_udebug_set_enabled(bool val)
+{
+ static const struct udebug_buf_meta meta = {
+ .name = "procd_log",
+ .format = UDEBUG_FORMAT_STRING,
+ };
+
+ if (udebug_enabled == val)
+ return;
+
+ udebug_enabled = val;
+ if (!val) {
+ ulog_udebug(NULL);
+ udebug_buf_free(&udb);
+ udebug_free(&ud);
+ return;
+ }
+
+ udebug_init(&ud);
+ udebug_auto_connect(&ud, NULL);
+ udebug_buf_init(&udb, 1024, 64 * 1024);
+ udebug_buf_add(&ud, &udb, &meta);
+ ulog_udebug(&udb);
+}
+
+
static int usage(const char *prog)
{
fprintf(stderr, "Usage: %s [options]\n"
setsid();
uloop_init();
procd_signal();
+ procd_udebug_set_enabled(true);
if (getpid() != 1)
procd_connect_ubus();
else
#include <libubox/uloop.h>
#include <libubox/utils.h>
#include <libubus.h>
+#include <udebug.h>
#include <stdio.h>
#include <syslog.h>
void procd_connect_ubus(void);
void procd_reconnect_ubus(int reconnect);
+void ubus_init_hotplug(struct ubus_context *ctx);
void ubus_init_service(struct ubus_context *ctx);
void ubus_init_system(struct ubus_context *ctx);
void procd_signal_preinit(void);
void procd_inittab(void);
void procd_inittab_run(const char *action);
+void procd_inittab_kill(void);
void procd_bcast_event(char *event, struct blob_attr *msg);
struct trigger;
void watch_del(void *id);
void watch_ubus(struct ubus_context *ctx);
+void procd_udebug_printf(const char *format, ...);
+void procd_udebug_set_enabled(bool val);
+
#endif
#include <libubox/uloop.h>
#include <libubox/runqueue.h>
+#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
ts_res.tv_nsec += 1000000000;
}
- DEBUG(2, "stop %s %s - took %lu.%09lus\n", s->file, s->param, ts_res.tv_sec, ts_res.tv_nsec);
+ DEBUG(2, "stop %s %s - took %" PRId64 ".%09" PRId64 "s\n", s->file, s->param, (int64_t)ts_res.tv_sec, (int64_t)ts_res.tv_nsec);
ustream_free(&s->fd.stream);
close(s->fd.fd.fd);
free(s);
#include "service.h"
#include "instance.h"
+#define UJAIL_BIN_PATH "/sbin/ujail"
+#define CGROUP_BASEDIR "/sys/fs/cgroup/services"
enum {
INSTANCE_ATTR_COMMAND,
INSTANCE_ATTR_JAIL,
INSTANCE_ATTR_TRACE,
INSTANCE_ATTR_SECCOMP,
+ INSTANCE_ATTR_CAPABILITIES,
INSTANCE_ATTR_PIDFILE,
INSTANCE_ATTR_RELOADSIG,
INSTANCE_ATTR_TERMTIMEOUT,
INSTANCE_ATTR_FACILITY,
+ INSTANCE_ATTR_EXTROOT,
+ INSTANCE_ATTR_OVERLAYDIR,
+ INSTANCE_ATTR_TMPOVERLAYSIZE,
+ INSTANCE_ATTR_BUNDLE,
+ INSTANCE_ATTR_WATCHDOG,
__INSTANCE_ATTR_MAX
};
[INSTANCE_ATTR_JAIL] = { "jail", BLOBMSG_TYPE_TABLE },
[INSTANCE_ATTR_TRACE] = { "trace", BLOBMSG_TYPE_BOOL },
[INSTANCE_ATTR_SECCOMP] = { "seccomp", BLOBMSG_TYPE_STRING },
+ [INSTANCE_ATTR_CAPABILITIES] = { "capabilities", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_PIDFILE] = { "pidfile", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_RELOADSIG] = { "reload_signal", BLOBMSG_TYPE_INT32 },
[INSTANCE_ATTR_TERMTIMEOUT] = { "term_timeout", BLOBMSG_TYPE_INT32 },
[INSTANCE_ATTR_FACILITY] = { "facility", BLOBMSG_TYPE_STRING },
+ [INSTANCE_ATTR_EXTROOT] = { "extroot", BLOBMSG_TYPE_STRING },
+ [INSTANCE_ATTR_OVERLAYDIR] = { "overlaydir", BLOBMSG_TYPE_STRING },
+ [INSTANCE_ATTR_TMPOVERLAYSIZE] = { "tmpoverlaysize", BLOBMSG_TYPE_STRING },
+ [INSTANCE_ATTR_BUNDLE] = { "bundle", BLOBMSG_TYPE_STRING },
+ [INSTANCE_ATTR_WATCHDOG] = { "watchdog", BLOBMSG_TYPE_ARRAY },
};
enum {
JAIL_ATTR_LOG,
JAIL_ATTR_RONLY,
JAIL_ATTR_MOUNT,
+ JAIL_ATTR_NETNS,
+ JAIL_ATTR_USERNS,
+ JAIL_ATTR_CGROUPSNS,
+ JAIL_ATTR_CONSOLE,
+ JAIL_ATTR_REQUIREJAIL,
+ JAIL_ATTR_IMMEDIATELY,
+ JAIL_ATTR_PIDFILE,
+ JAIL_ATTR_SETNS,
__JAIL_ATTR_MAX,
};
[JAIL_ATTR_LOG] = { "log", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_RONLY] = { "ronly", BLOBMSG_TYPE_BOOL },
[JAIL_ATTR_MOUNT] = { "mount", BLOBMSG_TYPE_TABLE },
+ [JAIL_ATTR_NETNS] = { "netns", BLOBMSG_TYPE_BOOL },
+ [JAIL_ATTR_USERNS] = { "userns", BLOBMSG_TYPE_BOOL },
+ [JAIL_ATTR_CGROUPSNS] = { "cgroupsns", BLOBMSG_TYPE_BOOL },
+ [JAIL_ATTR_CONSOLE] = { "console", BLOBMSG_TYPE_BOOL },
+ [JAIL_ATTR_REQUIREJAIL] = { "requirejail", BLOBMSG_TYPE_BOOL },
+ [JAIL_ATTR_IMMEDIATELY] = { "immediately", BLOBMSG_TYPE_BOOL },
+ [JAIL_ATTR_PIDFILE] = { "pidfile", BLOBMSG_TYPE_STRING },
+ [JAIL_ATTR_SETNS] = { "setns", BLOBMSG_TYPE_ARRAY },
+};
+
+enum {
+ JAIL_SETNS_ATTR_PID,
+ JAIL_SETNS_ATTR_NS,
+ __JAIL_SETNS_ATTR_MAX,
+};
+
+static const struct blobmsg_policy jail_setns_attr[__JAIL_SETNS_ATTR_MAX] = {
+ [JAIL_SETNS_ATTR_PID] = { "pid", BLOBMSG_TYPE_INT32 },
+ [JAIL_SETNS_ATTR_NS] = { "namespaces", BLOBMSG_TYPE_ARRAY },
};
struct instance_netdev {
}
}
+static char *
+instance_gen_setns_argstr(struct blob_attr *attr)
+{
+ struct blob_attr *tb[__JAIL_SETNS_ATTR_MAX];
+ struct blob_attr *cur;
+ int rem, len, total;
+ char *ret;
+
+ blobmsg_parse(jail_setns_attr, __JAIL_SETNS_ATTR_MAX, tb,
+ blobmsg_data(attr), blobmsg_data_len(attr));
+
+ if (!tb[JAIL_SETNS_ATTR_PID] || !tb[JAIL_SETNS_ATTR_NS])
+ return NULL;
+
+ len = snprintf(NULL, 0, "%d:", blobmsg_get_u32(tb[JAIL_SETNS_ATTR_PID]));
+
+ blobmsg_for_each_attr(cur, tb[JAIL_SETNS_ATTR_NS], rem) {
+ char *tmp;
+
+ if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING)
+ return NULL;
+
+ tmp = blobmsg_get_string(cur);
+ if (!tmp)
+ return NULL;
+
+ len += strlen(tmp) + 1;
+ }
+
+ total = len;
+ ret = malloc(total);
+ if (!ret)
+ return NULL;
+
+ len = snprintf(ret, total, "%d:", blobmsg_get_u32(tb[JAIL_SETNS_ATTR_PID]));
+
+ blobmsg_for_each_attr(cur, tb[JAIL_SETNS_ATTR_NS], rem) {
+ strncpy(&ret[len], blobmsg_get_string(cur), total - len);
+ len += strlen(blobmsg_get_string(cur));
+ ret[len++] = ',';
+ }
+ ret[total - 1] = '\0';
+
+ return ret;
+}
+
static inline int
jail_run(struct service_instance *in, char **argv)
{
+ char *term_timeout_str;
struct blobmsg_list_node *var;
struct jail *jail = &in->jail;
int argc = 0;
- argv[argc++] = "/sbin/ujail";
+ argv[argc++] = UJAIL_BIN_PATH;
+
+ if (asprintf(&term_timeout_str, "%d", in->term_timeout) == -1)
+ exit(ENOMEM);
+
+ argv[argc++] = "-t";
+ argv[argc++] = term_timeout_str;
if (jail->name) {
argv[argc++] = "-n";
argv[argc++] = in->seccomp;
}
+ if (in->user) {
+ argv[argc++] = "-U";
+ argv[argc++] = in->user;
+ }
+
+ if (in->group) {
+ argv[argc++] = "-G";
+ argv[argc++] = in->group;
+ }
+
+ if (in->capabilities) {
+ argv[argc++] = "-C";
+ argv[argc++] = in->capabilities;
+ }
+
if (in->no_new_privs)
argv[argc++] = "-c";
if (jail->ronly)
argv[argc++] = "-o";
+ if (jail->netns)
+ argv[argc++] = "-N";
+
+ if (jail->userns)
+ argv[argc++] = "-f";
+
+ if (jail->cgroupsns)
+ argv[argc++] = "-F";
+
+ if (jail->console)
+ argv[argc++] = "-y";
+
+ if (in->extroot) {
+ argv[argc++] = "-R";
+ argv[argc++] = in->extroot;
+ }
+
+ if (in->overlaydir) {
+ argv[argc++] = "-O";
+ argv[argc++] = in->overlaydir;
+ }
+
+ if (in->tmpoverlaysize) {
+ argv[argc++] = "-T";
+ argv[argc++] = in->tmpoverlaysize;
+ }
+
+ if (in->immediately)
+ argv[argc++] = "-i";
+
+ if (jail->pidfile) {
+ argv[argc++] = "-P";
+ argv[argc++] = jail->pidfile;
+ }
+
+ if (in->bundle) {
+ argv[argc++] = "-J";
+ argv[argc++] = in->bundle;
+ }
+
+ if (in->require_jail)
+ argv[argc++] = "-E";
+
+ blobmsg_list_for_each(&in->env, var) {
+ argv[argc++] = "-e";
+ argv[argc++] = (char *) blobmsg_name(var->data);
+ }
+
blobmsg_list_for_each(&jail->mount, var) {
const char *type = blobmsg_data(var->data);
argv[argc++] = (char *) blobmsg_name(var->data);
}
+ blobmsg_list_for_each(&jail->setns, var) {
+ char *setns_arg = instance_gen_setns_argstr(var->data);
+
+ if (setns_arg) {
+ argv[argc++] = "-j";
+ argv[argc++] = setns_arg;
+ }
+ }
+
argv[argc++] = "--";
return argc;
if (!in->pidfile)
return 0;
if (unlink(in->pidfile)) {
- ERROR("Failed to removed pidfile: %s: %m\n", in->pidfile);
+ ERROR("Failed to remove pidfile: %s: %m\n", in->pidfile);
return 1;
}
return 0;
ULOG_WARN("Seccomp support for %s::%s not available\n", in->srv->name, in->name);
#endif
- if (in->has_jail)
+ if (in->has_jail) {
argc = jail_run(in, argv);
+ if (argc != in->jail.argc)
+ ULOG_WARN("expected %i jail params, used %i for %s::%s\n",
+ in->jail.argc, argc, in->srv->name, in->name);
+ }
blobmsg_for_each_attr(cur, in->command, rem)
argv[argc++] = blobmsg_data(cur);
closefd(_stderr);
}
- if (in->user && in->pw_gid && initgroups(in->user, in->pw_gid)) {
+ if (!in->has_jail && in->user && in->pw_gid && initgroups(in->user, in->pw_gid)) {
ERROR("failed to initgroups() for user %s: %m\n", in->user);
exit(127);
}
- if (in->gr_gid && setgid(in->gr_gid)) {
+ if (!in->has_jail && in->gr_gid && setgid(in->gr_gid)) {
ERROR("failed to set group id %d: %m\n", in->gr_gid);
exit(127);
}
- if (in->uid && setuid(in->uid)) {
+ if (!in->has_jail && in->uid && setuid(in->uid)) {
ERROR("failed to set user id %d: %m\n", in->uid);
exit(127);
}
exit(127);
}
+static void
+instance_add_cgroup(const char *service, const char *instance)
+{
+ struct stat sb;
+ char cgnamebuf[256];
+ int fd;
+
+ if (stat("/sys/fs/cgroup/cgroup.subtree_control", &sb))
+ return;
+
+ mkdir(CGROUP_BASEDIR, 0700);
+
+ snprintf(cgnamebuf, sizeof(cgnamebuf), "%s/%s", CGROUP_BASEDIR, service);
+ mkdir(cgnamebuf, 0700);
+ snprintf(cgnamebuf, sizeof(cgnamebuf), "%s/%s/%s", CGROUP_BASEDIR, service, instance);
+ mkdir(cgnamebuf, 0700);
+ strcat(cgnamebuf, "/cgroup.procs");
+
+ fd = open(cgnamebuf, O_WRONLY);
+ if (fd == -1)
+ return;
+
+ dprintf(fd, "%d", getpid());
+ close(fd);
+}
+
static void
instance_free_stdio(struct service_instance *in)
{
close(in->_stderr.fd.fd);
in->_stderr.fd.fd = -1;
}
+
+ if (in->console.fd.fd > -1) {
+ ustream_free(&in->console.stream);
+ close(in->console.fd.fd);
+ in->console.fd.fd = -1;
+ }
+
+ if (in->console_client.fd.fd > -1) {
+ ustream_free(&in->console_client.stream);
+ close(in->console_client.fd.fd);
+ in->console_client.fd.fd = -1;
+ }
}
void
return;
}
- if (!in->command) {
+ if (!in->bundle && !in->command) {
LOG("Not starting instance %s::%s, command not set\n", in->srv->name, in->name);
return;
}
uloop_done();
closefd(opipe[0]);
closefd(epipe[0]);
+ instance_add_cgroup(in->srv->name, in->name);
instance_run(in, opipe[1], epipe[1]);
return;
}
- DEBUG(2, "Started instance %s::%s[%d]\n", in->srv->name, in->name, pid);
+ P_DEBUG(2, "Started instance %s::%s[%d]\n", in->srv->name, in->name, pid);
in->proc.pid = pid;
instance_writepid(in);
clock_gettime(CLOCK_MONOTONIC, &in->start);
fcntl(epipe[0], F_SETFD, FD_CLOEXEC);
}
+ if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) {
+ uloop_timeout_set(&in->watchdog.timeout, in->watchdog.freq * 1000);
+ P_DEBUG(2, "Started instance %s::%s watchdog timer : timeout = %d\n", in->srv->name, in->name, in->watchdog.freq);
+ }
+
service_event("instance.start", in->srv->name, in->name);
}
container_of(s, struct service_instance, _stdout.stream));
}
+static void
+instance_console(struct ustream *s, int bytes)
+{
+ struct service_instance *in = container_of(s, struct service_instance, console.stream);
+ char *buf;
+ int len;
+
+ do {
+ buf = ustream_get_read_buf(s, &len);
+ if (!buf)
+ break;
+
+ ulog(LOG_INFO, "out: %s\n", buf);
+
+ /* test if console client is attached */
+ if (in->console_client.fd.fd > -1)
+ ustream_write(&in->console_client.stream, buf, len, false);
+
+ ustream_consume(s, len);
+ } while (1);
+}
+
+static void
+instance_console_client(struct ustream *s, int bytes)
+{
+ struct service_instance *in = container_of(s, struct service_instance, console_client.stream);
+ char *buf;
+ int len;
+
+ do {
+ buf = ustream_get_read_buf(s, &len);
+ if (!buf)
+ break;
+
+ ulog(LOG_INFO, "in: %s\n", buf);
+ ustream_write(&in->console.stream, buf, len, false);
+ ustream_consume(s, len);
+ } while (1);
+}
+
static void
instance_stderr(struct ustream *s, int bytes)
{
service_stopped(s);
}
+static int
+instance_exit_code(int ret)
+{
+ if (WIFEXITED(ret)) {
+ return WEXITSTATUS(ret);
+ }
+
+ if (WIFSIGNALED(ret)) {
+ return SIGNALLED_OFFSET + WTERMSIG(ret);
+ }
+
+ if (WIFSTOPPED(ret)) {
+ return WSTOPSIG(ret);
+ }
+
+ return 1;
+}
+
static void
instance_exit(struct uloop_process *p, int ret)
{
clock_gettime(CLOCK_MONOTONIC, &tp);
runtime = tp.tv_sec - in->start.tv_sec;
- DEBUG(2, "Instance %s::%s exit with error code %d after %ld seconds\n", in->srv->name, in->name, ret, runtime);
+ P_DEBUG(2, "Instance %s::%s exit with error code %d after %ld seconds\n", in->srv->name, in->name, ret, runtime);
+ in->exit_code = instance_exit_code(ret);
uloop_timeout_cancel(&in->timeout);
+ uloop_timeout_cancel(&in->watchdog.timeout);
service_event("instance.stop", in->srv->name, in->name);
if (in->halt) {
in->halt = halt;
in->restart = in->respawn = false;
kill(in->proc.pid, SIGTERM);
- uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
+ if (!in->has_jail)
+ uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
}
static void
in->halt = true;
in->restart = true;
kill(in->proc.pid, SIGTERM);
- uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
+ if (!in->has_jail)
+ uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
+}
+
+static void
+instance_watchdog(struct uloop_timeout *t)
+{
+ struct service_instance *in = container_of(t, struct service_instance, watchdog.timeout);
+
+ P_DEBUG(3, "instance %s::%s watchdog timer expired\n", in->srv->name, in->name);
+
+ if (in->respawn)
+ instance_restart(in);
+ else
+ instance_stop(in, true);
}
static bool string_changed(const char *a, const char *b)
if (!blob_attr_equal(in->command, in_new->command))
return true;
+ if (string_changed(in->bundle, in_new->bundle))
+ return true;
+
+ if (string_changed(in->extroot, in_new->extroot))
+ return true;
+
+ if (string_changed(in->overlaydir, in_new->overlaydir))
+ return true;
+
+ if (string_changed(in->tmpoverlaysize, in_new->tmpoverlaysize))
+ return true;
+
if (!blobmsg_list_equal(&in->env, &in_new->env))
return true;
if (in->pw_gid != in_new->pw_gid)
return true;
+ if (in->gr_gid != in_new->gr_gid)
+ return true;
+
if (string_changed(in->pidfile, in_new->pidfile))
return true;
if (in->respawn_timeout != in_new->respawn_timeout)
return true;
- if ((!in->seccomp && in_new->seccomp) ||
- (in->seccomp && !in_new->seccomp) ||
- (in->seccomp && in_new->seccomp && strcmp(in->seccomp, in_new->seccomp)))
+ if (in->reload_signal != in_new->reload_signal)
+ return true;
+
+ if (in->term_timeout != in_new->term_timeout)
+ return true;
+
+ if (string_changed(in->seccomp, in_new->seccomp))
+ return true;
+
+ if (string_changed(in->capabilities, in_new->capabilities))
return true;
if (!blobmsg_list_equal(&in->limits, &in_new->limits))
if (!blobmsg_list_equal(&in->jail.mount, &in_new->jail.mount))
return true;
+ if (!blobmsg_list_equal(&in->jail.setns, &in_new->jail.setns))
+ return true;
+
if (!blobmsg_list_equal(&in->errors, &in_new->errors))
return true;
+ if (in->has_jail != in_new->has_jail)
+ return true;
+
+ if (in->trace != in_new->trace)
+ return true;
+
+ if (in->require_jail != in_new->require_jail)
+ return true;
+
+ if (in->immediately != in_new->immediately)
+ return true;
+
+ if (in->no_new_privs != in_new->no_new_privs)
+ return true;
+
+ if (string_changed(in->jail.name, in_new->jail.name))
+ return true;
+
+ if (string_changed(in->jail.hostname, in_new->jail.hostname))
+ return true;
+
+ if (string_changed(in->jail.pidfile, in_new->jail.pidfile))
+ return true;
+
+ if (in->jail.procfs != in_new->jail.procfs)
+ return true;
+
+ if (in->jail.sysfs != in_new->jail.sysfs)
+ return true;
+
+ if (in->jail.ubus != in_new->jail.ubus)
+ return true;
+
+ if (in->jail.log != in_new->jail.log)
+ return true;
+
+ if (in->jail.ronly != in_new->jail.ronly)
+ return true;
+
+ if (in->jail.netns != in_new->jail.netns)
+ return true;
+
+ if (in->jail.userns != in_new->jail.userns)
+ return true;
+
+ if (in->jail.cgroupsns != in_new->jail.cgroupsns)
+ return true;
+
+ if (in->jail.console != in_new->jail.console)
+ return true;
+
+ if (in->watchdog.mode != in_new->watchdog.mode)
+ return true;
+
+ if (in->watchdog.freq != in_new->watchdog.freq)
+ return true;
+
return false;
}
{
struct blob_attr *tb[__JAIL_ATTR_MAX];
struct jail *jail = &in->jail;
- struct stat s;
-
- if (stat("/sbin/ujail", &s))
- return 0;
+ struct blobmsg_list_node *var;
blobmsg_parse(jail_attr, __JAIL_ATTR_MAX, tb,
blobmsg_data(attr), blobmsg_data_len(attr));
- jail->argc = 2;
+ jail->argc = 4;
+ if (tb[JAIL_ATTR_REQUIREJAIL] && blobmsg_get_bool(tb[JAIL_ATTR_REQUIREJAIL])) {
+ in->require_jail = true;
+ jail->argc++;
+ }
+ if (tb[JAIL_ATTR_IMMEDIATELY] && blobmsg_get_bool(tb[JAIL_ATTR_IMMEDIATELY])) {
+ in->immediately = true;
+ jail->argc++;
+ }
if (tb[JAIL_ATTR_NAME]) {
- jail->name = blobmsg_get_string(tb[JAIL_ATTR_NAME]);
+ jail->name = strdup(blobmsg_get_string(tb[JAIL_ATTR_NAME]));
jail->argc += 2;
}
if (tb[JAIL_ATTR_HOSTNAME]) {
- jail->hostname = blobmsg_get_string(tb[JAIL_ATTR_HOSTNAME]);
+ jail->hostname = strdup(blobmsg_get_string(tb[JAIL_ATTR_HOSTNAME]));
jail->argc += 2;
}
- if (tb[JAIL_ATTR_PROCFS]) {
- jail->procfs = blobmsg_get_bool(tb[JAIL_ATTR_PROCFS]);
+ if (tb[JAIL_ATTR_PROCFS] && blobmsg_get_bool(tb[JAIL_ATTR_PROCFS])) {
+ jail->procfs = true;
+ jail->argc++;
+ }
+ if (tb[JAIL_ATTR_SYSFS] && blobmsg_get_bool(tb[JAIL_ATTR_SYSFS])) {
+ jail->sysfs = true;
+ jail->argc++;
+ }
+ if (tb[JAIL_ATTR_UBUS] && blobmsg_get_bool(tb[JAIL_ATTR_UBUS])) {
+ jail->ubus = true;
+ jail->argc++;
+ }
+ if (tb[JAIL_ATTR_LOG] && blobmsg_get_bool(tb[JAIL_ATTR_LOG])) {
+ jail->log = true;
+ jail->argc++;
+ }
+ if (tb[JAIL_ATTR_RONLY] && blobmsg_get_bool(tb[JAIL_ATTR_RONLY])) {
+ jail->ronly = true;
jail->argc++;
}
- if (tb[JAIL_ATTR_SYSFS]) {
- jail->sysfs = blobmsg_get_bool(tb[JAIL_ATTR_SYSFS]);
+ if (tb[JAIL_ATTR_NETNS] && blobmsg_get_bool(tb[JAIL_ATTR_NETNS])) {
+ jail->netns = true;
jail->argc++;
}
- if (tb[JAIL_ATTR_UBUS]) {
- jail->ubus = blobmsg_get_bool(tb[JAIL_ATTR_UBUS]);
+ if (tb[JAIL_ATTR_USERNS] && blobmsg_get_bool(tb[JAIL_ATTR_USERNS])) {
+ jail->userns = true;
jail->argc++;
}
- if (tb[JAIL_ATTR_LOG]) {
- jail->log = blobmsg_get_bool(tb[JAIL_ATTR_LOG]);
+ if (tb[JAIL_ATTR_CGROUPSNS] && blobmsg_get_bool(tb[JAIL_ATTR_CGROUPSNS])) {
+ jail->cgroupsns = true;
jail->argc++;
}
- if (tb[JAIL_ATTR_RONLY]) {
- jail->ronly = blobmsg_get_bool(tb[JAIL_ATTR_RONLY]);
+ if (tb[JAIL_ATTR_CONSOLE] && blobmsg_get_bool(tb[JAIL_ATTR_CONSOLE])) {
+ jail->console = true;
jail->argc++;
}
+ if (tb[JAIL_ATTR_PIDFILE]) {
+ jail->pidfile = strdup(blobmsg_get_string(tb[JAIL_ATTR_PIDFILE]));
+ jail->argc += 2;
+ }
+
+ if (tb[JAIL_ATTR_SETNS]) {
+ struct blob_attr *cur;
+ int rem;
+
+ blobmsg_for_each_attr(cur, tb[JAIL_ATTR_SETNS], rem)
+ jail->argc += 2;
+ blobmsg_list_fill(&jail->setns, blobmsg_data(tb[JAIL_ATTR_SETNS]),
+ blobmsg_data_len(tb[JAIL_ATTR_SETNS]), true);
+ }
+
if (tb[JAIL_ATTR_MOUNT]) {
struct blob_attr *cur;
int rem;
jail->argc += 2;
instance_fill_array(&jail->mount, tb[JAIL_ATTR_MOUNT], NULL, false);
}
+
+ blobmsg_list_for_each(&in->env, var)
+ jail->argc += 2;
+
if (in->seccomp)
jail->argc += 2;
- return 1;
+ if (in->capabilities)
+ jail->argc += 2;
+
+ if (in->user)
+ jail->argc += 2;
+
+ if (in->group)
+ jail->argc += 2;
+
+ if (in->extroot)
+ jail->argc += 2;
+
+ if (in->overlaydir)
+ jail->argc += 2;
+
+ if (in->tmpoverlaysize)
+ jail->argc += 2;
+
+ if (in->no_new_privs)
+ jail->argc++;
+
+ if (in->bundle)
+ jail->argc += 2;
+
+ return true;
}
static bool
{
struct blob_attr *tb[__INSTANCE_ATTR_MAX];
struct blob_attr *cur, *cur2;
- int rem;
+ struct stat s;
+ int rem, r;
blobmsg_parse(instance_attr, __INSTANCE_ATTR_MAX, tb,
blobmsg_data(in->config), blobmsg_data_len(in->config));
- if (!instance_config_parse_command(in, tb))
- return false;
+ if (!tb[INSTANCE_ATTR_BUNDLE] && !instance_config_parse_command(in, tb))
+ return false;
if (tb[INSTANCE_ATTR_TERMTIMEOUT])
in->term_timeout = blobmsg_get_u32(tb[INSTANCE_ATTR_TERMTIMEOUT]);
blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_WATCH], rem) {
if (blobmsg_type(cur2) != BLOBMSG_TYPE_STRING)
continue;
- DEBUG(3, "watch for %s\n", blobmsg_get_string(cur2));
+ P_DEBUG(3, "watch for %s\n", blobmsg_get_string(cur2));
watch_add(blobmsg_get_string(cur2), in);
}
}
in->no_new_privs = blobmsg_get_bool(tb[INSTANCE_ATTR_NO_NEW_PRIVS]);
if (!in->trace && tb[INSTANCE_ATTR_SECCOMP])
- in->seccomp = blobmsg_get_string(tb[INSTANCE_ATTR_SECCOMP]);
+ in->seccomp = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_SECCOMP]));
+
+ if (tb[INSTANCE_ATTR_CAPABILITIES])
+ in->capabilities = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_CAPABILITIES]));
+
+ if (tb[INSTANCE_ATTR_EXTROOT])
+ in->extroot = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_EXTROOT]));
+
+ if (tb[INSTANCE_ATTR_OVERLAYDIR])
+ in->overlaydir = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_OVERLAYDIR]));
+
+ if (tb[INSTANCE_ATTR_TMPOVERLAYSIZE])
+ in->tmpoverlaysize = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_TMPOVERLAYSIZE]));
+
+ if (tb[INSTANCE_ATTR_BUNDLE])
+ in->bundle = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_BUNDLE]));
if (tb[INSTANCE_ATTR_PIDFILE]) {
char *pidfile = blobmsg_get_string(tb[INSTANCE_ATTR_PIDFILE]);
if (pidfile)
- in->pidfile = pidfile;
+ in->pidfile = strdup(pidfile);
}
if (tb[INSTANCE_ATTR_RELOADSIG])
in->reload_signal = blobmsg_get_u32(tb[INSTANCE_ATTR_RELOADSIG]);
- if (!in->trace && tb[INSTANCE_ATTR_JAIL])
- in->has_jail = instance_jail_parse(in, tb[INSTANCE_ATTR_JAIL]);
-
if (tb[INSTANCE_ATTR_STDOUT] && blobmsg_get_bool(tb[INSTANCE_ATTR_STDOUT]))
in->_stdout.fd.fd = -1;
int facility = syslog_facility_str_to_int(blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY]));
if (facility != -1) {
in->syslog_facility = facility;
- DEBUG(3, "setting facility '%s'\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY]));
+ P_DEBUG(3, "setting facility '%s'\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY]));
} else
- DEBUG(3, "unknown syslog facility '%s' given, using default (LOG_DAEMON)\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY]));
+ P_DEBUG(3, "unknown syslog facility '%s' given, using default (LOG_DAEMON)\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY]));
+ }
+
+ if (tb[INSTANCE_ATTR_WATCHDOG]) {
+ int i = 0;
+ uint32_t vals[2] = { 0, 30 };
+
+ blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_WATCHDOG], rem) {
+ if (i >= 2)
+ break;
+
+ vals[i] = atoi(blobmsg_get_string(cur2));
+ i++;
+ }
+
+ if (vals[0] >= 0 && vals[0] < __INSTANCE_WATCHDOG_MODE_MAX) {
+ in->watchdog.mode = vals[0];
+ P_DEBUG(3, "setting watchdog mode (%d)\n", vals[0]);
+ } else {
+ in->watchdog.mode = 0;
+ P_DEBUG(3, "unknown watchdog mode (%d) given, using default (0)\n", vals[0]);
+ }
+
+ if (vals[1] > 0) {
+ in->watchdog.freq = vals[1];
+ P_DEBUG(3, "setting watchdog timeout (%d)\n", vals[0]);
+ } else {
+ in->watchdog.freq = 30;
+ P_DEBUG(3, "invalid watchdog timeout (%d) given, using default (30)\n", vals[1]);
+ }
+ }
+
+ if (!in->trace && tb[INSTANCE_ATTR_JAIL])
+ in->has_jail = instance_jail_parse(in, tb[INSTANCE_ATTR_JAIL]);
+
+ if (in->has_jail) {
+ r = stat(UJAIL_BIN_PATH, &s);
+ if (r < 0) {
+ if (in->require_jail) {
+ ERROR("Cannot jail service %s::%s. %s: %m (%d)\n",
+ in->srv->name, in->name, UJAIL_BIN_PATH, r);
+ return false;
+ }
+ P_DEBUG(2, "unable to find %s: %m (%d)\n", UJAIL_BIN_PATH, r);
+ in->has_jail = false;
+ }
}
return true;
blobmsg_list_free(&in->limits);
blobmsg_list_free(&in->errors);
blobmsg_list_free(&in->jail.mount);
+ blobmsg_list_free(&in->jail.setns);
+}
+
+static void
+instance_config_move_strdup(char **dst, char *src)
+{
+ if (*dst) {
+ free(*dst);
+ *dst = NULL;
+ }
+
+ if (!src)
+ return;
+
+ *dst = strdup(src);
}
static void
blobmsg_list_move(&in->limits, &in_src->limits);
blobmsg_list_move(&in->errors, &in_src->errors);
blobmsg_list_move(&in->jail.mount, &in_src->jail.mount);
+ blobmsg_list_move(&in->jail.setns, &in_src->jail.setns);
in->trigger = in_src->trigger;
in->command = in_src->command;
- in->pidfile = in_src->pidfile;
in->respawn = in_src->respawn;
in->respawn_retry = in_src->respawn_retry;
in->respawn_threshold = in_src->respawn_threshold;
in->respawn_timeout = in_src->respawn_timeout;
+ in->reload_signal = in_src->reload_signal;
+ in->term_timeout = in_src->term_timeout;
+ in->watchdog.mode = in_src->watchdog.mode;
+ in->watchdog.freq = in_src->watchdog.freq;
+ in->watchdog.timeout = in_src->watchdog.timeout;
in->name = in_src->name;
+ in->nice = in_src->nice;
in->trace = in_src->trace;
- in->seccomp = in_src->seccomp;
in->node.avl.key = in_src->node.avl.key;
in->syslog_facility = in_src->syslog_facility;
+ in->require_jail = in_src->require_jail;
+ in->no_new_privs = in_src->no_new_privs;
+ in->immediately = in_src->immediately;
+ in->uid = in_src->uid;
+ in->pw_gid = in_src->pw_gid;
+ in->gr_gid = in_src->gr_gid;
+
+ in->has_jail = in_src->has_jail;
+ in->jail.procfs = in_src->jail.procfs;
+ in->jail.sysfs = in_src->jail.sysfs;
+ in->jail.ubus = in_src->jail.ubus;
+ in->jail.log = in_src->jail.log;
+ in->jail.ronly = in_src->jail.ronly;
+ in->jail.netns = in_src->jail.netns;
+ in->jail.cgroupsns = in_src->jail.cgroupsns;
+ in->jail.console = in_src->jail.console;
+ in->jail.argc = in_src->jail.argc;
+
+ instance_config_move_strdup(&in->pidfile, in_src->pidfile);
+ instance_config_move_strdup(&in->seccomp, in_src->seccomp);
+ instance_config_move_strdup(&in->capabilities, in_src->capabilities);
+ instance_config_move_strdup(&in->bundle, in_src->bundle);
+ instance_config_move_strdup(&in->extroot, in_src->extroot);
+ instance_config_move_strdup(&in->overlaydir, in_src->overlaydir);
+ instance_config_move_strdup(&in->tmpoverlaysize, in_src->tmpoverlaysize);
+ instance_config_move_strdup(&in->user, in_src->user);
+ instance_config_move_strdup(&in->group, in_src->group);
+ instance_config_move_strdup(&in->jail.name, in_src->jail.name);
+ instance_config_move_strdup(&in->jail.hostname, in_src->jail.hostname);
+ instance_config_move_strdup(&in->jail.pidfile, in_src->jail.pidfile);
free(in->config);
in->config = in_src->config;
instance_free_stdio(in);
uloop_process_delete(&in->proc);
uloop_timeout_cancel(&in->timeout);
+ uloop_timeout_cancel(&in->watchdog.timeout);
trigger_del(in);
watch_del(in);
instance_config_cleanup(in);
free(in->config);
free(in->user);
free(in->group);
+ free(in->extroot);
+ free(in->overlaydir);
+ free(in->tmpoverlaysize);
+ free(in->bundle);
+ free(in->jail.name);
+ free(in->jail.hostname);
+ free(in->jail.pidfile);
+ free(in->seccomp);
+ free(in->capabilities);
+ free(in->pidfile);
free(in);
}
in->proc.cb = instance_exit;
in->term_timeout = 5;
in->syslog_facility = LOG_DAEMON;
+ in->exit_code = 0;
+ in->require_jail = false;
+ in->immediately = false;
in->_stdout.fd.fd = -2;
in->_stdout.stream.string_data = true;
in->_stderr.stream.string_data = true;
in->_stderr.stream.notify_read = instance_stderr;
+ in->console.fd.fd = -2;
+ in->console.stream.string_data = true;
+ in->console.stream.notify_read = instance_console;
+
+ in->console_client.fd.fd = -2;
+ in->console_client.stream.string_data = true;
+ in->console_client.stream.notify_read = instance_console_client;
+
blobmsg_list_init(&in->netdev, struct instance_netdev, node, instance_netdev_cmp);
blobmsg_list_init(&in->file, struct instance_file, node, instance_file_cmp);
blobmsg_list_simple_init(&in->env);
blobmsg_list_simple_init(&in->limits);
blobmsg_list_simple_init(&in->errors);
blobmsg_list_simple_init(&in->jail.mount);
+ blobmsg_list_simple_init(&in->jail.setns);
+
+ in->watchdog.timeout.cb = instance_watchdog;
+
in->valid = instance_config_parse(in);
}
blobmsg_add_u32(b, "pid", in->proc.pid);
if (in->command)
blobmsg_add_blob(b, in->command);
+ if (in->bundle)
+ blobmsg_add_string(b, "bundle", in->bundle);
blobmsg_add_u32(b, "term_timeout", in->term_timeout);
+ if (!in->proc.pending)
+ blobmsg_add_u32(b, "exit_code", in->exit_code);
if (!avl_is_empty(&in->errors.avl)) {
struct blobmsg_list_node *var;
blobmsg_close_table(b, e);
}
+ if (!avl_is_empty(&in->netdev.avl)) {
+ struct blobmsg_list_node *var;
+ void *n = blobmsg_open_array(b, "netdev");
+
+ blobmsg_list_for_each(&in->netdev, var)
+ blobmsg_add_string(b, NULL, blobmsg_data(var->data));
+ blobmsg_close_array(b, n);
+ }
+
if (in->reload_signal)
blobmsg_add_u32(b, "reload_signal", in->reload_signal);
if (in->seccomp)
blobmsg_add_string(b, "seccomp", in->seccomp);
+ if (in->capabilities)
+ blobmsg_add_string(b, "capabilities", in->capabilities);
+
if (in->pidfile)
blobmsg_add_string(b, "pidfile", in->pidfile);
void *r = blobmsg_open_table(b, "jail");
if (in->jail.name)
blobmsg_add_string(b, "name", in->jail.name);
- if (in->jail.hostname)
- blobmsg_add_string(b, "hostname", in->jail.hostname);
- blobmsg_add_u8(b, "procfs", in->jail.procfs);
- blobmsg_add_u8(b, "sysfs", in->jail.sysfs);
- blobmsg_add_u8(b, "ubus", in->jail.ubus);
- blobmsg_add_u8(b, "log", in->jail.log);
- blobmsg_add_u8(b, "ronly", in->jail.ronly);
+ if (!in->bundle) {
+ if (in->jail.hostname)
+ blobmsg_add_string(b, "hostname", in->jail.hostname);
+
+ blobmsg_add_u8(b, "procfs", in->jail.procfs);
+ blobmsg_add_u8(b, "sysfs", in->jail.sysfs);
+ blobmsg_add_u8(b, "ubus", in->jail.ubus);
+ blobmsg_add_u8(b, "log", in->jail.log);
+ blobmsg_add_u8(b, "ronly", in->jail.ronly);
+ blobmsg_add_u8(b, "netns", in->jail.netns);
+ blobmsg_add_u8(b, "userns", in->jail.userns);
+ blobmsg_add_u8(b, "cgroupsns", in->jail.cgroupsns);
+ } else {
+ if (in->jail.pidfile)
+ blobmsg_add_string(b, "pidfile", in->jail.pidfile);
+
+ blobmsg_add_u8(b, "immediately", in->immediately);
+ }
+ blobmsg_add_u8(b, "console", (in->console.fd.fd > -1));
blobmsg_close_table(b, r);
if (!avl_is_empty(&in->jail.mount.avl)) {
struct blobmsg_list_node *var;
blobmsg_add_string(b, blobmsg_name(var->data), blobmsg_data(var->data));
blobmsg_close_table(b, e);
}
+
+ if (!avl_is_empty(&in->jail.setns.avl)) {
+ struct blobmsg_list_node *var;
+ void *s = blobmsg_open_array(b, "setns");
+ blobmsg_list_for_each(&in->jail.setns, var)
+ blobmsg_add_blob(b, var->data);
+ blobmsg_close_array(b, s);
+ }
}
+ if (in->extroot)
+ blobmsg_add_string(b, "extroot", in->extroot);
+ if (in->overlaydir)
+ blobmsg_add_string(b, "overlaydir", in->overlaydir);
+ if (in->tmpoverlaysize)
+ blobmsg_add_string(b, "tmpoverlaysize", in->tmpoverlaysize);
+
if (verbose && in->trigger)
blobmsg_add_blob(b, in->trigger);
+ if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) {
+ void *r = blobmsg_open_table(b, "watchdog");
+ blobmsg_add_u32(b, "mode", in->watchdog.mode);
+ blobmsg_add_u32(b, "timeout", in->watchdog.freq);
+ blobmsg_close_table(b, r);
+ }
+
blobmsg_close_table(b, i);
}
#include "../utils/utils.h"
#define RESPAWN_ERROR (5 * 60)
+#define SIGNALLED_OFFSET 128
struct jail {
bool procfs;
bool ubus;
bool log;
bool ronly;
+ bool netns;
+ bool userns;
+ bool cgroupsns;
+ bool console;
char *name;
char *hostname;
+ char *pidfile;
struct blobmsg_list mount;
+ struct blobmsg_list setns;
int argc;
};
+typedef enum instance_watchdog {
+ INSTANCE_WATCHDOG_MODE_DISABLED,
+ INSTANCE_WATCHDOG_MODE_PASSIVE,
+ INSTANCE_WATCHDOG_MODE_ACTIVE,
+ __INSTANCE_WATCHDOG_MODE_MAX,
+} instance_watchdog_mode_t;
+
+struct watchdog {
+ instance_watchdog_mode_t mode;
+ uint32_t freq;
+ struct uloop_timeout timeout;
+};
+
struct service_instance {
struct vlist_node node;
struct service *srv;
bool trace;
bool has_jail;
+ bool require_jail;
+ bool immediately;
bool no_new_privs;
struct jail jail;
char *seccomp;
+ char *capabilities;
char *pidfile;
+ char *extroot;
+ char *overlaydir;
+ char *tmpoverlaysize;
+ char *bundle;
int syslog_facility;
+ int exit_code;
uint32_t term_timeout;
uint32_t respawn_timeout;
struct uloop_timeout timeout;
struct ustream_fd _stdout;
struct ustream_fd _stderr;
+ struct ustream_fd console;
+ struct ustream_fd console_client;
struct blob_attr *command;
struct blob_attr *trigger;
struct blobmsg_list file;
struct blobmsg_list limits;
struct blobmsg_list errors;
+
+ struct watchdog watchdog;
};
void instance_start(struct service_instance *in);
* GNU General Public License for more details.
*/
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <unistd.h>
+#include <sched.h>
+
#include <libubox/blobmsg_json.h>
#include <libubox/avl-cmp.h>
#include "../rcS.h"
AVL_TREE(services, avl_strcmp, false, NULL);
+AVL_TREE(containers, avl_strcmp, false, NULL);
static struct blob_buf b;
static struct ubus_context *ctx;
static struct ubus_object main_object;
in_n = container_of(node_new, struct service_instance, node);
if (in_o && in_n) {
- DEBUG(2, "Update instance %s::%s\n", in_o->srv->name, in_o->name);
+ P_DEBUG(2, "Update instance %s::%s\n", in_o->srv->name, in_o->name);
instance_update(in_o, in_n);
instance_free(in_n);
} else if (in_o) {
- DEBUG(2, "Stop instance %s::%s\n", in_o->srv->name, in_o->name);
+ P_DEBUG(2, "Stop instance %s::%s\n", in_o->srv->name, in_o->name);
instance_stop(in_o, true);
} else if (in_n && in_n->srv->autostart) {
- DEBUG(2, "Start instance %s::%s\n", in_n->srv->name, in_n->name);
+ P_DEBUG(2, "Start instance %s::%s\n", in_n->srv->name, in_n->name);
instance_start(in_n);
}
blob_buf_init(&b, 0);
}
static void
-service_delete(struct service *s)
+service_delete(struct service *s, bool container)
{
blobmsg_list_free(&s->data_blob);
free(s->data);
[DATA_TYPE] = { "type", BLOBMSG_TYPE_STRING },
};
+enum {
+ CONTAINER_CONSOLE_NAME,
+ CONTAINER_CONSOLE_INSTANCE,
+ __CONTAINER_CONSOLE_MAX,
+};
+
+static const struct blobmsg_policy container_console_policy[__CONTAINER_CONSOLE_MAX] = {
+ [CONTAINER_CONSOLE_NAME] = { "name", BLOBMSG_TYPE_STRING },
+ [CONTAINER_CONSOLE_INSTANCE] = { "instance", BLOBMSG_TYPE_STRING },
+};
+
+static inline bool is_container_obj(struct ubus_object *obj)
+{
+ return (obj && (strcmp(obj->name, "container") == 0));
+}
+
+static inline void put_namespace(struct blob_buf *b, char *name)
+{
+ char nsfname[32];
+ struct stat statbuf;
+
+ snprintf(nsfname, sizeof(nsfname), "/proc/self/ns/%s", name);
+
+ if (!stat(nsfname, &statbuf))
+ blobmsg_add_string(b, NULL, name);
+}
+
+static void put_cgroups(struct blob_buf *b)
+{
+ int fd, ret;
+ static char buf[512] = "";
+ char *t, *z;
+
+ fd = open("/sys/fs/cgroup/cgroup.controllers", O_RDONLY);
+ if (fd == -1)
+ return;
+
+ ret = read(fd, &buf, sizeof(buf));
+ /* make sure buffer is NULL-terminated */
+ buf[sizeof(buf)-1] = '\0';
+
+ close(fd);
+
+ if (ret < 2)
+ return;
+
+ t = buf;
+ while(t) {
+ z = t;
+ /* replace space with \0 and direct next entry */
+ t = strchr(z, ' ');
+ if (t) {
+ *(t++) = '\0';
+ } else { /* replace trailing new-line with \0 */
+ t = strchr(z, '\n');
+ if (!t) /* shouldn't happen, but don't segfault if it does */
+ break;
+
+ *t = '\0';
+ t = NULL;
+ }
+ blobmsg_add_string(b, NULL, z);
+ }
+}
+
+static int
+container_handle_features(struct ubus_context *ctx, struct ubus_object *obj,
+ struct ubus_request_data *req, const char *method,
+ struct blob_attr *msg)
+{
+ struct utsname utsbuf;
+ struct stat statbuf;
+ void *nsarray, *cgarray;
+
+ if (stat("/sbin/ujail", &statbuf))
+ return UBUS_STATUS_NOT_SUPPORTED;
+
+ if (uname(&utsbuf) < 0)
+ return UBUS_STATUS_UNKNOWN_ERROR;
+
+ blob_buf_init(&b, 0);
+ blobmsg_add_string(&b, "machine", utsbuf.machine);
+
+#ifdef SECCOMP_SUPPORT
+ blobmsg_add_u8(&b, "seccomp", true);
+#else
+ blobmsg_add_u8(&b, "seccomp", false);
+#endif
+
+ cgarray = blobmsg_open_array(&b, "cgroup");
+ put_cgroups(&b);
+ blobmsg_close_array(&b, cgarray);
+
+ nsarray = blobmsg_open_array(&b, "namespaces");
+ put_namespace(&b, "cgroup");
+ put_namespace(&b, "ipc");
+ put_namespace(&b, "mnt");
+ put_namespace(&b, "net");
+ put_namespace(&b, "pid");
+#ifdef CLONE_NEWTIME
+ put_namespace(&b, "time");
+#endif
+ put_namespace(&b, "user");
+ put_namespace(&b, "uts");
+ blobmsg_close_array(&b, nsarray);
+ ubus_send_reply(ctx, req, b.head);
+
+ return UBUS_STATUS_OK;
+}
+
static int
service_handle_set(struct ubus_context *ctx, struct ubus_object *obj,
struct ubus_request_data *req, const char *method,
struct blob_attr *tb[__SERVICE_SET_MAX], *cur;
struct service *s = NULL;
const char *name;
+ bool container = is_container_obj(obj);
bool add = !strcmp(method, "add");
int ret;
name = blobmsg_data(cur);
- s = avl_find_element(&services, name, s, avl);
+ if (container)
+ s = avl_find_element(&containers, name, s, avl);
+ else
+ s = avl_find_element(&services, name, s, avl);
+
if (s) {
- DEBUG(2, "Update service %s\n", name);
+ P_DEBUG(2, "Update service %s\n", name);
return service_update(s, tb, add);
}
- DEBUG(2, "Create service %s\n", name);
+ P_DEBUG(2, "Create service %s\n", name);
s = service_alloc(name);
if (!s)
return UBUS_STATUS_UNKNOWN_ERROR;
+ s->container = container;
+
ret = service_update(s, tb, add);
if (ret)
return ret;
- avl_insert(&services, &s->avl);
+ if (container) {
+ avl_insert(&containers, &s->avl);
- service_event("service.start", s->name, NULL);
+ service_event("container.start", s->name, NULL);
+ } else {
+ avl_insert(&services, &s->avl);
+ service_event("service.start", s->name, NULL);
+ }
return 0;
}
struct service *s;
const char *name = NULL;
bool verbose = false;
+ bool container = is_container_obj(obj);
+ const struct avl_tree *tree = container?&containers:&services;
blobmsg_parse(service_list_attrs, __SERVICE_LIST_ATTR_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg));
name = blobmsg_get_string(tb[SERVICE_LIST_ATTR_NAME]);
blob_buf_init(&b, 0);
- avl_for_each_element(&services, s, avl) {
+ avl_for_each_element(tree, s, avl) {
if (name && strcmp(s->name, name) != 0)
continue;
struct blob_attr *tb[__SERVICE_DEL_ATTR_MAX], *cur;
struct service *s;
struct service_instance *in;
+ bool container = is_container_obj(obj);
blobmsg_parse(service_del_attrs, __SERVICE_DEL_ATTR_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg));
if (!cur)
return UBUS_STATUS_NOT_FOUND;
- s = avl_find_element(&services, blobmsg_data(cur), s, avl);
+ if (container)
+ s = avl_find_element(&containers, blobmsg_data(cur), s, avl);
+ else
+ s = avl_find_element(&services, blobmsg_data(cur), s, avl);
+
if (!s)
return UBUS_STATUS_NOT_FOUND;
cur = tb[SERVICE_DEL_ATTR_INSTANCE];
if (!cur) {
- service_delete(s);
+ service_delete(s, container);
return 0;
}
struct blob_attr *tb[__SERVICE_SIGNAL_ATTR_MAX], *cur;
struct service *s;
struct service_instance *in;
+ bool container = is_container_obj(obj);
int sig = SIGHUP;
int rv = 0;
if (!cur)
return UBUS_STATUS_NOT_FOUND;
- s = avl_find_element(&services, blobmsg_data(cur), s, avl);
+ if (container)
+ s = avl_find_element(&containers, blobmsg_data(cur), s, avl);
+ else
+ s = avl_find_element(&services, blobmsg_data(cur), s, avl);
+
if (!s)
return UBUS_STATUS_NOT_FOUND;
struct blob_attr *tb[__SERVICE_STATE_ATTR_MAX];
struct service *s;
struct service_instance *in;
+ bool container = is_container_obj(obj);
int spawn;
blobmsg_parse(service_state_attrs, __SERVICE_STATE_ATTR_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg));
if (!tb[SERVICE_STATE_ATTR_NAME])
return UBUS_STATUS_NOT_FOUND;
- s = avl_find_element(&services, blobmsg_data(tb[SERVICE_STATE_ATTR_NAME]), s, avl);
+ if (container)
+ s = avl_find_element(&containers, blobmsg_data(tb[SERVICE_STATE_ATTR_NAME]), s, avl);
+ else
+ s = avl_find_element(&services, blobmsg_data(tb[SERVICE_STATE_ATTR_NAME]), s, avl);
+
if (!s)
return UBUS_STATUS_NOT_FOUND;
return UBUS_STATUS_OK;
}
+static void
+service_avl_stop_all(struct avl_tree *sctree, unsigned int *term_timeout)
+{
+ struct service *s;
+
+ avl_for_each_element(sctree, s, avl) {
+ struct service_instance *in, *ptr;
+
+ vlist_for_each_element_safe(&s->instances, in, node, ptr) {
+ if (in->term_timeout > *term_timeout)
+ *term_timeout = in->term_timeout;
+ instance_stop(in, true);
+ }
+ }
+}
+
+void
+service_stop_all(void)
+{
+ unsigned int term_timeout = 0;
+
+ service_avl_stop_all(&containers, &term_timeout);
+ service_avl_stop_all(&services, &term_timeout);
+ procd_inittab_kill();
+
+ sleep(term_timeout);
+}
+
static int
service_handle_update(struct ubus_context *ctx, struct ubus_object *obj,
struct ubus_request_data *req, const char *method,
{
struct blob_attr *tb[__SERVICE_ATTR_MAX], *cur;
struct service *s;
+ bool container = is_container_obj(obj);
blobmsg_parse(service_attrs, __SERVICE_ATTR_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg));
if (!cur)
return UBUS_STATUS_INVALID_ARGUMENT;
- s = avl_find_element(&services, blobmsg_data(cur), s, avl);
+ if (container)
+ s = avl_find_element(&containers, blobmsg_data(cur), s, avl);
+ else
+ s = avl_find_element(&services, blobmsg_data(cur), s, avl);
+
if (!s)
return UBUS_STATUS_NOT_FOUND;
return 0;
}
+static int
+container_handle_console(struct ubus_context *ctx, struct ubus_object *obj,
+ struct ubus_request_data *req, const char *method,
+ struct blob_attr *msg)
+{
+ bool attach = !strcmp(method, "console_attach");
+ struct blob_attr *tb[__CONTAINER_CONSOLE_MAX];
+ struct service *s;
+ struct service_instance *in;
+ int console_fd = -1;
+
+ console_fd = ubus_request_get_caller_fd(req);
+ if (console_fd < 0)
+ return UBUS_STATUS_INVALID_ARGUMENT;
+
+ if (!msg)
+ goto err_console_fd;
+
+ blobmsg_parse(container_console_policy, __CONTAINER_CONSOLE_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg));
+ if (!tb[CONTAINER_CONSOLE_NAME])
+ goto err_console_fd;
+
+ s = avl_find_element(&containers, blobmsg_data(tb[CONTAINER_CONSOLE_NAME]), s, avl);
+ if (!s)
+ goto err_console_fd;
+
+ if (tb[CONTAINER_CONSOLE_INSTANCE]) {
+ in = vlist_find(&s->instances, blobmsg_data(tb[CONTAINER_CONSOLE_INSTANCE]), in, node);
+ } else {
+ /* use first element in instances list */
+ vlist_for_each_element(&s->instances, in, node)
+ break;
+ }
+ if (!in)
+ goto err_console_fd;
+
+ if (attach) {
+ if (in->console.fd.fd < 0) {
+ close(console_fd);
+ return UBUS_STATUS_NOT_SUPPORTED;
+ }
+
+ /* close and replace existing attached console */
+ if (in->console_client.fd.fd > -1)
+ close(in->console_client.fd.fd);
+
+ ustream_fd_init(&in->console_client, console_fd);
+ } else {
+ ustream_fd_init(&in->console, console_fd);
+ }
+
+ return UBUS_STATUS_OK;
+err_console_fd:
+ close(console_fd);
+ return UBUS_STATUS_INVALID_ARGUMENT;
+}
+
+enum {
+ SERVICE_WATCHDOG_MODE,
+ SERVICE_WATCHDOG_TIMEOUT,
+ SERVICE_WATCHDOG_NAME,
+ SERVICE_WATCHDOG_INSTANCE,
+ __SERVICE_WATCHDOG_MAX,
+};
+
+static const struct blobmsg_policy service_watchdog_policy[__SERVICE_WATCHDOG_MAX] = {
+ [SERVICE_WATCHDOG_MODE] = { "mode", BLOBMSG_TYPE_INT32 },
+ [SERVICE_WATCHDOG_NAME] = { "name", BLOBMSG_TYPE_STRING },
+ [SERVICE_WATCHDOG_TIMEOUT] = { "timeout", BLOBMSG_TYPE_INT32 },
+ [SERVICE_WATCHDOG_INSTANCE] = { "instance", BLOBMSG_TYPE_STRING },
+};
+
+static int
+service_handle_watchdog(struct ubus_context *ctx, struct ubus_object *obj,
+ struct ubus_request_data *req, const char *method,
+ struct blob_attr *msg)
+{
+ struct blob_attr *tb[__SERVICE_WATCHDOG_MAX] = {0};
+ struct service *s;
+ struct blob_attr *cur;
+ struct service_instance *in;
+
+ blobmsg_parse(service_watchdog_policy, __SERVICE_WATCHDOG_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg));
+ cur = tb[SERVICE_WATCHDOG_NAME];
+ if (!cur)
+ return UBUS_STATUS_NOT_FOUND;
+
+ s = avl_find_element(&services, blobmsg_data(cur), s, avl);
+ if (!s)
+ return UBUS_STATUS_NOT_FOUND;
+
+ cur = tb[SERVICE_WATCHDOG_INSTANCE];
+ if (!cur)
+ return UBUS_STATUS_NOT_FOUND;
+
+ in = vlist_find(&s->instances, blobmsg_data(cur), in, node);
+ if (!in) {
+ ERROR("instance %s not found\n", blobmsg_get_string(cur));
+ return UBUS_STATUS_NOT_FOUND;
+ }
+
+ if (tb[SERVICE_WATCHDOG_MODE])
+ in->watchdog.mode = blobmsg_get_u32(tb[SERVICE_WATCHDOG_MODE]);
+
+ if (tb[SERVICE_WATCHDOG_TIMEOUT])
+ in->watchdog.freq = blobmsg_get_u32(tb[SERVICE_WATCHDOG_TIMEOUT]);
+
+ if (in->watchdog.mode == INSTANCE_WATCHDOG_MODE_DISABLED)
+ uloop_timeout_cancel(&in->watchdog.timeout);
+ else
+ uloop_timeout_set(&in->watchdog.timeout, in->watchdog.freq * 1000);
+
+ blob_buf_init(&b, 0);
+ blobmsg_add_string(&b, "name", blobmsg_get_string(tb[SERVICE_WATCHDOG_NAME]));
+ blobmsg_add_string(&b, "instance", blobmsg_get_string(tb[SERVICE_WATCHDOG_INSTANCE]));
+ blobmsg_add_u32(&b, "mode", in->watchdog.mode);
+ blobmsg_add_u32(&b, "timeout", in->watchdog.freq);
+
+ ubus_send_reply(ctx, req, b.head);
+
+ return UBUS_STATUS_OK;
+}
+
static struct ubus_method main_object_methods[] = {
UBUS_METHOD("set", service_handle_set, service_set_attrs),
UBUS_METHOD("add", service_handle_set, service_set_attrs),
UBUS_METHOD("validate", service_handle_validate, validate_policy),
UBUS_METHOD("get_data", service_get_data, get_data_policy),
UBUS_METHOD("state", service_handle_state, service_state_attrs),
+ UBUS_METHOD("watchdog", service_handle_watchdog, service_watchdog_policy),
};
static struct ubus_object_type main_object_type =
};
int
-service_start_early(char *name, char *cmdline)
+service_start_early(char *name, char *cmdline, char *user, char *group)
{
void *instances, *instance, *command, *respawn;
char *t;
blobmsg_add_string(&b, NULL, "1");
blobmsg_add_string(&b, NULL, "0");
blobmsg_close_array(&b, respawn);
+ if (user)
+ blobmsg_add_string(&b, "user", user);
+ if (group)
+ blobmsg_add_string(&b, "group", group);
+
blobmsg_close_table(&b, instance);
blobmsg_close_table(&b, instances);
void service_stopped(struct service *s)
{
if (s->deleted && avl_is_empty(&s->instances.avl)) {
- service_event("service.stop", s->name, NULL);
- avl_delete(&services, &s->avl);
+ if (s->container) {
+ service_event("container.stop", s->name, NULL);
+ avl_delete(&containers, &s->avl);
+ } else {
+ service_event("service.stop", s->name, NULL);
+ avl_delete(&services, &s->avl);
+ }
trigger_del(s);
service_validate_del(s);
free(s->trigger);
ubus_event_bcast(type, "service", service, "instance", instance);
}
+static struct ubus_method container_object_methods[] = {
+ UBUS_METHOD("set", service_handle_set, service_set_attrs),
+ UBUS_METHOD("add", service_handle_set, service_set_attrs),
+ UBUS_METHOD("list", service_handle_list, service_list_attrs),
+ UBUS_METHOD("delete", service_handle_delete, service_del_attrs),
+ UBUS_METHOD("state", service_handle_state, service_state_attrs),
+ UBUS_METHOD_NOARG("get_features", container_handle_features),
+ UBUS_METHOD("console_set", container_handle_console, container_console_policy),
+ UBUS_METHOD("console_attach", container_handle_console, container_console_policy),
+};
+
+static struct ubus_object_type container_object_type =
+ UBUS_OBJECT_TYPE("container", container_object_methods);
+
+static struct ubus_object container_object = {
+ .name = "container",
+ .type = &container_object_type,
+ .methods = container_object_methods,
+ .n_methods = ARRAY_SIZE(container_object_methods),
+};
+
void ubus_init_service(struct ubus_context *_ctx)
{
+ struct stat statbuf;
+
ctx = _ctx;
ubus_add_object(ctx, &main_object);
+
+ if (!stat("/sbin/ujail", &statbuf))
+ ubus_add_object(ctx, &container_object);
}
#include "../utils/utils.h"
extern struct avl_tree services;
+extern struct avl_tree containers;
struct vrule {
struct avl_node avl;
const char *name;
bool deleted;
bool autostart;
+ bool container;
struct blob_attr *trigger;
struct vlist_tree instances;
void service_validate_add(struct service *s, struct blob_attr *attr);
void service_validate_dump(struct blob_buf *b, struct service *s);
void service_validate_dump_all(struct blob_buf *b, char *p, char *s);
-int service_start_early(char *name, char *cmdline);
+int service_start_early(char *name, char *cmdline, char *user, char *group);
void service_stopped(struct service *s);
void service_validate_del(struct service *s);
void service_event(const char *type, const char *service, const char *instance);
+void service_stop_all(void);
pid_t pid;
int n = 0;
int rem;
+ int fd;
pid = fork();
if (pid < 0) {
return;
}
- if (debug < 3) {
- close(STDIN_FILENO);
- close(STDOUT_FILENO);
- close(STDERR_FILENO);
+ if (debug < 3 && (fd = open("/dev/null", O_RDWR)) >= 0) {
+ dup2(fd, STDIN_FILENO);
+ dup2(fd, STDOUT_FILENO);
+ dup2(fd, STDERR_FILENO);
+ if (fd > STDERR_FILENO)
+ close(fd);
}
blobmsg_for_each_attr(cur, cmd->data, rem)
static void trigger_command_add(struct trigger *t, struct blob_attr *data)
{
struct trigger_command *cmd;
- int remaining;
+ int64_t remaining;
cmd = avl_find_element(&trigger_pending, data, cmd, avl);
if (cmd) {
}
/* Extend timer if trigger timeout is bigger than remaining time */
- remaining = uloop_timeout_remaining(&cmd->delay);
+ remaining = uloop_timeout_remaining64(&cmd->delay);
if (remaining < t->timeout)
uloop_timeout_set(&cmd->delay, t->timeout);
struct blob_attr *attr;
const char *path;
- DEBUG(3, "ubus event %s\n", type);
+ P_DEBUG(3, "ubus event %s\n", type);
if (strcmp(type, "ubus.object.add") != 0)
return;
return;
path = blobmsg_data(attr);
- DEBUG(3, "ubus path %s\n", path);
+ P_DEBUG(3, "ubus path %s\n", path);
list_for_each_entry(o, &watch_objects, list) {
unsigned int id;
char *str;
str = blobmsg_format_json(msg, true);
- DEBUG(3, "Received ubus notify '%s': %s\n", method, str);
+ P_DEBUG(3, "Received ubus notify '%s': %s\n", method, str);
free(str);
}
*/
#include <fcntl.h>
+#include <pwd.h>
#include <sys/reboot.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <signal.h>
+#include "container.h"
#include "procd.h"
#include "syslog.h"
#include "plug/hotplug.h"
set_stdio(tty);
}
+static void perform_halt()
+{
+ if (reboot_event == RB_POWER_OFF)
+ LOG("- power down -\n");
+ else
+ LOG("- reboot -\n");
+
+ /* Allow time for last message to reach serial console, etc */
+ sleep(1);
+
+ if (is_container()) {
+ reboot(reboot_event);
+ exit(EXIT_SUCCESS);
+ return;
+ }
+
+ /* We have to fork here, since the kernel calls do_exit(EXIT_SUCCESS)
+ * in linux/kernel/sys.c, which can cause the machine to panic when
+ * the init process exits... */
+ if (!vfork()) { /* child */
+ reboot(reboot_event);
+ _exit(EXIT_SUCCESS);
+ }
+
+ while (1)
+ sleep(1);
+}
+
static void state_enter(void)
{
char ubus_cmd[] = "/sbin/ubusd";
+ struct passwd *p;
switch (state) {
case STATE_EARLY:
// try to reopen incase the wdt was not available before coldplug
watchdog_init(0);
set_stdio("console");
- LOG("- ubus -\n");
+ p = getpwnam("ubus");
+ if (p) {
+ int ret;
+ LOG("- ubus -\n");
+ mkdir(p->pw_dir, 0755);
+ ret = chown(p->pw_dir, p->pw_uid, p->pw_gid);
+ if (ret)
+ LOG("- ubus - failed to chown(%s)\n", p->pw_dir);
+ } else {
+ LOG("- ubus (running as root!) -\n");
+ }
+
procd_connect_ubus();
- service_start_early("ubus", ubus_cmd);
+ service_start_early("ubus", ubus_cmd, p?"ubus":NULL, p?"ubus":NULL);
break;
case STATE_INIT:
sync();
sleep(1);
#ifndef DISABLE_INIT
- if (reboot_event == RB_POWER_OFF)
- LOG("- power down -\n");
- else
- LOG("- reboot -\n");
-
- /* Allow time for last message to reach serial console, etc */
- sleep(1);
-
- /* We have to fork here, since the kernel calls do_exit(EXIT_SUCCESS)
- * in linux/kernel/sys.c, which can cause the machine to panic when
- * the init process exits... */
- if (!vfork( )) { /* child */
- reboot(reboot_event);
- _exit(EXIT_SUCCESS);
- }
- while (1)
- sleep(1);
+ perform_halt();
#else
- exit(0);
+ exit(EXIT_SUCCESS);
#endif
break;
#include <sys/types.h>
#include <sys/reboot.h>
#include <sys/stat.h>
+#include <sys/statvfs.h>
#include <fcntl.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>
+#include <json-c/json_tokener.h>
+#include <libubox/blobmsg_json.h>
#include <libubox/uloop.h>
#include "procd.h"
#include "sysupgrade.h"
#include "watchdog.h"
+#include "service/service.h"
static struct blob_buf b;
static int notify;
static struct ubus_context *_ctx;
+static int initramfs;
+
+enum vjson_state {
+ VJSON_ERROR,
+ VJSON_CONTINUE,
+ VJSON_SUCCESS,
+};
+
+static const char *system_rootfs_type(void) {
+ const char proc_mounts[] = "/proc/self/mounts";
+ static char fstype[16] = { 0 };
+ char *mountstr = NULL, *mp = "/", *pos, *tmp;
+ FILE *mounts;
+ size_t len = 0;
+ bool found = false;
+
+ if (initramfs)
+ return "initramfs";
+
+ if (fstype[0])
+ return fstype;
+
+ mounts = fopen(proc_mounts, "r");
+ if (!mounts)
+ return NULL;
+
+ while (getline(&mountstr, &len, mounts) != -1) {
+ pos = strchr(mountstr, ' ');
+ if (!pos)
+ continue;
+
+ tmp = pos + 1;
+ pos = strchr(tmp, ' ');
+ if (!pos)
+ continue;
+
+ *pos = '\0';
+ if (strcmp(tmp, mp))
+ continue;
+
+ tmp = pos + 1;
+ pos = strchr(tmp, ' ');
+ if (!pos)
+ continue;
+
+ *pos = '\0';
+
+ if (!strcmp(tmp, "overlay")) {
+ /*
+ * there is no point in parsing overlay option string for
+ * lowerdir, as that can point to "/" being a previous
+ * overlay mount (after firstboot or sysuprade config
+ * restore). Hence just assume the lowerdir is "/rom" and
+ * restart searching for that instead if that's not
+ * already the case.
+ */
+ if (!strcmp(mp, "/rom"))
+ break;
+
+ mp = "/rom";
+ fseek(mounts, 0, SEEK_SET);
+ continue;
+ }
+
+ found = true;
+ break;
+ }
+
+ if (found)
+ strncpy(fstype, tmp, sizeof(fstype) - 1);
+
+ fstype[sizeof(fstype) - 1]= '\0';
+ free(mountstr);
+ fclose(mounts);
+
+ if (found)
+ return fstype;
+ else
+ return NULL;
+}
static int system_board(struct ubus_context *ctx, struct ubus_object *obj,
struct ubus_request_data *req, const char *method,
void *c;
char line[256];
char *key, *val, *next;
+ const char *rootfs_type = system_rootfs_type();
struct utsname utsname;
FILE *f;
if (!key || !val)
continue;
+#ifdef __aarch64__
+ if (!strcasecmp(key, "CPU revision")) {
+ snprintf(line, sizeof(line), "ARMv8 Processor rev %lu", strtoul(val + 2, NULL, 16));
+ blobmsg_add_string(&b, "system", line);
+ break;
+ }
+#elif __riscv
+ if (!strcasecmp(key, "isa")) {
+ snprintf(line, sizeof(line), "RISC-V (%s)", val + 2);
+ blobmsg_add_string(&b, "system", line);
+ break;
+ }
+#else
if (!strcasecmp(key, "system type") ||
!strcasecmp(key, "processor") ||
!strcasecmp(key, "cpu") ||
break;
}
}
+#endif
}
fclose(f);
fclose(f);
}
+ if (rootfs_type)
+ blobmsg_add_string(&b, "rootfs_type", rootfs_type);
+
if ((f = fopen("/etc/openwrt_release", "r")) != NULL)
{
c = blobmsg_open_table(&b, "release");
return UBUS_STATUS_OK;
}
+static unsigned long
+kscale(unsigned long b, unsigned long bs)
+{
+ return (b * (unsigned long long) bs + 1024/2) / 1024;
+}
+
static int system_info(struct ubus_context *ctx, struct ubus_object *obj,
struct ubus_request_data *req, const char *method,
struct blob_attr *msg)
#ifdef linux
struct sysinfo info;
void *c;
+ char line[256];
+ char *key, *val;
+ unsigned long long available, cached;
+ FILE *f;
+ int i;
+ struct statvfs s;
+ const char *fslist[] = {
+ "/", "root",
+ "/tmp", "tmp",
+ };
if (sysinfo(&info))
return UBUS_STATUS_UNKNOWN_ERROR;
+
+ if ((f = fopen("/proc/meminfo", "r")) == NULL)
+ return UBUS_STATUS_UNKNOWN_ERROR;
+
+ /* if linux < 3.14 MemAvailable is not in meminfo */
+ available = 0;
+ cached = 0;
+
+ while (fgets(line, sizeof(line), f))
+ {
+ key = strtok(line, " :");
+ val = strtok(NULL, " ");
+
+ if (!key || !val)
+ continue;
+
+ if (!strcasecmp(key, "MemAvailable"))
+ available = 1024 * atoll(val);
+ else if (!strcasecmp(key, "Cached"))
+ cached = 1024 * atoll(val);
+ }
+
+ fclose(f);
#endif
now = time(NULL);
blobmsg_close_array(&b, c);
c = blobmsg_open_table(&b, "memory");
- blobmsg_add_u64(&b, "total", info.mem_unit * info.totalram);
- blobmsg_add_u64(&b, "free", info.mem_unit * info.freeram);
- blobmsg_add_u64(&b, "shared", info.mem_unit * info.sharedram);
- blobmsg_add_u64(&b, "buffered", info.mem_unit * info.bufferram);
+ blobmsg_add_u64(&b, "total",
+ (uint64_t)info.mem_unit * (uint64_t)info.totalram);
+ blobmsg_add_u64(&b, "free",
+ (uint64_t)info.mem_unit * (uint64_t)info.freeram);
+ blobmsg_add_u64(&b, "shared",
+ (uint64_t)info.mem_unit * (uint64_t)info.sharedram);
+ blobmsg_add_u64(&b, "buffered",
+ (uint64_t)info.mem_unit * (uint64_t)info.bufferram);
+ blobmsg_add_u64(&b, "available", available);
+ blobmsg_add_u64(&b, "cached", cached);
blobmsg_close_table(&b, c);
+ for (i = 0; i < sizeof(fslist) / sizeof(fslist[0]); i += 2) {
+ if (statvfs(fslist[i], &s))
+ continue;
+
+ c = blobmsg_open_table(&b, fslist[i+1]);
+
+ if (!s.f_frsize)
+ s.f_frsize = s.f_bsize;
+
+ blobmsg_add_u64(&b, "total", kscale(s.f_blocks, s.f_frsize));
+ blobmsg_add_u64(&b, "free", kscale(s.f_bfree, s.f_frsize));
+ blobmsg_add_u64(&b, "used", kscale(s.f_blocks - s.f_bfree, s.f_frsize));
+ blobmsg_add_u64(&b, "avail", kscale(s.f_bavail, s.f_frsize));
+
+ blobmsg_close_table(&b, c);
+ }
+
c = blobmsg_open_table(&b, "swap");
- blobmsg_add_u64(&b, "total", info.mem_unit * info.totalswap);
- blobmsg_add_u64(&b, "free", info.mem_unit * info.freeswap);
+ blobmsg_add_u64(&b, "total",
+ (uint64_t)info.mem_unit * (uint64_t)info.totalswap);
+ blobmsg_add_u64(&b, "free",
+ (uint64_t)info.mem_unit * (uint64_t)info.freeswap);
blobmsg_close_table(&b, c);
#endif
if (timeout <= frequency)
timeout = frequency * 2;
- watchdog_timeout(timeout);
+ watchdog_timeout(timeout);
}
if (tb[WDT_MAGICCLOSE])
return 0;
}
+__attribute__((format (printf, 2, 3)))
+static enum vjson_state vjson_error(char **b, const char *fmt, ...)
+{
+ static char buf[256] = { 0 };
+ const char *pfx = "Firmware image couldn't be validated: ";
+ va_list va;
+ int r;
+
+ r = snprintf(buf, sizeof(buf), "%s", pfx);
+ if (r < 0) {
+ *b = "vjson_error() snprintf failed";
+ return VJSON_ERROR;
+ }
+
+ va_start(va, fmt);
+ r = vsnprintf(buf+r, sizeof(buf)-r, fmt, va);
+ if (r < 0) {
+ *b = "vjson_error() vsnprintf failed";
+ return VJSON_ERROR;
+ }
+ va_end(va);
+
+ *b = buf;
+ return VJSON_ERROR;
+}
+
+static enum vjson_state vjson_parse_token(json_tokener *tok, char *buf, ssize_t len, char **err)
+{
+ json_object *jsobj = NULL;
+
+ jsobj = json_tokener_parse_ex(tok, buf, len);
+ if (json_tokener_get_error(tok) == json_tokener_continue)
+ return VJSON_CONTINUE;
+
+ if (json_tokener_get_error(tok) == json_tokener_success) {
+ if (json_object_get_type(jsobj) != json_type_object) {
+ json_object_put(jsobj);
+ return vjson_error(err, "result is not an JSON object");
+ }
+
+ blobmsg_add_object(&b, jsobj);
+ json_object_put(jsobj);
+ return VJSON_SUCCESS;
+ }
+
+ return vjson_error(err, "failed to parse JSON: %s (%d)",
+ json_tokener_error_desc(json_tokener_get_error(tok)),
+ json_tokener_get_error(tok));
+}
+
+static enum vjson_state vjson_parse(int fd, char **err)
+{
+ enum vjson_state r = VJSON_ERROR;
+ size_t read_count = 0;
+ char buf[64] = { 0 };
+ json_tokener *tok;
+ ssize_t len;
+ int _errno;
+
+ tok = json_tokener_new();
+ if (!tok)
+ return vjson_error(err, "json_tokener_new() failed");
+
+ vjson_error(err, "incomplete JSON input");
+
+ while ((len = read(fd, buf, sizeof(buf)))) {
+ if (len < 0 && errno == EINTR)
+ continue;
+
+ if (len < 0) {
+ _errno = errno;
+ json_tokener_free(tok);
+ return vjson_error(err, "read() failed: %s (%d)",
+ strerror(_errno), _errno);
+ }
+
+ read_count += len;
+ r = vjson_parse_token(tok, buf, len, err);
+ if (r != VJSON_CONTINUE)
+ break;
+
+ memset(buf, 0, sizeof(buf));
+ }
+
+ if (read_count == 0)
+ vjson_error(err, "no JSON input");
+
+ json_tokener_free(tok);
+ return r;
+}
+
+/**
+ * validate_firmware_image_call - perform validation & store result in global b
+ *
+ * @file: firmware image path
+ */
+static enum vjson_state validate_firmware_image_call(const char *file, char **err)
+{
+ const char *path = "/usr/libexec/validate_firmware_image";
+ enum vjson_state ret = VJSON_ERROR;
+ int _errno;
+ int fds[2];
+ int fd;
+
+ blob_buf_init(&b, 0);
+ vjson_error(err, "unhandled error");
+
+ if (pipe(fds)) {
+ _errno = errno;
+ return vjson_error(err, "pipe() failed: %s (%d)",
+ strerror(_errno), _errno);
+ }
+
+ switch (fork()) {
+ case -1:
+ _errno = errno;
+
+ close(fds[0]);
+ close(fds[1]);
+
+ return vjson_error(err, "fork() failed: %s (%d)",
+ strerror(_errno), _errno);
+ case 0:
+ /* Set stdin & stderr to /dev/null */
+ fd = open("/dev/null", O_RDWR);
+ if (fd >= 0) {
+ dup2(fd, 0);
+ dup2(fd, 2);
+ close(fd);
+ }
+
+ /* Set stdout to the shared pipe */
+ dup2(fds[1], 1);
+ close(fds[0]);
+ close(fds[1]);
+
+ execl(path, path, file, NULL);
+ exit(errno);
+ }
+
+ /* Parent process */
+ close(fds[1]);
+
+ ret = vjson_parse(fds[0], err);
+ close(fds[0]);
+
+ return ret;
+}
+
+enum {
+ VALIDATE_FIRMWARE_IMAGE_PATH,
+ __VALIDATE_FIRMWARE_IMAGE_MAX,
+};
+
+static const struct blobmsg_policy validate_firmware_image_policy[__VALIDATE_FIRMWARE_IMAGE_MAX] = {
+ [VALIDATE_FIRMWARE_IMAGE_PATH] = { .name = "path", .type = BLOBMSG_TYPE_STRING },
+};
+
+static int validate_firmware_image(struct ubus_context *ctx,
+ struct ubus_object *obj,
+ struct ubus_request_data *req,
+ const char *method, struct blob_attr *msg)
+{
+ struct blob_attr *tb[__VALIDATE_FIRMWARE_IMAGE_MAX];
+ enum vjson_state ret = VJSON_ERROR;
+ char *err;
+
+ if (!msg)
+ return UBUS_STATUS_INVALID_ARGUMENT;
+
+ blobmsg_parse(validate_firmware_image_policy, __VALIDATE_FIRMWARE_IMAGE_MAX, tb, blob_data(msg), blob_len(msg));
+ if (!tb[VALIDATE_FIRMWARE_IMAGE_PATH])
+ return UBUS_STATUS_INVALID_ARGUMENT;
+
+ ret = validate_firmware_image_call(blobmsg_get_string(tb[VALIDATE_FIRMWARE_IMAGE_PATH]), &err);
+ if (ret != VJSON_SUCCESS)
+ return UBUS_STATUS_UNKNOWN_ERROR;
+
+ ubus_send_reply(ctx, req, b.head);
+
+ return UBUS_STATUS_OK;
+}
+
enum {
SYSUPGRADE_PATH,
+ SYSUPGRADE_FORCE,
+ SYSUPGRADE_BACKUP,
SYSUPGRADE_PREFIX,
SYSUPGRADE_COMMAND,
+ SYSUPGRADE_OPTIONS,
__SYSUPGRADE_MAX
};
static const struct blobmsg_policy sysupgrade_policy[__SYSUPGRADE_MAX] = {
[SYSUPGRADE_PATH] = { .name = "path", .type = BLOBMSG_TYPE_STRING },
+ [SYSUPGRADE_FORCE] = { .name = "force", .type = BLOBMSG_TYPE_BOOL },
+ [SYSUPGRADE_BACKUP] = { .name = "backup", .type = BLOBMSG_TYPE_STRING },
[SYSUPGRADE_PREFIX] = { .name = "prefix", .type = BLOBMSG_TYPE_STRING },
[SYSUPGRADE_COMMAND] = { .name = "command", .type = BLOBMSG_TYPE_STRING },
+ [SYSUPGRADE_OPTIONS] = { .name = "options", .type = BLOBMSG_TYPE_TABLE },
};
+static void sysupgrade_error(struct ubus_context *ctx,
+ struct ubus_request_data *req,
+ const char *message)
+{
+ void *c;
+
+ blob_buf_init(&b, 0);
+
+ c = blobmsg_open_table(&b, "error");
+ blobmsg_add_string(&b, "message", message);
+ blobmsg_close_table(&b, c);
+
+ ubus_send_reply(ctx, req, b.head);
+}
+
static int sysupgrade(struct ubus_context *ctx, struct ubus_object *obj,
struct ubus_request_data *req, const char *method,
struct blob_attr *msg)
{
+ enum {
+ VALIDATION_VALID,
+ VALIDATION_FORCEABLE,
+ VALIDATION_ALLOW_BACKUP,
+ __VALIDATION_MAX
+ };
+ static const struct blobmsg_policy validation_policy[__VALIDATION_MAX] = {
+ [VALIDATION_VALID] = { .name = "valid", .type = BLOBMSG_TYPE_BOOL },
+ [VALIDATION_FORCEABLE] = { .name = "forceable", .type = BLOBMSG_TYPE_BOOL },
+ [VALIDATION_ALLOW_BACKUP] = { .name = "allow_backup", .type = BLOBMSG_TYPE_BOOL },
+ };
+ struct blob_attr *validation[__VALIDATION_MAX];
struct blob_attr *tb[__SYSUPGRADE_MAX];
+ bool valid, forceable, allow_backup;
+ enum vjson_state ret = VJSON_ERROR;
+ char *err;
if (!msg)
return UBUS_STATUS_INVALID_ARGUMENT;
if (!tb[SYSUPGRADE_PATH] || !tb[SYSUPGRADE_PREFIX])
return UBUS_STATUS_INVALID_ARGUMENT;
+ ret = validate_firmware_image_call(blobmsg_get_string(tb[SYSUPGRADE_PATH]), &err);
+ if (ret != VJSON_SUCCESS) {
+ sysupgrade_error(ctx, req, err);
+ return UBUS_STATUS_UNKNOWN_ERROR;
+ }
+
+ blobmsg_parse(validation_policy, __VALIDATION_MAX, validation, blob_data(b.head), blob_len(b.head));
+
+ if (!validation[VALIDATION_VALID] || !validation[VALIDATION_FORCEABLE] ||
+ !validation[VALIDATION_ALLOW_BACKUP]) {
+ sysupgrade_error(ctx, req, "Validation script provided invalid input");
+ return UBUS_STATUS_INVALID_ARGUMENT;
+ }
+
+ valid = validation[VALIDATION_VALID] && blobmsg_get_bool(validation[VALIDATION_VALID]);
+ forceable = validation[VALIDATION_FORCEABLE] && blobmsg_get_bool(validation[VALIDATION_FORCEABLE]);
+ allow_backup = validation[VALIDATION_ALLOW_BACKUP] && blobmsg_get_bool(validation[VALIDATION_ALLOW_BACKUP]);
+
+ if (!valid) {
+ if (!forceable) {
+ sysupgrade_error(ctx, req, "Firmware image is broken and cannot be installed");
+ return UBUS_STATUS_NOT_SUPPORTED;
+ } else if (!tb[SYSUPGRADE_FORCE] || !blobmsg_get_bool(tb[SYSUPGRADE_FORCE])) {
+ sysupgrade_error(ctx, req, "Firmware image is invalid");
+ return UBUS_STATUS_NOT_SUPPORTED;
+ }
+ } else if (!allow_backup && tb[SYSUPGRADE_BACKUP]) {
+ sysupgrade_error(ctx, req, "Firmware image doesn't allow preserving a backup");
+ return UBUS_STATUS_NOT_SUPPORTED;
+ }
+
+ service_stop_all();
+
sysupgrade_exec_upgraded(blobmsg_get_string(tb[SYSUPGRADE_PREFIX]),
blobmsg_get_string(tb[SYSUPGRADE_PATH]),
- tb[SYSUPGRADE_COMMAND] ? blobmsg_get_string(tb[SYSUPGRADE_COMMAND]) : NULL);
+ tb[SYSUPGRADE_BACKUP] ? blobmsg_get_string(tb[SYSUPGRADE_BACKUP]) : NULL,
+ tb[SYSUPGRADE_COMMAND] ? blobmsg_get_string(tb[SYSUPGRADE_COMMAND]) : NULL,
+ tb[SYSUPGRADE_OPTIONS]);
/* sysupgrade_exec_upgraded() will never return unless something has gone wrong */
return UBUS_STATUS_UNKNOWN_ERROR;
UBUS_METHOD_NOARG("reboot", system_reboot),
UBUS_METHOD("watchdog", watchdog_set, watchdog_policy),
UBUS_METHOD("signal", proc_signal, signal_policy),
+ UBUS_METHOD("validate_firmware_image", validate_firmware_image, validate_firmware_image_policy),
UBUS_METHOD("sysupgrade", sysupgrade, sysupgrade_policy),
};
int ret;
_ctx = ctx;
+
+ initramfs = !!getenv("INITRAMFS");
+ if (initramfs)
+ unsetenv("INITRAMFS");
+
ret = ubus_add_object(ctx, &system_object);
if (ret)
ERROR("Failed to add object: %s\n", ubus_strerror(ret));
*/
+#define _GNU_SOURCE
#include "watchdog.h"
#include "sysupgrade.h"
+#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#include <libubox/blobmsg.h>
-void sysupgrade_exec_upgraded(const char *prefix, char *path, char *command)
+void sysupgrade_exec_upgraded(const char *prefix, char *path,
+ const char *backup, char *command,
+ struct blob_attr *options)
{
char *wdt_fd = watchdog_fd();
char *argv[] = { "/sbin/upgraded", NULL, NULL, NULL};
+ struct blob_attr *option;
+ int rem;
int ret;
ret = chroot(prefix);
watchdog_set_cloexec(false);
setenv("WDTFD", wdt_fd, 1);
}
+
+ if (backup)
+ setenv("UPGRADE_BACKUP", backup, 1);
+
+ blobmsg_for_each_attr(option, options, rem) {
+ const char *prefix = "UPGRADE_OPT_";
+ char value[11];
+ char *name;
+ char *c;
+ int tmp;
+
+ if (asprintf(&name, "%s%s", prefix, blobmsg_name(option)) <= 0)
+ continue;
+ for (c = name + strlen(prefix); *c; c++) {
+ if (isalnum(*c) || *c == '_') {
+ *c = toupper(*c);
+ } else {
+ c = NULL;
+ break;
+ }
+ }
+
+ if (!c) {
+ fprintf(stderr, "Option \"%s\" contains invalid characters\n",
+ blobmsg_name(option));
+ free(name);
+ continue;
+ }
+
+ switch (blobmsg_type(option)) {
+ case BLOBMSG_TYPE_INT32:
+ tmp = blobmsg_get_u32(option);
+ break;
+ case BLOBMSG_TYPE_INT16:
+ tmp = blobmsg_get_u16(option);
+ break;
+ case BLOBMSG_TYPE_INT8:
+ tmp = blobmsg_get_u8(option);
+ break;
+ default:
+ fprintf(stderr, "Option \"%s\" has unsupported type: %d\n",
+ blobmsg_name(option), blobmsg_type(option));
+ free(name);
+ continue;
+ }
+ snprintf(value, sizeof(value), "%u", tmp);
+
+ setenv(name, value, 1);
+
+ free(name);
+ }
+
execvp(argv[0], argv);
/* Cleanup on failure */
#ifndef __PROCD_SYSUPGRADE_H
#define __PROCD_SYSUPGRADE_H
+struct blob_attr;
-void sysupgrade_exec_upgraded(const char *prefix, char *path, char *command);
+void sysupgrade_exec_upgraded(const char *prefix, char *path,
+ const char *backup, char *command,
+ struct blob_attr *options);
#endif
start_main_t __start_main__;
__start_main__ = dlsym(RTLD_NEXT, "__libc_start_main");
- if (!__start_main__)
+ if (!__start_main__) {
ERROR("failed to find __libc_start_main %s\n", dlerror());
-
+ return -1;
+ }
__main__ = main;
return (*__start_main__)(__preload_main__, argc, argv, auxvec,
uClibc_main __start_main__;
__start_main__ = dlsym(RTLD_NEXT, "__uClibc_main");
- if (!__start_main__)
+ if (!__start_main__) {
ERROR("failed to find __uClibc_main %s\n", dlerror());
+ return;
+ }
__main__ = main;
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
-#include <string.h>
+#include <libgen.h>
#include <syslog.h>
#include <limits.h>
#define PTRACE_EVENT_STOP 128
#endif
+#ifndef PTRACE_EVENT_SECCOMP
+/* undefined with uClibc-ng */
+#define PTRACE_EVENT_SECCOMP 7
+#endif
+
#include <libubox/ulog.h>
#include <libubox/uloop.h>
#include <libubox/blobmsg.h>
#define _offsetof(a, b) __builtin_offsetof(a,b)
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-#ifdef __amd64__
+#if defined (__aarch64__)
+#include <linux/ptrace.h>
+#elif defined(__amd64__)
#define reg_syscall_nr _offsetof(struct user, regs.orig_rax)
+#elif defined(__arm__)
+#include <asm/ptrace.h> /* for PTRACE_SET_SYSCALL */
+#define reg_syscall_nr _offsetof(struct user, regs.uregs[7])
+# if defined(__ARM_EABI__)
+# define reg_retval_nr _offsetof(struct user, regs.uregs[0])
+# endif
#elif defined(__i386__)
#define reg_syscall_nr _offsetof(struct user, regs.orig_eax)
#elif defined(__mips)
# define EF_REG2 8
# endif
#define reg_syscall_nr (EF_REG2 / 4)
-#elif defined(__arm__)
-#include <asm/ptrace.h> /* for PTRACE_SET_SYSCALL */
-#define reg_syscall_nr _offsetof(struct user, regs.uregs[7])
-# if defined(__ARM_EABI__)
-# define reg_retval_nr _offsetof(struct user, regs.uregs[0])
-# endif
#elif defined(__PPC__)
#define reg_syscall_nr _offsetof(struct user, regs.gpr[0])
#define reg_retval_nr _offsetof(struct user, regs.gpr[3])
static void print_syscalls(int policy, const char *json)
{
- void *c;
+ void *c, *d, *e;
int i;
+ char *tmp;
if (mode == UTRACE) {
set_syscall("rt_sigaction", 1);
qsort(sorted, SYSCALL_COUNT, sizeof(sorted[0]), cmp_count);
blob_buf_init(&b, 0);
- c = blobmsg_open_array(&b, "whitelist");
+ blobmsg_add_string(&b, "defaultAction", "SCMP_ACT_KILL_PROCESS");
+ c = blobmsg_open_array(&b, "syscalls");
+ d = blobmsg_open_table(&b, "");
+ e = blobmsg_open_array(&b, "names");
for (i = 0; i < SYSCALL_COUNT; i++) {
int sc = sorted[i].syscall;
ULOG_ERR("no name found for syscall(%d)\n", sc);
}
}
+ blobmsg_close_array(&b, e);
+ blobmsg_add_string(&b, "action", "SCMP_ACT_ALLOW");
+ blobmsg_close_table(&b, d);
blobmsg_close_array(&b, c);
- blobmsg_add_u32(&b, "policy", policy);
if (json) {
FILE *fp = fopen(json, "w");
if (fp) {
- fprintf(fp, "%s", blobmsg_format_json_indent(b.head, true, 0));
+ tmp = blobmsg_format_json_indent(b.head, true, 0);
+ if (!tmp) {
+ fclose(fp);
+ return;
+ }
+
+ fprintf(fp, "%s\n", tmp);
+ free(tmp);
fclose(fp);
ULOG_INFO("saving syscall trace to %s\n", json);
} else {
ULOG_ERR("failed to open %s\n", json);
}
} else {
- printf("%s\n",
- blobmsg_format_json_indent(b.head, true, 0));
- }
+ tmp = blobmsg_format_json_indent(b.head, true, 0);
+ if (!tmp)
+ return;
+ printf("%s\n", tmp);
+ free(tmp);
+ }
}
static void report_seccomp_vialation(pid_t pid, unsigned syscall)
char buf[200];
snprintf(buf, sizeof(buf), "/proc/%d/cmdline", pid);
int f = open(buf, O_RDONLY);
+ if (f < 0)
+ return;
+
int r = read(f, buf, sizeof(buf) - 1);
+ buf[sizeof(buf) - 1] = '\0';
+
if (r >= 0)
buf[r] = 0;
else
if (WIFSTOPPED(ret) || (ret >> 16)) {
if (WSTOPSIG(ret) & 0x80) {
if (!tracee->in_syscall) {
+#ifdef __aarch64__
+ int syscall = -1;
+ struct ptrace_syscall_info ptsi = {.op=PTRACE_SYSCALL_INFO_ENTRY};
+ if (ptrace(PTRACE_GET_SYSCALL_INFO, c->pid, sizeof(ptsi), &ptsi) != -1)
+ syscall = ptsi.entry.nr;
+#else
int syscall = ptrace(PTRACE_PEEKUSER, c->pid, reg_syscall_nr);
+#endif
int i = syscall_index(syscall);
if (i >= 0) {
syscall_count[i]++;
} else if ((ret >> 16) == PTRACE_EVENT_STOP) {
/* Nothing special to do here */
} else if ((ret >> 8) == (SIGTRAP | (PTRACE_EVENT_SECCOMP << 8))) {
+#ifdef __aarch64__
+ int syscall = -1;
+ struct ptrace_syscall_info ptsi = {.op=PTRACE_SYSCALL_INFO_SECCOMP};
+ if (ptrace(PTRACE_GET_SYSCALL_INFO, c->pid, sizeof(ptsi), &ptsi) != -1)
+ syscall = ptsi.entry.nr;
+#else
int syscall = ptrace(PTRACE_PEEKUSER, c->pid, reg_syscall_nr);
#if defined(__arm__)
ptrace(PTRACE_SET_SYSCALL, c->pid, 0, -1);
ptrace(PTRACE_POKEUSER, c->pid, reg_retval_nr, -ENOSYS);
#else
ptrace(PTRACE_POKEUSER, c->pid, reg_syscall_nr, -1);
+#endif
#endif
report_seccomp_vialation(c->pid, syscall);
} else {
ULOG_ERR("failed to exec %s: %m\n", _argv[0]);
free(_argv);
+ if (_envp[0])
+ free(_envp[0]);
+ if (newenv == 2 && _envp[1])
+ free(_envp[1]);
free(_envp);
return ret;
}
static struct ubus_context *ctx;
static struct uloop_timeout ubus_timer;
static int timeout;
+static struct udebug_ubus udebug;
+
+static void
+procd_udebug_cb(struct udebug_ubus *ctx, struct blob_attr *data, bool enabled)
+{
+ procd_udebug_set_enabled(enabled);
+}
static void reset_timeout(void)
{
return;
}
+ udebug_ubus_init(&udebug, ctx, "procd", procd_udebug_cb);
ctx->connection_lost = ubus_disconnect_cb;
+ ubus_init_hotplug(ctx);
ubus_init_service(ctx);
ubus_init_system(ctx);
watch_ubus(ctx);
INCLUDE_DIRECTORIES(${ubox_include_dir})
ADD_DEFINITIONS(-Os -ggdb -Wall -Werror --std=gnu99 -Wmissing-declarations)
ADD_EXECUTABLE(upgraded upgraded.c ../watchdog.c)
-TARGET_LINK_LIBRARIES(upgraded ubox)
+TARGET_LINK_LIBRARIES(upgraded ${ubox})
INSTALL(TARGETS upgraded
RUNTIME DESTINATION sbin
)
/* Child */
execvp(args[0], args);
fprintf(stderr, "Failed to exec sysupgrade\n");
- _exit(-1);
+ _exit(EXIT_FAILURE);
}
uloop_process_add(&upgrade_proc);
return true;
}
-char* get_cmdline_val(const char* name, char* out, int len)
+char *get_active_console(char *out, int len)
+{
+ char line[CMDLINE_SIZE + 1];
+ int fd = open("/sys/class/tty/console/active", O_RDONLY);
+ ssize_t r;
+
+ if (fd < 0)
+ return NULL;
+
+ r = read(fd, line, sizeof(line) - 1);
+ line[CMDLINE_SIZE] = '\0';
+
+ close(fd);
+
+ if (r <= 0)
+ return NULL;
+
+ /* The active file is terminated by a newline which we need to strip */
+ char *newline = strtok(line, "\n");
+
+ if (newline != NULL) {
+ strncpy(out, newline, len);
+ return out;
+ }
+
+ return NULL;
+}
+
+char *get_cmdline_val_offset(const char *name, char *out, int len, int offset)
{
char line[CMDLINE_SIZE + 1], *c, *sptr;
- int fd = open("/proc/cmdline", O_RDONLY);
+ int i, fd = open("/proc/cmdline", O_RDONLY);
ssize_t r = read(fd, line, sizeof(line) - 1);
close(fd);
line[r] = 0;
- for (c = strtok_r(line, " \t\n", &sptr); c;
+ for (i = 0, c = strtok_r(line, " \t\n", &sptr); c;
c = strtok_r(NULL, " \t\n", &sptr)) {
char *sep = strchr(c, '=');
+ if (sep == NULL)
+ continue;
+
ssize_t klen = sep - c;
- if (klen < 0 || strncmp(name, c, klen) || name[klen] != 0)
+ if (strncmp(name, c, klen) || name[klen] != 0)
continue;
+ if (i++ < offset)
+ continue;
strncpy(out, &sep[1], len);
out[len-1] = 0;
return out;
void blobmsg_list_free(struct blobmsg_list *list);
bool blobmsg_list_equal(struct blobmsg_list *l1, struct blobmsg_list *l2);
void blobmsg_list_move(struct blobmsg_list *list, struct blobmsg_list *src);
-char* get_cmdline_val(const char* name, char* out, int len);
+char *get_cmdline_val_offset(const char *name, char *out, int len, int offset);
+char *get_active_console(char *out, int len);
+
+#define get_cmdline_val(name, out, len) \
+ get_cmdline_val_offset(name, out, len, 0)
int patch_fd(const char *device, int fd, int flags);
int patch_stdio(const char *device);
--- /dev/null
+/*
+ * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <glob.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <signal.h>
+#include <termios.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <libubus.h>
+#include <libubox/avl-cmp.h>
+#include <libubox/blobmsg.h>
+#include <libubox/blobmsg_json.h>
+#include <libubox/ustream.h>
+
+#include "log.h"
+
+#define UXC_VERSION "0.3"
+#define OCI_VERSION_STRING "1.0.2"
+#define UXC_ETC_CONFDIR "/etc/uxc"
+#define UXC_VOL_CONFDIR "/tmp/run/uvol/.meta/uxc"
+
+static bool verbose = false;
+static bool json_output = false;
+static char *confdir = UXC_ETC_CONFDIR;
+static struct ustream_fd cufd;
+static struct ustream_fd lufd;
+
+
+struct runtime_state {
+ struct avl_node avl;
+ char *container_name;
+ char *instance_name;
+ char *jail_name;
+ bool running;
+ int runtime_pid;
+ int exitcode;
+ struct blob_attr *ocistate;
+};
+
+struct settings {
+ struct avl_node avl;
+ char *container_name;
+ const char *fname;
+ char *tmprwsize;
+ char *writepath;
+ signed char autostart;
+ struct blob_attr *volumes;
+};
+
+enum uxc_cmd {
+ CMD_ATTACH,
+ CMD_LIST,
+ CMD_BOOT,
+ CMD_START,
+ CMD_STATE,
+ CMD_KILL,
+ CMD_ENABLE,
+ CMD_DISABLE,
+ CMD_DELETE,
+ CMD_CREATE,
+ CMD_UNKNOWN
+};
+
+#define OPT_ARGS "ab:fjm:p:t:vVw:"
+static struct option long_options[] = {
+ {"autostart", no_argument, 0, 'a' },
+ {"console", no_argument, 0, 'c' },
+ {"bundle", required_argument, 0, 'b' },
+ {"force", no_argument, 0, 'f' },
+ {"json", no_argument, 0, 'j' },
+ {"mounts", required_argument, 0, 'm' },
+ {"pid-file", required_argument, 0, 'p' },
+ {"temp-overlay-size", required_argument, 0, 't' },
+ {"write-overlay-path", required_argument, 0, 'w' },
+ {"verbose", no_argument, 0, 'v' },
+ {"version", no_argument, 0, 'V' },
+ {0, 0, 0, 0 }
+};
+
+AVL_TREE(runtime, avl_strcmp, false, NULL);
+AVL_TREE(settings, avl_strcmp, false, NULL);
+static struct blob_buf conf;
+static struct blob_buf settingsbuf;
+static struct blob_attr *blockinfo;
+static struct blob_attr *fstabinfo;
+static struct ubus_context *ctx;
+
+static int usage(void) {
+ printf("syntax: uxc <command> [parameters ...]\n");
+ printf("commands:\n");
+ printf("\tlist [--json]\t\t\t\tlist all configured containers\n");
+ printf("\tattach <conf>\t\t\t\tattach to container console\n");
+ printf("\tcreate <conf>\t\t\t\t(re-)create <conf>\n");
+ printf("\t\t[--bundle <path>]\t\t\tOCI bundle at <path>\n");
+ printf("\t\t[--autostart]\t\t\t\tstart on boot\n");
+ printf("\t\t[--temp-overlay-size <size>]\t\tuse tmpfs overlay with {size}\n");
+ printf("\t\t[--write-overlay-path <path>]\t\tuse overlay on {path}\n");
+ printf("\t\t[--mounts <v1>,<v2>,...,<vN>]\t\trequire filesystems to be available\n");
+ printf("\tstart [--console] <conf>\t\tstart container <conf>\n");
+ printf("\tstate <conf>\t\t\t\tget state of container <conf>\n");
+ printf("\tkill <conf> [<signal>]\t\t\tsend signal to container <conf>\n");
+ printf("\tenable <conf>\t\t\t\tstart container <conf> on boot\n");
+ printf("\tdisable <conf>\t\t\t\tdon't start container <conf> on boot\n");
+ printf("\tdelete <conf> [--force]\t\t\tdelete <conf>\n");
+ return -EINVAL;
+}
+
+enum {
+ CONF_NAME,
+ CONF_PATH,
+ CONF_JAIL,
+ CONF_AUTOSTART,
+ CONF_PIDFILE,
+ CONF_TEMP_OVERLAY_SIZE,
+ CONF_WRITE_OVERLAY_PATH,
+ CONF_VOLUMES,
+ __CONF_MAX,
+};
+
+static const struct blobmsg_policy conf_policy[__CONF_MAX] = {
+ [CONF_NAME] = { .name = "name", .type = BLOBMSG_TYPE_STRING },
+ [CONF_PATH] = { .name = "path", .type = BLOBMSG_TYPE_STRING },
+ [CONF_JAIL] = { .name = "jail", .type = BLOBMSG_TYPE_STRING },
+ [CONF_AUTOSTART] = { .name = "autostart", .type = BLOBMSG_TYPE_BOOL },
+ [CONF_PIDFILE] = { .name = "pidfile", .type = BLOBMSG_TYPE_STRING },
+ [CONF_TEMP_OVERLAY_SIZE] = { .name = "temp-overlay-size", .type = BLOBMSG_TYPE_STRING },
+ [CONF_WRITE_OVERLAY_PATH] = { .name = "write-overlay-path", .type = BLOBMSG_TYPE_STRING },
+ [CONF_VOLUMES] = { .name = "volumes", .type = BLOBMSG_TYPE_ARRAY },
+};
+
+static int conf_load(bool load_settings)
+{
+ int gl_flags = GLOB_NOESCAPE | GLOB_MARK;
+ int j, res;
+ glob_t gl;
+ char *globstr;
+ void *c, *o;
+ struct stat sb;
+ struct blob_buf *target;
+
+ if (asprintf(&globstr, "%s/%s*.json", UXC_ETC_CONFDIR, load_settings?"settings/":"") == -1)
+ return -ENOMEM;
+
+ res = glob(globstr, gl_flags, NULL, &gl);
+ if (res == 0)
+ gl_flags |= GLOB_APPEND;
+
+ free(globstr);
+
+ if (!stat(UXC_VOL_CONFDIR, &sb)) {
+ if (sb.st_mode & S_IFDIR) {
+ if (asprintf(&globstr, "%s/%s*.json", UXC_VOL_CONFDIR, load_settings?"settings/":"") == -1)
+ return -ENOMEM;
+
+ res = glob(globstr, gl_flags, NULL, &gl);
+ free(globstr);
+ }
+ }
+
+ target = load_settings ? &settingsbuf : &conf;
+ blob_buf_init(target, 0);
+ c = blobmsg_open_table(target, NULL);
+
+ if (res < 0)
+ return 0;
+
+ for (j = 0; j < gl.gl_pathc; j++) {
+ o = blobmsg_open_table(target, strdup(gl.gl_pathv[j]));
+ if (!blobmsg_add_json_from_file(target, gl.gl_pathv[j])) {
+ ERROR("uxc: failed to load %s\n", gl.gl_pathv[j]);
+ continue;
+ }
+ blobmsg_close_table(target, o);
+ }
+ blobmsg_close_table(target, c);
+ globfree(&gl);
+
+ return 0;
+}
+
+static struct settings *
+settings_alloc(const char *container_name)
+{
+ struct settings *s;
+ char *new_name;
+ s = calloc_a(sizeof(*s), &new_name, strlen(container_name) + 1);
+ strcpy(new_name, container_name);
+ s->container_name = new_name;
+ s->avl.key = s->container_name;
+ s->autostart = -1;
+ s->tmprwsize = NULL;
+ s->writepath = NULL;
+ s->volumes = NULL;
+ return s;
+}
+
+static int settings_add(void)
+{
+ struct blob_attr *cur, *tb[__CONF_MAX];
+ struct settings *s;
+ int rem, err;
+
+ avl_init(&settings, avl_strcmp, false, NULL);
+
+ blobmsg_for_each_attr(cur, blob_data(settingsbuf.head), rem) {
+ blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tb[CONF_NAME])
+ continue;
+
+ if (tb[CONF_TEMP_OVERLAY_SIZE] && tb[CONF_WRITE_OVERLAY_PATH])
+ return -EINVAL;
+
+ s = settings_alloc(blobmsg_get_string(tb[CONF_NAME]));
+
+ if (tb[CONF_AUTOSTART])
+ s->autostart = blobmsg_get_bool(tb[CONF_AUTOSTART]);
+
+ if (tb[CONF_TEMP_OVERLAY_SIZE])
+ s->tmprwsize = blobmsg_get_string(tb[CONF_TEMP_OVERLAY_SIZE]);
+
+ if (tb[CONF_WRITE_OVERLAY_PATH])
+ s->writepath = blobmsg_get_string(tb[CONF_WRITE_OVERLAY_PATH]);
+
+ s->volumes = tb[CONF_VOLUMES];
+ s->fname = blobmsg_name(cur);
+
+ err = avl_insert(&settings, &s->avl);
+ if (err) {
+ fprintf(stderr, "error adding settings for %s\n", blobmsg_get_string(tb[CONF_NAME]));
+ free(s);
+ }
+ }
+
+ return 0;
+}
+
+static void settings_free(void)
+{
+ struct settings *item, *tmp;
+
+ avl_for_each_element_safe(&settings, item, avl, tmp) {
+ avl_delete(&settings, &item->avl);
+ free(item);
+ }
+
+ return;
+}
+
+enum {
+ LIST_INSTANCES,
+ __LIST_MAX,
+};
+
+static const struct blobmsg_policy list_policy[__LIST_MAX] = {
+ [LIST_INSTANCES] = { .name = "instances", .type = BLOBMSG_TYPE_TABLE },
+};
+
+enum {
+ INSTANCE_RUNNING,
+ INSTANCE_PID,
+ INSTANCE_EXITCODE,
+ INSTANCE_JAIL,
+ __INSTANCE_MAX,
+};
+
+static const struct blobmsg_policy instance_policy[__INSTANCE_MAX] = {
+ [INSTANCE_RUNNING] = { .name = "running", .type = BLOBMSG_TYPE_BOOL },
+ [INSTANCE_PID] = { .name = "pid", .type = BLOBMSG_TYPE_INT32 },
+ [INSTANCE_EXITCODE] = { .name = "exit_code", .type = BLOBMSG_TYPE_INT32 },
+ [INSTANCE_JAIL] = { .name = "jail", .type = BLOBMSG_TYPE_TABLE },
+};
+
+enum {
+ JAIL_NAME,
+ __JAIL_MAX,
+};
+
+static const struct blobmsg_policy jail_policy[__JAIL_MAX] = {
+ [JAIL_NAME] = { .name = "name", .type = BLOBMSG_TYPE_STRING },
+};
+
+static struct runtime_state *
+runtime_alloc(const char *container_name)
+{
+ struct runtime_state *s;
+ char *new_name;
+ s = calloc_a(sizeof(*s), &new_name, strlen(container_name) + 1);
+ strcpy(new_name, container_name);
+ s->container_name = new_name;
+ s->avl.key = s->container_name;
+ return s;
+}
+
+enum {
+ STATE_OCIVERSION,
+ STATE_ID,
+ STATE_STATUS,
+ STATE_PID,
+ STATE_BUNDLE,
+ STATE_ANNOTATIONS,
+ __STATE_MAX,
+};
+
+static const struct blobmsg_policy state_policy[__STATE_MAX] = {
+ [STATE_OCIVERSION] = { .name = "ociVersion", .type = BLOBMSG_TYPE_STRING },
+ [STATE_ID] = { .name = "id", .type = BLOBMSG_TYPE_STRING },
+ [STATE_STATUS] = { .name = "status", .type = BLOBMSG_TYPE_STRING },
+ [STATE_PID] = { .name = "pid", .type = BLOBMSG_TYPE_INT32 },
+ [STATE_BUNDLE] = { .name = "bundle", .type = BLOBMSG_TYPE_STRING },
+ [STATE_ANNOTATIONS] = { .name = "annotations", .type = BLOBMSG_TYPE_TABLE },
+};
+
+
+static void ocistate_cb(struct ubus_request *req, int type, struct blob_attr *msg)
+{
+ struct blob_attr **ocistate = (struct blob_attr **)req->priv;
+ struct blob_attr *tb[__STATE_MAX];
+
+ blobmsg_parse(state_policy, __STATE_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[STATE_OCIVERSION] ||
+ !tb[STATE_ID] ||
+ !tb[STATE_STATUS] ||
+ !tb[STATE_BUNDLE])
+ return;
+
+ *ocistate = blob_memdup(msg);
+}
+
+static void get_ocistate(struct blob_attr **ocistate, const char *name)
+{
+ char *objname;
+ unsigned int id;
+ int ret;
+ *ocistate = NULL;
+
+ if (asprintf(&objname, "container.%s", name) == -1)
+ exit(ENOMEM);
+
+ ret = ubus_lookup_id(ctx, objname, &id);
+ free(objname);
+ if (ret)
+ return;
+
+ ubus_invoke(ctx, id, "state", NULL, ocistate_cb, ocistate, 3000);
+}
+
+static void list_cb(struct ubus_request *req, int type, struct blob_attr *msg)
+{
+ struct blob_attr *cur, *curi, *tl[__LIST_MAX], *ti[__INSTANCE_MAX], *tj[__JAIL_MAX];
+ int rem, remi;
+ const char *container_name, *instance_name, *jail_name;
+ bool running;
+ int pid, exitcode;
+ struct runtime_state *rs;
+
+ blobmsg_for_each_attr(cur, msg, rem) {
+ container_name = blobmsg_name(cur);
+ blobmsg_parse(list_policy, __LIST_MAX, tl, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tl[LIST_INSTANCES])
+ continue;
+
+ blobmsg_for_each_attr(curi, tl[LIST_INSTANCES], remi) {
+ instance_name = blobmsg_name(curi);
+ blobmsg_parse(instance_policy, __INSTANCE_MAX, ti, blobmsg_data(curi), blobmsg_len(curi));
+
+ if (!ti[INSTANCE_JAIL])
+ continue;
+
+ blobmsg_parse(jail_policy, __JAIL_MAX, tj, blobmsg_data(ti[INSTANCE_JAIL]), blobmsg_len(ti[INSTANCE_JAIL]));
+ if (!tj[JAIL_NAME])
+ continue;
+
+ jail_name = blobmsg_get_string(tj[JAIL_NAME]);
+
+ running = ti[INSTANCE_RUNNING] && blobmsg_get_bool(ti[INSTANCE_RUNNING]);
+
+ if (ti[INSTANCE_PID])
+ pid = blobmsg_get_u32(ti[INSTANCE_PID]);
+ else
+ pid = -1;
+
+ if (ti[INSTANCE_EXITCODE])
+ exitcode = blobmsg_get_u32(ti[INSTANCE_EXITCODE]);
+ else
+ exitcode = -1;
+
+ rs = runtime_alloc(container_name);
+ rs->instance_name = strdup(instance_name);
+ rs->jail_name = strdup(jail_name);
+ rs->runtime_pid = pid;
+ rs->exitcode = exitcode;
+ rs->running = running;
+ avl_insert(&runtime, &rs->avl);
+ }
+ }
+
+ return;
+}
+
+static int runtime_load(void)
+{
+ struct runtime_state *item, *tmp;
+ uint32_t id;
+
+ avl_init(&runtime, avl_strcmp, false, NULL);
+ if (ubus_lookup_id(ctx, "container", &id) ||
+ ubus_invoke(ctx, id, "list", NULL, list_cb, &runtime, 3000))
+ return -EIO;
+
+ avl_for_each_element_safe(&runtime, item, avl, tmp)
+ get_ocistate(&item->ocistate, item->jail_name);
+
+ return 0;
+}
+
+static void runtime_free(void)
+{
+ struct runtime_state *item, *tmp;
+
+ avl_for_each_element_safe(&runtime, item, avl, tmp) {
+ avl_delete(&runtime, &item->avl);
+ free(item->instance_name);
+ free(item->jail_name);
+ free(item->ocistate);
+ free(item);
+ }
+
+ return;
+}
+
+static inline int setup_tios(int fd, struct termios *oldtios)
+{
+ struct termios newtios;
+
+ if (!isatty(fd)) {
+ return -EIO;
+ }
+
+ /* Get current termios */
+ if (tcgetattr(fd, oldtios) < 0)
+ return -errno;
+
+ newtios = *oldtios;
+
+ /* We use the same settings that ssh does. */
+ newtios.c_iflag |= IGNPAR;
+ newtios.c_iflag &= ~(ISTRIP | INLCR | IGNCR | ICRNL | IXON | IXANY | IXOFF);
+ newtios.c_lflag &= ~(TOSTOP | ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHONL);
+ newtios.c_oflag &= ~ONLCR;
+ newtios.c_oflag |= OPOST;
+ newtios.c_cc[VMIN] = 1;
+ newtios.c_cc[VTIME] = 0;
+
+ /* Set new attributes */
+ if (tcsetattr(fd, TCSAFLUSH, &newtios) < 0)
+ return -errno;
+
+ return 0;
+}
+
+
+static void client_cb(struct ustream *s, int bytes)
+{
+ char *buf;
+ int len, rv;
+
+ do {
+ buf = ustream_get_read_buf(s, &len);
+ if (!buf)
+ break;
+
+ rv = ustream_write(&lufd.stream, buf, len, false);
+
+ if (rv > 0)
+ ustream_consume(s, rv);
+
+ if (rv <= len)
+ break;
+ } while(1);
+}
+
+static void local_cb(struct ustream *s, int bytes)
+{
+ char *buf;
+ int len, rv;
+
+ do {
+ buf = ustream_get_read_buf(s, &len);
+ if (!buf)
+ break;
+
+ if ((len > 0) && (buf[0] == 2))
+ uloop_end();
+
+ rv = ustream_write(&cufd.stream, buf, len, false);
+
+ if (rv > 0)
+ ustream_consume(s, rv);
+
+ if (rv <= len)
+ break;
+ } while(1);
+}
+
+static int uxc_attach(const char *container_name)
+{
+ struct ubus_context *ctx;
+ uint32_t id;
+ static struct blob_buf req;
+ int client_fd, server_fd, tty_fd;
+ struct termios oldtermios;
+
+ ctx = ubus_connect(NULL);
+ if (!ctx) {
+ fprintf(stderr, "can't connect to ubus!\n");
+ return -ECONNREFUSED;
+ }
+
+ /* open pseudo-terminal pair */
+ client_fd = posix_openpt(O_RDWR | O_NOCTTY);
+ if (client_fd < 0) {
+ fprintf(stderr, "can't create virtual console!\n");
+ ubus_free(ctx);
+ return -EIO;
+ }
+ setup_tios(client_fd, &oldtermios);
+ grantpt(client_fd);
+ unlockpt(client_fd);
+ server_fd = open(ptsname(client_fd), O_RDWR | O_NOCTTY);
+ if (server_fd < 0) {
+ fprintf(stderr, "can't open virtual console!\n");
+ close(client_fd);
+ ubus_free(ctx);
+ return -EIO;
+ }
+ setup_tios(server_fd, &oldtermios);
+
+ tty_fd = open("/dev/tty", O_RDWR);
+ if (tty_fd < 0) {
+ fprintf(stderr, "can't open local console!\n");
+ close(server_fd);
+ close(client_fd);
+ ubus_free(ctx);
+ return -EIO;
+ }
+ setup_tios(tty_fd, &oldtermios);
+
+ /* register server-side with procd */
+ blob_buf_init(&req, 0);
+ blobmsg_add_string(&req, "name", container_name);
+ blobmsg_add_string(&req, "instance", container_name);
+
+ if (ubus_lookup_id(ctx, "container", &id) ||
+ ubus_invoke_fd(ctx, id, "console_attach", req.head, NULL, NULL, 3000, server_fd)) {
+ fprintf(stderr, "ubus request failed\n");
+ close(tty_fd);
+ close(server_fd);
+ close(client_fd);
+ blob_buf_free(&req);
+ ubus_free(ctx);
+ return -ENXIO;
+ }
+
+ close(server_fd);
+ blob_buf_free(&req);
+ ubus_free(ctx);
+
+ uloop_init();
+
+ /* forward between stdio and client_fd until detach is requested */
+ lufd.stream.notify_read = local_cb;
+ ustream_fd_init(&lufd, tty_fd);
+
+ cufd.stream.notify_read = client_cb;
+/* ToDo: handle remote close and other events */
+// cufd.stream.notify_state = client_state_cb;
+ ustream_fd_init(&cufd, client_fd);
+
+ fprintf(stderr, "attaching to jail console. press [CTRL]+[B] to exit.\n");
+ close(0);
+ close(1);
+ close(2);
+ uloop_run();
+
+ tcsetattr(tty_fd, TCSAFLUSH, &oldtermios);
+ ustream_free(&lufd.stream);
+ ustream_free(&cufd.stream);
+ close(client_fd);
+
+ return 0;
+}
+
+static int uxc_state(char *name)
+{
+ struct runtime_state *rsstate = avl_find_element(&runtime, name, rsstate, avl);
+ struct blob_attr *ocistate = NULL;
+ struct blob_attr *cur, *tb[__CONF_MAX];
+ int rem;
+ char *bundle = NULL;
+ char *jail_name = NULL;
+ char *state = NULL;
+ char *tmp;
+ static struct blob_buf buf;
+
+ if (rsstate)
+ ocistate = rsstate->ocistate;
+
+ if (ocistate) {
+ state = blobmsg_format_json_indent(ocistate, true, 0);
+ if (!state)
+ return -ENOMEM;
+
+ printf("%s\n", state);
+ free(state);
+ return 0;
+ }
+
+ blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
+ blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tb[CONF_NAME] || !tb[CONF_PATH])
+ continue;
+
+ if (!strcmp(name, blobmsg_get_string(tb[CONF_NAME]))) {
+ if (tb[CONF_JAIL])
+ jail_name = blobmsg_get_string(tb[CONF_JAIL]);
+ else
+ jail_name = name;
+
+ bundle = blobmsg_get_string(tb[CONF_PATH]);
+ break;
+ }
+ }
+
+ if (!bundle)
+ return -ENOENT;
+
+ blob_buf_init(&buf, 0);
+ blobmsg_add_string(&buf, "ociVersion", OCI_VERSION_STRING);
+ blobmsg_add_string(&buf, "id", jail_name);
+ blobmsg_add_string(&buf, "status", rsstate?"stopped":"uninitialized");
+ blobmsg_add_string(&buf, "bundle", bundle);
+
+ tmp = blobmsg_format_json_indent(buf.head, true, 0);
+ if (!tmp) {
+ blob_buf_free(&buf);
+ return -ENOMEM;
+ }
+
+ printf("%s\n", tmp);
+ free(tmp);
+
+ blob_buf_free(&buf);
+
+ return 0;
+}
+
+static int uxc_list(void)
+{
+ struct blob_attr *cur, *tb[__CONF_MAX], *ts[__STATE_MAX];
+ int rem;
+ struct runtime_state *rsstate = NULL;
+ struct settings *usettings = NULL;
+ char *name, *ocistatus, *status, *tmp;
+ int container_pid = -1;
+ bool autostart;
+ static struct blob_buf buf;
+ void *arr, *obj;
+
+ if (json_output) {
+ blob_buf_init(&buf, 0);
+ arr = blobmsg_open_array(&buf, "");
+ }
+
+ blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
+ blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tb[CONF_NAME] || !tb[CONF_PATH])
+ continue;
+
+ autostart = tb[CONF_AUTOSTART] && blobmsg_get_bool(tb[CONF_AUTOSTART]);
+
+ ocistatus = NULL;
+ container_pid = 0;
+ name = blobmsg_get_string(tb[CONF_NAME]);
+ rsstate = avl_find_element(&runtime, name, rsstate, avl);
+
+ if (rsstate && rsstate->ocistate) {
+ blobmsg_parse(state_policy, __STATE_MAX, ts, blobmsg_data(rsstate->ocistate), blobmsg_len(rsstate->ocistate));
+ ocistatus = blobmsg_get_string(ts[STATE_STATUS]);
+ container_pid = blobmsg_get_u32(ts[STATE_PID]);
+ }
+
+ status = ocistatus?:(rsstate && rsstate->running)?"creating":"stopped";
+
+ usettings = avl_find_element(&settings, name, usettings, avl);
+
+ if (usettings && (usettings->autostart >= 0))
+ autostart = !!(usettings->autostart);
+
+ if (json_output) {
+ obj = blobmsg_open_table(&buf, "");
+ blobmsg_add_string(&buf, "name", name);
+ blobmsg_add_string(&buf, "status", status);
+ blobmsg_add_u8(&buf, "autostart", autostart);
+ } else {
+ printf("[%c] %s %s", autostart?'*':' ', name, status);
+ }
+
+ if (rsstate && !rsstate->running && (rsstate->exitcode >= 0)) {
+ if (json_output)
+ blobmsg_add_u32(&buf, "exitcode", rsstate->exitcode);
+ else
+ printf(" exitcode: %d (%s)", rsstate->exitcode, strerror(rsstate->exitcode));
+ }
+
+ if (rsstate && rsstate->running && (rsstate->runtime_pid >= 0)) {
+ if (json_output)
+ blobmsg_add_u32(&buf, "runtime_pid", rsstate->runtime_pid);
+ else
+ printf(" runtime pid: %d", rsstate->runtime_pid);
+ }
+
+ if (rsstate && rsstate->running && (container_pid >= 0)) {
+ if (json_output)
+ blobmsg_add_u32(&buf, "container_pid", container_pid);
+ else
+ printf(" container pid: %d", container_pid);
+ }
+
+ if (!json_output)
+ printf("\n");
+ else
+ blobmsg_close_table(&buf, obj);
+ }
+
+ if (json_output) {
+ blobmsg_close_array(&buf, arr);
+ tmp = blobmsg_format_json_indent(buf.head, true, 0);
+ if (!tmp) {
+ blob_buf_free(&buf);
+ return -ENOMEM;
+ }
+ printf("%s\n", tmp);
+ free(tmp);
+ blob_buf_free(&buf);
+ };
+
+ return 0;
+}
+
+static int uxc_exists(char *name)
+{
+ struct runtime_state *rsstate = NULL;
+ rsstate = avl_find_element(&runtime, name, rsstate, avl);
+
+ if (rsstate && (rsstate->running))
+ return -EEXIST;
+
+ return 0;
+}
+
+static int uxc_create(char *name, bool immediately)
+{
+ static struct blob_buf req;
+ struct blob_attr *cur, *tb[__CONF_MAX];
+ int rem, ret = 0;
+ uint32_t id;
+ struct settings *usettings = NULL;
+ char *path = NULL, *jailname = NULL, *pidfile = NULL, *tmprwsize = NULL, *writepath = NULL;
+
+ void *in, *ins, *j;
+ bool found = false;
+
+ blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
+ blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tb[CONF_NAME] || !tb[CONF_PATH])
+ continue;
+
+ if (strcmp(name, blobmsg_get_string(tb[CONF_NAME])))
+ continue;
+
+ found = true;
+ break;
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ path = blobmsg_get_string(tb[CONF_PATH]);
+
+ if (tb[CONF_PIDFILE])
+ pidfile = blobmsg_get_string(tb[CONF_PIDFILE]);
+
+ if (tb[CONF_TEMP_OVERLAY_SIZE])
+ tmprwsize = blobmsg_get_string(tb[CONF_TEMP_OVERLAY_SIZE]);
+
+ if (tb[CONF_WRITE_OVERLAY_PATH])
+ writepath = blobmsg_get_string(tb[CONF_WRITE_OVERLAY_PATH]);
+
+ if (tb[CONF_JAIL])
+ jailname = blobmsg_get_string(tb[CONF_JAIL]);
+
+ usettings = avl_find_element(&settings, blobmsg_get_string(tb[CONF_NAME]), usettings, avl);
+ if (usettings) {
+ if (usettings->writepath) {
+ writepath = usettings->writepath;
+ tmprwsize = NULL;
+ }
+ if (usettings->tmprwsize) {
+ tmprwsize = usettings->tmprwsize;
+ writepath = NULL;
+ }
+ }
+
+ blob_buf_init(&req, 0);
+ blobmsg_add_string(&req, "name", name);
+ ins = blobmsg_open_table(&req, "instances");
+ in = blobmsg_open_table(&req, name);
+ blobmsg_add_string(&req, "bundle", path);
+ j = blobmsg_open_table(&req, "jail");
+ blobmsg_add_string(&req, "name", jailname?:name);
+ blobmsg_add_u8(&req, "immediately", immediately);
+
+ if (pidfile)
+ blobmsg_add_string(&req, "pidfile", pidfile);
+
+ blobmsg_close_table(&req, j);
+
+ if (writepath)
+ blobmsg_add_string(&req, "overlaydir", writepath);
+
+ if (tmprwsize)
+ blobmsg_add_string(&req, "tmpoverlaysize", tmprwsize);
+
+ blobmsg_close_table(&req, in);
+ blobmsg_close_table(&req, ins);
+
+ if (verbose) {
+ char *tmp;
+ tmp = blobmsg_format_json_indent(req.head, true, 1);
+ if (!tmp)
+ return -ENOMEM;
+
+ fprintf(stderr, "adding container to procd:\n\t%s\n", tmp);
+ free(tmp);
+ }
+
+ if (ubus_lookup_id(ctx, "container", &id) ||
+ ubus_invoke(ctx, id, "add", req.head, NULL, NULL, 3000)) {
+ blob_buf_free(&req);
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+static int uxc_start(const char *name, bool console)
+{
+ char *objname;
+ unsigned int id;
+ pid_t pid;
+
+ if (console) {
+ pid = fork();
+ if (pid > 0)
+ exit(uxc_attach(name));
+ }
+
+ if (asprintf(&objname, "container.%s", name) == -1)
+ return -ENOMEM;
+
+ if (ubus_lookup_id(ctx, objname, &id))
+ return -ENOENT;
+
+ free(objname);
+ return ubus_invoke(ctx, id, "start", NULL, NULL, NULL, 3000);
+}
+
+static int uxc_kill(char *name, int signal)
+{
+ static struct blob_buf req;
+ struct blob_attr *cur, *tb[__CONF_MAX];
+ int rem, ret;
+ char *objname;
+ unsigned int id;
+ struct runtime_state *rsstate = NULL;
+ bool found = false;
+
+ blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
+ blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tb[CONF_NAME] || !tb[CONF_PATH])
+ continue;
+
+ if (strcmp(name, blobmsg_get_string(tb[CONF_NAME])))
+ continue;
+
+ found = true;
+ break;
+ }
+
+ if (!found)
+ return -ENOENT;
+
+ rsstate = avl_find_element(&runtime, name, rsstate, avl);
+
+ if (!rsstate || !(rsstate->running))
+ return -ENOENT;
+
+ blob_buf_init(&req, 0);
+ blobmsg_add_u32(&req, "signal", signal);
+ blobmsg_add_string(&req, "name", name);
+
+ if (asprintf(&objname, "container.%s", name) == -1)
+ return -ENOMEM;
+
+ ret = ubus_lookup_id(ctx, objname, &id);
+ free(objname);
+ if (ret)
+ return -ENOENT;
+
+ if (ubus_invoke(ctx, id, "kill", req.head, NULL, NULL, 3000))
+ return -EIO;
+
+ return 0;
+}
+
+
+static int uxc_set(char *name, char *path, signed char autostart, char *pidfile, char *tmprwsize, char *writepath, char *requiredmounts)
+{
+ static struct blob_buf req;
+ struct settings *usettings = NULL;
+ struct blob_attr *cur, *tb[__CONF_MAX];
+ int rem, ret;
+ const char *cfname = NULL;
+ const char *sfname = NULL;
+ char *fname = NULL;
+ char *curvol, *tmp, *mnttok;
+ void *mntarr;
+ int f;
+ struct stat sb;
+
+ /* nothing to do */
+ if (!path && (autostart<0) && !pidfile && !tmprwsize && !writepath && !requiredmounts)
+ return 0;
+
+ blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
+ blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tb[CONF_NAME] || !tb[CONF_PATH])
+ continue;
+
+ if (strcmp(name, blobmsg_get_string(tb[CONF_NAME])))
+ continue;
+
+ cfname = blobmsg_name(cur);
+ break;
+ }
+
+ if (cfname && path)
+ return -EEXIST;
+
+ if (!cfname && !path)
+ return -ENOENT;
+
+ if (path) {
+ if (stat(path, &sb) == -1)
+ return -ENOENT;
+
+ if ((sb.st_mode & S_IFMT) != S_IFDIR)
+ return -ENOTDIR;
+ }
+
+ usettings = avl_find_element(&settings, blobmsg_get_string(tb[CONF_NAME]), usettings, avl);
+ if (path && usettings)
+ return -EIO;
+
+ if (usettings) {
+ sfname = usettings->fname;
+ if (!tmprwsize && !writepath) {
+ if (usettings->tmprwsize) {
+ tmprwsize = usettings->tmprwsize;
+ writepath = NULL;
+ }
+ if (usettings->writepath) {
+ writepath = usettings->writepath;
+ tmprwsize = NULL;
+ }
+ }
+ if (usettings->autostart >= 0 && autostart < 0)
+ autostart = !!(usettings->autostart);
+ }
+
+ if (path) {
+ ret = mkdir(confdir, 0755);
+
+ if (ret && errno != EEXIST)
+ return -errno;
+
+ if (asprintf(&fname, "%s/%s.json", confdir, name) == -1)
+ return -ENOMEM;
+
+ f = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (f < 0)
+ return -errno;
+
+ free(fname);
+ } else {
+ if (sfname) {
+ f = open(sfname, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ } else {
+ char *t1, *t2;
+ t1 = strdup(cfname);
+ t2 = strrchr(t1, '/');
+ if (!t2)
+ return -EINVAL;
+
+ *t2 = '\0';
+
+ if (asprintf(&t2, "%s/settings", t1) == -1)
+ return -ENOMEM;
+
+ ret = mkdir(t2, 0755);
+ if (ret && ret != EEXIST)
+ return -ret;
+
+ free(t2);
+ if (asprintf(&t2, "%s/settings/%s.json", t1, name) == -1)
+ return -ENOMEM;
+
+ free(t1);
+ f = open(t2, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ free(t2);
+ }
+ if (f < 0)
+ return -errno;
+ }
+
+ blob_buf_init(&req, 0);
+ blobmsg_add_string(&req, "name", name);
+ if (path)
+ blobmsg_add_string(&req, "path", path);
+
+ if (autostart >= 0)
+ blobmsg_add_u8(&req, "autostart", !!autostart);
+
+ if (pidfile)
+ blobmsg_add_string(&req, "pidfile", pidfile);
+
+ if (tmprwsize)
+ blobmsg_add_string(&req, "temp-overlay-size", tmprwsize);
+
+ if (writepath)
+ blobmsg_add_string(&req, "write-overlay-path", writepath);
+
+ if (!requiredmounts && usettings && usettings->volumes)
+ blobmsg_add_blob(&req, usettings->volumes);
+
+ if (requiredmounts) {
+ mntarr = blobmsg_open_array(&req, "volumes");
+ for (mnttok = requiredmounts; ; mnttok = NULL) {
+ curvol = strtok_r(mnttok, ",;", &tmp);
+ if (!curvol)
+ break;
+
+ blobmsg_add_string(&req, NULL, curvol);
+ }
+ blobmsg_close_array(&req, mntarr);
+ }
+
+ tmp = blobmsg_format_json_indent(req.head, true, 0);
+ if (tmp) {
+ dprintf(f, "%s\n", tmp);
+ free(tmp);
+ }
+
+ blob_buf_free(&req);
+ close(f);
+
+ return 1;
+}
+
+enum {
+ BLOCK_INFO_DEVICE,
+ BLOCK_INFO_UUID,
+ BLOCK_INFO_TARGET,
+ BLOCK_INFO_TYPE,
+ BLOCK_INFO_MOUNT,
+ __BLOCK_INFO_MAX,
+};
+
+static const struct blobmsg_policy block_info_policy[__BLOCK_INFO_MAX] = {
+ [BLOCK_INFO_DEVICE] = { .name = "device", .type = BLOBMSG_TYPE_STRING },
+ [BLOCK_INFO_UUID] = { .name = "uuid", .type = BLOBMSG_TYPE_STRING },
+ [BLOCK_INFO_TARGET] = { .name = "target", .type = BLOBMSG_TYPE_STRING },
+ [BLOCK_INFO_TYPE] = { .name = "type", .type = BLOBMSG_TYPE_STRING },
+ [BLOCK_INFO_MOUNT] = { .name = "mount", .type = BLOBMSG_TYPE_STRING },
+};
+
+
+/* check if device 'devname' is mounted according to blockd */
+static bool checkblock(const char *uuid)
+{
+ struct blob_attr *tb[__BLOCK_INFO_MAX];
+ struct blob_attr *cur;
+ int rem;
+
+ blobmsg_for_each_attr(cur, blockinfo, rem) {
+ blobmsg_parse(block_info_policy, __BLOCK_INFO_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+
+ if (!tb[BLOCK_INFO_UUID] || !tb[BLOCK_INFO_MOUNT])
+ continue;
+
+ if (!strcmp(uuid, blobmsg_get_string(tb[BLOCK_INFO_UUID])))
+ return false;
+ }
+
+ return true;
+}
+
+enum {
+ UCI_FSTAB_UUID,
+ UCI_FSTAB_ANONYMOUS,
+ __UCI_FSTAB_MAX,
+};
+
+static const struct blobmsg_policy uci_fstab_policy[__UCI_FSTAB_MAX] = {
+ [UCI_FSTAB_UUID] = { .name = "uuid", .type = BLOBMSG_TYPE_STRING },
+ [UCI_FSTAB_ANONYMOUS] = { .name = ".anonymous", .type = BLOBMSG_TYPE_BOOL },
+};
+
+static const char *resolveuuid(const char *volname)
+{
+ struct blob_attr *tb[__UCI_FSTAB_MAX];
+ struct blob_attr *cur;
+ const char *mntname;
+ char *tmpvolname, *replc;
+ int rem, res;
+
+ blobmsg_for_each_attr(cur, fstabinfo, rem) {
+ blobmsg_parse(uci_fstab_policy, __UCI_FSTAB_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+
+ if (!tb[UCI_FSTAB_UUID])
+ continue;
+
+ if (tb[UCI_FSTAB_ANONYMOUS] && blobmsg_get_bool(tb[UCI_FSTAB_ANONYMOUS]))
+ continue;
+
+ mntname = blobmsg_name(cur);
+ if (!mntname)
+ continue;
+
+ tmpvolname = strdup(volname);
+ while ((replc = strchr(tmpvolname, '-')))
+ *replc = '_';
+
+ res = strcmp(tmpvolname, mntname);
+ free(tmpvolname);
+
+ if (!res)
+ return blobmsg_get_string(tb[UCI_FSTAB_UUID]);
+ };
+
+ return volname;
+};
+
+/* check status of each required volume */
+static bool checkvolumes(struct blob_attr *volumes)
+{
+ struct blob_attr *cur;
+ int rem;
+
+ blobmsg_for_each_attr(cur, volumes, rem) {
+ if (checkblock(resolveuuid(blobmsg_get_string(cur))))
+ return true;
+ }
+
+ return false;
+}
+
+static void block_cb(struct ubus_request *req, int type, struct blob_attr *msg)
+{
+ blockinfo = blob_memdup(blobmsg_data(msg));
+}
+
+static void fstab_cb(struct ubus_request *req, int type, struct blob_attr *msg)
+{
+ fstabinfo = blob_memdup(blobmsg_data(msg));
+}
+
+static int uxc_boot(void)
+{
+ struct blob_attr *cur, *tb[__CONF_MAX];
+ struct runtime_state *rsstate = NULL;
+ struct settings *usettings = NULL;
+ static struct blob_buf req;
+ int rem, ret = 0;
+ char *name;
+ unsigned int id;
+ bool autostart;
+
+ ret = ubus_lookup_id(ctx, "block", &id);
+ if (ret)
+ return -ENOENT;
+
+ ret = ubus_invoke(ctx, id, "info", NULL, block_cb, NULL, 3000);
+ if (ret)
+ return -ENXIO;
+
+ ret = ubus_lookup_id(ctx, "uci", &id);
+ if (ret)
+ return -ENOENT;
+
+ blob_buf_init(&req, 0);
+ blobmsg_add_string(&req, "config", "fstab");
+ blobmsg_add_string(&req, "type", "mount");
+
+ ret = ubus_invoke(ctx, id, "get", req.head, fstab_cb, NULL, 3000);
+ if (ret)
+ return ret;
+
+ blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
+ blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tb[CONF_NAME] || !tb[CONF_PATH])
+ continue;
+
+ rsstate = avl_find_element(&runtime, blobmsg_get_string(tb[CONF_NAME]), rsstate, avl);
+ if (rsstate)
+ continue;
+
+ if (tb[CONF_AUTOSTART])
+ autostart = blobmsg_get_bool(tb[CONF_AUTOSTART]);
+
+ usettings = avl_find_element(&settings, blobmsg_get_string(tb[CONF_NAME]), usettings, avl);
+ if (usettings && (usettings->autostart >= 0))
+ autostart = !!(usettings->autostart);
+
+ if (!autostart)
+ continue;
+
+ /* make sure all volumes are ready before starting */
+ if (tb[CONF_VOLUMES])
+ if (checkvolumes(tb[CONF_VOLUMES]))
+ continue;
+
+ if (usettings && usettings->volumes)
+ if (checkvolumes(usettings->volumes))
+ continue;
+
+ name = strdup(blobmsg_get_string(tb[CONF_NAME]));
+ if (uxc_exists(name))
+ continue;
+
+ if (uxc_create(name, true))
+ ++ret;
+
+ free(name);
+ }
+
+ return ret;
+}
+
+static int uxc_delete(char *name, bool force)
+{
+ struct blob_attr *cur, *tb[__CONF_MAX];
+ struct runtime_state *rsstate = NULL;
+ struct settings *usettings = NULL;
+ static struct blob_buf req;
+ uint32_t id;
+ int rem, ret = 0;
+ const char *cfname = NULL;
+ const char *sfname = NULL;
+ struct stat sb;
+
+ blobmsg_for_each_attr(cur, blob_data(conf.head), rem) {
+ blobmsg_parse(conf_policy, __CONF_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+ if (!tb[CONF_NAME] || !tb[CONF_PATH])
+ continue;
+
+ if (strcmp(name, blobmsg_get_string(tb[CONF_NAME])))
+ continue;
+
+ cfname = blobmsg_name(cur);
+ break;
+ }
+
+ if (!cfname)
+ return -ENOENT;
+
+ rsstate = avl_find_element(&runtime, name, rsstate, avl);
+
+ if (rsstate && rsstate->running) {
+ if (force) {
+ ret = uxc_kill(name, SIGKILL);
+ if (ret)
+ goto errout;
+
+ } else {
+ ret = -EWOULDBLOCK;
+ goto errout;
+ }
+ }
+
+ if (rsstate) {
+ ret = ubus_lookup_id(ctx, "container", &id);
+ if (ret)
+ goto errout;
+
+ blob_buf_init(&req, 0);
+ blobmsg_add_string(&req, "name", rsstate->container_name);
+ blobmsg_add_string(&req, "instance", rsstate->instance_name);
+
+ if (ubus_invoke(ctx, id, "delete", req.head, NULL, NULL, 3000)) {
+ blob_buf_free(&req);
+ ret = -EIO;
+ goto errout;
+ }
+ }
+
+ usettings = avl_find_element(&settings, name, usettings, avl);
+ if (usettings)
+ sfname = usettings->fname;
+
+ if (sfname) {
+ if (stat(sfname, &sb) == -1) {
+ ret = -ENOENT;
+ goto errout;
+ }
+
+ if (unlink(sfname) == -1) {
+ ret = -errno;
+ goto errout;
+ }
+ }
+
+ if (stat(cfname, &sb) == -1) {
+ ret = -ENOENT;
+ goto errout;
+ }
+
+ if (unlink(cfname) == -1)
+ ret = -errno;
+
+errout:
+ return ret;
+}
+
+static void reload_conf(void)
+{
+ blob_buf_free(&conf);
+ conf_load(false);
+ settings_free();
+ blob_buf_free(&settingsbuf);
+ conf_load(true);
+ settings_add();
+}
+
+int main(int argc, char **argv)
+{
+ enum uxc_cmd cmd = CMD_UNKNOWN;
+ int ret = -EINVAL;
+ char *bundle = NULL;
+ char *pidfile = NULL;
+ char *tmprwsize = NULL;
+ char *writepath = NULL;
+ char *requiredmounts = NULL;
+ signed char autostart = -1;
+ bool force = false;
+ bool console = false;
+ int signal = SIGTERM;
+ int c;
+
+ if (argc < 2)
+ return usage();
+
+ ctx = ubus_connect(NULL);
+ if (!ctx)
+ return -ENODEV;
+
+ ret = conf_load(false);
+ if (ret < 0)
+ goto out;
+
+ ret = conf_load(true);
+ if (ret < 0)
+ goto conf_out;
+
+ ret = settings_add();
+ if (ret < 0)
+ goto settings_out;
+
+ ret = runtime_load();
+ if (ret)
+ goto settings_avl_out;
+
+ while (true) {
+ int option_index = 0;
+ c = getopt_long(argc, argv, OPT_ARGS, long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'a':
+ autostart = 1;
+ break;
+
+ case 'b':
+ bundle = optarg;
+ break;
+
+ case 'c':
+ console = true;
+ break;
+
+ case 'f':
+ force = true;
+ break;
+
+ case 'j':
+ json_output = true;
+ break;
+
+ case 'p':
+ pidfile = optarg;
+ break;
+
+ case 't':
+ tmprwsize = optarg;
+ break;
+
+ case 'v':
+ verbose = true;
+ break;
+
+ case 'V':
+ printf("uxc %s\n", UXC_VERSION);
+ exit(0);
+
+ case 'w':
+ writepath = optarg;
+ break;
+
+ case 'm':
+ requiredmounts = optarg;
+ break;
+ }
+ }
+
+ if (optind == argc)
+ goto usage_out;
+
+ if (!strcmp("list", argv[optind]))
+ cmd = CMD_LIST;
+ else if (!strcmp("attach", argv[optind]))
+ cmd = CMD_ATTACH;
+ else if (!strcmp("boot", argv[optind]))
+ cmd = CMD_BOOT;
+ else if(!strcmp("start", argv[optind]))
+ cmd = CMD_START;
+ else if(!strcmp("state", argv[optind]))
+ cmd = CMD_STATE;
+ else if(!strcmp("kill", argv[optind]))
+ cmd = CMD_KILL;
+ else if(!strcmp("enable", argv[optind]))
+ cmd = CMD_ENABLE;
+ else if(!strcmp("disable", argv[optind]))
+ cmd = CMD_DISABLE;
+ else if(!strcmp("delete", argv[optind]))
+ cmd = CMD_DELETE;
+ else if(!strcmp("create", argv[optind]))
+ cmd = CMD_CREATE;
+
+ switch (cmd) {
+ case CMD_ATTACH:
+ if (optind != argc - 2)
+ goto usage_out;
+
+ ret = uxc_attach(argv[optind + 1]);
+ break;
+
+ case CMD_LIST:
+ ret = uxc_list();
+ break;
+
+ case CMD_BOOT:
+ ret = uxc_boot();
+ break;
+
+ case CMD_START:
+ if (optind != argc - 2)
+ goto usage_out;
+
+ ret = uxc_start(argv[optind + 1], console);
+ break;
+
+ case CMD_STATE:
+ if (optind != argc - 2)
+ goto usage_out;
+
+ ret = uxc_state(argv[optind + 1]);
+ break;
+
+ case CMD_KILL:
+ if (optind == (argc - 3))
+ signal = atoi(argv[optind + 2]);
+ else if (optind > argc - 2)
+ goto usage_out;
+
+ ret = uxc_kill(argv[optind + 1], signal);
+ break;
+
+ case CMD_ENABLE:
+ if (optind != argc - 2)
+ goto usage_out;
+
+ ret = uxc_set(argv[optind + 1], NULL, 1, NULL, NULL, NULL, NULL);
+ break;
+
+ case CMD_DISABLE:
+ if (optind != argc - 2)
+ goto usage_out;
+
+ ret = uxc_set(argv[optind + 1], NULL, 0, NULL, NULL, NULL, NULL);
+ break;
+
+ case CMD_DELETE:
+ if (optind != argc - 2)
+ goto usage_out;
+
+ ret = uxc_delete(argv[optind + 1], force);
+ break;
+
+ case CMD_CREATE:
+ if (optind != argc - 2)
+ goto usage_out;
+
+ ret = uxc_exists(argv[optind + 1]);
+ if (ret)
+ goto runtime_out;
+
+ ret = uxc_set(argv[optind + 1], bundle, autostart, pidfile, tmprwsize, writepath, requiredmounts);
+ if (ret < 0)
+ goto runtime_out;
+
+ if (ret > 0)
+ reload_conf();
+
+ ret = uxc_create(argv[optind + 1], false);
+ break;
+
+ default:
+ goto usage_out;
+ }
+
+ goto runtime_out;
+
+usage_out:
+ ret = usage();
+runtime_out:
+ runtime_free();
+settings_avl_out:
+ settings_free();
+settings_out:
+ blob_buf_free(&settingsbuf);
+conf_out:
+ blob_buf_free(&conf);
+out:
+ ubus_free(ctx);
+
+ if (ret < 0)
+ fprintf(stderr, "uxc error: %s\n", strerror(-ret));
+
+ return ret;
+}
return wdt_fd;
}
-static void watchdog_close(void)
+static void watchdog_close(bool with_release)
{
if (wdt_fd < 0)
return;
- if (write(wdt_fd, "V", 1) < 0)
- ERROR("WDT failed to write release: %m\n");
+ if (with_release) {
+ if (write(wdt_fd, "V", 1) < 0)
+ ERROR("WDT failed to write release: %m\n");
+ }
if (close(wdt_fd) == -1)
ERROR("WDT failed to close watchdog: %m\n");
return ioctl(wdt_fd, WDIOC_SETTIMEOUT, &wdt_drv_timeout);
}
+static void watchdog_print_status(void)
+{
+ struct watchdog_info wdt_info;
+ int bootstatus;
+
+ if (wdt_fd < 0)
+ return;
+
+ if (ioctl(wdt_fd, WDIOC_GETSUPPORT, &wdt_info)) {
+ DEBUG(2, "Watchdog GETSUPPORT failed\n");
+ return;
+ }
+
+ if (!(wdt_info.options & WDIOF_CARDRESET)) {
+ DEBUG(2, "Watchdog does not have CARDRESET support\n");
+ return;
+ }
+
+ if (ioctl(wdt_fd, WDIOC_GETBOOTSTATUS, &bootstatus)) {
+ DEBUG(2, "Watchdog GETBOOTSTATUS failed\n");
+ return;
+ }
+
+ if (bootstatus & WDIOF_CARDRESET)
+ LOG("Watchdog has previously reset the system\n");
+ else
+ DEBUG(2, "Watchdog did not previously reset the system\n");
+}
+
void watchdog_set_magicclose(bool val)
{
wdt_magicclose = val;
if (val) {
uloop_timeout_cancel(&wdt_timeout);
- if (wdt_magicclose)
- watchdog_close();
+ watchdog_close(wdt_magicclose);
}
else {
watchdog_open(true);
watchdog_timeout_cb(&wdt_timeout);
DEBUG(4, "Opened watchdog with timeout %ds\n", watchdog_timeout(0));
+
+ watchdog_print_status();
}