2 * parse and setup OCI seccomp filter
3 * Copyright (c) 2020 Daniel Golle <daniel@makrotopia.org>
4 * seccomp example with syscall reporting
5 * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
7 * Kees Cook <keescook@chromium.org>
8 * Will Drewry <wad@chromium.org>
10 * Use of this source code is governed by a BSD-style license that can be
11 * found in the LICENSE file.
15 * (check_arch)<t>---(check_syscall)<f>---+----[...]<f>---(return default_action)
17 * KILL (check_argument)<f>--+
29 #include <libubox/utils.h>
30 #include <libubox/blobmsg.h>
31 #include <libubox/blobmsg_json.h>
34 #include "seccomp-bpf.h"
35 #include "seccomp-oci.h"
36 #include "../syscall-names.h"
37 #include "seccomp-syscalls-helpers.h"
39 static uint32_t resolve_action(char *actname
)
41 if (!strcmp(actname
, "SCMP_ACT_KILL"))
42 return SECCOMP_RET_KILL
;
43 else if (!strcmp(actname
, "SCMP_ACT_KILL_PROCESS"))
44 return SECCOMP_RET_KILLPROCESS
;
45 else if (!strcmp(actname
, "SCMP_ACT_TRAP"))
46 return SECCOMP_RET_TRAP
;
47 else if (!strcmp(actname
, "SCMP_ACT_ERRNO"))
48 return SECCOMP_RET_ERRNO
;
49 else if (!strcmp(actname
, "SCMP_ACT_ERROR"))
50 return SECCOMP_RET_ERRNO
;
51 else if (!strcmp(actname
, "SCMP_ACT_TRACE"))
52 return SECCOMP_RET_TRACE
;
53 else if (!strcmp(actname
, "SCMP_ACT_ALLOW"))
54 return SECCOMP_RET_ALLOW
;
55 else if (!strcmp(actname
, "SCMP_ACT_LOG"))
56 return SECCOMP_RET_LOGALLOW
;
58 ERROR("unknown seccomp action %s\n", actname
);
59 return SECCOMP_RET_KILL
;
63 static uint8_t resolve_op_ins(const char *op
)
65 if (!strcmp(op
, "SCMP_CMP_NE")) /* invert EQ */
67 else if (!strcmp(op
, "SCMP_CMP_LT")) /* invert GE */
69 else if (!strcmp(op
, "SCMP_CMP_LE")) /* invert GT */
71 else if (!strcmp(op
, "SCMP_CMP_EQ"))
73 else if (!strcmp(op
, "SCMP_CMP_GE"))
75 else if (!strcmp(op
, "SCMP_CMP_GT"))
77 else if (!strcmp(op
, "SCMP_CMP_MASKED_EQ"))
80 ERROR("unknown seccomp op %s\n", op
);
85 static bool resolve_op_is_masked(const char *op
)
87 if (!strcmp(op
, "SCMP_CMP_MASKED_EQ"))
93 static bool resolve_op_inv(const char *op
)
95 if (!strcmp(op
, "SCMP_CMP_NE") ||
96 !strcmp(op
, "SCMP_CMP_LT") ||
97 !strcmp(op
, "SCMP_CMP_LE"))
103 static uint32_t resolve_architecture(char *archname
)
108 if (!strcmp(archname
, "SCMP_ARCH_X86"))
109 return AUDIT_ARCH_I386
;
110 else if (!strcmp(archname
, "SCMP_ARCH_X86_64"))
111 return AUDIT_ARCH_X86_64
;
112 else if (!strcmp(archname
, "SCMP_ARCH_X32"))
114 * return AUDIT_ARCH_X86_64;
115 * 32-bit userland on 64-bit kernel is not supported yet
118 else if (!strcmp(archname
, "SCMP_ARCH_ARM"))
119 return AUDIT_ARCH_ARM
;
120 else if (!strcmp(archname
, "SCMP_ARCH_AARCH64"))
121 return AUDIT_ARCH_AARCH64
;
122 else if (!strcmp(archname
, "SCMP_ARCH_MIPS"))
123 return AUDIT_ARCH_MIPS
;
124 else if (!strcmp(archname
, "SCMP_ARCH_MIPS64"))
125 return AUDIT_ARCH_MIPS64
;
126 else if (!strcmp(archname
, "SCMP_ARCH_MIPS64N32"))
127 return AUDIT_ARCH_MIPS64N32
;
128 else if (!strcmp(archname
, "SCMP_ARCH_MIPSEL"))
129 return AUDIT_ARCH_MIPSEL
;
130 else if (!strcmp(archname
, "SCMP_ARCH_MIPSEL64"))
131 return AUDIT_ARCH_MIPSEL64
;
132 else if (!strcmp(archname
, "SCMP_ARCH_MIPSEL64N32"))
133 return AUDIT_ARCH_MIPSEL64N32
;
134 else if (!strcmp(archname
, "SCMP_ARCH_PPC"))
135 return AUDIT_ARCH_PPC
;
136 else if (!strcmp(archname
, "SCMP_ARCH_PPC64"))
137 return AUDIT_ARCH_PPC64
;
138 else if (!strcmp(archname
, "SCMP_ARCH_PPC64LE"))
139 return AUDIT_ARCH_PPC64LE
;
140 else if (!strcmp(archname
, "SCMP_ARCH_S390"))
141 return AUDIT_ARCH_S390
;
142 else if (!strcmp(archname
, "SCMP_ARCH_S390X"))
143 return AUDIT_ARCH_S390X
;
144 else if (!strcmp(archname
, "SCMP_ARCH_PARISC"))
145 return AUDIT_ARCH_PARISC
;
146 else if (!strcmp(archname
, "SCMP_ARCH_PARISC64"))
147 return AUDIT_ARCH_PARISC64
;
149 ERROR("unknown seccomp architecture %s\n", archname
);
155 OCI_LINUX_SECCOMP_DEFAULTACTION
,
156 OCI_LINUX_SECCOMP_ARCHITECTURES
,
157 OCI_LINUX_SECCOMP_FLAGS
,
158 OCI_LINUX_SECCOMP_SYSCALLS
,
159 __OCI_LINUX_SECCOMP_MAX
,
162 static const struct blobmsg_policy oci_linux_seccomp_policy
[] = {
163 [OCI_LINUX_SECCOMP_DEFAULTACTION
] = { "defaultAction", BLOBMSG_TYPE_STRING
},
164 [OCI_LINUX_SECCOMP_ARCHITECTURES
] = { "architectures", BLOBMSG_TYPE_ARRAY
},
165 [OCI_LINUX_SECCOMP_FLAGS
] = { "flags", BLOBMSG_TYPE_ARRAY
},
166 [OCI_LINUX_SECCOMP_SYSCALLS
] = { "syscalls", BLOBMSG_TYPE_ARRAY
},
170 OCI_LINUX_SECCOMP_SYSCALLS_NAMES
,
171 OCI_LINUX_SECCOMP_SYSCALLS_ACTION
,
172 OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET
,
173 OCI_LINUX_SECCOMP_SYSCALLS_ARGS
,
174 __OCI_LINUX_SECCOMP_SYSCALLS_MAX
177 static const struct blobmsg_policy oci_linux_seccomp_syscalls_policy
[] = {
178 [OCI_LINUX_SECCOMP_SYSCALLS_NAMES
] = { "names", BLOBMSG_TYPE_ARRAY
},
179 [OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET
] = { "errnoRet", BLOBMSG_TYPE_INT32
},
180 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS
] = { "args", BLOBMSG_TYPE_ARRAY
},
181 [OCI_LINUX_SECCOMP_SYSCALLS_ACTION
] = { "action", BLOBMSG_TYPE_STRING
},
185 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX
,
186 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE
,
187 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO
,
188 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP
,
189 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX
192 static const struct blobmsg_policy oci_linux_seccomp_syscalls_args_policy
[] = {
193 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX
] = { "index", BLOBMSG_TYPE_INT32
},
194 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE
] = { "value", BLOBMSG_CAST_INT64
},
195 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO
] = { "valueTwo", BLOBMSG_CAST_INT64
},
196 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP
] = { "op", BLOBMSG_TYPE_STRING
},
199 struct sock_fprog
*parseOCIlinuxseccomp(struct blob_attr
*msg
)
201 struct blob_attr
*tb
[__OCI_LINUX_SECCOMP_MAX
];
202 struct blob_attr
*tbn
[__OCI_LINUX_SECCOMP_SYSCALLS_MAX
];
203 struct blob_attr
*tba
[__OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX
];
204 struct blob_attr
*cur
, *curn
, *curarg
;
205 int rem
, remn
, remargs
, sc
;
206 struct sock_filter
*filter
;
207 struct sock_fprog
*prog
;
209 uint32_t default_policy
= 0;
210 uint32_t seccomp_arch
;
214 blobmsg_parse(oci_linux_seccomp_policy
, __OCI_LINUX_SECCOMP_MAX
,
215 tb
, blobmsg_data(msg
), blobmsg_len(msg
));
217 if (!tb
[OCI_LINUX_SECCOMP_DEFAULTACTION
]) {
218 ERROR("seccomp: no default action set\n");
222 default_policy
= resolve_action(blobmsg_get_string(tb
[OCI_LINUX_SECCOMP_DEFAULTACTION
]));
224 /* verify architecture while ignoring the x86_64 anomaly for now */
225 if (tb
[OCI_LINUX_SECCOMP_ARCHITECTURES
]) {
226 arch_matched
= false;
227 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_SECCOMP_ARCHITECTURES
], rem
) {
228 seccomp_arch
= resolve_architecture(blobmsg_get_string(cur
));
229 if (ARCH_NR
== seccomp_arch
) {
235 ERROR("seccomp architecture doesn't match system\n");
240 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_SECCOMP_SYSCALLS
], rem
) {
241 sz
+= 2; /* load and return */
243 blobmsg_parse(oci_linux_seccomp_syscalls_policy
,
244 __OCI_LINUX_SECCOMP_SYSCALLS_MAX
,
245 tbn
, blobmsg_data(cur
), blobmsg_len(cur
));
246 blobmsg_for_each_attr(curn
, tbn
[OCI_LINUX_SECCOMP_SYSCALLS_NAMES
], remn
) {
247 sc
= find_syscall(blobmsg_get_string(curn
));
249 DEBUG("unknown syscall '%s'\n", blobmsg_get_string(curn
));
250 /* TODO: support run.oci.seccomp_fail_unknown_syscall=1 annotation */
256 if (tbn
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS
]) {
257 blobmsg_for_each_attr(curarg
, tbn
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS
], remargs
) {
258 sz
+= 2; /* load and compare */
260 blobmsg_parse(oci_linux_seccomp_syscalls_args_policy
,
261 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX
,
262 tba
, blobmsg_data(curarg
), blobmsg_len(curarg
));
263 if (!tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX
] ||
264 !tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE
] ||
265 !tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP
])
268 if (blobmsg_get_u32(tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX
]) > 5)
271 op_str
= blobmsg_get_string(tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP
]);
272 if (!resolve_op_ins(op_str
))
275 if (resolve_op_is_masked(op_str
))
276 ++sz
; /* SCMP_CMP_MASKED_EQ needs an extra BPF_AND op */
284 prog
= malloc(sizeof(struct sock_fprog
));
288 filter
= calloc(sz
, sizeof(struct sock_filter
));
290 ERROR("failed to allocate memory for seccomp filter\n");
295 set_filter(&filter
[idx
++], BPF_LD
+ BPF_W
+ BPF_ABS
, 0, 0, arch_nr
);
296 set_filter(&filter
[idx
++], BPF_JMP
+ BPF_JEQ
+ BPF_K
, 1, 0, ARCH_NR
);
297 set_filter(&filter
[idx
++], BPF_RET
+ BPF_K
, 0, 0, SECCOMP_RET_KILL
);
299 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_SECCOMP_SYSCALLS
], rem
) {
303 bool op_inv
, op_masked
;
304 uint64_t op_val
, op_val2
;
308 blobmsg_parse(oci_linux_seccomp_syscalls_policy
,
309 __OCI_LINUX_SECCOMP_SYSCALLS_MAX
,
310 tbn
, blobmsg_data(cur
), blobmsg_len(cur
));
311 action
= resolve_action(blobmsg_get_string(
312 tbn
[OCI_LINUX_SECCOMP_SYSCALLS_ACTION
]));
313 if (tbn
[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET
]) {
314 if (action
!= SECCOMP_RET_ERRNO
)
317 action
= SECCOMP_RET_ERROR(blobmsg_get_u32(
318 tbn
[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET
]));
319 } else if (action
== SECCOMP_RET_ERRNO
)
320 action
= SECCOMP_RET_ERROR(EPERM
);
323 set_filter(&filter
[idx
++], BPF_LD
+ BPF_W
+ BPF_ABS
, 0, 0, syscall_nr
);
325 /* get number of syscall names */
327 blobmsg_for_each_attr(curn
, tbn
[OCI_LINUX_SECCOMP_SYSCALLS_NAMES
], remn
) {
328 if (find_syscall(blobmsg_get_string(curn
)) == -1)
333 start_rule_idx
= next_rule_idx
;
335 /* calculate length of argument filter rules */
336 blobmsg_for_each_attr(curn
, tbn
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS
], remn
) {
337 blobmsg_parse(oci_linux_seccomp_syscalls_args_policy
,
338 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX
,
339 tba
, blobmsg_data(curn
), blobmsg_len(curn
));
341 op_str
= blobmsg_get_string(tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP
]);
342 if (resolve_op_is_masked(op_str
))
346 ++next_rule_idx
; /* account for return action */
348 blobmsg_for_each_attr(curn
, tbn
[OCI_LINUX_SECCOMP_SYSCALLS_NAMES
], remn
) {
349 sc
= find_syscall(blobmsg_get_string(curn
));
353 * check syscall, skip other syscall checks if match is found.
354 * if no match is found, jump to next section
356 set_filter(&filter
[idx
], BPF_JMP
+ BPF_JEQ
+ BPF_K
,
357 start_rule_idx
- (idx
+ 1),
358 ((idx
+ 1) == start_rule_idx
)?(next_rule_idx
- (idx
+ 1)):0,
363 assert(idx
= start_rule_idx
);
365 /* generate argument filter rules */
366 blobmsg_for_each_attr(curn
, tbn
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS
], remn
) {
367 blobmsg_parse(oci_linux_seccomp_syscalls_args_policy
,
368 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX
,
369 tba
, blobmsg_data(curn
), blobmsg_len(curn
));
371 op_str
= blobmsg_get_string(tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP
]);
372 op_ins
= resolve_op_ins(op_str
);
373 op_inv
= resolve_op_inv(op_str
);
374 op_masked
= resolve_op_is_masked(op_str
);
375 op_idx
= blobmsg_get_u32(tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX
]);
376 op_val
= blobmsg_cast_u64(tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE
]);
377 if (tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO
])
378 op_val2
= blobmsg_cast_u64(tba
[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO
]);
383 set_filter(&filter
[idx
++], BPF_LD
+ BPF_W
+ BPF_ABS
, 0, 0, syscall_arg(op_idx
));
387 set_filter(&filter
[idx
++], BPF_ALU
+ BPF_K
+ BPF_AND
, 0, 0, op_val
);
389 set_filter(&filter
[idx
], BPF_JMP
+ op_ins
+ BPF_K
,
390 op_inv
?(next_rule_idx
- (idx
+ 1)):0,
391 op_inv
?0:(next_rule_idx
- (idx
+ 1)),
392 op_masked
?op_val2
:op_val
);
396 /* if we have reached until here, all conditions were met and we can return */
397 set_filter(&filter
[idx
++], BPF_RET
+ BPF_K
, 0, 0, action
);
399 assert(idx
== next_rule_idx
);
402 set_filter(&filter
[idx
++], BPF_RET
+ BPF_K
, 0, 0, default_policy
);
406 prog
->len
= (unsigned short) idx
;
407 prog
->filter
= filter
;
409 DEBUG("generated seccomp-bpf program:\n");
411 fprintf(stderr
, " [idx]\tcode\t jt\t jf\tk\n");
412 for (idx
=0; idx
<sz
; idx
++)
413 fprintf(stderr
, " [%03d]\t%04hx\t%3hhu\t%3hhu\t%08x\n", idx
,
430 int applyOCIlinuxseccomp(struct sock_fprog
*prog
)
432 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0)) {
433 ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
437 if (prctl(PR_SET_SECCOMP
, SECCOMP_MODE_FILTER
, prog
)) {
438 ERROR("prctl(PR_SET_SECCOMP) failed: %m\n");