jail: parse OCI cgroups resources
[project/procd.git] / jail / cgroups.c
1 /*
2 * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License version 2.1
6 * as published by the Free Software Foundation
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * reads unified cgroup config as proposed in
14 * https://github.com/opencontainers/runtime-spec/pull/1040
15 * attempt conversion from cgroup1 -> cgroup2
16 * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2
17 *
18 * ToDo:
19 * - convert cgroup1 devices to eBPF program
20 * - convert cgroup1 net_prio and net_cls to eBPF program
21 * - rdma (anyone?) intelrdt (anyone?)
22 */
23
24 #define _GNU_SOURCE
25
26 #include <assert.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <sys/stat.h>
33 #include <sys/mman.h>
34 #include <unistd.h>
35 #include <libgen.h>
36 #include <inttypes.h>
37
38 #include <libubox/avl.h>
39 #include <libubox/avl-cmp.h>
40 #include <libubox/blobmsg.h>
41 #include <libubox/list.h>
42
43 #include "fs.h"
44 #include "log.h"
45 #include "cgroups.h"
46
47 #define CGROUP_ROOT "/sys/fs/cgroup/"
48 #define CGROUP_IO_WEIGHT_MAX 10000
49
50 struct cgval {
51 struct avl_node avl;
52 char *val;
53 };
54
55 struct avl_tree cgvals;
56 static char *cgroup_path;
57
58 void cgroups_init(const char *p) {
59 avl_init(&cgvals, avl_strcmp, false, NULL);
60 cgroup_path = strdup(p);
61 }
62
63 static void cgroups_set(const char *key, const char *val)
64 {
65 struct cgval *valp;
66
67 valp = avl_find_element(&cgvals, key, valp, avl);
68 if (!valp) {
69 valp = malloc(sizeof(struct cgval));
70 assert(valp != NULL);
71 valp->avl.key = strdup(key);
72 avl_insert(&cgvals, &valp->avl);
73 } else {
74 DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key, valp->val);
75 free(valp->val);
76 }
77
78 valp->val = strdup(val);
79 }
80
81 void cgroups_free(void)
82 {
83 struct cgval *valp, *tmp;
84 avl_for_each_element_safe(&cgvals, valp, avl, tmp) {
85 avl_delete(&cgvals, &valp->avl);
86 free((void *)(valp->avl.key));
87 free(valp->val);
88 free(valp);
89 }
90 free(cgroup_path);
91 }
92
93 void cgroups_apply(pid_t pid)
94 {
95 struct cgval *valp;
96 char *cdir, *ent;
97 int fd;
98 size_t maxlen = strlen("cgroup.subtree_control");
99
100 bool cpuset = false,
101 cpu = false,
102 hugetlb = false,
103 io = false,
104 memory = false,
105 pids = false,
106 rdma = false;
107
108 char subtree_control[64] = { 0 };
109
110 DEBUG("using cgroup path %s\n", cgroup_path);
111 mkdir_p(cgroup_path, 0700);
112
113 /* find which controllers need to be enabled */
114 avl_for_each_element(&cgvals, valp, avl) {
115 ent = (char *)valp->avl.key;
116 if (strlen(ent) > maxlen)
117 maxlen = strlen(ent);
118
119 if (!strncmp("cpuset.", ent, 7))
120 cpuset = true;
121 else if (!strncmp("cpu.", ent, 4))
122 cpu = true;
123 else if (!strncmp("hugetlb.", ent, 8))
124 hugetlb = true;
125 else if (!strncmp("io.", ent, 3))
126 io = true;
127 else if (!strncmp("memory.", ent, 7))
128 memory = true;
129 else if (!strncmp("pids.", ent, 5))
130 pids = true;
131 else if (!strncmp("rdma.", ent, 5))
132 pids = true;
133 }
134
135 maxlen += strlen(cgroup_path) + 2;
136
137 if (cpuset)
138 strcat(subtree_control, "+cpuset ");
139
140 if (cpu)
141 strcat(subtree_control, "+cpu ");
142
143 if (hugetlb)
144 strcat(subtree_control, "+hugetlb ");
145
146 if (io)
147 strcat(subtree_control, "+io ");
148
149 if (memory)
150 strcat(subtree_control, "+memory ");
151
152 if (pids)
153 strcat(subtree_control, "+pids ");
154
155 if (rdma)
156 strcat(subtree_control, "+rdma ");
157
158 /* remove trailing space */
159 ent = strchr(subtree_control, '\0') - 1;
160 *ent = '\0';
161
162 ent = malloc(maxlen);
163 assert(ent != 0);
164
165 DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control);
166 cdir = &cgroup_path[strlen(CGROUP_ROOT) - 2];
167 while ((cdir = strchr(cdir + 1, '/'))) {
168 *cdir = '\0';
169 snprintf(ent, maxlen, "%s/cgroup.subtree_control", cgroup_path);
170 DEBUG(" * %s\n", ent);
171 fd = open(ent, O_WRONLY);
172 assert(fd != -1);
173 write(fd, subtree_control, strlen(subtree_control));
174 close(fd);
175 *cdir = '/';
176 }
177
178 avl_for_each_element(&cgvals, valp, avl) {
179 DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp->avl.key, valp->val);
180 snprintf(ent, maxlen, "%s/%s", cgroup_path, (char *)valp->avl.key);
181 fd = open(ent, O_WRONLY);
182 if (fd == -1) {
183 ERROR("can't open %s: %m\n", ent);
184 continue;
185 }
186 if (dprintf(fd, "%s", valp->val) < 0) {
187 ERROR("can't write to %s: %m\n", ent);
188 };
189 close(fd);
190 }
191
192 snprintf(ent, maxlen, "%s/%s", cgroup_path, "cgroup.procs");
193 fd = open(ent, O_WRONLY);
194 assert(fd != -1);
195 dprintf(fd, "%d", pid);
196 close(fd);
197
198 free(ent);
199
200 cgroups_free();
201 }
202
203 enum {
204 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR,
205 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR,
206 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT,
207 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT,
208 __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX,
209 };
210
211 static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy[] = {
212 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
213 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
214 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
215 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
216 };
217
218 enum {
219 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR,
220 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR,
221 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE,
222 __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX,
223 };
224
225 static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy[] = {
226 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
227 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
228 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE] = { "rate", BLOBMSG_CAST_INT64 },
229 };
230
231 enum {
232 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT,
233 OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT,
234 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE,
235 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE,
236 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE,
237 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE,
238 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE,
239 __OCI_LINUX_CGROUPS_BLOCKIO_MAX,
240 };
241
242 static const struct blobmsg_policy oci_linux_cgroups_blockio_policy[] = {
243 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
244 [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
245 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE] = { "weightDevice", BLOBMSG_TYPE_ARRAY },
246 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY },
247 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY },
248 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY },
249 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY },
250 };
251
252 struct posix_dev {
253 uint64_t major;
254 uint64_t minor;
255 };
256
257 struct iomax_line {
258 struct avl_node avl;
259 struct posix_dev dev;
260 uint64_t rbps;
261 uint64_t wbps;
262 uint64_t riops;
263 uint64_t wiops;
264 };
265
266 static int avl_devcmp(const void *k1, const void *k2, void *ptr)
267 {
268 struct posix_dev *d1 = (struct posix_dev *)k1, *d2 = (struct posix_dev *)k2;
269
270 if (d1->major < d2->major)
271 return -1;
272
273 if (d1->major > d2->major)
274 return 1;
275
276 if (d1->minor < d2->minor)
277 return -1;
278
279 if (d1->minor > d2->minor)
280 return 1;
281
282 return 0;
283 }
284
285 static struct iomax_line *get_iomax_line(struct avl_tree *iomax, uint64_t major, uint64_t minor)
286 {
287 struct iomax_line *l;
288 struct posix_dev d;
289 d.major = major;
290 d.minor = minor;
291 l = avl_find_element(iomax, &d, l, avl);
292 if (!l) {
293 l = malloc(sizeof(struct iomax_line));
294 assert(l != NULL);
295 l->dev.major = d.major;
296 l->dev.minor = d.minor;
297 l->avl.key = &l->dev;
298 l->rbps = -1;
299 l->wbps = -1;
300 l->riops = -1;
301 l->wiops = -1;
302 avl_insert(iomax, &l->avl);
303 }
304
305 return l;
306 }
307
308 static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr *msg)
309 {
310 struct blob_attr *tb[__OCI_LINUX_CGROUPS_BLOCKIO_MAX],
311 *tbwd[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX],
312 *tbtd[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX],
313 *cur;
314 int rem;
315 int weight = -1, leafweight = -1;
316 size_t numweightstrs = 0, numiomaxstrs = 0, strtotlen = 1;
317 char **weightstrs = NULL, **iomaxstrs = NULL, **curstr;
318 char *weightstr, *iomaxstr;
319 struct avl_tree iomax;
320 struct iomax_line *curiomax, *tmp;
321
322 blobmsg_parse(oci_linux_cgroups_blockio_policy, __OCI_LINUX_CGROUPS_BLOCKIO_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
323
324 if (tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]) {
325 weight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
326 ++numweightstrs;
327 }
328
329 if (weight > CGROUP_IO_WEIGHT_MAX)
330 return ERANGE;
331
332 if (tb[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT])
333 leafweight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
334
335 if (leafweight > CGROUP_IO_WEIGHT_MAX)
336 return ERANGE;
337
338 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem)
339 ++numweightstrs;
340
341 weightstrs = calloc(numweightstrs + 1, sizeof(char *));
342 assert(weightstrs != 0);
343 numweightstrs = 0;
344
345 if (weight > -1)
346 asprintf(&weightstrs[numweightstrs++], "default %d", weight);
347
348 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem) {
349 uint64_t major, minor;
350 int devweight = weight, devleafweight = leafweight;
351
352 blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX, tbwd, blobmsg_data(cur), blobmsg_len(cur));
353 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] ||
354 !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR])
355 return ENODATA;
356
357 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] &&
358 !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
359 return ENODATA;
360
361 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT])
362 devweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT]);
363
364 if (devweight > CGROUP_IO_WEIGHT_MAX)
365 return ERANGE;
366
367 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
368 devleafweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]);
369
370 if (devleafweight > CGROUP_IO_WEIGHT_MAX)
371 return ERANGE;
372
373 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
374 return ENOTSUP;
375
376 major = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR]);
377 minor = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR]);
378
379 asprintf(&weightstrs[numweightstrs++], "%" PRIu64 ":%" PRIu64 " %u", major, minor, devweight);
380 }
381
382 if (numweightstrs) {
383 curstr = weightstrs;
384 while (*curstr)
385 strtotlen += strlen(*(curstr++)) + 1;
386
387 weightstr = calloc(strtotlen, sizeof(char));
388 assert(weightstr != 0);
389
390 curstr = weightstrs;
391 while (*curstr) {
392 strcat(weightstr, *curstr);
393 strcat(weightstr, "\n");
394 free(*(curstr++));
395 }
396
397 cgroups_set("io.bfq.weight", weightstr);
398 free(weightstr);
399 };
400
401 free(weightstrs);
402
403 avl_init(&iomax, avl_devcmp, false, NULL);
404
405 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE], rem) {
406 struct iomax_line *l;
407
408 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
409
410 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
411 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
412 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
413 return ENODATA;
414
415 l = get_iomax_line(&iomax,
416 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
417 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
418
419 l->rbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
420 }
421
422 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE], rem) {
423 struct iomax_line *l;
424
425 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
426
427 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
428 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
429 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
430 return ENODATA;
431
432 l = get_iomax_line(&iomax,
433 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
434 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
435
436 l->wbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
437 }
438
439 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE], rem) {
440 struct iomax_line *l;
441
442 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
443
444 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
445 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
446 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
447 return ENODATA;
448
449 l = get_iomax_line(&iomax,
450 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
451 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
452
453 l->riops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
454 }
455
456 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE], rem) {
457 struct iomax_line *l;
458
459 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
460
461 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
462 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
463 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
464 return ENODATA;
465
466 l = get_iomax_line(&iomax,
467 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
468 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
469
470 l->wiops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
471 }
472
473 avl_for_each_element(&iomax, curiomax, avl)
474 ++numiomaxstrs;
475
476 if (!numiomaxstrs)
477 return 0;
478
479 iomaxstrs = calloc(numiomaxstrs + 1, sizeof(char *));
480 assert(iomaxstrs != 0);
481 numiomaxstrs = 0;
482
483 avl_for_each_element(&iomax, curiomax, avl) {
484 char iomaxlstr[160];
485 char lstr[32];
486
487 sprintf(iomaxlstr, "%" PRIu64 ":%" PRIu64 " ", curiomax->dev.major, curiomax->dev.minor);
488
489 if (curiomax->rbps != -1) {
490 sprintf(lstr, "rbps=%" PRIu64 " ", curiomax->rbps);
491 strcat(iomaxlstr, lstr);
492 }
493 if (curiomax->wbps != -1) {
494 sprintf(lstr, "wbps=%" PRIu64 " ", curiomax->wbps);
495 strcat(iomaxlstr, lstr);
496 }
497 if (curiomax->riops != -1) {
498 sprintf(lstr, "riops=%" PRIu64 " ", curiomax->riops);
499 strcat(iomaxlstr, lstr);
500 }
501 if (curiomax->wiops != -1) {
502 sprintf(lstr, "wiops=%" PRIu64 " ", curiomax->wiops);
503 strcat(iomaxlstr, lstr);
504 }
505
506 iomaxstrs[numiomaxstrs++] = strdup(iomaxlstr);
507 }
508
509 avl_for_each_element_safe(&iomax, curiomax, avl, tmp) {
510 avl_delete(&iomax, &curiomax->avl);
511 free(curiomax);
512 }
513
514 strtotlen = 1; /* 1 accounts for \0 at end of string */
515 if (numiomaxstrs) {
516 curstr = iomaxstrs;
517 while (*curstr)
518 strtotlen += strlen(*(curstr++)) + 1; /* +1 accounts for \n at end of line */
519
520 iomaxstr = calloc(strtotlen, sizeof(char));
521 assert(iomaxstr != 0);
522 curstr = iomaxstrs;
523
524 while (*curstr) {
525 strcat(iomaxstr, *curstr);
526 strcat(iomaxstr, "\n");
527 free(*(curstr++));
528 }
529
530 cgroups_set("io.max", iomaxstr);
531 free(iomaxstr);
532 };
533
534 free(iomaxstrs);
535
536 return 0;
537 }
538
539
540 enum {
541 OCI_LINUX_CGROUPS_CPU_SHARES,
542 OCI_LINUX_CGROUPS_CPU_PERIOD,
543 OCI_LINUX_CGROUPS_CPU_QUOTA,
544 OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME,
545 OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD,
546 OCI_LINUX_CGROUPS_CPU_CPUS,
547 OCI_LINUX_CGROUPS_CPU_MEMS,
548 __OCI_LINUX_CGROUPS_CPU_MAX,
549 };
550
551 static const struct blobmsg_policy oci_linux_cgroups_cpu_policy[] = {
552 [OCI_LINUX_CGROUPS_CPU_SHARES] = { "shares", BLOBMSG_CAST_INT64 },
553 [OCI_LINUX_CGROUPS_CPU_PERIOD] = { "period", BLOBMSG_CAST_INT64 },
554 [OCI_LINUX_CGROUPS_CPU_QUOTA] = { "quota", BLOBMSG_CAST_INT64 }, /* signed int64! */
555 [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] = { "realtimePeriod", BLOBMSG_CAST_INT64 },
556 [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME] = { "realtimeRuntime", BLOBMSG_CAST_INT64 },
557 [OCI_LINUX_CGROUPS_CPU_CPUS] = { "cpus", BLOBMSG_TYPE_STRING },
558 [OCI_LINUX_CGROUPS_CPU_MEMS] = { "mems", BLOBMSG_TYPE_STRING },
559 };
560
561 static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr *msg)
562 {
563 struct blob_attr *tb[__OCI_LINUX_CGROUPS_CPU_MAX];
564 uint64_t shares, period = 0;
565 int64_t quota = -2; /* unset */
566 char tmp[32] = { 0 };
567
568 blobmsg_parse(oci_linux_cgroups_cpu_policy, __OCI_LINUX_CGROUPS_CPU_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
569
570 if (tb[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] ||
571 tb[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME])
572 return ENOTSUP; /* no equivalent in cgroup2 */
573
574 if (tb[OCI_LINUX_CGROUPS_CPU_SHARES]) {
575 shares = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_SHARES]);
576 if ((shares < 2) || (shares > 262144))
577 return ERANGE;
578
579 snprintf(tmp, sizeof(tmp), "%" PRIu64, (((uint64_t)1) + ((shares - 2) * 9999) / 262142));
580 cgroups_set("cpu.weight", tmp);
581 tmp[0] = '\0';
582 }
583
584 if (tb[OCI_LINUX_CGROUPS_CPU_QUOTA])
585 quota = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_CPU_QUOTA]);
586
587 if (tb[OCI_LINUX_CGROUPS_CPU_PERIOD])
588 period = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_PERIOD]);
589
590 if (period) {
591 if (quota >= 0)
592 snprintf(tmp, sizeof(tmp), "%" PRId64 " %" PRIu64 , quota, period);
593 else
594 snprintf(tmp, sizeof(tmp), "max %" PRIu64, period); /* assume default */
595 } else if (quota >= 0) {
596 snprintf(tmp, sizeof(tmp), "%" PRId64, quota);
597 } else if (quota == -1) {
598 strcpy(tmp, "max");
599 }
600
601 if (tmp[0])
602 cgroups_set("cpu.max", tmp);
603
604 if (tb[OCI_LINUX_CGROUPS_CPU_CPUS])
605 cgroups_set("cpuset.cpus", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_CPUS]));
606
607 if (tb[OCI_LINUX_CGROUPS_CPU_MEMS])
608 cgroups_set("cpuset.mems", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_MEMS]));
609
610 return 0;
611 }
612
613
614 enum {
615 OCI_LINUX_CGROUPS_MEMORY_LIMIT,
616 OCI_LINUX_CGROUPS_MEMORY_RESERVATION,
617 OCI_LINUX_CGROUPS_MEMORY_SWAP,
618 OCI_LINUX_CGROUPS_MEMORY_KERNEL,
619 OCI_LINUX_CGROUPS_MEMORY_KERNELTCP,
620 OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS,
621 OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER,
622 OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY,
623 __OCI_LINUX_CGROUPS_MEMORY_MAX,
624 };
625
626 static const struct blobmsg_policy oci_linux_cgroups_memory_policy[] = {
627 [OCI_LINUX_CGROUPS_MEMORY_LIMIT] = { "limit", BLOBMSG_CAST_INT64 }, /* signed int64! */
628 [OCI_LINUX_CGROUPS_MEMORY_RESERVATION] = { "reservation", BLOBMSG_CAST_INT64 }, /* signed int64! */
629 [OCI_LINUX_CGROUPS_MEMORY_SWAP] = { "swap", BLOBMSG_CAST_INT64 }, /* signed int64! */
630 [OCI_LINUX_CGROUPS_MEMORY_KERNEL] = { "kernel", BLOBMSG_CAST_INT64 }, /* signed int64! */
631 [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP] = { "kernelTCP", BLOBMSG_CAST_INT64 }, /* signed int64! */
632 [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] = { "swappiness", BLOBMSG_CAST_INT64 },
633 [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL },
634 [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY] { "useHierarchy", BLOBMSG_TYPE_BOOL },
635 };
636
637 static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr *msg)
638 {
639 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
640 char tmp[32] = { 0 };
641 int64_t limit, swap, reservation;
642
643 blobmsg_parse(oci_linux_cgroups_memory_policy, __OCI_LINUX_CGROUPS_MEMORY_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
644
645 if (tb[OCI_LINUX_CGROUPS_MEMORY_KERNEL] ||
646 tb[OCI_LINUX_CGROUPS_MEMORY_KERNELTCP] ||
647 tb[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] ||
648 tb[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] ||
649 tb[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY])
650 return ENOTSUP; /* no equivalent in cgroup2 */
651
652 if (tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]) {
653 limit = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]);
654 if (limit == -1)
655 strcpy(tmp, "max");
656 else
657 snprintf(tmp, sizeof(tmp), "%" PRId64, limit);
658
659 cgroups_set("memory.max", tmp);
660 }
661
662 if (tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]) {
663 reservation = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]);
664
665 if (reservation == -1)
666 strcpy(tmp, "max");
667 else
668 snprintf(tmp, sizeof(tmp), "%" PRId64, reservation);
669
670 cgroups_set("memory.low", tmp);
671 }
672
673 if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]) {
674 swap = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]);
675
676 if (swap == -1)
677 strcpy(tmp, "max");
678 else
679 snprintf(tmp, sizeof(tmp), "%" PRId64, swap);
680
681 cgroups_set("memory.swap_max", tmp);
682 }
683
684 return 0;
685 }
686
687
688 enum {
689 OCI_LINUX_CGROUPS_PIDS_LIMIT,
690 __OCI_LINUX_CGROUPS_PIDS_MAX,
691 };
692
693 static const struct blobmsg_policy oci_linux_cgroups_pids_policy[] = {
694 [OCI_LINUX_CGROUPS_PIDS_LIMIT] = { "limit", BLOBMSG_CAST_INT64 },
695 };
696
697 static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr *msg)
698 {
699 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
700 char tmp[32] = { 0 };
701
702 blobmsg_parse(oci_linux_cgroups_pids_policy, __OCI_LINUX_CGROUPS_PIDS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
703
704 if (!tb[OCI_LINUX_CGROUPS_PIDS_LIMIT])
705 return EINVAL;
706
707 snprintf(tmp, sizeof(tmp), "%" PRIu64, blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_PIDS_LIMIT]));
708
709 cgroups_set("pids.max", tmp);
710
711 return 0;
712 }
713
714 static int parseOCIlinuxcgroups_unified(struct blob_attr *msg)
715 {
716 struct blob_attr *cur;
717 int rem;
718
719 blobmsg_for_each_attr(cur, msg, rem) {
720 if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING)
721 return EINVAL;
722
723 cgroups_set(blobmsg_name(cur), blobmsg_get_string(cur));
724 }
725
726 return 0;
727 }
728
729 enum {
730 OCI_LINUX_CGROUPS_BLOCKIO,
731 OCI_LINUX_CGROUPS_CPU,
732 OCI_LINUX_CGROUPS_DEVICES,
733 OCI_LINUX_CGROUPS_HUGEPAGELIMITS,
734 OCI_LINUX_CGROUPS_INTELRDT,
735 OCI_LINUX_CGROUPS_MEMORY,
736 OCI_LINUX_CGROUPS_NETWORK,
737 OCI_LINUX_CGROUPS_PIDS,
738 OCI_LINUX_CGROUPS_RDMA,
739 OCI_LINUX_CGROUPS_UNIFIED,
740 __OCI_LINUX_CGROUPS_MAX,
741 };
742
743 static const struct blobmsg_policy oci_linux_cgroups_policy[] = {
744 [OCI_LINUX_CGROUPS_BLOCKIO] = { "blockIO", BLOBMSG_TYPE_TABLE },
745 [OCI_LINUX_CGROUPS_CPU] = { "cpu", BLOBMSG_TYPE_TABLE },
746 [OCI_LINUX_CGROUPS_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY },
747 [OCI_LINUX_CGROUPS_HUGEPAGELIMITS] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY },
748 [OCI_LINUX_CGROUPS_INTELRDT] = { "intelRdt", BLOBMSG_TYPE_TABLE },
749 [OCI_LINUX_CGROUPS_MEMORY] = { "memory", BLOBMSG_TYPE_TABLE },
750 [OCI_LINUX_CGROUPS_NETWORK] = { "network", BLOBMSG_TYPE_TABLE },
751 [OCI_LINUX_CGROUPS_PIDS] = { "pids", BLOBMSG_TYPE_TABLE },
752 [OCI_LINUX_CGROUPS_RDMA] = { "rdma", BLOBMSG_TYPE_TABLE },
753 [OCI_LINUX_CGROUPS_UNIFIED] = { "unified", BLOBMSG_TYPE_TABLE },
754 };
755
756 int parseOCIlinuxcgroups(struct blob_attr *msg)
757 {
758 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MAX];
759 int ret;
760
761 blobmsg_parse(oci_linux_cgroups_policy, __OCI_LINUX_CGROUPS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
762
763 if (tb[OCI_LINUX_CGROUPS_DEVICES] ||
764 tb[OCI_LINUX_CGROUPS_HUGEPAGELIMITS] ||
765 tb[OCI_LINUX_CGROUPS_INTELRDT] ||
766 tb[OCI_LINUX_CGROUPS_NETWORK] ||
767 tb[OCI_LINUX_CGROUPS_RDMA])
768 return ENOTSUP;
769
770 if (tb[OCI_LINUX_CGROUPS_BLOCKIO]) {
771 ret = parseOCIlinuxcgroups_legacy_blockio(tb[OCI_LINUX_CGROUPS_BLOCKIO]);
772 if (ret)
773 return ret;
774 }
775
776 if (tb[OCI_LINUX_CGROUPS_CPU]) {
777 ret = parseOCIlinuxcgroups_legacy_cpu(tb[OCI_LINUX_CGROUPS_CPU]);
778 if (ret)
779 return ret;
780 }
781
782 if (tb[OCI_LINUX_CGROUPS_MEMORY]) {
783 ret = parseOCIlinuxcgroups_legacy_memory(tb[OCI_LINUX_CGROUPS_MEMORY]);
784 if (ret)
785 return ret;
786 }
787
788 if (tb[OCI_LINUX_CGROUPS_PIDS]) {
789 ret = parseOCIlinuxcgroups_legacy_pids(tb[OCI_LINUX_CGROUPS_PIDS]);
790 if (ret)
791 return ret;
792 }
793
794 if (tb[OCI_LINUX_CGROUPS_UNIFIED]) {
795 ret = parseOCIlinuxcgroups_unified(tb[OCI_LINUX_CGROUPS_UNIFIED]);
796 if (ret)
797 return ret;
798 }
799
800 return 0;
801 }