97583b3e0e53adca4d074ef78e7692fd970b1037
[project/procd.git] / jail / cgroups.c
1 /*
2 * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License version 2.1
6 * as published by the Free Software Foundation
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * reads unified cgroup config as proposed in
14 * https://github.com/opencontainers/runtime-spec/pull/1040
15 * attempt conversion from cgroup1 -> cgroup2
16 * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2
17 *
18 * ToDo:
19 * - convert cgroup1 devices to eBPF program
20 * - convert cgroup1 net_prio and net_cls to eBPF program
21 * - rdma (anyone?) intelrdt (anyone?)
22 */
23
24 #define _GNU_SOURCE
25
26 #include <assert.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <sys/stat.h>
33 #include <sys/mman.h>
34 #include <unistd.h>
35 #include <libgen.h>
36 #include <inttypes.h>
37
38 #include <libubox/avl.h>
39 #include <libubox/avl-cmp.h>
40 #include <libubox/blobmsg.h>
41 #include <libubox/list.h>
42
43 #include "fs.h"
44 #include "log.h"
45 #include "cgroups.h"
46
47 #define CGROUP_ROOT "/sys/fs/cgroup/"
48 #define CGROUP_IO_WEIGHT_MAX 10000
49
50 struct cgval {
51 struct avl_node avl;
52 char *val;
53 };
54
55 struct avl_tree cgvals;
56 static char *cgroup_path;
57
58 void cgroups_init(const char *p) {
59 avl_init(&cgvals, avl_strcmp, false, NULL);
60 cgroup_path = strdup(p);
61 }
62
63 static void cgroups_set(const char *key, const char *val)
64 {
65 struct cgval *valp;
66
67 valp = avl_find_element(&cgvals, key, valp, avl);
68 if (!valp) {
69 valp = malloc(sizeof(struct cgval));
70 assert(valp != NULL);
71 valp->avl.key = strdup(key);
72 avl_insert(&cgvals, &valp->avl);
73 } else {
74 DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key, valp->val);
75 free(valp->val);
76 }
77
78 valp->val = strdup(val);
79 }
80
81 void cgroups_free(void)
82 {
83 struct cgval *valp, *tmp;
84
85 avl_for_each_element_safe(&cgvals, valp, avl, tmp) {
86 avl_delete(&cgvals, &valp->avl);
87 free((void *)(valp->avl.key));
88 free(valp->val);
89 free(valp);
90 }
91 free(cgroup_path);
92 }
93
94 void cgroups_apply(pid_t pid)
95 {
96 struct cgval *valp;
97 char *cdir, *ent;
98 int fd;
99 size_t maxlen = strlen("cgroup.subtree_control");
100
101 bool cpuset = false,
102 cpu = false,
103 hugetlb = false,
104 io = false,
105 memory = false,
106 pids = false,
107 rdma = false;
108
109 char subtree_control[64] = { 0 };
110
111 DEBUG("using cgroup path %s\n", cgroup_path);
112 mkdir_p(cgroup_path, 0700);
113
114 /* find which controllers need to be enabled */
115 avl_for_each_element(&cgvals, valp, avl) {
116 ent = (char *)valp->avl.key;
117 if (strlen(ent) > maxlen)
118 maxlen = strlen(ent);
119
120 if (!strncmp("cpuset.", ent, 7))
121 cpuset = true;
122 else if (!strncmp("cpu.", ent, 4))
123 cpu = true;
124 else if (!strncmp("hugetlb.", ent, 8))
125 hugetlb = true;
126 else if (!strncmp("io.", ent, 3))
127 io = true;
128 else if (!strncmp("memory.", ent, 7))
129 memory = true;
130 else if (!strncmp("pids.", ent, 5))
131 pids = true;
132 else if (!strncmp("rdma.", ent, 5))
133 pids = true;
134 }
135
136 maxlen += strlen(cgroup_path) + 2;
137
138 if (cpuset)
139 strcat(subtree_control, "+cpuset ");
140
141 if (cpu)
142 strcat(subtree_control, "+cpu ");
143
144 if (hugetlb)
145 strcat(subtree_control, "+hugetlb ");
146
147 if (io)
148 strcat(subtree_control, "+io ");
149
150 if (memory)
151 strcat(subtree_control, "+memory ");
152
153 if (pids)
154 strcat(subtree_control, "+pids ");
155
156 if (rdma)
157 strcat(subtree_control, "+rdma ");
158
159 /* remove trailing space */
160 ent = strchr(subtree_control, '\0') - 1;
161 *ent = '\0';
162
163 ent = malloc(maxlen);
164 assert(ent != 0);
165
166 DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control);
167 cdir = &cgroup_path[strlen(CGROUP_ROOT) - 2];
168 while ((cdir = strchr(cdir + 1, '/'))) {
169 *cdir = '\0';
170 snprintf(ent, maxlen, "%s/cgroup.subtree_control", cgroup_path);
171 DEBUG(" * %s\n", ent);
172 fd = open(ent, O_WRONLY);
173 assert(fd != -1);
174 write(fd, subtree_control, strlen(subtree_control));
175 close(fd);
176 *cdir = '/';
177 }
178
179 avl_for_each_element(&cgvals, valp, avl) {
180 DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp->avl.key, valp->val);
181 snprintf(ent, maxlen, "%s/%s", cgroup_path, (char *)valp->avl.key);
182 fd = open(ent, O_WRONLY);
183 if (fd == -1) {
184 ERROR("can't open %s: %m\n", ent);
185 continue;
186 }
187 if (dprintf(fd, "%s", valp->val) < 0) {
188 ERROR("can't write to %s: %m\n", ent);
189 };
190 close(fd);
191 }
192
193 snprintf(ent, maxlen, "%s/%s", cgroup_path, "cgroup.procs");
194 fd = open(ent, O_WRONLY);
195 assert(fd != -1);
196 dprintf(fd, "%d", pid);
197 close(fd);
198
199 free(ent);
200
201 cgroups_free();
202 }
203
204 enum {
205 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR,
206 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR,
207 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT,
208 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT,
209 __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX,
210 };
211
212 static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy[] = {
213 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
214 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
215 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
216 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
217 };
218
219 enum {
220 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR,
221 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR,
222 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE,
223 __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX,
224 };
225
226 static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy[] = {
227 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
228 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
229 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE] = { "rate", BLOBMSG_CAST_INT64 },
230 };
231
232 enum {
233 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT,
234 OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT,
235 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE,
236 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE,
237 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE,
238 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE,
239 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE,
240 __OCI_LINUX_CGROUPS_BLOCKIO_MAX,
241 };
242
243 static const struct blobmsg_policy oci_linux_cgroups_blockio_policy[] = {
244 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
245 [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
246 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE] = { "weightDevice", BLOBMSG_TYPE_ARRAY },
247 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY },
248 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY },
249 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY },
250 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY },
251 };
252
253 struct posix_dev {
254 uint64_t major;
255 uint64_t minor;
256 };
257
258 struct iomax_line {
259 struct avl_node avl;
260 struct posix_dev dev;
261 uint64_t rbps;
262 uint64_t wbps;
263 uint64_t riops;
264 uint64_t wiops;
265 };
266
267 static int avl_devcmp(const void *k1, const void *k2, void *ptr)
268 {
269 struct posix_dev *d1 = (struct posix_dev *)k1, *d2 = (struct posix_dev *)k2;
270
271 if (d1->major < d2->major)
272 return -1;
273
274 if (d1->major > d2->major)
275 return 1;
276
277 if (d1->minor < d2->minor)
278 return -1;
279
280 if (d1->minor > d2->minor)
281 return 1;
282
283 return 0;
284 }
285
286 static struct iomax_line *get_iomax_line(struct avl_tree *iomax, uint64_t major, uint64_t minor)
287 {
288 struct iomax_line *l;
289 struct posix_dev d;
290 d.major = major;
291 d.minor = minor;
292 l = avl_find_element(iomax, &d, l, avl);
293 if (!l) {
294 l = malloc(sizeof(struct iomax_line));
295 assert(l != NULL);
296 l->dev.major = d.major;
297 l->dev.minor = d.minor;
298 l->avl.key = &l->dev;
299 l->rbps = -1;
300 l->wbps = -1;
301 l->riops = -1;
302 l->wiops = -1;
303 avl_insert(iomax, &l->avl);
304 }
305
306 return l;
307 }
308
309 static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr *msg)
310 {
311 struct blob_attr *tb[__OCI_LINUX_CGROUPS_BLOCKIO_MAX],
312 *tbwd[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX],
313 *tbtd[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX],
314 *cur;
315 int rem;
316 int weight = -1, leafweight = -1;
317 size_t numweightstrs = 0, numiomaxstrs = 0, strtotlen = 1;
318 char **weightstrs = NULL, **iomaxstrs = NULL, **curstr;
319 char *weightstr, *iomaxstr;
320 struct avl_tree iomax;
321 struct iomax_line *curiomax, *tmp;
322
323 blobmsg_parse(oci_linux_cgroups_blockio_policy, __OCI_LINUX_CGROUPS_BLOCKIO_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
324
325 if (tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]) {
326 weight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
327 ++numweightstrs;
328 }
329
330 if (weight > CGROUP_IO_WEIGHT_MAX)
331 return ERANGE;
332
333 if (tb[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT])
334 leafweight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
335
336 if (leafweight > CGROUP_IO_WEIGHT_MAX)
337 return ERANGE;
338
339 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem)
340 ++numweightstrs;
341
342 weightstrs = calloc(numweightstrs + 1, sizeof(char *));
343 assert(weightstrs != 0);
344 numweightstrs = 0;
345
346 if (weight > -1)
347 asprintf(&weightstrs[numweightstrs++], "default %d", weight);
348
349 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem) {
350 uint64_t major, minor;
351 int devweight = weight, devleafweight = leafweight;
352
353 blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX, tbwd, blobmsg_data(cur), blobmsg_len(cur));
354 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] ||
355 !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR])
356 return ENODATA;
357
358 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] &&
359 !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
360 return ENODATA;
361
362 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT])
363 devweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT]);
364
365 if (devweight > CGROUP_IO_WEIGHT_MAX)
366 return ERANGE;
367
368 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
369 devleafweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]);
370
371 if (devleafweight > CGROUP_IO_WEIGHT_MAX)
372 return ERANGE;
373
374 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
375 return ENOTSUP;
376
377 major = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR]);
378 minor = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR]);
379
380 asprintf(&weightstrs[numweightstrs++], "%" PRIu64 ":%" PRIu64 " %u", major, minor, devweight);
381 }
382
383 if (numweightstrs) {
384 curstr = weightstrs;
385 while (*curstr)
386 strtotlen += strlen(*(curstr++)) + 1;
387
388 weightstr = calloc(strtotlen, sizeof(char));
389 assert(weightstr != 0);
390
391 curstr = weightstrs;
392 while (*curstr) {
393 strcat(weightstr, *curstr);
394 strcat(weightstr, "\n");
395 free(*(curstr++));
396 }
397
398 cgroups_set("io.bfq.weight", weightstr);
399 free(weightstr);
400 };
401
402 free(weightstrs);
403
404 avl_init(&iomax, avl_devcmp, false, NULL);
405
406 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE], rem) {
407 struct iomax_line *l;
408
409 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
410
411 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
412 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
413 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
414 return ENODATA;
415
416 l = get_iomax_line(&iomax,
417 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
418 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
419
420 l->rbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
421 }
422
423 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE], rem) {
424 struct iomax_line *l;
425
426 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
427
428 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
429 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
430 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
431 return ENODATA;
432
433 l = get_iomax_line(&iomax,
434 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
435 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
436
437 l->wbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
438 }
439
440 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE], rem) {
441 struct iomax_line *l;
442
443 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
444
445 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
446 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
447 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
448 return ENODATA;
449
450 l = get_iomax_line(&iomax,
451 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
452 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
453
454 l->riops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
455 }
456
457 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE], rem) {
458 struct iomax_line *l;
459
460 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
461
462 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
463 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
464 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
465 return ENODATA;
466
467 l = get_iomax_line(&iomax,
468 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
469 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
470
471 l->wiops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
472 }
473
474 avl_for_each_element(&iomax, curiomax, avl)
475 ++numiomaxstrs;
476
477 if (!numiomaxstrs)
478 return 0;
479
480 iomaxstrs = calloc(numiomaxstrs + 1, sizeof(char *));
481 assert(iomaxstrs != 0);
482 numiomaxstrs = 0;
483
484 avl_for_each_element(&iomax, curiomax, avl) {
485 char iomaxlstr[160];
486 char lstr[32];
487
488 sprintf(iomaxlstr, "%" PRIu64 ":%" PRIu64 " ", curiomax->dev.major, curiomax->dev.minor);
489
490 if (curiomax->rbps != -1) {
491 sprintf(lstr, "rbps=%" PRIu64 " ", curiomax->rbps);
492 strcat(iomaxlstr, lstr);
493 }
494 if (curiomax->wbps != -1) {
495 sprintf(lstr, "wbps=%" PRIu64 " ", curiomax->wbps);
496 strcat(iomaxlstr, lstr);
497 }
498 if (curiomax->riops != -1) {
499 sprintf(lstr, "riops=%" PRIu64 " ", curiomax->riops);
500 strcat(iomaxlstr, lstr);
501 }
502 if (curiomax->wiops != -1) {
503 sprintf(lstr, "wiops=%" PRIu64 " ", curiomax->wiops);
504 strcat(iomaxlstr, lstr);
505 }
506
507 iomaxstrs[numiomaxstrs++] = strdup(iomaxlstr);
508 }
509
510 avl_for_each_element_safe(&iomax, curiomax, avl, tmp) {
511 avl_delete(&iomax, &curiomax->avl);
512 free(curiomax);
513 }
514
515 strtotlen = 1; /* 1 accounts for \0 at end of string */
516 if (numiomaxstrs) {
517 curstr = iomaxstrs;
518 while (*curstr)
519 strtotlen += strlen(*(curstr++)) + 1; /* +1 accounts for \n at end of line */
520
521 iomaxstr = calloc(strtotlen, sizeof(char));
522 assert(iomaxstr != 0);
523 curstr = iomaxstrs;
524
525 while (*curstr) {
526 strcat(iomaxstr, *curstr);
527 strcat(iomaxstr, "\n");
528 free(*(curstr++));
529 }
530
531 cgroups_set("io.max", iomaxstr);
532 free(iomaxstr);
533 };
534
535 free(iomaxstrs);
536
537 return 0;
538 }
539
540
541 enum {
542 OCI_LINUX_CGROUPS_CPU_SHARES,
543 OCI_LINUX_CGROUPS_CPU_PERIOD,
544 OCI_LINUX_CGROUPS_CPU_QUOTA,
545 OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME,
546 OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD,
547 OCI_LINUX_CGROUPS_CPU_CPUS,
548 OCI_LINUX_CGROUPS_CPU_MEMS,
549 __OCI_LINUX_CGROUPS_CPU_MAX,
550 };
551
552 static const struct blobmsg_policy oci_linux_cgroups_cpu_policy[] = {
553 [OCI_LINUX_CGROUPS_CPU_SHARES] = { "shares", BLOBMSG_CAST_INT64 },
554 [OCI_LINUX_CGROUPS_CPU_PERIOD] = { "period", BLOBMSG_CAST_INT64 },
555 [OCI_LINUX_CGROUPS_CPU_QUOTA] = { "quota", BLOBMSG_CAST_INT64 }, /* signed int64! */
556 [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] = { "realtimePeriod", BLOBMSG_CAST_INT64 },
557 [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME] = { "realtimeRuntime", BLOBMSG_CAST_INT64 },
558 [OCI_LINUX_CGROUPS_CPU_CPUS] = { "cpus", BLOBMSG_TYPE_STRING },
559 [OCI_LINUX_CGROUPS_CPU_MEMS] = { "mems", BLOBMSG_TYPE_STRING },
560 };
561
562 static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr *msg)
563 {
564 struct blob_attr *tb[__OCI_LINUX_CGROUPS_CPU_MAX];
565 uint64_t shares, period = 0;
566 int64_t quota = -2; /* unset */
567 char tmp[32] = { 0 };
568
569 blobmsg_parse(oci_linux_cgroups_cpu_policy, __OCI_LINUX_CGROUPS_CPU_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
570
571 if (tb[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] ||
572 tb[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME])
573 return ENOTSUP; /* no equivalent in cgroup2 */
574
575 if (tb[OCI_LINUX_CGROUPS_CPU_SHARES]) {
576 shares = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_SHARES]);
577 if ((shares < 2) || (shares > 262144))
578 return ERANGE;
579
580 snprintf(tmp, sizeof(tmp), "%" PRIu64, (((uint64_t)1) + ((shares - 2) * 9999) / 262142));
581 cgroups_set("cpu.weight", tmp);
582 tmp[0] = '\0';
583 }
584
585 if (tb[OCI_LINUX_CGROUPS_CPU_QUOTA])
586 quota = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_CPU_QUOTA]);
587
588 if (tb[OCI_LINUX_CGROUPS_CPU_PERIOD])
589 period = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_PERIOD]);
590
591 if (period) {
592 if (quota >= 0)
593 snprintf(tmp, sizeof(tmp), "%" PRId64 " %" PRIu64 , quota, period);
594 else
595 snprintf(tmp, sizeof(tmp), "max %" PRIu64, period); /* assume default */
596 } else if (quota >= 0) {
597 snprintf(tmp, sizeof(tmp), "%" PRId64, quota);
598 } else if (quota == -1) {
599 strcpy(tmp, "max");
600 }
601
602 if (tmp[0])
603 cgroups_set("cpu.max", tmp);
604
605 if (tb[OCI_LINUX_CGROUPS_CPU_CPUS])
606 cgroups_set("cpuset.cpus", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_CPUS]));
607
608 if (tb[OCI_LINUX_CGROUPS_CPU_MEMS])
609 cgroups_set("cpuset.mems", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_MEMS]));
610
611 return 0;
612 }
613
614
615 enum {
616 OCI_LINUX_CGROUPS_MEMORY_LIMIT,
617 OCI_LINUX_CGROUPS_MEMORY_RESERVATION,
618 OCI_LINUX_CGROUPS_MEMORY_SWAP,
619 OCI_LINUX_CGROUPS_MEMORY_KERNEL,
620 OCI_LINUX_CGROUPS_MEMORY_KERNELTCP,
621 OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS,
622 OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER,
623 OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY,
624 __OCI_LINUX_CGROUPS_MEMORY_MAX,
625 };
626
627 static const struct blobmsg_policy oci_linux_cgroups_memory_policy[] = {
628 [OCI_LINUX_CGROUPS_MEMORY_LIMIT] = { "limit", BLOBMSG_CAST_INT64 }, /* signed int64! */
629 [OCI_LINUX_CGROUPS_MEMORY_RESERVATION] = { "reservation", BLOBMSG_CAST_INT64 }, /* signed int64! */
630 [OCI_LINUX_CGROUPS_MEMORY_SWAP] = { "swap", BLOBMSG_CAST_INT64 }, /* signed int64! */
631 [OCI_LINUX_CGROUPS_MEMORY_KERNEL] = { "kernel", BLOBMSG_CAST_INT64 }, /* signed int64! */
632 [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP] = { "kernelTCP", BLOBMSG_CAST_INT64 }, /* signed int64! */
633 [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] = { "swappiness", BLOBMSG_CAST_INT64 },
634 [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL },
635 [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY] { "useHierarchy", BLOBMSG_TYPE_BOOL },
636 };
637
638 static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr *msg)
639 {
640 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
641 char tmp[32] = { 0 };
642 int64_t limit, swap, reservation;
643
644 blobmsg_parse(oci_linux_cgroups_memory_policy, __OCI_LINUX_CGROUPS_MEMORY_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
645
646 if (tb[OCI_LINUX_CGROUPS_MEMORY_KERNEL] ||
647 tb[OCI_LINUX_CGROUPS_MEMORY_KERNELTCP] ||
648 tb[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] ||
649 tb[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] ||
650 tb[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY])
651 return ENOTSUP; /* no equivalent in cgroup2 */
652
653 if (tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]) {
654 limit = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]);
655 if (limit == -1)
656 strcpy(tmp, "max");
657 else
658 snprintf(tmp, sizeof(tmp), "%" PRId64, limit);
659
660 cgroups_set("memory.max", tmp);
661 }
662
663 if (tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]) {
664 reservation = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]);
665
666 if (reservation == -1)
667 strcpy(tmp, "max");
668 else
669 snprintf(tmp, sizeof(tmp), "%" PRId64, reservation);
670
671 cgroups_set("memory.low", tmp);
672 }
673
674 if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]) {
675 swap = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]);
676
677 if (swap == -1)
678 strcpy(tmp, "max");
679 else
680 snprintf(tmp, sizeof(tmp), "%" PRId64, swap);
681
682 cgroups_set("memory.swap_max", tmp);
683 }
684
685 return 0;
686 }
687
688
689 enum {
690 OCI_LINUX_CGROUPS_PIDS_LIMIT,
691 __OCI_LINUX_CGROUPS_PIDS_MAX,
692 };
693
694 static const struct blobmsg_policy oci_linux_cgroups_pids_policy[] = {
695 [OCI_LINUX_CGROUPS_PIDS_LIMIT] = { "limit", BLOBMSG_CAST_INT64 },
696 };
697
698 static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr *msg)
699 {
700 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
701 char tmp[32] = { 0 };
702
703 blobmsg_parse(oci_linux_cgroups_pids_policy, __OCI_LINUX_CGROUPS_PIDS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
704
705 if (!tb[OCI_LINUX_CGROUPS_PIDS_LIMIT])
706 return EINVAL;
707
708 snprintf(tmp, sizeof(tmp), "%" PRIu64, blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_PIDS_LIMIT]));
709
710 cgroups_set("pids.max", tmp);
711
712 return 0;
713 }
714
715 static int parseOCIlinuxcgroups_unified(struct blob_attr *msg)
716 {
717 struct blob_attr *cur;
718 int rem;
719
720 blobmsg_for_each_attr(cur, msg, rem) {
721 if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING)
722 return EINVAL;
723
724 cgroups_set(blobmsg_name(cur), blobmsg_get_string(cur));
725 }
726
727 return 0;
728 }
729
730 enum {
731 OCI_LINUX_CGROUPS_BLOCKIO,
732 OCI_LINUX_CGROUPS_CPU,
733 OCI_LINUX_CGROUPS_DEVICES,
734 OCI_LINUX_CGROUPS_HUGEPAGELIMITS,
735 OCI_LINUX_CGROUPS_INTELRDT,
736 OCI_LINUX_CGROUPS_MEMORY,
737 OCI_LINUX_CGROUPS_NETWORK,
738 OCI_LINUX_CGROUPS_PIDS,
739 OCI_LINUX_CGROUPS_RDMA,
740 OCI_LINUX_CGROUPS_UNIFIED,
741 __OCI_LINUX_CGROUPS_MAX,
742 };
743
744 static const struct blobmsg_policy oci_linux_cgroups_policy[] = {
745 [OCI_LINUX_CGROUPS_BLOCKIO] = { "blockIO", BLOBMSG_TYPE_TABLE },
746 [OCI_LINUX_CGROUPS_CPU] = { "cpu", BLOBMSG_TYPE_TABLE },
747 [OCI_LINUX_CGROUPS_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY },
748 [OCI_LINUX_CGROUPS_HUGEPAGELIMITS] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY },
749 [OCI_LINUX_CGROUPS_INTELRDT] = { "intelRdt", BLOBMSG_TYPE_TABLE },
750 [OCI_LINUX_CGROUPS_MEMORY] = { "memory", BLOBMSG_TYPE_TABLE },
751 [OCI_LINUX_CGROUPS_NETWORK] = { "network", BLOBMSG_TYPE_TABLE },
752 [OCI_LINUX_CGROUPS_PIDS] = { "pids", BLOBMSG_TYPE_TABLE },
753 [OCI_LINUX_CGROUPS_RDMA] = { "rdma", BLOBMSG_TYPE_TABLE },
754 [OCI_LINUX_CGROUPS_UNIFIED] = { "unified", BLOBMSG_TYPE_TABLE },
755 };
756
757 int parseOCIlinuxcgroups(struct blob_attr *msg)
758 {
759 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MAX];
760 int ret;
761
762 blobmsg_parse(oci_linux_cgroups_policy, __OCI_LINUX_CGROUPS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
763
764 if (tb[OCI_LINUX_CGROUPS_DEVICES] ||
765 tb[OCI_LINUX_CGROUPS_HUGEPAGELIMITS] ||
766 tb[OCI_LINUX_CGROUPS_INTELRDT] ||
767 tb[OCI_LINUX_CGROUPS_NETWORK] ||
768 tb[OCI_LINUX_CGROUPS_RDMA])
769 return ENOTSUP;
770
771 if (tb[OCI_LINUX_CGROUPS_BLOCKIO]) {
772 ret = parseOCIlinuxcgroups_legacy_blockio(tb[OCI_LINUX_CGROUPS_BLOCKIO]);
773 if (ret)
774 return ret;
775 }
776
777 if (tb[OCI_LINUX_CGROUPS_CPU]) {
778 ret = parseOCIlinuxcgroups_legacy_cpu(tb[OCI_LINUX_CGROUPS_CPU]);
779 if (ret)
780 return ret;
781 }
782
783 if (tb[OCI_LINUX_CGROUPS_MEMORY]) {
784 ret = parseOCIlinuxcgroups_legacy_memory(tb[OCI_LINUX_CGROUPS_MEMORY]);
785 if (ret)
786 return ret;
787 }
788
789 if (tb[OCI_LINUX_CGROUPS_PIDS]) {
790 ret = parseOCIlinuxcgroups_legacy_pids(tb[OCI_LINUX_CGROUPS_PIDS]);
791 if (ret)
792 return ret;
793 }
794
795 if (tb[OCI_LINUX_CGROUPS_UNIFIED]) {
796 ret = parseOCIlinuxcgroups_unified(tb[OCI_LINUX_CGROUPS_UNIFIED]);
797 if (ret)
798 return ret;
799 }
800
801 return 0;
802 }