2 * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License version 2.1
6 * as published by the Free Software Foundation
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * reads unified cgroup config as proposed in
14 * https://github.com/opencontainers/runtime-spec/pull/1040
15 * attempt conversion from cgroup1 -> cgroup2
16 * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2
19 * - convert cgroup1 net_prio and net_cls to eBPF program
20 * - rdma (anyone?) intelrdt (anyone?)
36 #include <libubox/avl.h>
37 #include <libubox/avl-cmp.h>
38 #include <libubox/blobmsg.h>
39 #include <libubox/list.h>
40 #include <libubox/utils.h>
44 #include "cgroups-bpf.h"
46 #define CGROUP_ROOT "/sys/fs/cgroup/"
47 #define CGROUP_IO_WEIGHT_MAX 10000
54 struct avl_tree cgvals
;
55 static char *cgroup_path
;
56 static bool initialized
;
58 void cgroups_prepare(void) {
62 void cgroups_init(const char *p
) {
63 avl_init(&cgvals
, avl_strcmp
, false, NULL
);
64 cgroup_path
= strdup(p
);
68 static void cgroups_set(const char *key
, const char *val
)
72 valp
= avl_find_element(&cgvals
, key
, valp
, avl
);
74 valp
= malloc(sizeof(struct cgval
));
78 valp
->avl
.key
= strdup(key
);
79 avl_insert(&cgvals
, &valp
->avl
);
81 DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key
, valp
->val
);
85 valp
->val
= strdup(val
);
88 void cgroups_free(void)
90 struct cgval
*valp
, *tmp
;
93 avl_remove_all_elements(&cgvals
, valp
, avl
, tmp
) {
94 free((void *)(valp
->avl
.key
));
102 void cgroups_apply(pid_t pid
)
107 size_t maxlen
= strlen("cgroup.subtree_control");
117 char subtree_control
[64] = { 0 };
119 DEBUG("using cgroup path %s\n", cgroup_path
);
120 mkdir_p(cgroup_path
, 0700);
122 /* find which controllers need to be enabled */
123 avl_for_each_element(&cgvals
, valp
, avl
) {
124 ent
= (char *)valp
->avl
.key
;
125 if (strlen(ent
) > maxlen
)
126 maxlen
= strlen(ent
);
128 if (!strncmp("cpuset.", ent
, 7))
130 else if (!strncmp("cpu.", ent
, 4))
132 else if (!strncmp("hugetlb.", ent
, 8))
134 else if (!strncmp("io.", ent
, 3))
136 else if (!strncmp("memory.", ent
, 7))
138 else if (!strncmp("pids.", ent
, 5))
140 else if (!strncmp("rdma.", ent
, 5))
144 maxlen
+= strlen(cgroup_path
) + 2;
147 strcat(subtree_control
, "+cpuset ");
150 strcat(subtree_control
, "+cpu ");
153 strcat(subtree_control
, "+hugetlb ");
156 strcat(subtree_control
, "+io ");
159 strcat(subtree_control
, "+memory ");
162 strcat(subtree_control
, "+pids ");
165 strcat(subtree_control
, "+rdma ");
167 /* remove trailing space */
168 ent
= strchr(subtree_control
, '\0') - 1;
171 ent
= malloc(maxlen
);
175 DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control
);
176 cdir
= &cgroup_path
[strlen(CGROUP_ROOT
) - 2];
177 while ((cdir
= strchr(cdir
+ 1, '/'))) {
179 snprintf(ent
, maxlen
, "%s/cgroup.subtree_control", cgroup_path
);
180 DEBUG(" * %s\n", ent
);
181 if ((fd
= open(ent
, O_WRONLY
)) < 0) {
182 ERROR("can't open %s: %m\n", ent
);
186 if (write(fd
, subtree_control
, strlen(subtree_control
)) == -1) {
187 ERROR("can't write to %s: %m\n", ent
);
196 avl_for_each_element(&cgvals
, valp
, avl
) {
197 DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp
->avl
.key
, valp
->val
);
198 snprintf(ent
, maxlen
, "%s/%s", cgroup_path
, (char *)valp
->avl
.key
);
199 fd
= open(ent
, O_WRONLY
);
201 ERROR("can't open %s: %m\n", ent
);
204 if (dprintf(fd
, "%s", valp
->val
) < 0) {
205 ERROR("can't write to %s: %m\n", ent
);
210 int dirfd
= open(cgroup_path
, O_DIRECTORY
);
212 ERROR("can't open %s: %m\n", cgroup_path
);
214 attach_cgroups_ebpf(dirfd
);
218 snprintf(ent
, maxlen
, "%s/%s", cgroup_path
, "cgroup.procs");
219 fd
= open(ent
, O_WRONLY
);
221 ERROR("can't open %s: %m\n", cgroup_path
);
223 dprintf(fd
, "%d", pid
);
231 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
,
232 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
,
233 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
,
234 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
,
235 __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX
,
238 static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy
[] = {
239 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
] = { "major", BLOBMSG_CAST_INT64
},
240 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
] = { "minor", BLOBMSG_CAST_INT64
},
241 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
] = { "weight", BLOBMSG_TYPE_INT32
},
242 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
] = { "leafWeight", BLOBMSG_TYPE_INT32
},
246 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
,
247 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
,
248 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
,
249 __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
,
252 static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy
[] = {
253 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] = { "major", BLOBMSG_CAST_INT64
},
254 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] = { "minor", BLOBMSG_CAST_INT64
},
255 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
] = { "rate", BLOBMSG_CAST_INT64
},
259 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
,
260 OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT
,
261 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
,
262 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE
,
263 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE
,
264 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE
,
265 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE
,
266 __OCI_LINUX_CGROUPS_BLOCKIO_MAX
,
269 static const struct blobmsg_policy oci_linux_cgroups_blockio_policy
[] = {
270 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
] = { "weight", BLOBMSG_TYPE_INT32
},
271 [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT
] = { "leafWeight", BLOBMSG_TYPE_INT32
},
272 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
] = { "weightDevice", BLOBMSG_TYPE_ARRAY
},
273 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE
] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY
},
274 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE
] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY
},
275 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE
] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY
},
276 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE
] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY
},
286 struct posix_dev dev
;
293 static int avl_devcmp(const void *k1
, const void *k2
, void *ptr
)
295 struct posix_dev
*d1
= (struct posix_dev
*)k1
, *d2
= (struct posix_dev
*)k2
;
297 if (d1
->major
< d2
->major
)
300 if (d1
->major
> d2
->major
)
303 if (d1
->minor
< d2
->minor
)
306 if (d1
->minor
> d2
->minor
)
312 static struct iomax_line
*get_iomax_line(struct avl_tree
*iomax
, uint64_t major
, uint64_t minor
)
314 struct iomax_line
*l
;
318 l
= avl_find_element(iomax
, &d
, l
, avl
);
320 l
= malloc(sizeof(struct iomax_line
));
324 l
->dev
.major
= d
.major
;
325 l
->dev
.minor
= d
.minor
;
326 l
->avl
.key
= &l
->dev
;
331 avl_insert(iomax
, &l
->avl
);
337 static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr
*msg
)
339 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_BLOCKIO_MAX
],
340 *tbwd
[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX
],
341 *tbtd
[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
],
344 int weight
= -1, leafweight
= -1;
345 size_t numweightstrs
= 0, numiomaxstrs
= 0, strtotlen
= 1;
346 char **weightstrs
= NULL
, **iomaxstrs
= NULL
, **curstr
;
347 char *weightstr
, *iomaxstr
;
348 struct avl_tree iomax
;
349 struct iomax_line
*curiomax
, *tmp
;
351 blobmsg_parse(oci_linux_cgroups_blockio_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
353 if (tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
]) {
354 weight
= blobmsg_get_u32(tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
]);
358 if (weight
> CGROUP_IO_WEIGHT_MAX
)
361 if (tb
[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT
])
362 leafweight
= blobmsg_get_u32(tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
]);
364 if (leafweight
> CGROUP_IO_WEIGHT_MAX
)
367 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
], rem
)
370 weightstrs
= calloc(numweightstrs
+ 1, sizeof(char *));
377 if (asprintf(&weightstrs
[numweightstrs
++], "default %d", weight
) < 0)
380 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
], rem
) {
381 uint64_t major
, minor
;
382 int devweight
= weight
, devleafweight
= leafweight
;
384 blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX
, tbwd
, blobmsg_data(cur
), blobmsg_len(cur
));
385 if (!tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
] ||
386 !tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
])
389 if (!tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
] &&
390 !tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
])
393 if (tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
])
394 devweight
= blobmsg_get_u32(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
]);
396 if (devweight
> CGROUP_IO_WEIGHT_MAX
)
399 if (tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
])
400 devleafweight
= blobmsg_get_u32(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
]);
402 if (devleafweight
> CGROUP_IO_WEIGHT_MAX
)
405 if (tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
])
408 major
= blobmsg_cast_u64(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
]);
409 minor
= blobmsg_cast_u64(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
]);
411 if (asprintf(&weightstrs
[numweightstrs
++], "%" PRIu64
":%" PRIu64
" %u", major
, minor
, devweight
) < 0)
418 strtotlen
+= strlen(*(curstr
++)) + 1;
420 weightstr
= calloc(strtotlen
, sizeof(char));
426 strcat(weightstr
, *curstr
);
427 strcat(weightstr
, "\n");
431 cgroups_set("io.bfq.weight", weightstr
);
437 avl_init(&iomax
, avl_devcmp
, false, NULL
);
439 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE
], rem
) {
440 struct iomax_line
*l
;
442 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
444 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
445 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
446 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
449 l
= get_iomax_line(&iomax
,
450 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
451 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
453 l
->rbps
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
456 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE
], rem
) {
457 struct iomax_line
*l
;
459 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
461 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
462 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
463 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
466 l
= get_iomax_line(&iomax
,
467 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
468 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
470 l
->wbps
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
473 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE
], rem
) {
474 struct iomax_line
*l
;
476 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
478 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
479 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
480 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
483 l
= get_iomax_line(&iomax
,
484 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
485 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
487 l
->riops
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
490 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE
], rem
) {
491 struct iomax_line
*l
;
493 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
495 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
496 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
497 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
500 l
= get_iomax_line(&iomax
,
501 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
502 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
504 l
->wiops
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
507 avl_for_each_element(&iomax
, curiomax
, avl
)
513 iomaxstrs
= calloc(numiomaxstrs
+ 1, sizeof(char *));
519 avl_for_each_element(&iomax
, curiomax
, avl
) {
523 sprintf(iomaxlstr
, "%" PRIu64
":%" PRIu64
" ", curiomax
->dev
.major
, curiomax
->dev
.minor
);
525 if (curiomax
->rbps
!= -1) {
526 sprintf(lstr
, "rbps=%" PRIu64
" ", curiomax
->rbps
);
527 strcat(iomaxlstr
, lstr
);
529 if (curiomax
->wbps
!= -1) {
530 sprintf(lstr
, "wbps=%" PRIu64
" ", curiomax
->wbps
);
531 strcat(iomaxlstr
, lstr
);
533 if (curiomax
->riops
!= -1) {
534 sprintf(lstr
, "riops=%" PRIu64
" ", curiomax
->riops
);
535 strcat(iomaxlstr
, lstr
);
537 if (curiomax
->wiops
!= -1) {
538 sprintf(lstr
, "wiops=%" PRIu64
" ", curiomax
->wiops
);
539 strcat(iomaxlstr
, lstr
);
542 iomaxstrs
[numiomaxstrs
++] = strdup(iomaxlstr
);
545 avl_for_each_element_safe(&iomax
, curiomax
, avl
, tmp
) {
546 avl_delete(&iomax
, &curiomax
->avl
);
550 strtotlen
= 1; /* 1 accounts for \0 at end of string */
554 strtotlen
+= strlen(*(curstr
++)) + 1; /* +1 accounts for \n at end of line */
556 iomaxstr
= calloc(strtotlen
, sizeof(char));
563 strcat(iomaxstr
, *curstr
);
564 strcat(iomaxstr
, "\n");
568 cgroups_set("io.max", iomaxstr
);
579 OCI_LINUX_CGROUPS_CPU_SHARES
,
580 OCI_LINUX_CGROUPS_CPU_PERIOD
,
581 OCI_LINUX_CGROUPS_CPU_QUOTA
,
582 OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME
,
583 OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD
,
584 OCI_LINUX_CGROUPS_CPU_CPUS
,
585 OCI_LINUX_CGROUPS_CPU_MEMS
,
586 __OCI_LINUX_CGROUPS_CPU_MAX
,
589 static const struct blobmsg_policy oci_linux_cgroups_cpu_policy
[] = {
590 [OCI_LINUX_CGROUPS_CPU_SHARES
] = { "shares", BLOBMSG_CAST_INT64
},
591 [OCI_LINUX_CGROUPS_CPU_PERIOD
] = { "period", BLOBMSG_CAST_INT64
},
592 [OCI_LINUX_CGROUPS_CPU_QUOTA
] = { "quota", BLOBMSG_CAST_INT64
}, /* signed int64! */
593 [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD
] = { "realtimePeriod", BLOBMSG_CAST_INT64
},
594 [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME
] = { "realtimeRuntime", BLOBMSG_CAST_INT64
},
595 [OCI_LINUX_CGROUPS_CPU_CPUS
] = { "cpus", BLOBMSG_TYPE_STRING
},
596 [OCI_LINUX_CGROUPS_CPU_MEMS
] = { "mems", BLOBMSG_TYPE_STRING
},
599 static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr
*msg
)
601 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_CPU_MAX
];
602 uint64_t shares
, period
= 0;
603 int64_t quota
= -2; /* unset */
604 char tmp
[32] = { 0 };
606 blobmsg_parse(oci_linux_cgroups_cpu_policy
, __OCI_LINUX_CGROUPS_CPU_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
608 if (tb
[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD
] ||
609 tb
[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME
])
610 return ENOTSUP
; /* no equivalent in cgroup2 */
612 if (tb
[OCI_LINUX_CGROUPS_CPU_SHARES
]) {
613 shares
= blobmsg_cast_u64(tb
[OCI_LINUX_CGROUPS_CPU_SHARES
]);
614 if ((shares
< 2) || (shares
> 262144))
617 snprintf(tmp
, sizeof(tmp
), "%" PRIu64
, (((uint64_t)1) + ((shares
- 2) * 9999) / 262142));
618 cgroups_set("cpu.weight", tmp
);
622 if (tb
[OCI_LINUX_CGROUPS_CPU_QUOTA
])
623 quota
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_CPU_QUOTA
]);
625 if (tb
[OCI_LINUX_CGROUPS_CPU_PERIOD
])
626 period
= blobmsg_cast_u64(tb
[OCI_LINUX_CGROUPS_CPU_PERIOD
]);
630 snprintf(tmp
, sizeof(tmp
), "%" PRId64
" %" PRIu64
, quota
, period
);
632 snprintf(tmp
, sizeof(tmp
), "max %" PRIu64
, period
); /* assume default */
633 } else if (quota
>= 0) {
634 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, quota
);
635 } else if (quota
== -1) {
640 cgroups_set("cpu.max", tmp
);
642 if (tb
[OCI_LINUX_CGROUPS_CPU_CPUS
])
643 cgroups_set("cpuset.cpus", blobmsg_get_string(tb
[OCI_LINUX_CGROUPS_CPU_CPUS
]));
645 if (tb
[OCI_LINUX_CGROUPS_CPU_MEMS
])
646 cgroups_set("cpuset.mems", blobmsg_get_string(tb
[OCI_LINUX_CGROUPS_CPU_MEMS
]));
653 OCI_LINUX_CGROUPS_MEMORY_LIMIT
,
654 OCI_LINUX_CGROUPS_MEMORY_RESERVATION
,
655 OCI_LINUX_CGROUPS_MEMORY_SWAP
,
656 OCI_LINUX_CGROUPS_MEMORY_KERNEL
,
657 OCI_LINUX_CGROUPS_MEMORY_KERNELTCP
,
658 OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS
,
659 OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER
,
660 OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY
,
661 __OCI_LINUX_CGROUPS_MEMORY_MAX
,
664 static const struct blobmsg_policy oci_linux_cgroups_memory_policy
[] = {
665 [OCI_LINUX_CGROUPS_MEMORY_LIMIT
] = { "limit", BLOBMSG_CAST_INT64
}, /* signed int64! */
666 [OCI_LINUX_CGROUPS_MEMORY_RESERVATION
] = { "reservation", BLOBMSG_CAST_INT64
}, /* signed int64! */
667 [OCI_LINUX_CGROUPS_MEMORY_SWAP
] = { "swap", BLOBMSG_CAST_INT64
}, /* signed int64! */
668 [OCI_LINUX_CGROUPS_MEMORY_KERNEL
] = { "kernel", BLOBMSG_CAST_INT64
}, /* signed int64! ignored */
669 [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP
] = { "kernelTCP", BLOBMSG_CAST_INT64
}, /* signed int64! ignored */
670 [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS
] = { "swappiness", BLOBMSG_CAST_INT64
},
671 [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER
] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL
},
672 [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY
] { "useHierarchy", BLOBMSG_TYPE_BOOL
},
675 static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr
*msg
)
677 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_MEMORY_MAX
];
678 char tmp
[32] = { 0 };
679 int64_t limit
= -1, swap
, reservation
;
681 blobmsg_parse(oci_linux_cgroups_memory_policy
, __OCI_LINUX_CGROUPS_MEMORY_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
684 * not all properties of the OCI memory section can be mapped to cgroup2
685 * kernel memory accounting is always enabled and included in the set
686 * memory limit, hence these options can be ignored
687 * disableOOMKiller could be emulated using oom_score_adj + seccomp eBPF
688 * preventing self-upgrade (but allow downgrade)
690 * see also https://github.com/opencontainers/runtime-spec/issues/1005
692 if (tb
[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS
] ||
693 tb
[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER
] ||
694 tb
[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY
])
698 if (tb
[OCI_LINUX_CGROUPS_MEMORY_LIMIT
]) {
699 limit
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_MEMORY_LIMIT
]);
703 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, limit
);
705 cgroups_set("memory.max", tmp
);
708 if (tb
[OCI_LINUX_CGROUPS_MEMORY_RESERVATION
]) {
709 reservation
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_MEMORY_RESERVATION
]);
711 if (reservation
== -1)
714 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, reservation
);
716 cgroups_set("memory.low", tmp
);
719 /* OCI 'swap' acounts for memory+swap */
720 if (tb
[OCI_LINUX_CGROUPS_MEMORY_SWAP
]) {
721 swap
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_MEMORY_SWAP
]);
725 else if (limit
== -1 || (limit
< swap
))
726 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, swap
);
728 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, limit
- swap
);
730 cgroups_set("memory.swap_max", tmp
);
738 OCI_LINUX_CGROUPS_PIDS_LIMIT
,
739 __OCI_LINUX_CGROUPS_PIDS_MAX
,
742 static const struct blobmsg_policy oci_linux_cgroups_pids_policy
[] = {
743 [OCI_LINUX_CGROUPS_PIDS_LIMIT
] = { "limit", BLOBMSG_CAST_INT64
},
746 static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr
*msg
)
748 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_MEMORY_MAX
];
749 char tmp
[32] = { 0 };
751 blobmsg_parse(oci_linux_cgroups_pids_policy
, __OCI_LINUX_CGROUPS_PIDS_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
753 if (!tb
[OCI_LINUX_CGROUPS_PIDS_LIMIT
])
756 snprintf(tmp
, sizeof(tmp
), "%" PRIu64
, blobmsg_cast_u64(tb
[OCI_LINUX_CGROUPS_PIDS_LIMIT
]));
758 cgroups_set("pids.max", tmp
);
763 static int parseOCIlinuxcgroups_unified(struct blob_attr
*msg
)
765 struct blob_attr
*cur
;
768 blobmsg_for_each_attr(cur
, msg
, rem
) {
769 if (blobmsg_type(cur
) != BLOBMSG_TYPE_STRING
)
773 if (strchr(blobmsg_name(cur
), '/') ||
774 !strcmp(blobmsg_name(cur
), "cgroup.subtree_control") ||
775 !strcmp(blobmsg_name(cur
), "cgroup.procs") ||
776 !strcmp(blobmsg_name(cur
), "cgroup.threads") ||
777 !strcmp(blobmsg_name(cur
), "cgroup.freeze"))
780 cgroups_set(blobmsg_name(cur
), blobmsg_get_string(cur
));
787 OCI_LINUX_CGROUPS_BLOCKIO
,
788 OCI_LINUX_CGROUPS_CPU
,
789 OCI_LINUX_CGROUPS_DEVICES
,
790 OCI_LINUX_CGROUPS_HUGEPAGELIMITS
,
791 OCI_LINUX_CGROUPS_INTELRDT
,
792 OCI_LINUX_CGROUPS_MEMORY
,
793 OCI_LINUX_CGROUPS_NETWORK
,
794 OCI_LINUX_CGROUPS_PIDS
,
795 OCI_LINUX_CGROUPS_RDMA
,
796 OCI_LINUX_CGROUPS_UNIFIED
,
797 __OCI_LINUX_CGROUPS_MAX
,
800 static const struct blobmsg_policy oci_linux_cgroups_policy
[] = {
801 [OCI_LINUX_CGROUPS_BLOCKIO
] = { "blockIO", BLOBMSG_TYPE_TABLE
},
802 [OCI_LINUX_CGROUPS_CPU
] = { "cpu", BLOBMSG_TYPE_TABLE
},
803 [OCI_LINUX_CGROUPS_DEVICES
] = { "devices", BLOBMSG_TYPE_ARRAY
},
804 [OCI_LINUX_CGROUPS_HUGEPAGELIMITS
] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY
},
805 [OCI_LINUX_CGROUPS_INTELRDT
] = { "intelRdt", BLOBMSG_TYPE_TABLE
},
806 [OCI_LINUX_CGROUPS_MEMORY
] = { "memory", BLOBMSG_TYPE_TABLE
},
807 [OCI_LINUX_CGROUPS_NETWORK
] = { "network", BLOBMSG_TYPE_TABLE
},
808 [OCI_LINUX_CGROUPS_PIDS
] = { "pids", BLOBMSG_TYPE_TABLE
},
809 [OCI_LINUX_CGROUPS_RDMA
] = { "rdma", BLOBMSG_TYPE_TABLE
},
810 [OCI_LINUX_CGROUPS_UNIFIED
] = { "unified", BLOBMSG_TYPE_TABLE
},
813 int parseOCIlinuxcgroups(struct blob_attr
*msg
)
815 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_MAX
];
818 blobmsg_parse(oci_linux_cgroups_policy
, __OCI_LINUX_CGROUPS_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
820 if (tb
[OCI_LINUX_CGROUPS_HUGEPAGELIMITS
] ||
821 tb
[OCI_LINUX_CGROUPS_INTELRDT
] ||
822 tb
[OCI_LINUX_CGROUPS_NETWORK
] ||
823 tb
[OCI_LINUX_CGROUPS_RDMA
])
826 if (tb
[OCI_LINUX_CGROUPS_BLOCKIO
]) {
827 ret
= parseOCIlinuxcgroups_legacy_blockio(tb
[OCI_LINUX_CGROUPS_BLOCKIO
]);
832 if (tb
[OCI_LINUX_CGROUPS_CPU
]) {
833 ret
= parseOCIlinuxcgroups_legacy_cpu(tb
[OCI_LINUX_CGROUPS_CPU
]);
838 if (tb
[OCI_LINUX_CGROUPS_DEVICES
]) {
839 ret
= parseOCIlinuxcgroups_devices(tb
[OCI_LINUX_CGROUPS_DEVICES
]);
844 if (tb
[OCI_LINUX_CGROUPS_MEMORY
]) {
845 ret
= parseOCIlinuxcgroups_legacy_memory(tb
[OCI_LINUX_CGROUPS_MEMORY
]);
850 if (tb
[OCI_LINUX_CGROUPS_PIDS
]) {
851 ret
= parseOCIlinuxcgroups_legacy_pids(tb
[OCI_LINUX_CGROUPS_PIDS
]);
856 if (tb
[OCI_LINUX_CGROUPS_UNIFIED
]) {
857 ret
= parseOCIlinuxcgroups_unified(tb
[OCI_LINUX_CGROUPS_UNIFIED
]);