2 * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License version 2.1
6 * as published by the Free Software Foundation
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * reads unified cgroup config as proposed in
14 * https://github.com/opencontainers/runtime-spec/pull/1040
15 * attempt conversion from cgroup1 -> cgroup2
16 * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2
19 * - convert cgroup1 devices to eBPF program
20 * - convert cgroup1 net_prio and net_cls to eBPF program
21 * - rdma (anyone?) intelrdt (anyone?)
38 #include <libubox/avl.h>
39 #include <libubox/avl-cmp.h>
40 #include <libubox/blobmsg.h>
41 #include <libubox/list.h>
42 #include <libubox/utils.h>
47 #define CGROUP_ROOT "/sys/fs/cgroup/"
48 #define CGROUP_IO_WEIGHT_MAX 10000
55 struct avl_tree cgvals
;
56 static char *cgroup_path
;
57 static bool initialized
;
59 void cgroups_prepare(void) {
63 void cgroups_init(const char *p
) {
64 avl_init(&cgvals
, avl_strcmp
, false, NULL
);
65 cgroup_path
= strdup(p
);
69 static void cgroups_set(const char *key
, const char *val
)
73 valp
= avl_find_element(&cgvals
, key
, valp
, avl
);
75 valp
= malloc(sizeof(struct cgval
));
77 valp
->avl
.key
= strdup(key
);
78 avl_insert(&cgvals
, &valp
->avl
);
80 DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key
, valp
->val
);
84 valp
->val
= strdup(val
);
87 void cgroups_free(void)
89 struct cgval
*valp
, *tmp
;
92 avl_remove_all_elements(&cgvals
, valp
, avl
, tmp
) {
93 free((void *)(valp
->avl
.key
));
101 void cgroups_apply(pid_t pid
)
106 size_t maxlen
= strlen("cgroup.subtree_control");
116 char subtree_control
[64] = { 0 };
118 DEBUG("using cgroup path %s\n", cgroup_path
);
119 mkdir_p(cgroup_path
, 0700);
121 /* find which controllers need to be enabled */
122 avl_for_each_element(&cgvals
, valp
, avl
) {
123 ent
= (char *)valp
->avl
.key
;
124 if (strlen(ent
) > maxlen
)
125 maxlen
= strlen(ent
);
127 if (!strncmp("cpuset.", ent
, 7))
129 else if (!strncmp("cpu.", ent
, 4))
131 else if (!strncmp("hugetlb.", ent
, 8))
133 else if (!strncmp("io.", ent
, 3))
135 else if (!strncmp("memory.", ent
, 7))
137 else if (!strncmp("pids.", ent
, 5))
139 else if (!strncmp("rdma.", ent
, 5))
143 maxlen
+= strlen(cgroup_path
) + 2;
146 strcat(subtree_control
, "+cpuset ");
149 strcat(subtree_control
, "+cpu ");
152 strcat(subtree_control
, "+hugetlb ");
155 strcat(subtree_control
, "+io ");
158 strcat(subtree_control
, "+memory ");
161 strcat(subtree_control
, "+pids ");
164 strcat(subtree_control
, "+rdma ");
166 /* remove trailing space */
167 ent
= strchr(subtree_control
, '\0') - 1;
170 ent
= malloc(maxlen
);
173 DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control
);
174 cdir
= &cgroup_path
[strlen(CGROUP_ROOT
) - 2];
175 while ((cdir
= strchr(cdir
+ 1, '/'))) {
177 snprintf(ent
, maxlen
, "%s/cgroup.subtree_control", cgroup_path
);
178 DEBUG(" * %s\n", ent
);
179 fd
= open(ent
, O_WRONLY
);
181 write(fd
, subtree_control
, strlen(subtree_control
));
186 avl_for_each_element(&cgvals
, valp
, avl
) {
187 DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp
->avl
.key
, valp
->val
);
188 snprintf(ent
, maxlen
, "%s/%s", cgroup_path
, (char *)valp
->avl
.key
);
189 fd
= open(ent
, O_WRONLY
);
191 ERROR("can't open %s: %m\n", ent
);
194 if (dprintf(fd
, "%s", valp
->val
) < 0) {
195 ERROR("can't write to %s: %m\n", ent
);
200 snprintf(ent
, maxlen
, "%s/%s", cgroup_path
, "cgroup.procs");
201 fd
= open(ent
, O_WRONLY
);
203 dprintf(fd
, "%d", pid
);
210 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
,
211 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
,
212 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
,
213 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
,
214 __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX
,
217 static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy
[] = {
218 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
] = { "major", BLOBMSG_CAST_INT64
},
219 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
] = { "minor", BLOBMSG_CAST_INT64
},
220 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
] = { "weight", BLOBMSG_TYPE_INT32
},
221 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
] = { "leafWeight", BLOBMSG_TYPE_INT32
},
225 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
,
226 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
,
227 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
,
228 __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
,
231 static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy
[] = {
232 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] = { "major", BLOBMSG_CAST_INT64
},
233 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] = { "minor", BLOBMSG_CAST_INT64
},
234 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
] = { "rate", BLOBMSG_CAST_INT64
},
238 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
,
239 OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT
,
240 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
,
241 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE
,
242 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE
,
243 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE
,
244 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE
,
245 __OCI_LINUX_CGROUPS_BLOCKIO_MAX
,
248 static const struct blobmsg_policy oci_linux_cgroups_blockio_policy
[] = {
249 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
] = { "weight", BLOBMSG_TYPE_INT32
},
250 [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT
] = { "leafWeight", BLOBMSG_TYPE_INT32
},
251 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
] = { "weightDevice", BLOBMSG_TYPE_ARRAY
},
252 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE
] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY
},
253 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE
] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY
},
254 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE
] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY
},
255 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE
] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY
},
265 struct posix_dev dev
;
272 static int avl_devcmp(const void *k1
, const void *k2
, void *ptr
)
274 struct posix_dev
*d1
= (struct posix_dev
*)k1
, *d2
= (struct posix_dev
*)k2
;
276 if (d1
->major
< d2
->major
)
279 if (d1
->major
> d2
->major
)
282 if (d1
->minor
< d2
->minor
)
285 if (d1
->minor
> d2
->minor
)
291 static struct iomax_line
*get_iomax_line(struct avl_tree
*iomax
, uint64_t major
, uint64_t minor
)
293 struct iomax_line
*l
;
297 l
= avl_find_element(iomax
, &d
, l
, avl
);
299 l
= malloc(sizeof(struct iomax_line
));
301 l
->dev
.major
= d
.major
;
302 l
->dev
.minor
= d
.minor
;
303 l
->avl
.key
= &l
->dev
;
308 avl_insert(iomax
, &l
->avl
);
314 static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr
*msg
)
316 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_BLOCKIO_MAX
],
317 *tbwd
[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX
],
318 *tbtd
[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
],
321 int weight
= -1, leafweight
= -1;
322 size_t numweightstrs
= 0, numiomaxstrs
= 0, strtotlen
= 1;
323 char **weightstrs
= NULL
, **iomaxstrs
= NULL
, **curstr
;
324 char *weightstr
, *iomaxstr
;
325 struct avl_tree iomax
;
326 struct iomax_line
*curiomax
, *tmp
;
328 blobmsg_parse(oci_linux_cgroups_blockio_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
330 if (tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
]) {
331 weight
= blobmsg_get_u32(tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
]);
335 if (weight
> CGROUP_IO_WEIGHT_MAX
)
338 if (tb
[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT
])
339 leafweight
= blobmsg_get_u32(tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT
]);
341 if (leafweight
> CGROUP_IO_WEIGHT_MAX
)
344 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
], rem
)
347 weightstrs
= calloc(numweightstrs
+ 1, sizeof(char *));
348 assert(weightstrs
!= 0);
352 asprintf(&weightstrs
[numweightstrs
++], "default %d", weight
);
354 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE
], rem
) {
355 uint64_t major
, minor
;
356 int devweight
= weight
, devleafweight
= leafweight
;
358 blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX
, tbwd
, blobmsg_data(cur
), blobmsg_len(cur
));
359 if (!tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
] ||
360 !tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
])
363 if (!tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
] &&
364 !tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
])
367 if (tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
])
368 devweight
= blobmsg_get_u32(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT
]);
370 if (devweight
> CGROUP_IO_WEIGHT_MAX
)
373 if (tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
])
374 devleafweight
= blobmsg_get_u32(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
]);
376 if (devleafweight
> CGROUP_IO_WEIGHT_MAX
)
379 if (tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT
])
382 major
= blobmsg_cast_u64(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR
]);
383 minor
= blobmsg_cast_u64(tbwd
[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR
]);
385 asprintf(&weightstrs
[numweightstrs
++], "%" PRIu64
":%" PRIu64
" %u", major
, minor
, devweight
);
391 strtotlen
+= strlen(*(curstr
++)) + 1;
393 weightstr
= calloc(strtotlen
, sizeof(char));
394 assert(weightstr
!= 0);
398 strcat(weightstr
, *curstr
);
399 strcat(weightstr
, "\n");
403 cgroups_set("io.bfq.weight", weightstr
);
409 avl_init(&iomax
, avl_devcmp
, false, NULL
);
411 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE
], rem
) {
412 struct iomax_line
*l
;
414 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
416 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
417 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
418 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
421 l
= get_iomax_line(&iomax
,
422 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
423 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
425 l
->rbps
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
428 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE
], rem
) {
429 struct iomax_line
*l
;
431 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
433 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
434 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
435 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
438 l
= get_iomax_line(&iomax
,
439 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
440 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
442 l
->wbps
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
445 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE
], rem
) {
446 struct iomax_line
*l
;
448 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
450 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
451 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
452 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
455 l
= get_iomax_line(&iomax
,
456 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
457 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
459 l
->riops
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
462 blobmsg_for_each_attr(cur
, tb
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE
], rem
) {
463 struct iomax_line
*l
;
465 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy
, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX
, tbtd
, blobmsg_data(cur
), blobmsg_len(cur
));
467 if (!tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
] ||
468 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
] ||
469 !tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
])
472 l
= get_iomax_line(&iomax
,
473 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR
]),
474 blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR
]));
476 l
->wiops
= blobmsg_cast_u64(tbtd
[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE
]);
479 avl_for_each_element(&iomax
, curiomax
, avl
)
485 iomaxstrs
= calloc(numiomaxstrs
+ 1, sizeof(char *));
486 assert(iomaxstrs
!= 0);
489 avl_for_each_element(&iomax
, curiomax
, avl
) {
493 sprintf(iomaxlstr
, "%" PRIu64
":%" PRIu64
" ", curiomax
->dev
.major
, curiomax
->dev
.minor
);
495 if (curiomax
->rbps
!= -1) {
496 sprintf(lstr
, "rbps=%" PRIu64
" ", curiomax
->rbps
);
497 strcat(iomaxlstr
, lstr
);
499 if (curiomax
->wbps
!= -1) {
500 sprintf(lstr
, "wbps=%" PRIu64
" ", curiomax
->wbps
);
501 strcat(iomaxlstr
, lstr
);
503 if (curiomax
->riops
!= -1) {
504 sprintf(lstr
, "riops=%" PRIu64
" ", curiomax
->riops
);
505 strcat(iomaxlstr
, lstr
);
507 if (curiomax
->wiops
!= -1) {
508 sprintf(lstr
, "wiops=%" PRIu64
" ", curiomax
->wiops
);
509 strcat(iomaxlstr
, lstr
);
512 iomaxstrs
[numiomaxstrs
++] = strdup(iomaxlstr
);
515 avl_for_each_element_safe(&iomax
, curiomax
, avl
, tmp
) {
516 avl_delete(&iomax
, &curiomax
->avl
);
520 strtotlen
= 1; /* 1 accounts for \0 at end of string */
524 strtotlen
+= strlen(*(curstr
++)) + 1; /* +1 accounts for \n at end of line */
526 iomaxstr
= calloc(strtotlen
, sizeof(char));
527 assert(iomaxstr
!= 0);
531 strcat(iomaxstr
, *curstr
);
532 strcat(iomaxstr
, "\n");
536 cgroups_set("io.max", iomaxstr
);
547 OCI_LINUX_CGROUPS_CPU_SHARES
,
548 OCI_LINUX_CGROUPS_CPU_PERIOD
,
549 OCI_LINUX_CGROUPS_CPU_QUOTA
,
550 OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME
,
551 OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD
,
552 OCI_LINUX_CGROUPS_CPU_CPUS
,
553 OCI_LINUX_CGROUPS_CPU_MEMS
,
554 __OCI_LINUX_CGROUPS_CPU_MAX
,
557 static const struct blobmsg_policy oci_linux_cgroups_cpu_policy
[] = {
558 [OCI_LINUX_CGROUPS_CPU_SHARES
] = { "shares", BLOBMSG_CAST_INT64
},
559 [OCI_LINUX_CGROUPS_CPU_PERIOD
] = { "period", BLOBMSG_CAST_INT64
},
560 [OCI_LINUX_CGROUPS_CPU_QUOTA
] = { "quota", BLOBMSG_CAST_INT64
}, /* signed int64! */
561 [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD
] = { "realtimePeriod", BLOBMSG_CAST_INT64
},
562 [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME
] = { "realtimeRuntime", BLOBMSG_CAST_INT64
},
563 [OCI_LINUX_CGROUPS_CPU_CPUS
] = { "cpus", BLOBMSG_TYPE_STRING
},
564 [OCI_LINUX_CGROUPS_CPU_MEMS
] = { "mems", BLOBMSG_TYPE_STRING
},
567 static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr
*msg
)
569 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_CPU_MAX
];
570 uint64_t shares
, period
= 0;
571 int64_t quota
= -2; /* unset */
572 char tmp
[32] = { 0 };
574 blobmsg_parse(oci_linux_cgroups_cpu_policy
, __OCI_LINUX_CGROUPS_CPU_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
576 if (tb
[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD
] ||
577 tb
[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME
])
578 return ENOTSUP
; /* no equivalent in cgroup2 */
580 if (tb
[OCI_LINUX_CGROUPS_CPU_SHARES
]) {
581 shares
= blobmsg_cast_u64(tb
[OCI_LINUX_CGROUPS_CPU_SHARES
]);
582 if ((shares
< 2) || (shares
> 262144))
585 snprintf(tmp
, sizeof(tmp
), "%" PRIu64
, (((uint64_t)1) + ((shares
- 2) * 9999) / 262142));
586 cgroups_set("cpu.weight", tmp
);
590 if (tb
[OCI_LINUX_CGROUPS_CPU_QUOTA
])
591 quota
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_CPU_QUOTA
]);
593 if (tb
[OCI_LINUX_CGROUPS_CPU_PERIOD
])
594 period
= blobmsg_cast_u64(tb
[OCI_LINUX_CGROUPS_CPU_PERIOD
]);
598 snprintf(tmp
, sizeof(tmp
), "%" PRId64
" %" PRIu64
, quota
, period
);
600 snprintf(tmp
, sizeof(tmp
), "max %" PRIu64
, period
); /* assume default */
601 } else if (quota
>= 0) {
602 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, quota
);
603 } else if (quota
== -1) {
608 cgroups_set("cpu.max", tmp
);
610 if (tb
[OCI_LINUX_CGROUPS_CPU_CPUS
])
611 cgroups_set("cpuset.cpus", blobmsg_get_string(tb
[OCI_LINUX_CGROUPS_CPU_CPUS
]));
613 if (tb
[OCI_LINUX_CGROUPS_CPU_MEMS
])
614 cgroups_set("cpuset.mems", blobmsg_get_string(tb
[OCI_LINUX_CGROUPS_CPU_MEMS
]));
621 OCI_LINUX_CGROUPS_MEMORY_LIMIT
,
622 OCI_LINUX_CGROUPS_MEMORY_RESERVATION
,
623 OCI_LINUX_CGROUPS_MEMORY_SWAP
,
624 OCI_LINUX_CGROUPS_MEMORY_KERNEL
,
625 OCI_LINUX_CGROUPS_MEMORY_KERNELTCP
,
626 OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS
,
627 OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER
,
628 OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY
,
629 __OCI_LINUX_CGROUPS_MEMORY_MAX
,
632 static const struct blobmsg_policy oci_linux_cgroups_memory_policy
[] = {
633 [OCI_LINUX_CGROUPS_MEMORY_LIMIT
] = { "limit", BLOBMSG_CAST_INT64
}, /* signed int64! */
634 [OCI_LINUX_CGROUPS_MEMORY_RESERVATION
] = { "reservation", BLOBMSG_CAST_INT64
}, /* signed int64! */
635 [OCI_LINUX_CGROUPS_MEMORY_SWAP
] = { "swap", BLOBMSG_CAST_INT64
}, /* signed int64! */
636 [OCI_LINUX_CGROUPS_MEMORY_KERNEL
] = { "kernel", BLOBMSG_CAST_INT64
}, /* signed int64! ignored */
637 [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP
] = { "kernelTCP", BLOBMSG_CAST_INT64
}, /* signed int64! ignored */
638 [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS
] = { "swappiness", BLOBMSG_CAST_INT64
},
639 [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER
] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL
},
640 [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY
] { "useHierarchy", BLOBMSG_TYPE_BOOL
},
643 static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr
*msg
)
645 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_MEMORY_MAX
];
646 char tmp
[32] = { 0 };
647 int64_t limit
, swap
, reservation
;
649 blobmsg_parse(oci_linux_cgroups_memory_policy
, __OCI_LINUX_CGROUPS_MEMORY_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
652 * not all properties of the OCI memory section can be mapped to cgroup2
653 * kernel memory accounting is always enabled and included in the set
654 * memory limit, hence these options can be ignored
655 * disableOOMKiller could be emulated using oom_score_adj + seccomp eBPF
656 * preventing self-upgrade (but allow downgrade)
658 * see also https://github.com/opencontainers/runtime-spec/issues/1005
660 if (tb
[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS
] ||
661 tb
[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER
] ||
662 tb
[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY
])
666 if (tb
[OCI_LINUX_CGROUPS_MEMORY_LIMIT
]) {
667 limit
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_MEMORY_LIMIT
]);
671 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, limit
);
673 cgroups_set("memory.max", tmp
);
676 if (tb
[OCI_LINUX_CGROUPS_MEMORY_RESERVATION
]) {
677 reservation
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_MEMORY_RESERVATION
]);
679 if (reservation
== -1)
682 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, reservation
);
684 cgroups_set("memory.low", tmp
);
687 /* OCI 'swap' acounts for memory+swap */
688 if (tb
[OCI_LINUX_CGROUPS_MEMORY_SWAP
]) {
689 swap
= blobmsg_cast_s64(tb
[OCI_LINUX_CGROUPS_MEMORY_SWAP
]);
693 else if (limit
== -1 || (limit
< swap
))
694 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, swap
);
696 snprintf(tmp
, sizeof(tmp
), "%" PRId64
, limit
- swap
);
698 cgroups_set("memory.swap_max", tmp
);
706 OCI_LINUX_CGROUPS_PIDS_LIMIT
,
707 __OCI_LINUX_CGROUPS_PIDS_MAX
,
710 static const struct blobmsg_policy oci_linux_cgroups_pids_policy
[] = {
711 [OCI_LINUX_CGROUPS_PIDS_LIMIT
] = { "limit", BLOBMSG_CAST_INT64
},
714 static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr
*msg
)
716 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_MEMORY_MAX
];
717 char tmp
[32] = { 0 };
719 blobmsg_parse(oci_linux_cgroups_pids_policy
, __OCI_LINUX_CGROUPS_PIDS_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
721 if (!tb
[OCI_LINUX_CGROUPS_PIDS_LIMIT
])
724 snprintf(tmp
, sizeof(tmp
), "%" PRIu64
, blobmsg_cast_u64(tb
[OCI_LINUX_CGROUPS_PIDS_LIMIT
]));
726 cgroups_set("pids.max", tmp
);
731 static int parseOCIlinuxcgroups_unified(struct blob_attr
*msg
)
733 struct blob_attr
*cur
;
736 blobmsg_for_each_attr(cur
, msg
, rem
) {
737 if (blobmsg_type(cur
) != BLOBMSG_TYPE_STRING
)
741 if (strchr(blobmsg_name(cur
), '/') ||
742 !strcmp(blobmsg_name(cur
), "cgroup.subtree_control") ||
743 !strcmp(blobmsg_name(cur
), "cgroup.procs") ||
744 !strcmp(blobmsg_name(cur
), "cgroup.threads") ||
745 !strcmp(blobmsg_name(cur
), "cgroup.freeze"))
748 cgroups_set(blobmsg_name(cur
), blobmsg_get_string(cur
));
755 OCI_LINUX_CGROUPS_BLOCKIO
,
756 OCI_LINUX_CGROUPS_CPU
,
757 OCI_LINUX_CGROUPS_DEVICES
,
758 OCI_LINUX_CGROUPS_HUGEPAGELIMITS
,
759 OCI_LINUX_CGROUPS_INTELRDT
,
760 OCI_LINUX_CGROUPS_MEMORY
,
761 OCI_LINUX_CGROUPS_NETWORK
,
762 OCI_LINUX_CGROUPS_PIDS
,
763 OCI_LINUX_CGROUPS_RDMA
,
764 OCI_LINUX_CGROUPS_UNIFIED
,
765 __OCI_LINUX_CGROUPS_MAX
,
768 static const struct blobmsg_policy oci_linux_cgroups_policy
[] = {
769 [OCI_LINUX_CGROUPS_BLOCKIO
] = { "blockIO", BLOBMSG_TYPE_TABLE
},
770 [OCI_LINUX_CGROUPS_CPU
] = { "cpu", BLOBMSG_TYPE_TABLE
},
771 [OCI_LINUX_CGROUPS_DEVICES
] = { "devices", BLOBMSG_TYPE_ARRAY
},
772 [OCI_LINUX_CGROUPS_HUGEPAGELIMITS
] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY
},
773 [OCI_LINUX_CGROUPS_INTELRDT
] = { "intelRdt", BLOBMSG_TYPE_TABLE
},
774 [OCI_LINUX_CGROUPS_MEMORY
] = { "memory", BLOBMSG_TYPE_TABLE
},
775 [OCI_LINUX_CGROUPS_NETWORK
] = { "network", BLOBMSG_TYPE_TABLE
},
776 [OCI_LINUX_CGROUPS_PIDS
] = { "pids", BLOBMSG_TYPE_TABLE
},
777 [OCI_LINUX_CGROUPS_RDMA
] = { "rdma", BLOBMSG_TYPE_TABLE
},
778 [OCI_LINUX_CGROUPS_UNIFIED
] = { "unified", BLOBMSG_TYPE_TABLE
},
781 int parseOCIlinuxcgroups(struct blob_attr
*msg
)
783 struct blob_attr
*tb
[__OCI_LINUX_CGROUPS_MAX
];
786 blobmsg_parse(oci_linux_cgroups_policy
, __OCI_LINUX_CGROUPS_MAX
, tb
, blobmsg_data(msg
), blobmsg_len(msg
));
788 if (tb
[OCI_LINUX_CGROUPS_DEVICES
] ||
789 tb
[OCI_LINUX_CGROUPS_HUGEPAGELIMITS
] ||
790 tb
[OCI_LINUX_CGROUPS_INTELRDT
] ||
791 tb
[OCI_LINUX_CGROUPS_NETWORK
] ||
792 tb
[OCI_LINUX_CGROUPS_RDMA
])
795 if (tb
[OCI_LINUX_CGROUPS_BLOCKIO
]) {
796 ret
= parseOCIlinuxcgroups_legacy_blockio(tb
[OCI_LINUX_CGROUPS_BLOCKIO
]);
801 if (tb
[OCI_LINUX_CGROUPS_CPU
]) {
802 ret
= parseOCIlinuxcgroups_legacy_cpu(tb
[OCI_LINUX_CGROUPS_CPU
]);
807 if (tb
[OCI_LINUX_CGROUPS_MEMORY
]) {
808 ret
= parseOCIlinuxcgroups_legacy_memory(tb
[OCI_LINUX_CGROUPS_MEMORY
]);
813 if (tb
[OCI_LINUX_CGROUPS_PIDS
]) {
814 ret
= parseOCIlinuxcgroups_legacy_pids(tb
[OCI_LINUX_CGROUPS_PIDS
]);
819 if (tb
[OCI_LINUX_CGROUPS_UNIFIED
]) {
820 ret
= parseOCIlinuxcgroups_unified(tb
[OCI_LINUX_CGROUPS_UNIFIED
]);