mvebu: harmonize GL.iNet GL-MV1000 MTD partitions layout with vendor
[openwrt/staging/hauke.git] / target / linux / generic / backport-5.15 / 020-v6.1-08-mm-multigenerational-lru-user-interface.patch
1 From 5cc7fdec54e87e32b4fb0f07d84b21769d5f8d92 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Mon, 25 Jan 2021 21:38:02 -0700
4 Subject: [PATCH 08/10] mm: multigenerational lru: user interface
5
6 Add /sys/kernel/mm/lru_gen/enabled to enable and disable the
7 multigenerational lru at runtime.
8
9 Add /sys/kernel/mm/lru_gen/min_ttl_ms to protect the working set of a
10 given number of milliseconds. The OOM killer is invoked if this
11 working set cannot be kept in memory.
12
13 Add /sys/kernel/debug/lru_gen to monitor the multigenerational lru and
14 invoke the aging and the eviction. This file has the following output:
15 memcg memcg_id memcg_path
16 node node_id
17 min_gen birth_time anon_size file_size
18 ...
19 max_gen birth_time anon_size file_size
20
21 min_gen is the oldest generation number and max_gen is the youngest
22 generation number. birth_time is in milliseconds. anon_size and
23 file_size are in pages.
24
25 This file takes the following input:
26 + memcg_id node_id max_gen [swappiness] [use_bloom_filter]
27 - memcg_id node_id min_gen [swappiness] [nr_to_reclaim]
28
29 The first command line invokes the aging, which scans PTEs for
30 accessed pages and then creates the next generation max_gen+1. A swap
31 file and a non-zero swappiness, which overrides vm.swappiness, are
32 required to scan PTEs mapping anon pages. The second command line
33 invokes the eviction, which evicts generations less than or equal to
34 min_gen. min_gen should be less than max_gen-1 as max_gen and
35 max_gen-1 are not fully aged and therefore cannot be evicted.
36 Setting nr_to_reclaim to N limits the number of pages to evict.
37 Setting use_bloom_filter to 0 overrides the default behavior which
38 only scans PTE tables found populated. Multiple command lines are
39 supported, as is concatenation with delimiters "," and ";".
40
41 Signed-off-by: Yu Zhao <yuzhao@google.com>
42 Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
43 Change-Id: I4448e60029badbe347aa3b624f429b280cc3a3d3
44 ---
45 include/linux/nodemask.h | 1 +
46 mm/vmscan.c | 415 +++++++++++++++++++++++++++++++++++++++
47 2 files changed, 416 insertions(+)
48
49 --- a/include/linux/nodemask.h
50 +++ b/include/linux/nodemask.h
51 @@ -485,6 +485,7 @@ static inline int num_node_state(enum no
52 #define first_online_node 0
53 #define first_memory_node 0
54 #define next_online_node(nid) (MAX_NUMNODES)
55 +#define next_memory_node(nid) (MAX_NUMNODES)
56 #define nr_node_ids 1U
57 #define nr_online_nodes 1U
58
59 --- a/mm/vmscan.c
60 +++ b/mm/vmscan.c
61 @@ -53,6 +53,8 @@
62 #include <linux/memory.h>
63 #include <linux/pagewalk.h>
64 #include <linux/shmem_fs.h>
65 +#include <linux/ctype.h>
66 +#include <linux/debugfs.h>
67
68 #include <asm/tlbflush.h>
69 #include <asm/div64.h>
70 @@ -4817,6 +4819,413 @@ unlock:
71 }
72
73 /******************************************************************************
74 + * sysfs interface
75 + ******************************************************************************/
76 +
77 +static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
78 +{
79 + return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
80 +}
81 +
82 +static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
83 + const char *buf, size_t len)
84 +{
85 + unsigned int msecs;
86 +
87 + if (kstrtouint(buf, 10, &msecs))
88 + return -EINVAL;
89 +
90 + WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs));
91 +
92 + return len;
93 +}
94 +
95 +static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR(
96 + min_ttl_ms, 0644, show_min_ttl, store_min_ttl
97 +);
98 +
99 +static ssize_t show_enable(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
100 +{
101 + return snprintf(buf, PAGE_SIZE, "%d\n", lru_gen_enabled());
102 +}
103 +
104 +static ssize_t store_enable(struct kobject *kobj, struct kobj_attribute *attr,
105 + const char *buf, size_t len)
106 +{
107 + bool enable;
108 +
109 + if (kstrtobool(buf, &enable))
110 + return -EINVAL;
111 +
112 + lru_gen_change_state(enable, true, false);
113 +
114 + return len;
115 +}
116 +
117 +static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
118 + enabled, 0644, show_enable, store_enable
119 +);
120 +
121 +static struct attribute *lru_gen_attrs[] = {
122 + &lru_gen_min_ttl_attr.attr,
123 + &lru_gen_enabled_attr.attr,
124 + NULL
125 +};
126 +
127 +static struct attribute_group lru_gen_attr_group = {
128 + .name = "lru_gen",
129 + .attrs = lru_gen_attrs,
130 +};
131 +
132 +/******************************************************************************
133 + * debugfs interface
134 + ******************************************************************************/
135 +
136 +static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
137 +{
138 + struct mem_cgroup *memcg;
139 + loff_t nr_to_skip = *pos;
140 +
141 + m->private = kvmalloc(PATH_MAX, GFP_KERNEL);
142 + if (!m->private)
143 + return ERR_PTR(-ENOMEM);
144 +
145 + memcg = mem_cgroup_iter(NULL, NULL, NULL);
146 + do {
147 + int nid;
148 +
149 + for_each_node_state(nid, N_MEMORY) {
150 + if (!nr_to_skip--)
151 + return get_lruvec(nid, memcg);
152 + }
153 + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
154 +
155 + return NULL;
156 +}
157 +
158 +static void lru_gen_seq_stop(struct seq_file *m, void *v)
159 +{
160 + if (!IS_ERR_OR_NULL(v))
161 + mem_cgroup_iter_break(NULL, lruvec_memcg(v));
162 +
163 + kvfree(m->private);
164 + m->private = NULL;
165 +}
166 +
167 +static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
168 +{
169 + int nid = lruvec_pgdat(v)->node_id;
170 + struct mem_cgroup *memcg = lruvec_memcg(v);
171 +
172 + ++*pos;
173 +
174 + nid = next_memory_node(nid);
175 + if (nid == MAX_NUMNODES) {
176 + memcg = mem_cgroup_iter(NULL, memcg, NULL);
177 + if (!memcg)
178 + return NULL;
179 +
180 + nid = first_memory_node;
181 + }
182 +
183 + return get_lruvec(nid, memcg);
184 +}
185 +
186 +static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
187 + unsigned long max_seq, unsigned long *min_seq,
188 + unsigned long seq)
189 +{
190 + int i;
191 + int type, tier;
192 + int hist = lru_hist_from_seq(seq);
193 + struct lrugen *lrugen = &lruvec->evictable;
194 +
195 + for (tier = 0; tier < MAX_NR_TIERS; tier++) {
196 + seq_printf(m, " %10d", tier);
197 + for (type = 0; type < ANON_AND_FILE; type++) {
198 + unsigned long n[3] = {};
199 +
200 + if (seq == max_seq) {
201 + n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
202 + n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
203 +
204 + seq_printf(m, " %10luR %10luT %10lu ", n[0], n[1], n[2]);
205 + } else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
206 + n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
207 + n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
208 + if (tier)
209 + n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
210 +
211 + seq_printf(m, " %10lur %10lue %10lup", n[0], n[1], n[2]);
212 + } else
213 + seq_puts(m, " 0 0 0 ");
214 + }
215 + seq_putc(m, '\n');
216 + }
217 +
218 + seq_puts(m, " ");
219 + for (i = 0; i < NR_MM_STATS; i++) {
220 + if (seq == max_seq && NR_HIST_GENS == 1)
221 + seq_printf(m, " %10lu%c", READ_ONCE(lruvec->mm_walk.stats[hist][i]),
222 + toupper(MM_STAT_CODES[i]));
223 + else if (seq != max_seq && NR_HIST_GENS > 1)
224 + seq_printf(m, " %10lu%c", READ_ONCE(lruvec->mm_walk.stats[hist][i]),
225 + MM_STAT_CODES[i]);
226 + else
227 + seq_puts(m, " 0 ");
228 + }
229 + seq_putc(m, '\n');
230 +}
231 +
232 +static int lru_gen_seq_show(struct seq_file *m, void *v)
233 +{
234 + unsigned long seq;
235 + bool full = !debugfs_real_fops(m->file)->write;
236 + struct lruvec *lruvec = v;
237 + struct lrugen *lrugen = &lruvec->evictable;
238 + int nid = lruvec_pgdat(lruvec)->node_id;
239 + struct mem_cgroup *memcg = lruvec_memcg(lruvec);
240 + DEFINE_MAX_SEQ(lruvec);
241 + DEFINE_MIN_SEQ(lruvec);
242 +
243 + if (nid == first_memory_node) {
244 + const char *path = memcg ? m->private : "";
245 +
246 +#ifdef CONFIG_MEMCG
247 + if (memcg)
248 + cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
249 +#endif
250 + seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path);
251 + }
252 +
253 + seq_printf(m, " node %5d\n", nid);
254 +
255 + if (!full)
256 + seq = min_seq[0];
257 + else if (max_seq >= MAX_NR_GENS)
258 + seq = max_seq - MAX_NR_GENS + 1;
259 + else
260 + seq = 0;
261 +
262 + for (; seq <= max_seq; seq++) {
263 + int gen, type, zone;
264 + unsigned int msecs;
265 +
266 + gen = lru_gen_from_seq(seq);
267 + msecs = jiffies_to_msecs(jiffies - READ_ONCE(lrugen->timestamps[gen]));
268 +
269 + seq_printf(m, " %10lu %10u", seq, msecs);
270 +
271 + for (type = 0; type < ANON_AND_FILE; type++) {
272 + long size = 0;
273 +
274 + if (seq < min_seq[type]) {
275 + seq_puts(m, " -0 ");
276 + continue;
277 + }
278 +
279 + for (zone = 0; zone < MAX_NR_ZONES; zone++)
280 + size += READ_ONCE(lrugen->sizes[gen][type][zone]);
281 +
282 + seq_printf(m, " %10lu ", max(size, 0L));
283 + }
284 +
285 + seq_putc(m, '\n');
286 +
287 + if (full)
288 + lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq);
289 + }
290 +
291 + return 0;
292 +}
293 +
294 +static const struct seq_operations lru_gen_seq_ops = {
295 + .start = lru_gen_seq_start,
296 + .stop = lru_gen_seq_stop,
297 + .next = lru_gen_seq_next,
298 + .show = lru_gen_seq_show,
299 +};
300 +
301 +static int run_aging(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
302 + unsigned long seq, bool use_filter)
303 +{
304 + DEFINE_MAX_SEQ(lruvec);
305 +
306 + if (seq == max_seq)
307 + try_to_inc_max_seq(lruvec, sc, swappiness, max_seq, use_filter);
308 +
309 + return seq > max_seq ? -EINVAL : 0;
310 +}
311 +
312 +static int run_eviction(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
313 + unsigned long seq, unsigned long nr_to_reclaim)
314 +{
315 + struct blk_plug plug;
316 + int err = -EINTR;
317 + DEFINE_MAX_SEQ(lruvec);
318 +
319 + if (seq >= max_seq - 1)
320 + return -EINVAL;
321 +
322 + sc->nr_reclaimed = 0;
323 +
324 + blk_start_plug(&plug);
325 +
326 + while (!signal_pending(current)) {
327 + DEFINE_MIN_SEQ(lruvec);
328 +
329 + if (seq < min_seq[!swappiness] || sc->nr_reclaimed >= nr_to_reclaim ||
330 + !evict_pages(lruvec, sc, swappiness)) {
331 + err = 0;
332 + break;
333 + }
334 +
335 + cond_resched();
336 + }
337 +
338 + blk_finish_plug(&plug);
339 +
340 + return err;
341 +}
342 +
343 +static int run_cmd(char cmd, int memcg_id, int nid, struct scan_control *sc,
344 + int swappiness, unsigned long seq, unsigned long opt)
345 +{
346 + struct lruvec *lruvec;
347 + int err = -EINVAL;
348 + struct mem_cgroup *memcg = NULL;
349 +
350 + if (!mem_cgroup_disabled()) {
351 + rcu_read_lock();
352 + memcg = mem_cgroup_from_id(memcg_id);
353 +#ifdef CONFIG_MEMCG
354 + if (memcg && !css_tryget(&memcg->css))
355 + memcg = NULL;
356 +#endif
357 + rcu_read_unlock();
358 +
359 + if (!memcg)
360 + goto done;
361 + }
362 + if (memcg_id != mem_cgroup_id(memcg))
363 + goto done;
364 +
365 + if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
366 + goto done;
367 +
368 + lruvec = get_lruvec(nid, memcg);
369 +
370 + if (swappiness < 0)
371 + swappiness = get_swappiness(memcg);
372 + else if (swappiness > 200)
373 + goto done;
374 +
375 + switch (cmd) {
376 + case '+':
377 + err = run_aging(lruvec, sc, swappiness, seq, opt);
378 + break;
379 + case '-':
380 + err = run_eviction(lruvec, sc, swappiness, seq, opt);
381 + break;
382 + }
383 +done:
384 + mem_cgroup_put(memcg);
385 +
386 + return err;
387 +}
388 +
389 +static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
390 + size_t len, loff_t *pos)
391 +{
392 + void *buf;
393 + char *cur, *next;
394 + unsigned int flags;
395 + int err = 0;
396 + struct scan_control sc = {
397 + .may_writepage = 1,
398 + .may_unmap = 1,
399 + .may_swap = 1,
400 + .reclaim_idx = MAX_NR_ZONES - 1,
401 + .gfp_mask = GFP_KERNEL,
402 + };
403 +
404 + buf = kvmalloc(len + 1, GFP_KERNEL);
405 + if (!buf)
406 + return -ENOMEM;
407 +
408 + if (copy_from_user(buf, src, len)) {
409 + kvfree(buf);
410 + return -EFAULT;
411 + }
412 +
413 + next = buf;
414 + next[len] = '\0';
415 +
416 + sc.reclaim_state.mm_walk_args = alloc_mm_walk_args();
417 + if (!sc.reclaim_state.mm_walk_args) {
418 + kvfree(buf);
419 + return -ENOMEM;
420 + }
421 +
422 + flags = memalloc_noreclaim_save();
423 + set_task_reclaim_state(current, &sc.reclaim_state);
424 +
425 + while ((cur = strsep(&next, ",;\n"))) {
426 + int n;
427 + int end;
428 + char cmd;
429 + unsigned int memcg_id;
430 + unsigned int nid;
431 + unsigned long seq;
432 + unsigned int swappiness = -1;
433 + unsigned long opt = -1;
434 +
435 + cur = skip_spaces(cur);
436 + if (!*cur)
437 + continue;
438 +
439 + n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
440 + &seq, &end, &swappiness, &end, &opt, &end);
441 + if (n < 4 || cur[end]) {
442 + err = -EINVAL;
443 + break;
444 + }
445 +
446 + err = run_cmd(cmd, memcg_id, nid, &sc, swappiness, seq, opt);
447 + if (err)
448 + break;
449 + }
450 +
451 + set_task_reclaim_state(current, NULL);
452 + memalloc_noreclaim_restore(flags);
453 +
454 + free_mm_walk_args(sc.reclaim_state.mm_walk_args);
455 + kvfree(buf);
456 +
457 + return err ? : len;
458 +}
459 +
460 +static int lru_gen_seq_open(struct inode *inode, struct file *file)
461 +{
462 + return seq_open(file, &lru_gen_seq_ops);
463 +}
464 +
465 +static const struct file_operations lru_gen_rw_fops = {
466 + .open = lru_gen_seq_open,
467 + .read = seq_read,
468 + .write = lru_gen_seq_write,
469 + .llseek = seq_lseek,
470 + .release = seq_release,
471 +};
472 +
473 +static const struct file_operations lru_gen_ro_fops = {
474 + .open = lru_gen_seq_open,
475 + .read = seq_read,
476 + .llseek = seq_lseek,
477 + .release = seq_release,
478 +};
479 +
480 +/******************************************************************************
481 * initialization
482 ******************************************************************************/
483
484 @@ -4886,6 +5295,12 @@ static int __init init_lru_gen(void)
485 BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
486 BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1);
487
488 + if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
489 + pr_err("lru_gen: failed to create sysfs group\n");
490 +
491 + debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops);
492 + debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops);
493 +
494 return 0;
495 };
496 late_initcall(init_lru_gen);