1 From 93147736b5b3a21bea24313bfc7a696829932009 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Wed, 21 Dec 2022 21:19:05 -0700
4 Subject: [PATCH 27/29] mm: multi-gen LRU: clarify scan_control flags
6 Among the flags in scan_control:
7 1. sc->may_swap, which indicates swap constraint due to memsw.max, is
9 2. sc->proactive, which indicates reclaim by memory.reclaim, may not
10 opportunistically skip the aging path, since it is considered less
12 3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers
13 swappiness to prioritize file LRU, since clean file folios are more
15 4. sc->may_writefolio and sc->may_unmap, which indicates opportunistic
16 reclaim, are rejected, since unmapped clean folios are already
17 prioritized. Scanning for more of them is likely futile and can
18 cause high reclaim latency when there is a large number of memcgs.
20 The rest are handled by the existing code.
22 Link: https://lkml.kernel.org/r/20221222041905.2431096-8-yuzhao@google.com
23 Signed-off-by: Yu Zhao <yuzhao@google.com>
24 Cc: Johannes Weiner <hannes@cmpxchg.org>
25 Cc: Jonathan Corbet <corbet@lwn.net>
26 Cc: Michael Larabel <Michael@MichaelLarabel.com>
27 Cc: Michal Hocko <mhocko@kernel.org>
28 Cc: Mike Rapoport <rppt@kernel.org>
29 Cc: Roman Gushchin <roman.gushchin@linux.dev>
30 Cc: Suren Baghdasaryan <surenb@google.com>
31 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
33 mm/vmscan.c | 55 +++++++++++++++++++++++++++--------------------------
34 1 file changed, 28 insertions(+), 27 deletions(-)
38 @@ -3185,6 +3185,9 @@ static int get_swappiness(struct lruvec
39 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
40 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
45 if (!can_demote(pgdat->node_id, sc) &&
46 mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
48 @@ -4226,7 +4229,7 @@ static void walk_mm(struct lruvec *lruve
49 } while (err == -EAGAIN);
52 -static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
53 +static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
55 struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
57 @@ -4234,7 +4237,7 @@ static struct lru_gen_mm_walk *set_mm_wa
58 VM_WARN_ON_ONCE(walk);
60 walk = &pgdat->mm_walk;
61 - } else if (!pgdat && !walk) {
62 + } else if (!walk && force_alloc) {
63 VM_WARN_ON_ONCE(current_is_kswapd());
65 walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
66 @@ -4420,7 +4423,7 @@ static bool try_to_inc_max_seq(struct lr
70 - walk = set_mm_walk(NULL);
71 + walk = set_mm_walk(NULL, true);
73 success = iterate_mm_list_nowalk(lruvec, max_seq);
75 @@ -4489,8 +4492,6 @@ static bool lruvec_is_reclaimable(struct
76 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
77 DEFINE_MIN_SEQ(lruvec);
79 - VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
81 /* see the comment on lru_gen_folio */
82 gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
83 birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
84 @@ -4746,12 +4747,8 @@ static bool isolate_folio(struct lruvec
88 - /* unmapping inhibited */
89 - if (!sc->may_unmap && folio_mapped(folio))
92 /* swapping inhibited */
93 - if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
94 + if (!(sc->gfp_mask & __GFP_IO) &&
95 (folio_test_dirty(folio) ||
96 (folio_test_anon(folio) && !folio_test_swapcache(folio))))
98 @@ -4848,9 +4845,8 @@ static int scan_folios(struct lruvec *lr
99 __count_vm_events(PGSCAN_ANON + type, isolated);
102 - * There might not be eligible pages due to reclaim_idx, may_unmap and
103 - * may_writepage. Check the remaining to prevent livelock if it's not
105 + * There might not be eligible pages due to reclaim_idx. Check the
106 + * remaining to prevent livelock if it's not making progress.
108 return isolated || !remaining ? scanned : 0;
110 @@ -5110,8 +5106,7 @@ static long get_nr_to_scan(struct lruvec
111 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
112 DEFINE_MAX_SEQ(lruvec);
114 - if (mem_cgroup_below_min(memcg) ||
115 - (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
116 + if (mem_cgroup_below_min(memcg))
119 if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
120 @@ -5139,17 +5134,14 @@ static bool try_to_shrink_lruvec(struct
122 unsigned long scanned = 0;
123 unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
124 + int swappiness = get_swappiness(lruvec, sc);
126 + /* clean file folios are more likely to exist */
127 + if (swappiness && !(sc->gfp_mask & __GFP_IO))
135 - swappiness = get_swappiness(lruvec, sc);
136 - else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc))
141 nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
143 @@ -5279,12 +5271,13 @@ static void lru_gen_shrink_lruvec(struct
144 struct blk_plug plug;
146 VM_WARN_ON_ONCE(global_reclaim(sc));
147 + VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap);
151 blk_start_plug(&plug);
153 - set_mm_walk(lruvec_pgdat(lruvec));
154 + set_mm_walk(NULL, sc->proactive);
156 if (try_to_shrink_lruvec(lruvec, sc))
157 lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
158 @@ -5340,11 +5333,19 @@ static void lru_gen_shrink_node(struct p
160 VM_WARN_ON_ONCE(!global_reclaim(sc));
163 + * Unmapped clean folios are already prioritized. Scanning for more of
164 + * them is likely futile and can cause high reclaim latency when there
165 + * is a large number of memcgs.
167 + if (!sc->may_writepage || !sc->may_unmap)
172 blk_start_plug(&plug);
174 - set_mm_walk(pgdat);
175 + set_mm_walk(NULL, sc->proactive);
177 set_initial_priority(pgdat, sc);
179 @@ -5362,7 +5363,7 @@ static void lru_gen_shrink_node(struct p
182 blk_finish_plug(&plug);
185 /* kswapd should never fail */
186 pgdat->kswapd_failures = 0;
188 @@ -5934,7 +5935,7 @@ static ssize_t lru_gen_seq_write(struct
189 set_task_reclaim_state(current, &sc.reclaim_state);
190 flags = memalloc_noreclaim_save();
191 blk_start_plug(&plug);
192 - if (!set_mm_walk(NULL)) {
193 + if (!set_mm_walk(NULL, true)) {