kernel: fix mtk_eth_soc throughput regressions on gigabit PHY ports
[openwrt/openwrt.git] / target / linux / generic / backport-5.15 / 020-v6.3-27-mm-multi-gen-LRU-clarify-scan_control-flags.patch
1 From 93147736b5b3a21bea24313bfc7a696829932009 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Wed, 21 Dec 2022 21:19:05 -0700
4 Subject: [PATCH 27/29] mm: multi-gen LRU: clarify scan_control flags
5
6 Among the flags in scan_control:
7 1. sc->may_swap, which indicates swap constraint due to memsw.max, is
8 supported as usual.
9 2. sc->proactive, which indicates reclaim by memory.reclaim, may not
10 opportunistically skip the aging path, since it is considered less
11 latency sensitive.
12 3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers
13 swappiness to prioritize file LRU, since clean file pages are more
14 likely to exist.
15 4. sc->may_writepage and sc->may_unmap, which indicates opportunistic
16 reclaim, are rejected, since unmapped clean pages are already
17 prioritized. Scanning for more of them is likely futile and can
18 cause high reclaim latency when there is a large number of memcgs.
19
20 The rest are handled by the existing code.
21
22 Link: https://lkml.kernel.org/r/20221222041905.2431096-8-yuzhao@google.com
23 Signed-off-by: Yu Zhao <yuzhao@google.com>
24 Cc: Johannes Weiner <hannes@cmpxchg.org>
25 Cc: Jonathan Corbet <corbet@lwn.net>
26 Cc: Michael Larabel <Michael@MichaelLarabel.com>
27 Cc: Michal Hocko <mhocko@kernel.org>
28 Cc: Mike Rapoport <rppt@kernel.org>
29 Cc: Roman Gushchin <roman.gushchin@linux.dev>
30 Cc: Suren Baghdasaryan <surenb@google.com>
31 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
32 ---
33 mm/vmscan.c | 55 +++++++++++++++++++++++++++--------------------------
34 1 file changed, 28 insertions(+), 27 deletions(-)
35
36 --- a/mm/vmscan.c
37 +++ b/mm/vmscan.c
38 @@ -2905,6 +2905,9 @@ static int get_swappiness(struct lruvec
39 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
40 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
41
42 + if (!sc->may_swap)
43 + return 0;
44 +
45 if (!can_demote(pgdat->node_id, sc) &&
46 mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
47 return 0;
48 @@ -3952,7 +3955,7 @@ static void walk_mm(struct lruvec *lruve
49 } while (err == -EAGAIN);
50 }
51
52 -static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
53 +static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
54 {
55 struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
56
57 @@ -3960,7 +3963,7 @@ static struct lru_gen_mm_walk *set_mm_wa
58 VM_WARN_ON_ONCE(walk);
59
60 walk = &pgdat->mm_walk;
61 - } else if (!pgdat && !walk) {
62 + } else if (!walk && force_alloc) {
63 VM_WARN_ON_ONCE(current_is_kswapd());
64
65 walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
66 @@ -4146,7 +4149,7 @@ static bool try_to_inc_max_seq(struct lr
67 goto done;
68 }
69
70 - walk = set_mm_walk(NULL);
71 + walk = set_mm_walk(NULL, true);
72 if (!walk) {
73 success = iterate_mm_list_nowalk(lruvec, max_seq);
74 goto done;
75 @@ -4215,8 +4218,6 @@ static bool lruvec_is_reclaimable(struct
76 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
77 DEFINE_MIN_SEQ(lruvec);
78
79 - VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
80 -
81 /* see the comment on lru_gen_page */
82 gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
83 birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
84 @@ -4472,12 +4473,8 @@ static bool isolate_page(struct lruvec *
85 {
86 bool success;
87
88 - /* unmapping inhibited */
89 - if (!sc->may_unmap && page_mapped(page))
90 - return false;
91 -
92 /* swapping inhibited */
93 - if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
94 + if (!(sc->gfp_mask & __GFP_IO) &&
95 (PageDirty(page) ||
96 (PageAnon(page) && !PageSwapCache(page))))
97 return false;
98 @@ -4574,9 +4571,8 @@ static int scan_pages(struct lruvec *lru
99 __count_vm_events(PGSCAN_ANON + type, isolated);
100
101 /*
102 - * There might not be eligible pages due to reclaim_idx, may_unmap and
103 - * may_writepage. Check the remaining to prevent livelock if it's not
104 - * making progress.
105 + * There might not be eligible pages due to reclaim_idx. Check the
106 + * remaining to prevent livelock if it's not making progress.
107 */
108 return isolated || !remaining ? scanned : 0;
109 }
110 @@ -4836,8 +4832,7 @@ static long get_nr_to_scan(struct lruvec
111 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
112 DEFINE_MAX_SEQ(lruvec);
113
114 - if (mem_cgroup_below_min(memcg) ||
115 - (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
116 + if (mem_cgroup_below_min(memcg))
117 return 0;
118
119 if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
120 @@ -4865,17 +4860,14 @@ static bool try_to_shrink_lruvec(struct
121 long nr_to_scan;
122 unsigned long scanned = 0;
123 unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
124 + int swappiness = get_swappiness(lruvec, sc);
125 +
126 + /* clean file pages are more likely to exist */
127 + if (swappiness && !(sc->gfp_mask & __GFP_IO))
128 + swappiness = 1;
129
130 while (true) {
131 int delta;
132 - int swappiness;
133 -
134 - if (sc->may_swap)
135 - swappiness = get_swappiness(lruvec, sc);
136 - else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc))
137 - swappiness = 1;
138 - else
139 - swappiness = 0;
140
141 nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
142 if (nr_to_scan <= 0)
143 @@ -5005,12 +4997,13 @@ static void lru_gen_shrink_lruvec(struct
144 struct blk_plug plug;
145
146 VM_WARN_ON_ONCE(global_reclaim(sc));
147 + VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap);
148
149 lru_add_drain();
150
151 blk_start_plug(&plug);
152
153 - set_mm_walk(lruvec_pgdat(lruvec));
154 + set_mm_walk(NULL, false);
155
156 if (try_to_shrink_lruvec(lruvec, sc))
157 lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
158 @@ -5066,11 +5059,19 @@ static void lru_gen_shrink_node(struct p
159
160 VM_WARN_ON_ONCE(!global_reclaim(sc));
161
162 + /*
163 + * Unmapped clean pages are already prioritized. Scanning for more of
164 + * them is likely futile and can cause high reclaim latency when there
165 + * is a large number of memcgs.
166 + */
167 + if (!sc->may_writepage || !sc->may_unmap)
168 + goto done;
169 +
170 lru_add_drain();
171
172 blk_start_plug(&plug);
173
174 - set_mm_walk(pgdat);
175 + set_mm_walk(pgdat, false);
176
177 set_initial_priority(pgdat, sc);
178
179 @@ -5088,7 +5089,7 @@ static void lru_gen_shrink_node(struct p
180 clear_mm_walk();
181
182 blk_finish_plug(&plug);
183 -
184 +done:
185 /* kswapd should never fail */
186 pgdat->kswapd_failures = 0;
187 }
188 @@ -5656,7 +5657,7 @@ static ssize_t lru_gen_seq_write(struct
189 set_task_reclaim_state(current, &sc.reclaim_state);
190 flags = memalloc_noreclaim_save();
191 blk_start_plug(&plug);
192 - if (!set_mm_walk(NULL)) {
193 + if (!set_mm_walk(NULL, true)) {
194 err = -ENOMEM;
195 goto done;
196 }