1b9a70dbc1283548b2dae484525a4c1a84358a9b
[openwrt/openwrt.git] / target / linux / generic / backport-5.15 / 020-v6.1-03-mm-vmscan.c-refactor-shrink_node.patch
1 From 9e17efd11450d3d2069adaa3c58db9ac8ebd1c66 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Sun, 18 Sep 2022 02:00:00 -0600
4 Subject: [PATCH 03/29] mm/vmscan.c: refactor shrink_node()
5 MIME-Version: 1.0
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
8
9 This patch refactors shrink_node() to improve readability for the upcoming
10 changes to mm/vmscan.c.
11
12 Link: https://lkml.kernel.org/r/20220918080010.2920238-4-yuzhao@google.com
13 Signed-off-by: Yu Zhao <yuzhao@google.com>
14 Reviewed-by: Barry Song <baohua@kernel.org>
15 Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
16 Acked-by: Brian Geffon <bgeffon@google.com>
17 Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
18 Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
19 Acked-by: Steven Barrett <steven@liquorix.net>
20 Acked-by: Suleiman Souhlal <suleiman@google.com>
21 Tested-by: Daniel Byrne <djbyrne@mtu.edu>
22 Tested-by: Donald Carr <d@chaos-reins.com>
23 Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
24 Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
25 Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
26 Tested-by: Sofia Trinh <sofia.trinh@edi.works>
27 Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
28 Cc: Andi Kleen <ak@linux.intel.com>
29 Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
30 Cc: Catalin Marinas <catalin.marinas@arm.com>
31 Cc: Dave Hansen <dave.hansen@linux.intel.com>
32 Cc: Hillf Danton <hdanton@sina.com>
33 Cc: Jens Axboe <axboe@kernel.dk>
34 Cc: Johannes Weiner <hannes@cmpxchg.org>
35 Cc: Jonathan Corbet <corbet@lwn.net>
36 Cc: Linus Torvalds <torvalds@linux-foundation.org>
37 Cc: Matthew Wilcox <willy@infradead.org>
38 Cc: Mel Gorman <mgorman@suse.de>
39 Cc: Michael Larabel <Michael@MichaelLarabel.com>
40 Cc: Michal Hocko <mhocko@kernel.org>
41 Cc: Mike Rapoport <rppt@kernel.org>
42 Cc: Mike Rapoport <rppt@linux.ibm.com>
43 Cc: Peter Zijlstra <peterz@infradead.org>
44 Cc: Qi Zheng <zhengqi.arch@bytedance.com>
45 Cc: Tejun Heo <tj@kernel.org>
46 Cc: Vlastimil Babka <vbabka@suse.cz>
47 Cc: Will Deacon <will@kernel.org>
48 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
49 ---
50 mm/vmscan.c | 198 +++++++++++++++++++++++++++-------------------------
51 1 file changed, 104 insertions(+), 94 deletions(-)
52
53 diff --git a/mm/vmscan.c b/mm/vmscan.c
54 index 201acea81804..dc5f0381513f 100644
55 --- a/mm/vmscan.c
56 +++ b/mm/vmscan.c
57 @@ -2497,6 +2497,109 @@ enum scan_balance {
58 SCAN_FILE,
59 };
60
61 +static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
62 +{
63 + unsigned long file;
64 + struct lruvec *target_lruvec;
65 +
66 + target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
67 +
68 + /*
69 + * Flush the memory cgroup stats, so that we read accurate per-memcg
70 + * lruvec stats for heuristics.
71 + */
72 + mem_cgroup_flush_stats();
73 +
74 + /*
75 + * Determine the scan balance between anon and file LRUs.
76 + */
77 + spin_lock_irq(&target_lruvec->lru_lock);
78 + sc->anon_cost = target_lruvec->anon_cost;
79 + sc->file_cost = target_lruvec->file_cost;
80 + spin_unlock_irq(&target_lruvec->lru_lock);
81 +
82 + /*
83 + * Target desirable inactive:active list ratios for the anon
84 + * and file LRU lists.
85 + */
86 + if (!sc->force_deactivate) {
87 + unsigned long refaults;
88 +
89 + refaults = lruvec_page_state(target_lruvec,
90 + WORKINGSET_ACTIVATE_ANON);
91 + if (refaults != target_lruvec->refaults[0] ||
92 + inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
93 + sc->may_deactivate |= DEACTIVATE_ANON;
94 + else
95 + sc->may_deactivate &= ~DEACTIVATE_ANON;
96 +
97 + /*
98 + * When refaults are being observed, it means a new
99 + * workingset is being established. Deactivate to get
100 + * rid of any stale active pages quickly.
101 + */
102 + refaults = lruvec_page_state(target_lruvec,
103 + WORKINGSET_ACTIVATE_FILE);
104 + if (refaults != target_lruvec->refaults[1] ||
105 + inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
106 + sc->may_deactivate |= DEACTIVATE_FILE;
107 + else
108 + sc->may_deactivate &= ~DEACTIVATE_FILE;
109 + } else
110 + sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
111 +
112 + /*
113 + * If we have plenty of inactive file pages that aren't
114 + * thrashing, try to reclaim those first before touching
115 + * anonymous pages.
116 + */
117 + file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
118 + if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
119 + sc->cache_trim_mode = 1;
120 + else
121 + sc->cache_trim_mode = 0;
122 +
123 + /*
124 + * Prevent the reclaimer from falling into the cache trap: as
125 + * cache pages start out inactive, every cache fault will tip
126 + * the scan balance towards the file LRU. And as the file LRU
127 + * shrinks, so does the window for rotation from references.
128 + * This means we have a runaway feedback loop where a tiny
129 + * thrashing file LRU becomes infinitely more attractive than
130 + * anon pages. Try to detect this based on file LRU size.
131 + */
132 + if (!cgroup_reclaim(sc)) {
133 + unsigned long total_high_wmark = 0;
134 + unsigned long free, anon;
135 + int z;
136 +
137 + free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
138 + file = node_page_state(pgdat, NR_ACTIVE_FILE) +
139 + node_page_state(pgdat, NR_INACTIVE_FILE);
140 +
141 + for (z = 0; z < MAX_NR_ZONES; z++) {
142 + struct zone *zone = &pgdat->node_zones[z];
143 +
144 + if (!managed_zone(zone))
145 + continue;
146 +
147 + total_high_wmark += high_wmark_pages(zone);
148 + }
149 +
150 + /*
151 + * Consider anon: if that's low too, this isn't a
152 + * runaway file reclaim problem, but rather just
153 + * extreme pressure. Reclaim as per usual then.
154 + */
155 + anon = node_page_state(pgdat, NR_INACTIVE_ANON);
156 +
157 + sc->file_is_tiny =
158 + file + free <= total_high_wmark &&
159 + !(sc->may_deactivate & DEACTIVATE_ANON) &&
160 + anon >> sc->priority;
161 + }
162 +}
163 +
164 /*
165 * Determine how aggressively the anon and file LRU lists should be
166 * scanned. The relative value of each set of LRU lists is determined
167 @@ -2965,109 +3068,16 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
168 unsigned long nr_reclaimed, nr_scanned;
169 struct lruvec *target_lruvec;
170 bool reclaimable = false;
171 - unsigned long file;
172
173 target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
174
175 again:
176 - /*
177 - * Flush the memory cgroup stats, so that we read accurate per-memcg
178 - * lruvec stats for heuristics.
179 - */
180 - mem_cgroup_flush_stats();
181 -
182 memset(&sc->nr, 0, sizeof(sc->nr));
183
184 nr_reclaimed = sc->nr_reclaimed;
185 nr_scanned = sc->nr_scanned;
186
187 - /*
188 - * Determine the scan balance between anon and file LRUs.
189 - */
190 - spin_lock_irq(&target_lruvec->lru_lock);
191 - sc->anon_cost = target_lruvec->anon_cost;
192 - sc->file_cost = target_lruvec->file_cost;
193 - spin_unlock_irq(&target_lruvec->lru_lock);
194 -
195 - /*
196 - * Target desirable inactive:active list ratios for the anon
197 - * and file LRU lists.
198 - */
199 - if (!sc->force_deactivate) {
200 - unsigned long refaults;
201 -
202 - refaults = lruvec_page_state(target_lruvec,
203 - WORKINGSET_ACTIVATE_ANON);
204 - if (refaults != target_lruvec->refaults[0] ||
205 - inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
206 - sc->may_deactivate |= DEACTIVATE_ANON;
207 - else
208 - sc->may_deactivate &= ~DEACTIVATE_ANON;
209 -
210 - /*
211 - * When refaults are being observed, it means a new
212 - * workingset is being established. Deactivate to get
213 - * rid of any stale active pages quickly.
214 - */
215 - refaults = lruvec_page_state(target_lruvec,
216 - WORKINGSET_ACTIVATE_FILE);
217 - if (refaults != target_lruvec->refaults[1] ||
218 - inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
219 - sc->may_deactivate |= DEACTIVATE_FILE;
220 - else
221 - sc->may_deactivate &= ~DEACTIVATE_FILE;
222 - } else
223 - sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
224 -
225 - /*
226 - * If we have plenty of inactive file pages that aren't
227 - * thrashing, try to reclaim those first before touching
228 - * anonymous pages.
229 - */
230 - file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
231 - if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
232 - sc->cache_trim_mode = 1;
233 - else
234 - sc->cache_trim_mode = 0;
235 -
236 - /*
237 - * Prevent the reclaimer from falling into the cache trap: as
238 - * cache pages start out inactive, every cache fault will tip
239 - * the scan balance towards the file LRU. And as the file LRU
240 - * shrinks, so does the window for rotation from references.
241 - * This means we have a runaway feedback loop where a tiny
242 - * thrashing file LRU becomes infinitely more attractive than
243 - * anon pages. Try to detect this based on file LRU size.
244 - */
245 - if (!cgroup_reclaim(sc)) {
246 - unsigned long total_high_wmark = 0;
247 - unsigned long free, anon;
248 - int z;
249 -
250 - free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
251 - file = node_page_state(pgdat, NR_ACTIVE_FILE) +
252 - node_page_state(pgdat, NR_INACTIVE_FILE);
253 -
254 - for (z = 0; z < MAX_NR_ZONES; z++) {
255 - struct zone *zone = &pgdat->node_zones[z];
256 - if (!managed_zone(zone))
257 - continue;
258 -
259 - total_high_wmark += high_wmark_pages(zone);
260 - }
261 -
262 - /*
263 - * Consider anon: if that's low too, this isn't a
264 - * runaway file reclaim problem, but rather just
265 - * extreme pressure. Reclaim as per usual then.
266 - */
267 - anon = node_page_state(pgdat, NR_INACTIVE_ANON);
268 -
269 - sc->file_is_tiny =
270 - file + free <= total_high_wmark &&
271 - !(sc->may_deactivate & DEACTIVATE_ANON) &&
272 - anon >> sc->priority;
273 - }
274 + prepare_scan_count(pgdat, sc);
275
276 shrink_node_memcgs(pgdat, sc);
277
278 --
279 2.40.0
280