1 From 9e17efd11450d3d2069adaa3c58db9ac8ebd1c66 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Sun, 18 Sep 2022 02:00:00 -0600
4 Subject: [PATCH 03/29] mm/vmscan.c: refactor shrink_node()
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
9 This patch refactors shrink_node() to improve readability for the upcoming
10 changes to mm/vmscan.c.
12 Link: https://lkml.kernel.org/r/20220918080010.2920238-4-yuzhao@google.com
13 Signed-off-by: Yu Zhao <yuzhao@google.com>
14 Reviewed-by: Barry Song <baohua@kernel.org>
15 Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
16 Acked-by: Brian Geffon <bgeffon@google.com>
17 Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
18 Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
19 Acked-by: Steven Barrett <steven@liquorix.net>
20 Acked-by: Suleiman Souhlal <suleiman@google.com>
21 Tested-by: Daniel Byrne <djbyrne@mtu.edu>
22 Tested-by: Donald Carr <d@chaos-reins.com>
23 Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
24 Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
25 Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
26 Tested-by: Sofia Trinh <sofia.trinh@edi.works>
27 Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
28 Cc: Andi Kleen <ak@linux.intel.com>
29 Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
30 Cc: Catalin Marinas <catalin.marinas@arm.com>
31 Cc: Dave Hansen <dave.hansen@linux.intel.com>
32 Cc: Hillf Danton <hdanton@sina.com>
33 Cc: Jens Axboe <axboe@kernel.dk>
34 Cc: Johannes Weiner <hannes@cmpxchg.org>
35 Cc: Jonathan Corbet <corbet@lwn.net>
36 Cc: Linus Torvalds <torvalds@linux-foundation.org>
37 Cc: Matthew Wilcox <willy@infradead.org>
38 Cc: Mel Gorman <mgorman@suse.de>
39 Cc: Michael Larabel <Michael@MichaelLarabel.com>
40 Cc: Michal Hocko <mhocko@kernel.org>
41 Cc: Mike Rapoport <rppt@kernel.org>
42 Cc: Mike Rapoport <rppt@linux.ibm.com>
43 Cc: Peter Zijlstra <peterz@infradead.org>
44 Cc: Qi Zheng <zhengqi.arch@bytedance.com>
45 Cc: Tejun Heo <tj@kernel.org>
46 Cc: Vlastimil Babka <vbabka@suse.cz>
47 Cc: Will Deacon <will@kernel.org>
48 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
50 mm/vmscan.c | 198 +++++++++++++++++++++++++++-------------------------
51 1 file changed, 104 insertions(+), 94 deletions(-)
53 diff --git a/mm/vmscan.c b/mm/vmscan.c
54 index 201acea81804..dc5f0381513f 100644
57 @@ -2497,6 +2497,109 @@ enum scan_balance {
61 +static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
64 + struct lruvec *target_lruvec;
66 + target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
69 + * Flush the memory cgroup stats, so that we read accurate per-memcg
70 + * lruvec stats for heuristics.
72 + mem_cgroup_flush_stats();
75 + * Determine the scan balance between anon and file LRUs.
77 + spin_lock_irq(&target_lruvec->lru_lock);
78 + sc->anon_cost = target_lruvec->anon_cost;
79 + sc->file_cost = target_lruvec->file_cost;
80 + spin_unlock_irq(&target_lruvec->lru_lock);
83 + * Target desirable inactive:active list ratios for the anon
84 + * and file LRU lists.
86 + if (!sc->force_deactivate) {
87 + unsigned long refaults;
89 + refaults = lruvec_page_state(target_lruvec,
90 + WORKINGSET_ACTIVATE_ANON);
91 + if (refaults != target_lruvec->refaults[0] ||
92 + inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
93 + sc->may_deactivate |= DEACTIVATE_ANON;
95 + sc->may_deactivate &= ~DEACTIVATE_ANON;
98 + * When refaults are being observed, it means a new
99 + * workingset is being established. Deactivate to get
100 + * rid of any stale active pages quickly.
102 + refaults = lruvec_page_state(target_lruvec,
103 + WORKINGSET_ACTIVATE_FILE);
104 + if (refaults != target_lruvec->refaults[1] ||
105 + inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
106 + sc->may_deactivate |= DEACTIVATE_FILE;
108 + sc->may_deactivate &= ~DEACTIVATE_FILE;
110 + sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
113 + * If we have plenty of inactive file pages that aren't
114 + * thrashing, try to reclaim those first before touching
117 + file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
118 + if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
119 + sc->cache_trim_mode = 1;
121 + sc->cache_trim_mode = 0;
124 + * Prevent the reclaimer from falling into the cache trap: as
125 + * cache pages start out inactive, every cache fault will tip
126 + * the scan balance towards the file LRU. And as the file LRU
127 + * shrinks, so does the window for rotation from references.
128 + * This means we have a runaway feedback loop where a tiny
129 + * thrashing file LRU becomes infinitely more attractive than
130 + * anon pages. Try to detect this based on file LRU size.
132 + if (!cgroup_reclaim(sc)) {
133 + unsigned long total_high_wmark = 0;
134 + unsigned long free, anon;
137 + free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
138 + file = node_page_state(pgdat, NR_ACTIVE_FILE) +
139 + node_page_state(pgdat, NR_INACTIVE_FILE);
141 + for (z = 0; z < MAX_NR_ZONES; z++) {
142 + struct zone *zone = &pgdat->node_zones[z];
144 + if (!managed_zone(zone))
147 + total_high_wmark += high_wmark_pages(zone);
151 + * Consider anon: if that's low too, this isn't a
152 + * runaway file reclaim problem, but rather just
153 + * extreme pressure. Reclaim as per usual then.
155 + anon = node_page_state(pgdat, NR_INACTIVE_ANON);
158 + file + free <= total_high_wmark &&
159 + !(sc->may_deactivate & DEACTIVATE_ANON) &&
160 + anon >> sc->priority;
165 * Determine how aggressively the anon and file LRU lists should be
166 * scanned. The relative value of each set of LRU lists is determined
167 @@ -2965,109 +3068,16 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
168 unsigned long nr_reclaimed, nr_scanned;
169 struct lruvec *target_lruvec;
170 bool reclaimable = false;
171 - unsigned long file;
173 target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
177 - * Flush the memory cgroup stats, so that we read accurate per-memcg
178 - * lruvec stats for heuristics.
180 - mem_cgroup_flush_stats();
182 memset(&sc->nr, 0, sizeof(sc->nr));
184 nr_reclaimed = sc->nr_reclaimed;
185 nr_scanned = sc->nr_scanned;
188 - * Determine the scan balance between anon and file LRUs.
190 - spin_lock_irq(&target_lruvec->lru_lock);
191 - sc->anon_cost = target_lruvec->anon_cost;
192 - sc->file_cost = target_lruvec->file_cost;
193 - spin_unlock_irq(&target_lruvec->lru_lock);
196 - * Target desirable inactive:active list ratios for the anon
197 - * and file LRU lists.
199 - if (!sc->force_deactivate) {
200 - unsigned long refaults;
202 - refaults = lruvec_page_state(target_lruvec,
203 - WORKINGSET_ACTIVATE_ANON);
204 - if (refaults != target_lruvec->refaults[0] ||
205 - inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
206 - sc->may_deactivate |= DEACTIVATE_ANON;
208 - sc->may_deactivate &= ~DEACTIVATE_ANON;
211 - * When refaults are being observed, it means a new
212 - * workingset is being established. Deactivate to get
213 - * rid of any stale active pages quickly.
215 - refaults = lruvec_page_state(target_lruvec,
216 - WORKINGSET_ACTIVATE_FILE);
217 - if (refaults != target_lruvec->refaults[1] ||
218 - inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
219 - sc->may_deactivate |= DEACTIVATE_FILE;
221 - sc->may_deactivate &= ~DEACTIVATE_FILE;
223 - sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
226 - * If we have plenty of inactive file pages that aren't
227 - * thrashing, try to reclaim those first before touching
230 - file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
231 - if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
232 - sc->cache_trim_mode = 1;
234 - sc->cache_trim_mode = 0;
237 - * Prevent the reclaimer from falling into the cache trap: as
238 - * cache pages start out inactive, every cache fault will tip
239 - * the scan balance towards the file LRU. And as the file LRU
240 - * shrinks, so does the window for rotation from references.
241 - * This means we have a runaway feedback loop where a tiny
242 - * thrashing file LRU becomes infinitely more attractive than
243 - * anon pages. Try to detect this based on file LRU size.
245 - if (!cgroup_reclaim(sc)) {
246 - unsigned long total_high_wmark = 0;
247 - unsigned long free, anon;
250 - free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
251 - file = node_page_state(pgdat, NR_ACTIVE_FILE) +
252 - node_page_state(pgdat, NR_INACTIVE_FILE);
254 - for (z = 0; z < MAX_NR_ZONES; z++) {
255 - struct zone *zone = &pgdat->node_zones[z];
256 - if (!managed_zone(zone))
259 - total_high_wmark += high_wmark_pages(zone);
263 - * Consider anon: if that's low too, this isn't a
264 - * runaway file reclaim problem, but rather just
265 - * extreme pressure. Reclaim as per usual then.
267 - anon = node_page_state(pgdat, NR_INACTIVE_ANON);
270 - file + free <= total_high_wmark &&
271 - !(sc->may_deactivate & DEACTIVATE_ANON) &&
272 - anon >> sc->priority;
274 + prepare_scan_count(pgdat, sc);
276 shrink_node_memcgs(pgdat, sc);