kernel: fix mtk_eth_soc throughput regressions on gigabit PHY ports
[openwrt/openwrt.git] / target / linux / generic / backport-5.15 / 020-v6.1-03-mm-vmscan.c-refactor-shrink_node.patch
1 From 9e17efd11450d3d2069adaa3c58db9ac8ebd1c66 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Sun, 18 Sep 2022 02:00:00 -0600
4 Subject: [PATCH 03/29] mm/vmscan.c: refactor shrink_node()
5 MIME-Version: 1.0
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
8
9 This patch refactors shrink_node() to improve readability for the upcoming
10 changes to mm/vmscan.c.
11
12 Link: https://lkml.kernel.org/r/20220918080010.2920238-4-yuzhao@google.com
13 Signed-off-by: Yu Zhao <yuzhao@google.com>
14 Reviewed-by: Barry Song <baohua@kernel.org>
15 Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
16 Acked-by: Brian Geffon <bgeffon@google.com>
17 Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
18 Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
19 Acked-by: Steven Barrett <steven@liquorix.net>
20 Acked-by: Suleiman Souhlal <suleiman@google.com>
21 Tested-by: Daniel Byrne <djbyrne@mtu.edu>
22 Tested-by: Donald Carr <d@chaos-reins.com>
23 Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
24 Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
25 Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
26 Tested-by: Sofia Trinh <sofia.trinh@edi.works>
27 Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
28 Cc: Andi Kleen <ak@linux.intel.com>
29 Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
30 Cc: Catalin Marinas <catalin.marinas@arm.com>
31 Cc: Dave Hansen <dave.hansen@linux.intel.com>
32 Cc: Hillf Danton <hdanton@sina.com>
33 Cc: Jens Axboe <axboe@kernel.dk>
34 Cc: Johannes Weiner <hannes@cmpxchg.org>
35 Cc: Jonathan Corbet <corbet@lwn.net>
36 Cc: Linus Torvalds <torvalds@linux-foundation.org>
37 Cc: Matthew Wilcox <willy@infradead.org>
38 Cc: Mel Gorman <mgorman@suse.de>
39 Cc: Michael Larabel <Michael@MichaelLarabel.com>
40 Cc: Michal Hocko <mhocko@kernel.org>
41 Cc: Mike Rapoport <rppt@kernel.org>
42 Cc: Mike Rapoport <rppt@linux.ibm.com>
43 Cc: Peter Zijlstra <peterz@infradead.org>
44 Cc: Qi Zheng <zhengqi.arch@bytedance.com>
45 Cc: Tejun Heo <tj@kernel.org>
46 Cc: Vlastimil Babka <vbabka@suse.cz>
47 Cc: Will Deacon <will@kernel.org>
48 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
49 ---
50 mm/vmscan.c | 198 +++++++++++++++++++++++++++-------------------------
51 1 file changed, 104 insertions(+), 94 deletions(-)
52
53 --- a/mm/vmscan.c
54 +++ b/mm/vmscan.c
55 @@ -2497,6 +2497,109 @@ enum scan_balance {
56 SCAN_FILE,
57 };
58
59 +static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
60 +{
61 + unsigned long file;
62 + struct lruvec *target_lruvec;
63 +
64 + target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
65 +
66 + /*
67 + * Flush the memory cgroup stats, so that we read accurate per-memcg
68 + * lruvec stats for heuristics.
69 + */
70 + mem_cgroup_flush_stats();
71 +
72 + /*
73 + * Determine the scan balance between anon and file LRUs.
74 + */
75 + spin_lock_irq(&target_lruvec->lru_lock);
76 + sc->anon_cost = target_lruvec->anon_cost;
77 + sc->file_cost = target_lruvec->file_cost;
78 + spin_unlock_irq(&target_lruvec->lru_lock);
79 +
80 + /*
81 + * Target desirable inactive:active list ratios for the anon
82 + * and file LRU lists.
83 + */
84 + if (!sc->force_deactivate) {
85 + unsigned long refaults;
86 +
87 + refaults = lruvec_page_state(target_lruvec,
88 + WORKINGSET_ACTIVATE_ANON);
89 + if (refaults != target_lruvec->refaults[0] ||
90 + inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
91 + sc->may_deactivate |= DEACTIVATE_ANON;
92 + else
93 + sc->may_deactivate &= ~DEACTIVATE_ANON;
94 +
95 + /*
96 + * When refaults are being observed, it means a new
97 + * workingset is being established. Deactivate to get
98 + * rid of any stale active pages quickly.
99 + */
100 + refaults = lruvec_page_state(target_lruvec,
101 + WORKINGSET_ACTIVATE_FILE);
102 + if (refaults != target_lruvec->refaults[1] ||
103 + inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
104 + sc->may_deactivate |= DEACTIVATE_FILE;
105 + else
106 + sc->may_deactivate &= ~DEACTIVATE_FILE;
107 + } else
108 + sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
109 +
110 + /*
111 + * If we have plenty of inactive file pages that aren't
112 + * thrashing, try to reclaim those first before touching
113 + * anonymous pages.
114 + */
115 + file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
116 + if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
117 + sc->cache_trim_mode = 1;
118 + else
119 + sc->cache_trim_mode = 0;
120 +
121 + /*
122 + * Prevent the reclaimer from falling into the cache trap: as
123 + * cache pages start out inactive, every cache fault will tip
124 + * the scan balance towards the file LRU. And as the file LRU
125 + * shrinks, so does the window for rotation from references.
126 + * This means we have a runaway feedback loop where a tiny
127 + * thrashing file LRU becomes infinitely more attractive than
128 + * anon pages. Try to detect this based on file LRU size.
129 + */
130 + if (!cgroup_reclaim(sc)) {
131 + unsigned long total_high_wmark = 0;
132 + unsigned long free, anon;
133 + int z;
134 +
135 + free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
136 + file = node_page_state(pgdat, NR_ACTIVE_FILE) +
137 + node_page_state(pgdat, NR_INACTIVE_FILE);
138 +
139 + for (z = 0; z < MAX_NR_ZONES; z++) {
140 + struct zone *zone = &pgdat->node_zones[z];
141 +
142 + if (!managed_zone(zone))
143 + continue;
144 +
145 + total_high_wmark += high_wmark_pages(zone);
146 + }
147 +
148 + /*
149 + * Consider anon: if that's low too, this isn't a
150 + * runaway file reclaim problem, but rather just
151 + * extreme pressure. Reclaim as per usual then.
152 + */
153 + anon = node_page_state(pgdat, NR_INACTIVE_ANON);
154 +
155 + sc->file_is_tiny =
156 + file + free <= total_high_wmark &&
157 + !(sc->may_deactivate & DEACTIVATE_ANON) &&
158 + anon >> sc->priority;
159 + }
160 +}
161 +
162 /*
163 * Determine how aggressively the anon and file LRU lists should be
164 * scanned. The relative value of each set of LRU lists is determined
165 @@ -2965,109 +3068,16 @@ static void shrink_node(pg_data_t *pgdat
166 unsigned long nr_reclaimed, nr_scanned;
167 struct lruvec *target_lruvec;
168 bool reclaimable = false;
169 - unsigned long file;
170
171 target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
172
173 again:
174 - /*
175 - * Flush the memory cgroup stats, so that we read accurate per-memcg
176 - * lruvec stats for heuristics.
177 - */
178 - mem_cgroup_flush_stats();
179 -
180 memset(&sc->nr, 0, sizeof(sc->nr));
181
182 nr_reclaimed = sc->nr_reclaimed;
183 nr_scanned = sc->nr_scanned;
184
185 - /*
186 - * Determine the scan balance between anon and file LRUs.
187 - */
188 - spin_lock_irq(&target_lruvec->lru_lock);
189 - sc->anon_cost = target_lruvec->anon_cost;
190 - sc->file_cost = target_lruvec->file_cost;
191 - spin_unlock_irq(&target_lruvec->lru_lock);
192 -
193 - /*
194 - * Target desirable inactive:active list ratios for the anon
195 - * and file LRU lists.
196 - */
197 - if (!sc->force_deactivate) {
198 - unsigned long refaults;
199 -
200 - refaults = lruvec_page_state(target_lruvec,
201 - WORKINGSET_ACTIVATE_ANON);
202 - if (refaults != target_lruvec->refaults[0] ||
203 - inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
204 - sc->may_deactivate |= DEACTIVATE_ANON;
205 - else
206 - sc->may_deactivate &= ~DEACTIVATE_ANON;
207 -
208 - /*
209 - * When refaults are being observed, it means a new
210 - * workingset is being established. Deactivate to get
211 - * rid of any stale active pages quickly.
212 - */
213 - refaults = lruvec_page_state(target_lruvec,
214 - WORKINGSET_ACTIVATE_FILE);
215 - if (refaults != target_lruvec->refaults[1] ||
216 - inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
217 - sc->may_deactivate |= DEACTIVATE_FILE;
218 - else
219 - sc->may_deactivate &= ~DEACTIVATE_FILE;
220 - } else
221 - sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
222 -
223 - /*
224 - * If we have plenty of inactive file pages that aren't
225 - * thrashing, try to reclaim those first before touching
226 - * anonymous pages.
227 - */
228 - file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
229 - if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
230 - sc->cache_trim_mode = 1;
231 - else
232 - sc->cache_trim_mode = 0;
233 -
234 - /*
235 - * Prevent the reclaimer from falling into the cache trap: as
236 - * cache pages start out inactive, every cache fault will tip
237 - * the scan balance towards the file LRU. And as the file LRU
238 - * shrinks, so does the window for rotation from references.
239 - * This means we have a runaway feedback loop where a tiny
240 - * thrashing file LRU becomes infinitely more attractive than
241 - * anon pages. Try to detect this based on file LRU size.
242 - */
243 - if (!cgroup_reclaim(sc)) {
244 - unsigned long total_high_wmark = 0;
245 - unsigned long free, anon;
246 - int z;
247 -
248 - free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
249 - file = node_page_state(pgdat, NR_ACTIVE_FILE) +
250 - node_page_state(pgdat, NR_INACTIVE_FILE);
251 -
252 - for (z = 0; z < MAX_NR_ZONES; z++) {
253 - struct zone *zone = &pgdat->node_zones[z];
254 - if (!managed_zone(zone))
255 - continue;
256 -
257 - total_high_wmark += high_wmark_pages(zone);
258 - }
259 -
260 - /*
261 - * Consider anon: if that's low too, this isn't a
262 - * runaway file reclaim problem, but rather just
263 - * extreme pressure. Reclaim as per usual then.
264 - */
265 - anon = node_page_state(pgdat, NR_INACTIVE_ANON);
266 -
267 - sc->file_is_tiny =
268 - file + free <= total_high_wmark &&
269 - !(sc->may_deactivate & DEACTIVATE_ANON) &&
270 - anon >> sc->priority;
271 - }
272 + prepare_scan_count(pgdat, sc);
273
274 shrink_node_memcgs(pgdat, sc);
275