542bb0c3a83dbb2e9897dc9fd0c4a23fbb1c2fcb
[openwrt/staging/dedeckeh.git] / target / linux / generic / backport-5.15 / 020-v6.3-19-mm-add-vma_has_recency.patch
1 From 6c7f552a48b49a8612786a28a2239fbc24fac289 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Fri, 30 Dec 2022 14:52:51 -0700
4 Subject: [PATCH 19/29] mm: add vma_has_recency()
5
6 Add vma_has_recency() to indicate whether a VMA may exhibit temporal
7 locality that the LRU algorithm relies on.
8
9 This function returns false for VMAs marked by VM_SEQ_READ or
10 VM_RAND_READ. While the former flag indicates linear access, i.e., a
11 special case of spatial locality, both flags indicate a lack of temporal
12 locality, i.e., the reuse of an area within a relatively small duration.
13
14 "Recency" is chosen over "locality" to avoid confusion between temporal
15 and spatial localities.
16
17 Before this patch, the active/inactive LRU only ignored the accessed bit
18 from VMAs marked by VM_SEQ_READ. After this patch, the active/inactive
19 LRU and MGLRU share the same logic: they both ignore the accessed bit if
20 vma_has_recency() returns false.
21
22 For the active/inactive LRU, the following fio test showed a [6, 8]%
23 increase in IOPS when randomly accessing mapped files under memory
24 pressure.
25
26 kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo)
27 kb=$((kb - 8*1024*1024))
28
29 modprobe brd rd_nr=1 rd_size=$kb
30 dd if=/dev/zero of=/dev/ram0 bs=1M
31
32 mkfs.ext4 /dev/ram0
33 mount /dev/ram0 /mnt/
34 swapoff -a
35
36 fio --name=test --directory=/mnt/ --ioengine=mmap --numjobs=8 \
37 --size=8G --rw=randrw --time_based --runtime=10m \
38 --group_reporting
39
40 The discussion that led to this patch is here [1]. Additional test
41 results are available in that thread.
42
43 [1] https://lore.kernel.org/r/Y31s%2FK8T85jh05wH@google.com/
44
45 Link: https://lkml.kernel.org/r/20221230215252.2628425-1-yuzhao@google.com
46 Signed-off-by: Yu Zhao <yuzhao@google.com>
47 Cc: Alexander Viro <viro@zeniv.linux.org.uk>
48 Cc: Andrea Righi <andrea.righi@canonical.com>
49 Cc: Johannes Weiner <hannes@cmpxchg.org>
50 Cc: Michael Larabel <Michael@MichaelLarabel.com>
51 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
52 ---
53 include/linux/mm_inline.h | 9 +++++++++
54 mm/memory.c | 8 ++++----
55 mm/rmap.c | 42 +++++++++++++++++----------------------
56 mm/vmscan.c | 5 ++++-
57 4 files changed, 35 insertions(+), 29 deletions(-)
58
59 diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
60 index e095c1c24311..e8c723053a52 100644
61 --- a/include/linux/mm_inline.h
62 +++ b/include/linux/mm_inline.h
63 @@ -333,4 +333,13 @@ static __always_inline void del_page_from_lru_list(struct page *page,
64 update_lru_size(lruvec, page_lru(page), page_zonenum(page),
65 -thp_nr_pages(page));
66 }
67 +
68 +static inline bool vma_has_recency(struct vm_area_struct *vma)
69 +{
70 + if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
71 + return false;
72 +
73 + return true;
74 +}
75 +
76 #endif
77 diff --git a/mm/memory.c b/mm/memory.c
78 index 7d5be951de9e..1306b1ff0c10 100644
79 --- a/mm/memory.c
80 +++ b/mm/memory.c
81 @@ -41,6 +41,7 @@
82
83 #include <linux/kernel_stat.h>
84 #include <linux/mm.h>
85 +#include <linux/mm_inline.h>
86 #include <linux/sched/mm.h>
87 #include <linux/sched/coredump.h>
88 #include <linux/sched/numa_balancing.h>
89 @@ -1353,8 +1354,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
90 force_flush = 1;
91 set_page_dirty(page);
92 }
93 - if (pte_young(ptent) &&
94 - likely(!(vma->vm_flags & VM_SEQ_READ)))
95 + if (pte_young(ptent) && likely(vma_has_recency(vma)))
96 mark_page_accessed(page);
97 }
98 rss[mm_counter(page)]--;
99 @@ -4781,8 +4781,8 @@ static inline void mm_account_fault(struct pt_regs *regs,
100 #ifdef CONFIG_LRU_GEN
101 static void lru_gen_enter_fault(struct vm_area_struct *vma)
102 {
103 - /* the LRU algorithm doesn't apply to sequential or random reads */
104 - current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
105 + /* the LRU algorithm only applies to accesses with recency */
106 + current->in_lru_fault = vma_has_recency(vma);
107 }
108
109 static void lru_gen_exit_fault(void)
110 diff --git a/mm/rmap.c b/mm/rmap.c
111 index 22a86122732e..53df47753f3c 100644
112 --- a/mm/rmap.c
113 +++ b/mm/rmap.c
114 @@ -794,25 +794,14 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
115 }
116
117 if (pvmw.pte) {
118 - if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
119 - !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
120 + if (lru_gen_enabled() && pte_young(*pvmw.pte)) {
121 lru_gen_look_around(&pvmw);
122 referenced++;
123 }
124
125 if (ptep_clear_flush_young_notify(vma, address,
126 - pvmw.pte)) {
127 - /*
128 - * Don't treat a reference through
129 - * a sequentially read mapping as such.
130 - * If the page has been used in another mapping,
131 - * we will catch it; if this other mapping is
132 - * already gone, the unmap path will have set
133 - * PG_referenced or activated the page.
134 - */
135 - if (likely(!(vma->vm_flags & VM_SEQ_READ)))
136 - referenced++;
137 - }
138 + pvmw.pte))
139 + referenced++;
140 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
141 if (pmdp_clear_flush_young_notify(vma, address,
142 pvmw.pmd))
143 @@ -846,7 +835,20 @@ static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
144 struct page_referenced_arg *pra = arg;
145 struct mem_cgroup *memcg = pra->memcg;
146
147 - if (!mm_match_cgroup(vma->vm_mm, memcg))
148 + /*
149 + * Ignore references from this mapping if it has no recency. If the
150 + * page has been used in another mapping, we will catch it; if this
151 + * other mapping is already gone, the unmap path will have set the
152 + * referenced flag or activated the page in zap_pte_range().
153 + */
154 + if (!vma_has_recency(vma))
155 + return true;
156 +
157 + /*
158 + * If we are reclaiming on behalf of a cgroup, skip counting on behalf
159 + * of references from different cgroups.
160 + */
161 + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
162 return true;
163
164 return false;
165 @@ -876,6 +878,7 @@ int page_referenced(struct page *page,
166 .rmap_one = page_referenced_one,
167 .arg = (void *)&pra,
168 .anon_lock = page_lock_anon_vma_read,
169 + .invalid_vma = invalid_page_referenced_vma,
170 };
171
172 *vm_flags = 0;
173 @@ -891,15 +894,6 @@ int page_referenced(struct page *page,
174 return 1;
175 }
176
177 - /*
178 - * If we are reclaiming on behalf of a cgroup, skip
179 - * counting on behalf of references from different
180 - * cgroups
181 - */
182 - if (memcg) {
183 - rwc.invalid_vma = invalid_page_referenced_vma;
184 - }
185 -
186 rmap_walk(page, &rwc);
187 *vm_flags = pra.vm_flags;
188
189 diff --git a/mm/vmscan.c b/mm/vmscan.c
190 index 96f1af44bb77..4ab376abeaae 100644
191 --- a/mm/vmscan.c
192 +++ b/mm/vmscan.c
193 @@ -3486,7 +3486,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal
194 if (is_vm_hugetlb_page(vma))
195 return true;
196
197 - if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ))
198 + if (!vma_has_recency(vma))
199 + return true;
200 +
201 + if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))
202 return true;
203
204 if (vma == get_gate_vma(vma->vm_mm))
205 --
206 2.40.0
207