target/linux/generic/backport-5.15/020-v6.3-19-mm-add-vma_has_recency.patch

   1 From 6c7f552a48b49a8612786a28a2239fbc24fac289 Mon Sep 17 00:00:00 2001
   2 From: Yu Zhao <yuzhao@google.com>
   3 Date: Fri, 30 Dec 2022 14:52:51 -0700
   4 Subject: [PATCH 19/29] mm: add vma_has_recency()
   5
   6 Add vma_has_recency() to indicate whether a VMA may exhibit temporal
   7 locality that the LRU algorithm relies on.
   8
   9 This function returns false for VMAs marked by VM_SEQ_READ or
  10 VM_RAND_READ.  While the former flag indicates linear access, i.e., a
  11 special case of spatial locality, both flags indicate a lack of temporal
  12 locality, i.e., the reuse of an area within a relatively small duration.
  13
  14 "Recency" is chosen over "locality" to avoid confusion between temporal
  15 and spatial localities.
  16
  17 Before this patch, the active/inactive LRU only ignored the accessed bit
  18 from VMAs marked by VM_SEQ_READ.  After this patch, the active/inactive
  19 LRU and MGLRU share the same logic: they both ignore the accessed bit if
  20 vma_has_recency() returns false.
  21
  22 For the active/inactive LRU, the following fio test showed a [6, 8]%
  23 increase in IOPS when randomly accessing mapped files under memory
  24 pressure.
  25
  26   kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo)
  27   kb=$((kb - 8*1024*1024))
  28
  29   modprobe brd rd_nr=1 rd_size=$kb
  30   dd if=/dev/zero of=/dev/ram0 bs=1M
  31
  32   mkfs.ext4 /dev/ram0
  33   mount /dev/ram0 /mnt/
  34   swapoff -a
  35
  36   fio --name=test --directory=/mnt/ --ioengine=mmap --numjobs=8 \
  37       --size=8G --rw=randrw --time_based --runtime=10m \
  38       --group_reporting
  39
  40 The discussion that led to this patch is here [1].  Additional test
  41 results are available in that thread.
  42
  43 [1] https://lore.kernel.org/r/Y31s%2FK8T85jh05wH@google.com/
  44
  45 Link: https://lkml.kernel.org/r/20221230215252.2628425-1-yuzhao@google.com
  46 Signed-off-by: Yu Zhao <yuzhao@google.com>
  47 Cc: Alexander Viro <viro@zeniv.linux.org.uk>
  48 Cc: Andrea Righi <andrea.righi@canonical.com>
  49 Cc: Johannes Weiner <hannes@cmpxchg.org>
  50 Cc: Michael Larabel <Michael@MichaelLarabel.com>
  51 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  52 ---
  53  include/linux/mm_inline.h |  9 +++++++++
  54  mm/memory.c               |  8 ++++----
  55  mm/rmap.c                 | 42 +++++++++++++++++----------------------
  56  mm/vmscan.c               |  5 ++++-
  57  4 files changed, 35 insertions(+), 29 deletions(-)
  58
  59 diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
  60 index e095c1c24311..e8c723053a52 100644
  61 --- a/include/linux/mm_inline.h
  62 +++ b/include/linux/mm_inline.h
  63 @@ -333,4 +333,13 @@ static __always_inline void del_page_from_lru_list(struct page *page,
  64         update_lru_size(lruvec, page_lru(page), page_zonenum(page),
  65                         -thp_nr_pages(page));
  66  }
  67 +
  68 +static inline bool vma_has_recency(struct vm_area_struct *vma)
  69 +{
  70 +       if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
  71 +               return false;
  72 +
  73 +       return true;
  74 +}
  75 +
  76  #endif
  77 diff --git a/mm/memory.c b/mm/memory.c
  78 index 7d5be951de9e..1306b1ff0c10 100644
  79 --- a/mm/memory.c
  80 +++ b/mm/memory.c
  81 @@ -41,6 +41,7 @@
  82
  83  #include <linux/kernel_stat.h>
  84  #include <linux/mm.h>
  85 +#include <linux/mm_inline.h>
  86  #include <linux/sched/mm.h>
  87  #include <linux/sched/coredump.h>
  88  #include <linux/sched/numa_balancing.h>
  89 @@ -1353,8 +1354,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
  90                                         force_flush = 1;
  91                                         set_page_dirty(page);
  92                                 }
  93 -                               if (pte_young(ptent) &&
  94 -                                   likely(!(vma->vm_flags & VM_SEQ_READ)))
  95 +                               if (pte_young(ptent) && likely(vma_has_recency(vma)))
  96                                         mark_page_accessed(page);
  97                         }
  98                         rss[mm_counter(page)]--;
  99 @@ -4781,8 +4781,8 @@ static inline void mm_account_fault(struct pt_regs *regs,
 100  #ifdef CONFIG_LRU_GEN
 101  static void lru_gen_enter_fault(struct vm_area_struct *vma)
 102  {
 103 -       /* the LRU algorithm doesn't apply to sequential or random reads */
 104 -       current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
 105 +       /* the LRU algorithm only applies to accesses with recency */
 106 +       current->in_lru_fault = vma_has_recency(vma);
 107  }
 108
 109  static void lru_gen_exit_fault(void)
 110 diff --git a/mm/rmap.c b/mm/rmap.c
 111 index 22a86122732e..53df47753f3c 100644
 112 --- a/mm/rmap.c
 113 +++ b/mm/rmap.c
 114 @@ -794,25 +794,14 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
 115                 }
 116
 117                 if (pvmw.pte) {
 118 -                       if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
 119 -                           !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
 120 +                       if (lru_gen_enabled() && pte_young(*pvmw.pte)) {
 121                                 lru_gen_look_around(&pvmw);
 122                                 referenced++;
 123                         }
 124
 125                         if (ptep_clear_flush_young_notify(vma, address,
 126 -                                               pvmw.pte)) {
 127 -                               /*
 128 -                                * Don't treat a reference through
 129 -                                * a sequentially read mapping as such.
 130 -                                * If the page has been used in another mapping,
 131 -                                * we will catch it; if this other mapping is
 132 -                                * already gone, the unmap path will have set
 133 -                                * PG_referenced or activated the page.
 134 -                                */
 135 -                               if (likely(!(vma->vm_flags & VM_SEQ_READ)))
 136 -                                       referenced++;
 137 -                       }
 138 +                                               pvmw.pte))
 139 +                               referenced++;
 140                 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
 141                         if (pmdp_clear_flush_young_notify(vma, address,
 142                                                 pvmw.pmd))
 143 @@ -846,7 +835,20 @@ static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
 144         struct page_referenced_arg *pra = arg;
 145         struct mem_cgroup *memcg = pra->memcg;
 146
 147 -       if (!mm_match_cgroup(vma->vm_mm, memcg))
 148 +       /*
 149 +        * Ignore references from this mapping if it has no recency. If the
 150 +        * page has been used in another mapping, we will catch it; if this
 151 +        * other mapping is already gone, the unmap path will have set the
 152 +        * referenced flag or activated the page in zap_pte_range().
 153 +        */
 154 +       if (!vma_has_recency(vma))
 155 +               return true;
 156 +
 157 +       /*
 158 +        * If we are reclaiming on behalf of a cgroup, skip counting on behalf
 159 +        * of references from different cgroups.
 160 +        */
 161 +       if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
 162                 return true;
 163
 164         return false;
 165 @@ -876,6 +878,7 @@ int page_referenced(struct page *page,
 166                 .rmap_one = page_referenced_one,
 167                 .arg = (void *)&pra,
 168                 .anon_lock = page_lock_anon_vma_read,
 169 +               .invalid_vma = invalid_page_referenced_vma,
 170         };
 171
 172         *vm_flags = 0;
 173 @@ -891,15 +894,6 @@ int page_referenced(struct page *page,
 174                         return 1;
 175         }
 176
 177 -       /*
 178 -        * If we are reclaiming on behalf of a cgroup, skip
 179 -        * counting on behalf of references from different
 180 -        * cgroups
 181 -        */
 182 -       if (memcg) {
 183 -               rwc.invalid_vma = invalid_page_referenced_vma;
 184 -       }
 185 -
 186         rmap_walk(page, &rwc);
 187         *vm_flags = pra.vm_flags;
 188
 189 diff --git a/mm/vmscan.c b/mm/vmscan.c
 190 index 96f1af44bb77..4ab376abeaae 100644
 191 --- a/mm/vmscan.c
 192 +++ b/mm/vmscan.c
 193 @@ -3486,7 +3486,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal
 194         if (is_vm_hugetlb_page(vma))
 195                 return true;
 196
 197 -       if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ))
 198 +       if (!vma_has_recency(vma))
 199 +               return true;
 200 +
 201 +       if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))
 202                 return true;
 203
 204         if (vma == get_gate_vma(vma->vm_mm))
 205 --
 206 2.40.0
 207