4c6fcecdac1434ff49eff9c34a8dd8cc3b0c9ade
[openwrt/staging/jow.git] / target / linux / generic / backport-6.1 / 020-v6.3-11-UPSTREAM-mm-support-POSIX_FADV_NOREUSE.patch
1 From 9ca4e437a24dfc4ec6c362f319eb9850b9eca497 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Fri, 30 Dec 2022 14:52:52 -0700
4 Subject: [PATCH 11/19] UPSTREAM: mm: support POSIX_FADV_NOREUSE
5
6 This patch adds POSIX_FADV_NOREUSE to vma_has_recency() so that the LRU
7 algorithm can ignore access to mapped files marked by this flag.
8
9 The advantages of POSIX_FADV_NOREUSE are:
10 1. Unlike MADV_SEQUENTIAL and MADV_RANDOM, it does not alter the
11 default readahead behavior.
12 2. Unlike MADV_SEQUENTIAL and MADV_RANDOM, it does not split VMAs and
13 therefore does not take mmap_lock.
14 3. Unlike MADV_COLD, setting it has a negligible cost, regardless of
15 how many pages it affects.
16
17 Its limitations are:
18 1. Like POSIX_FADV_RANDOM and POSIX_FADV_SEQUENTIAL, it currently does
19 not support range. IOW, its scope is the entire file.
20 2. It currently does not ignore access through file descriptors.
21 Specifically, for the active/inactive LRU, given a file page shared
22 by two users and one of them having set POSIX_FADV_NOREUSE on the
23 file, this page will be activated upon the second user accessing
24 it. This corner case can be covered by checking POSIX_FADV_NOREUSE
25 before calling folio_mark_accessed() on the read path. But it is
26 considered not worth the effort.
27
28 There have been a few attempts to support POSIX_FADV_NOREUSE, e.g., [1].
29 This time the goal is to fill a niche: a few desktop applications, e.g.,
30 large file transferring and video encoding/decoding, want fast file
31 streaming with mmap() rather than direct IO. Among those applications, an
32 SVT-AV1 regression was reported when running with MGLRU [2]. The
33 following test can reproduce that regression.
34
35 kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo)
36 kb=$((kb - 8*1024*1024))
37
38 modprobe brd rd_nr=1 rd_size=$kb
39 dd if=/dev/zero of=/dev/ram0 bs=1M
40
41 mkfs.ext4 /dev/ram0
42 mount /dev/ram0 /mnt/
43 swapoff -a
44
45 fallocate -l 8G /mnt/swapfile
46 mkswap /mnt/swapfile
47 swapon /mnt/swapfile
48
49 wget http://ultravideo.cs.tut.fi/video/Bosphorus_3840x2160_120fps_420_8bit_YUV_Y4M.7z
50 7z e -o/mnt/ Bosphorus_3840x2160_120fps_420_8bit_YUV_Y4M.7z
51 SvtAv1EncApp --preset 12 -w 3840 -h 2160 \
52 -i /mnt/Bosphorus_3840x2160.y4m
53
54 For MGLRU, the following change showed a [9-11]% increase in FPS,
55 which makes it on par with the active/inactive LRU.
56
57 patch Source/App/EncApp/EbAppMain.c <<EOF
58 31a32
59 > #include <fcntl.h>
60 35d35
61 < #include <fcntl.h> /* _O_BINARY */
62 117a118
63 > posix_fadvise(config->mmap.fd, 0, 0, POSIX_FADV_NOREUSE);
64 EOF
65
66 [1] https://lore.kernel.org/r/1308923350-7932-1-git-send-email-andrea@betterlinux.com/
67 [2] https://openbenchmarking.org/result/2209259-PTS-MGLRU8GB57
68
69 Link: https://lkml.kernel.org/r/20221230215252.2628425-2-yuzhao@google.com
70 Change-Id: I0b7f5f971d78014ea1ba44cee6a8ec902a4330d0
71 Signed-off-by: Yu Zhao <yuzhao@google.com>
72 Cc: Alexander Viro <viro@zeniv.linux.org.uk>
73 Cc: Andrea Righi <andrea.righi@canonical.com>
74 Cc: Johannes Weiner <hannes@cmpxchg.org>
75 Cc: Michael Larabel <Michael@MichaelLarabel.com>
76 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
77 (cherry picked from commit 17e810229cb3068b692fa078bd9b3a6527e0866a)
78 Bug: 274865848
79 Signed-off-by: T.J. Mercier <tjmercier@google.com>
80 ---
81 include/linux/fs.h | 2 ++
82 include/linux/mm_inline.h | 3 +++
83 mm/fadvise.c | 5 ++++-
84 3 files changed, 9 insertions(+), 1 deletion(-)
85
86 diff --git a/include/linux/fs.h b/include/linux/fs.h
87 index f14ecbeab2a9d..97f9c41c1a43a 100644
88 --- a/include/linux/fs.h
89 +++ b/include/linux/fs.h
90 @@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
91 /* File supports DIRECT IO */
92 #define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000)
93
94 +#define FMODE_NOREUSE ((__force fmode_t)0x800000)
95 +
96 /* File was opened by fanotify and shouldn't generate fanotify events */
97 #define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
98
99 diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
100 index 7bb2e5f94734c..9a8e2049333c0 100644
101 --- a/include/linux/mm_inline.h
102 +++ b/include/linux/mm_inline.h
103 @@ -600,6 +600,9 @@ static inline bool vma_has_recency(struct vm_area_struct *vma)
104 if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
105 return false;
106
107 + if (vma->vm_file && (vma->vm_file->f_mode & FMODE_NOREUSE))
108 + return false;
109 +
110 return true;
111 }
112
113 diff --git a/mm/fadvise.c b/mm/fadvise.c
114 index c76ee665355a4..2ba24d865bf5f 100644
115 --- a/mm/fadvise.c
116 +++ b/mm/fadvise.c
117 @@ -80,7 +80,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
118 case POSIX_FADV_NORMAL:
119 file->f_ra.ra_pages = bdi->ra_pages;
120 spin_lock(&file->f_lock);
121 - file->f_mode &= ~FMODE_RANDOM;
122 + file->f_mode &= ~(FMODE_RANDOM | FMODE_NOREUSE);
123 spin_unlock(&file->f_lock);
124 break;
125 case POSIX_FADV_RANDOM:
126 @@ -107,6 +107,9 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
127 force_page_cache_readahead(mapping, file, start_index, nrpages);
128 break;
129 case POSIX_FADV_NOREUSE:
130 + spin_lock(&file->f_lock);
131 + file->f_mode |= FMODE_NOREUSE;
132 + spin_unlock(&file->f_lock);
133 break;
134 case POSIX_FADV_DONTNEED:
135 __filemap_fdatawrite_range(mapping, offset, endbyte,
136 --
137 2.40.1
138