1 From 46cbda7b65998a5af4493f745d94417af697bd68 Mon Sep 17 00:00:00 2001
2 From: Juergen Gross <jgross@suse.com>
3 Date: Wed, 23 Nov 2022 07:45:10 +0100
4 Subject: [PATCH 18/29] mm: introduce arch_has_hw_nonleaf_pmd_young()
6 When running as a Xen PV guests commit eed9a328aa1a ("mm: x86: add
7 CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") can cause a protection violation in
8 pmdp_test_and_clear_young():
10 BUG: unable to handle page fault for address: ffff8880083374d0
11 #PF: supervisor write access in kernel mode
12 #PF: error_code(0x0003) - permissions violation
13 PGD 3026067 P4D 3026067 PUD 3027067 PMD 7fee5067 PTE 8010000008337065
14 Oops: 0003 [#1] PREEMPT SMP NOPTI
15 CPU: 7 PID: 158 Comm: kswapd0 Not tainted 6.1.0-rc5-20221118-doflr+ #1
16 RIP: e030:pmdp_test_and_clear_young+0x25/0x40
18 This happens because the Xen hypervisor can't emulate direct writes to
19 page table entries other than PTEs.
21 This can easily be fixed by introducing arch_has_hw_nonleaf_pmd_young()
22 similar to arch_has_hw_pte_young() and test that instead of
23 CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG.
25 Link: https://lkml.kernel.org/r/20221123064510.16225-1-jgross@suse.com
26 Fixes: eed9a328aa1a ("mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG")
27 Signed-off-by: Juergen Gross <jgross@suse.com>
28 Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
29 Acked-by: Yu Zhao <yuzhao@google.com>
30 Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
31 Acked-by: David Hildenbrand <david@redhat.com> [core changes]
32 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
34 arch/x86/include/asm/pgtable.h | 8 ++++++++
35 include/linux/pgtable.h | 11 +++++++++++
36 mm/vmscan.c | 10 +++++-----
37 3 files changed, 24 insertions(+), 5 deletions(-)
39 diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
40 index c4b64ee357fd..d8363c676496 100644
41 --- a/arch/x86/include/asm/pgtable.h
42 +++ b/arch/x86/include/asm/pgtable.h
43 @@ -1405,6 +1405,14 @@ static inline bool arch_has_hw_pte_young(void)
48 +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
49 +static inline bool arch_has_hw_nonleaf_pmd_young(void)
51 + return !cpu_feature_enabled(X86_FEATURE_XENPV);
55 #endif /* __ASSEMBLY__ */
57 #endif /* _ASM_X86_PGTABLE_H */
58 diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
59 index dec3d890e814..562b4cc82b33 100644
60 --- a/include/linux/pgtable.h
61 +++ b/include/linux/pgtable.h
62 @@ -266,6 +266,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
63 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
66 +#ifndef arch_has_hw_nonleaf_pmd_young
68 + * Return whether the accessed bit in non-leaf PMD entries is supported on the
71 +static inline bool arch_has_hw_nonleaf_pmd_young(void)
73 + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
77 #ifndef arch_has_hw_pte_young
79 * Return whether the accessed bit is supported on the local CPU.
80 diff --git a/mm/vmscan.c b/mm/vmscan.c
81 index d310e0b9e520..96f1af44bb77 100644
84 @@ -3730,7 +3730,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
87 if (!pmd_trans_huge(pmd[i])) {
88 - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
89 + if (arch_has_hw_nonleaf_pmd_young() &&
90 get_cap(LRU_GEN_NONLEAF_YOUNG))
91 pmdp_test_and_clear_young(vma, addr, pmd + i);
93 @@ -3828,14 +3828,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
95 walk->mm_stats[MM_NONLEAF_TOTAL]++;
97 -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
98 - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
99 + if (arch_has_hw_nonleaf_pmd_young() &&
100 + get_cap(LRU_GEN_NONLEAF_YOUNG)) {
104 walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
108 if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
111 @@ -5135,7 +5135,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c
112 if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
113 caps |= BIT(LRU_GEN_MM_WALK);
115 - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
116 + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
117 caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
119 return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);