1 Flushing the L1 D cache with a test/clean/invalidate loop is very
2 easy in software, but it is not the quickest way of doing it, as
3 there is a lot of overhead involved in re-scanning the cache from
4 the beginning every time we hit a dirty line.
6 This patch makes proc-feroceon.S use "clean+invalidate by set/way"
7 loops according to possible cache configuration of Feroceon CPUs
8 (either direct-mapped or 4-way set associative).
10 [nico: optimized the assembly a bit]
12 Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
13 Signed-off-by: Nicolas Pitre <nico@marvell.com>
15 arch/arm/mm/proc-feroceon.S | 53 ++++++++++++++++++++++++++++++++++---------
16 1 files changed, 42 insertions(+), 11 deletions(-)
18 --- a/arch/arm/mm/proc-feroceon.S
19 +++ b/arch/arm/mm/proc-feroceon.S
22 #define CACHE_DLINESIZE 32
31 + .word __cache_params_loc
34 * cpu_feroceon_proc_init()
36 ENTRY(cpu_feroceon_proc_init)
37 + mrc p15, 0, r0, c0, c0, 1 @ read cache type register
38 + ldr r1, __cache_params
40 + tst r0, #(1 << 16) @ get way
41 + mov r0, r0, lsr #18 @ get cache size order
42 + movne r3, #((4 - 1) << 30) @ 4-way
44 + moveq r3, #0 @ 1-way
45 + mov r2, r2, lsl r0 @ actual cache size
46 + movne r2, r2, lsr #2 @ turned into # of sets
47 + sub r2, r2, #(1 << 5)
54 ENTRY(feroceon_flush_kern_cache_all)
59 -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
61 + ldr r1, __cache_params
64 +2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way
65 + subs ip, ip, #(1 << 30) @ next way
67 + subs r1, r1, #(1 << 5) @ next set
72 mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
73 mcrne p15, 0, ip, c7, c10, 4 @ drain WB
78 ENTRY(feroceon_flush_user_cache_range)
80 sub r3, r1, r0 @ calculate total size
82 bgt __flush_whole_cache
88 mcrne p15, 0, ip, c7, c10, 4 @ drain WB
93 ENTRY(cpu_feroceon_switch_mm)
96 -@ && 'Clean & Invalidate whole DCache'
97 -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
99 - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
100 - mcr p15, 0, ip, c7, c10, 4 @ drain WB
101 + mov r2, lr @ abuse r2 to preserve lr
102 + bl __flush_whole_cache
103 + @ if r2 contains the VM_EXEC bit then the next 2 ops are done already
105 + mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache
106 + mcreq p15, 0, ip, c7, c10, 4 @ drain WB
108 mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
109 mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
117 * cpu_feroceon_set_pte_ext(ptep, pte, ext)