add arm mach_types update again
[openwrt/svn-archive/archive.git] / target / linux / orion / patches / 002-feroceon__speed_up_flushing_of_the_entire_cache.patch
1 Flushing the L1 D cache with a test/clean/invalidate loop is very
2 easy in software, but it is not the quickest way of doing it, as
3 there is a lot of overhead involved in re-scanning the cache from
4 the beginning every time we hit a dirty line.
5
6 This patch makes proc-feroceon.S use "clean+invalidate by set/way"
7 loops according to possible cache configuration of Feroceon CPUs
8 (either direct-mapped or 4-way set associative).
9
10 [nico: optimized the assembly a bit]
11
12 Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
13 Signed-off-by: Nicolas Pitre <nico@marvell.com>
14 ---
15 arch/arm/mm/proc-feroceon.S | 53 ++++++++++++++++++++++++++++++++++---------
16 1 files changed, 42 insertions(+), 11 deletions(-)
17
18 --- a/arch/arm/mm/proc-feroceon.S
19 +++ b/arch/arm/mm/proc-feroceon.S
20 @@ -44,11 +44,31 @@
21 */
22 #define CACHE_DLINESIZE 32
23
24 + .bss
25 + .align 3
26 +__cache_params_loc:
27 + .space 8
28 +
29 .text
30 +__cache_params:
31 + .word __cache_params_loc
32 +
33 /*
34 * cpu_feroceon_proc_init()
35 */
36 ENTRY(cpu_feroceon_proc_init)
37 + mrc p15, 0, r0, c0, c0, 1 @ read cache type register
38 + ldr r1, __cache_params
39 + mov r2, #(16 << 5)
40 + tst r0, #(1 << 16) @ get way
41 + mov r0, r0, lsr #18 @ get cache size order
42 + movne r3, #((4 - 1) << 30) @ 4-way
43 + and r0, r0, #0xf
44 + moveq r3, #0 @ 1-way
45 + mov r2, r2, lsl r0 @ actual cache size
46 + movne r2, r2, lsr #2 @ turned into # of sets
47 + sub r2, r2, #(1 << 5)
48 + stmia r1, {r2, r3}
49 mov pc, lr
50
51 /*
52 @@ -117,11 +137,19 @@
53 */
54 ENTRY(feroceon_flush_kern_cache_all)
55 mov r2, #VM_EXEC
56 - mov ip, #0
57 +
58 __flush_whole_cache:
59 -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
60 - bne 1b
61 + ldr r1, __cache_params
62 + ldmia r1, {r1, r3}
63 +1: orr ip, r1, r3
64 +2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way
65 + subs ip, ip, #(1 << 30) @ next way
66 + bcs 2b
67 + subs r1, r1, #(1 << 5) @ next set
68 + bcs 1b
69 +
70 tst r2, #VM_EXEC
71 + mov ip, #0
72 mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
73 mcrne p15, 0, ip, c7, c10, 4 @ drain WB
74 mov pc, lr
75 @@ -138,7 +166,6 @@
76 */
77 .align 5
78 ENTRY(feroceon_flush_user_cache_range)
79 - mov ip, #0
80 sub r3, r1, r0 @ calculate total size
81 cmp r3, #CACHE_DLIMIT
82 bgt __flush_whole_cache
83 @@ -152,6 +179,7 @@
84 cmp r0, r1
85 blo 1b
86 tst r2, #VM_EXEC
87 + mov ip, #0
88 mcrne p15, 0, ip, c7, c10, 4 @ drain WB
89 mov pc, lr
90
91 @@ -306,16 +334,19 @@
92 .align 5
93 ENTRY(cpu_feroceon_switch_mm)
94 #ifdef CONFIG_MMU
95 - mov ip, #0
96 -@ && 'Clean & Invalidate whole DCache'
97 -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
98 - bne 1b
99 - mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
100 - mcr p15, 0, ip, c7, c10, 4 @ drain WB
101 + mov r2, lr @ abuse r2 to preserve lr
102 + bl __flush_whole_cache
103 + @ if r2 contains the VM_EXEC bit then the next 2 ops are done already
104 + tst r2, #VM_EXEC
105 + mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache
106 + mcreq p15, 0, ip, c7, c10, 4 @ drain WB
107 +
108 mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
109 mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
110 -#endif
111 + mov pc, r2
112 +#else
113 mov pc, lr
114 +#endif
115
116 /*
117 * cpu_feroceon_set_pte_ext(ptep, pte, ext)