brcm2708: update 4.1 patches
[openwrt/openwrt.git] / target / linux / brcm2708 / patches-4.1 / 0106-vchiq_arm-Two-cacheing-fixes.patch
1 From aefbd80ab33d9bbf786bd314da8929a7347dfc4d Mon Sep 17 00:00:00 2001
2 From: Phil Elwell <phil@raspberrypi.org>
3 Date: Wed, 8 Jul 2015 14:48:57 +0100
4 Subject: [PATCH 106/222] vchiq_arm: Two cacheing fixes
5
6 1) Make fragment size vary with cache line size
7 Without this patch, non-cache-line-aligned transfers may corrupt
8 (or be corrupted by) adjacent data structures.
9
10 Both ARM and VC need to be updated to enable this feature. This is
11 ensured by having the loader apply a new DT parameter -
12 cache-line-size. The existence of this parameter guarantees that the
13 kernel is capable, and the parameter will only be modified from the
14 safe default if the loader is capable.
15
16 2) Flush/invalidate vmalloc'd memory, and invalidate after reads
17 ---
18 arch/arm/boot/dts/bcm2708_common.dtsi | 5 +
19 .../interface/vchiq_arm/vchiq_2835_arm.c | 112 +++++++++++++--------
20 2 files changed, 77 insertions(+), 40 deletions(-)
21
22 --- a/arch/arm/boot/dts/bcm2708_common.dtsi
23 +++ b/arch/arm/boot/dts/bcm2708_common.dtsi
24 @@ -218,6 +218,7 @@
25 compatible = "brcm,bcm2835-vchiq";
26 reg = <0x7e00b840 0xf>;
27 interrupts = <0 2>;
28 + cache-line-size = <32>;
29 };
30
31 thermal: thermal {
32 @@ -270,4 +271,8 @@
33 clock-frequency = <126000000>;
34 };
35 };
36 +
37 + __overrides__ {
38 + cache_line_size = <&vchiq>, "cache-line-size:0";
39 + };
40 };
41 --- a/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
42 +++ b/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
43 @@ -42,6 +42,7 @@
44 #include <linux/platform_data/mailbox-bcm2708.h>
45 #include <linux/platform_device.h>
46 #include <linux/uaccess.h>
47 +#include <linux/of.h>
48 #include <asm/pgtable.h>
49
50 #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
51 @@ -64,8 +65,10 @@ typedef struct vchiq_2835_state_struct {
52 } VCHIQ_2835_ARM_STATE_T;
53
54 static void __iomem *g_regs;
55 -static FRAGMENTS_T *g_fragments_base;
56 -static FRAGMENTS_T *g_free_fragments;
57 +static unsigned int g_cache_line_size = sizeof(CACHE_LINE_SIZE);
58 +static unsigned int g_fragments_size;
59 +static char *g_fragments_base;
60 +static char *g_free_fragments;
61 static struct semaphore g_free_fragments_sema;
62 static unsigned long g_virt_to_bus_offset;
63
64 @@ -95,9 +98,13 @@ int vchiq_platform_init(struct platform_
65
66 g_virt_to_bus_offset = virt_to_dma(dev, (void *)0);
67
68 + (void)of_property_read_u32(dev->of_node, "cache-line-size",
69 + &g_cache_line_size);
70 + g_fragments_size = 2 * g_cache_line_size;
71 +
72 /* Allocate space for the channels in coherent memory */
73 slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
74 - frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS);
75 + frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
76
77 slot_mem = dmam_alloc_coherent(dev, slot_mem_size + frag_mem_size,
78 &slot_phys, GFP_KERNEL);
79 @@ -117,15 +124,15 @@ int vchiq_platform_init(struct platform_
80 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
81 MAX_FRAGMENTS;
82
83 - g_fragments_base = (FRAGMENTS_T *)(slot_mem + slot_mem_size);
84 + g_fragments_base = (char *)slot_mem + slot_mem_size;
85 slot_mem_size += frag_mem_size;
86
87 g_free_fragments = g_fragments_base;
88 for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
89 - *(FRAGMENTS_T **)&g_fragments_base[i] =
90 - &g_fragments_base[i + 1];
91 + *(char **)&g_fragments_base[i*g_fragments_size] =
92 + &g_fragments_base[(i + 1)*g_fragments_size];
93 }
94 - *(FRAGMENTS_T **)&g_fragments_base[i] = NULL;
95 + *(char **)&g_fragments_base[i * g_fragments_size] = NULL;
96 sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
97
98 if (vchiq_init_state(state, vchiq_slot_zero, 0) != VCHIQ_SUCCESS)
99 @@ -344,7 +351,7 @@ vchiq_doorbell_irq(int irq, void *dev_id
100 ** cached area.
101
102 ** N.B. This implementation plays slightly fast and loose with the Linux
103 -** driver programming rules, e.g. its use of __virt_to_bus instead of
104 +** driver programming rules, e.g. its use of dmac_map_area instead of
105 ** dma_map_single, but it isn't a multi-platform driver and it benefits
106 ** from increased speed as a result.
107 */
108 @@ -355,7 +362,6 @@ create_pagelist(char __user *buf, size_t
109 {
110 PAGELIST_T *pagelist;
111 struct page **pages;
112 - struct page *page;
113 unsigned long *addrs;
114 unsigned int num_pages, offset, i;
115 char *addr, *base_addr, *next_addr;
116 @@ -386,10 +392,25 @@ create_pagelist(char __user *buf, size_t
117 pages = (struct page **)(addrs + num_pages + 1);
118
119 if (is_vmalloc_addr(buf)) {
120 - for (actual_pages = 0; actual_pages < num_pages; actual_pages++) {
121 - pages[actual_pages] = vmalloc_to_page(buf + (actual_pages * PAGE_SIZE));
122 + int dir = (type == PAGELIST_WRITE) ?
123 + DMA_TO_DEVICE : DMA_FROM_DEVICE;
124 + unsigned long length = pagelist->length;
125 + unsigned int offset = pagelist->offset;
126 +
127 + for (actual_pages = 0; actual_pages < num_pages;
128 + actual_pages++) {
129 + struct page *pg = vmalloc_to_page(buf + (actual_pages *
130 + PAGE_SIZE));
131 + size_t bytes = PAGE_SIZE - offset;
132 +
133 + if (bytes > length)
134 + bytes = length;
135 + pages[actual_pages] = pg;
136 + dmac_map_area(page_address(pg) + offset, bytes, dir);
137 + length -= bytes;
138 + offset = 0;
139 }
140 - *need_release = 0; /* do not try and release vmalloc pages */
141 + *need_release = 0; /* do not try and release vmalloc pages */
142 } else {
143 down_read(&task->mm->mmap_sem);
144 actual_pages = get_user_pages(task, task->mm,
145 @@ -418,7 +439,7 @@ create_pagelist(char __user *buf, size_t
146 actual_pages = -ENOMEM;
147 return actual_pages;
148 }
149 - *need_release = 1; /* release user pages */
150 + *need_release = 1; /* release user pages */
151 }
152
153 pagelist->length = count;
154 @@ -451,10 +472,10 @@ create_pagelist(char __user *buf, size_t
155
156 /* Partial cache lines (fragments) require special measures */
157 if ((type == PAGELIST_READ) &&
158 - ((pagelist->offset & (CACHE_LINE_SIZE - 1)) ||
159 + ((pagelist->offset & (g_cache_line_size - 1)) ||
160 ((pagelist->offset + pagelist->length) &
161 - (CACHE_LINE_SIZE - 1)))) {
162 - FRAGMENTS_T *fragments;
163 + (g_cache_line_size - 1)))) {
164 + char *fragments;
165
166 if (down_interruptible(&g_free_fragments_sema) != 0) {
167 kfree(pagelist);
168 @@ -464,19 +485,15 @@ create_pagelist(char __user *buf, size_t
169 WARN_ON(g_free_fragments == NULL);
170
171 down(&g_free_fragments_mutex);
172 - fragments = (FRAGMENTS_T *) g_free_fragments;
173 + fragments = g_free_fragments;
174 WARN_ON(fragments == NULL);
175 - g_free_fragments = *(FRAGMENTS_T **) g_free_fragments;
176 + g_free_fragments = *(char **) g_free_fragments;
177 up(&g_free_fragments_mutex);
178 - pagelist->type =
179 - PAGELIST_READ_WITH_FRAGMENTS + (fragments -
180 - g_fragments_base);
181 + pagelist->type = PAGELIST_READ_WITH_FRAGMENTS +
182 + (fragments - g_fragments_base) / g_fragments_size;
183 }
184
185 - for (page = virt_to_page(pagelist);
186 - page <= virt_to_page(addrs + num_pages - 1); page++) {
187 - flush_dcache_page(page);
188 - }
189 + dmac_flush_range(pagelist, addrs + num_pages);
190
191 *ppagelist = pagelist;
192
193 @@ -502,13 +519,14 @@ free_pagelist(PAGELIST_T *pagelist, int
194
195 /* Deal with any partial cache lines (fragments) */
196 if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
197 - FRAGMENTS_T *fragments = g_fragments_base +
198 - (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS);
199 + char *fragments = g_fragments_base +
200 + (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS) *
201 + g_fragments_size;
202 int head_bytes, tail_bytes;
203 - head_bytes = (CACHE_LINE_SIZE - pagelist->offset) &
204 - (CACHE_LINE_SIZE - 1);
205 + head_bytes = (g_cache_line_size - pagelist->offset) &
206 + (g_cache_line_size - 1);
207 tail_bytes = (pagelist->offset + actual) &
208 - (CACHE_LINE_SIZE - 1);
209 + (g_cache_line_size - 1);
210
211 if ((actual >= 0) && (head_bytes != 0)) {
212 if (head_bytes > actual)
213 @@ -516,32 +534,46 @@ free_pagelist(PAGELIST_T *pagelist, int
214
215 memcpy((char *)page_address(pages[0]) +
216 pagelist->offset,
217 - fragments->headbuf,
218 + fragments,
219 head_bytes);
220 }
221 if ((actual >= 0) && (head_bytes < actual) &&
222 (tail_bytes != 0)) {
223 memcpy((char *)page_address(pages[num_pages - 1]) +
224 ((pagelist->offset + actual) &
225 - (PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)),
226 - fragments->tailbuf, tail_bytes);
227 + (PAGE_SIZE - 1) & ~(g_cache_line_size - 1)),
228 + fragments + g_cache_line_size,
229 + tail_bytes);
230 }
231
232 down(&g_free_fragments_mutex);
233 - *(FRAGMENTS_T **) fragments = g_free_fragments;
234 + *(char **)fragments = g_free_fragments;
235 g_free_fragments = fragments;
236 up(&g_free_fragments_mutex);
237 up(&g_free_fragments_sema);
238 }
239
240 - if (*need_release) {
241 + if (*need_release) {
242 + unsigned int length = pagelist->length;
243 + unsigned int offset = pagelist->offset;
244 +
245 for (i = 0; i < num_pages; i++) {
246 - if (pagelist->type != PAGELIST_WRITE)
247 - set_page_dirty(pages[i]);
248 + struct page *pg = pages[i];
249
250 - page_cache_release(pages[i]);
251 + if (pagelist->type != PAGELIST_WRITE) {
252 + unsigned int bytes = PAGE_SIZE - offset;
253 +
254 + if (bytes > length)
255 + bytes = length;
256 + dmac_unmap_area(page_address(pg) + offset,
257 + bytes, DMA_FROM_DEVICE);
258 + length -= bytes;
259 + offset = 0;
260 + set_page_dirty(pg);
261 + }
262 + page_cache_release(pg);
263 }
264 - }
265 + }
266
267 kfree(pagelist);
268 }