1 From edb21286ac7e246dfe7c9ee05101880f719e00e8 Mon Sep 17 00:00:00 2001
2 From: Phil Elwell <phil@raspberrypi.org>
3 Date: Wed, 8 Jul 2015 14:48:57 +0100
4 Subject: [PATCH 106/121] vchiq_arm: Two cacheing fixes
6 1) Make fragment size vary with cache line size
7 Without this patch, non-cache-line-aligned transfers may corrupt
8 (or be corrupted by) adjacent data structures.
10 Both ARM and VC need to be updated to enable this feature. This is
11 ensured by having the loader apply a new DT parameter -
12 cache-line-size. The existence of this parameter guarantees that the
13 kernel is capable, and the parameter will only be modified from the
14 safe default if the loader is capable.
16 2) Flush/invalidate vmalloc'd memory, and invalidate after reads
18 arch/arm/boot/dts/bcm2708_common.dtsi | 5 +
19 .../interface/vchiq_arm/vchiq_2835_arm.c | 112 +++++++++++++--------
20 2 files changed, 77 insertions(+), 40 deletions(-)
22 --- a/arch/arm/boot/dts/bcm2708_common.dtsi
23 +++ b/arch/arm/boot/dts/bcm2708_common.dtsi
25 compatible = "brcm,bcm2835-vchiq";
26 reg = <0x7e00b840 0xf>;
28 + cache-line-size = <32>;
33 clock-frequency = <126000000>;
38 + cache_line_size = <&vchiq>, "cache-line-size:0";
41 --- a/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
42 +++ b/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
44 #include <linux/platform_data/mailbox-bcm2708.h>
45 #include <linux/platform_device.h>
46 #include <linux/uaccess.h>
47 +#include <linux/of.h>
48 #include <asm/pgtable.h>
50 #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32)
51 @@ -64,8 +65,10 @@ typedef struct vchiq_2835_state_struct {
52 } VCHIQ_2835_ARM_STATE_T;
54 static void __iomem *g_regs;
55 -static FRAGMENTS_T *g_fragments_base;
56 -static FRAGMENTS_T *g_free_fragments;
57 +static unsigned int g_cache_line_size = sizeof(CACHE_LINE_SIZE);
58 +static unsigned int g_fragments_size;
59 +static char *g_fragments_base;
60 +static char *g_free_fragments;
61 static struct semaphore g_free_fragments_sema;
62 static unsigned long g_virt_to_bus_offset;
64 @@ -95,9 +98,13 @@ int vchiq_platform_init(struct platform_
66 g_virt_to_bus_offset = virt_to_dma(dev, (void *)0);
68 + (void)of_property_read_u32(dev->of_node, "cache-line-size",
69 + &g_cache_line_size);
70 + g_fragments_size = 2 * g_cache_line_size;
72 /* Allocate space for the channels in coherent memory */
73 slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
74 - frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS);
75 + frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
77 slot_mem = dmam_alloc_coherent(dev, slot_mem_size + frag_mem_size,
78 &slot_phys, GFP_KERNEL);
79 @@ -117,15 +124,15 @@ int vchiq_platform_init(struct platform_
80 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
83 - g_fragments_base = (FRAGMENTS_T *)(slot_mem + slot_mem_size);
84 + g_fragments_base = (char *)slot_mem + slot_mem_size;
85 slot_mem_size += frag_mem_size;
87 g_free_fragments = g_fragments_base;
88 for (i = 0; i < (MAX_FRAGMENTS - 1); i++) {
89 - *(FRAGMENTS_T **)&g_fragments_base[i] =
90 - &g_fragments_base[i + 1];
91 + *(char **)&g_fragments_base[i*g_fragments_size] =
92 + &g_fragments_base[(i + 1)*g_fragments_size];
94 - *(FRAGMENTS_T **)&g_fragments_base[i] = NULL;
95 + *(char **)&g_fragments_base[i * g_fragments_size] = NULL;
96 sema_init(&g_free_fragments_sema, MAX_FRAGMENTS);
98 if (vchiq_init_state(state, vchiq_slot_zero, 0) != VCHIQ_SUCCESS)
99 @@ -344,7 +351,7 @@ vchiq_doorbell_irq(int irq, void *dev_id
102 ** N.B. This implementation plays slightly fast and loose with the Linux
103 -** driver programming rules, e.g. its use of __virt_to_bus instead of
104 +** driver programming rules, e.g. its use of dmac_map_area instead of
105 ** dma_map_single, but it isn't a multi-platform driver and it benefits
106 ** from increased speed as a result.
108 @@ -355,7 +362,6 @@ create_pagelist(char __user *buf, size_t
110 PAGELIST_T *pagelist;
113 unsigned long *addrs;
114 unsigned int num_pages, offset, i;
115 char *addr, *base_addr, *next_addr;
116 @@ -386,10 +392,25 @@ create_pagelist(char __user *buf, size_t
117 pages = (struct page **)(addrs + num_pages + 1);
119 if (is_vmalloc_addr(buf)) {
120 - for (actual_pages = 0; actual_pages < num_pages; actual_pages++) {
121 - pages[actual_pages] = vmalloc_to_page(buf + (actual_pages * PAGE_SIZE));
122 + int dir = (type == PAGELIST_WRITE) ?
123 + DMA_TO_DEVICE : DMA_FROM_DEVICE;
124 + unsigned long length = pagelist->length;
125 + unsigned int offset = pagelist->offset;
127 + for (actual_pages = 0; actual_pages < num_pages;
129 + struct page *pg = vmalloc_to_page(buf + (actual_pages *
131 + size_t bytes = PAGE_SIZE - offset;
133 + if (bytes > length)
135 + pages[actual_pages] = pg;
136 + dmac_map_area(page_address(pg) + offset, bytes, dir);
140 - *need_release = 0; /* do not try and release vmalloc pages */
141 + *need_release = 0; /* do not try and release vmalloc pages */
143 down_read(&task->mm->mmap_sem);
144 actual_pages = get_user_pages(task, task->mm,
145 @@ -418,7 +439,7 @@ create_pagelist(char __user *buf, size_t
146 actual_pages = -ENOMEM;
149 - *need_release = 1; /* release user pages */
150 + *need_release = 1; /* release user pages */
153 pagelist->length = count;
154 @@ -451,10 +472,10 @@ create_pagelist(char __user *buf, size_t
156 /* Partial cache lines (fragments) require special measures */
157 if ((type == PAGELIST_READ) &&
158 - ((pagelist->offset & (CACHE_LINE_SIZE - 1)) ||
159 + ((pagelist->offset & (g_cache_line_size - 1)) ||
160 ((pagelist->offset + pagelist->length) &
161 - (CACHE_LINE_SIZE - 1)))) {
162 - FRAGMENTS_T *fragments;
163 + (g_cache_line_size - 1)))) {
166 if (down_interruptible(&g_free_fragments_sema) != 0) {
168 @@ -464,19 +485,15 @@ create_pagelist(char __user *buf, size_t
169 WARN_ON(g_free_fragments == NULL);
171 down(&g_free_fragments_mutex);
172 - fragments = (FRAGMENTS_T *) g_free_fragments;
173 + fragments = g_free_fragments;
174 WARN_ON(fragments == NULL);
175 - g_free_fragments = *(FRAGMENTS_T **) g_free_fragments;
176 + g_free_fragments = *(char **) g_free_fragments;
177 up(&g_free_fragments_mutex);
179 - PAGELIST_READ_WITH_FRAGMENTS + (fragments -
181 + pagelist->type = PAGELIST_READ_WITH_FRAGMENTS +
182 + (fragments - g_fragments_base) / g_fragments_size;
185 - for (page = virt_to_page(pagelist);
186 - page <= virt_to_page(addrs + num_pages - 1); page++) {
187 - flush_dcache_page(page);
189 + dmac_flush_range(pagelist, addrs + num_pages);
191 *ppagelist = pagelist;
193 @@ -502,13 +519,14 @@ free_pagelist(PAGELIST_T *pagelist, int
195 /* Deal with any partial cache lines (fragments) */
196 if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) {
197 - FRAGMENTS_T *fragments = g_fragments_base +
198 - (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS);
199 + char *fragments = g_fragments_base +
200 + (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS) *
202 int head_bytes, tail_bytes;
203 - head_bytes = (CACHE_LINE_SIZE - pagelist->offset) &
204 - (CACHE_LINE_SIZE - 1);
205 + head_bytes = (g_cache_line_size - pagelist->offset) &
206 + (g_cache_line_size - 1);
207 tail_bytes = (pagelist->offset + actual) &
208 - (CACHE_LINE_SIZE - 1);
209 + (g_cache_line_size - 1);
211 if ((actual >= 0) && (head_bytes != 0)) {
212 if (head_bytes > actual)
213 @@ -516,32 +534,46 @@ free_pagelist(PAGELIST_T *pagelist, int
215 memcpy((char *)page_address(pages[0]) +
217 - fragments->headbuf,
221 if ((actual >= 0) && (head_bytes < actual) &&
223 memcpy((char *)page_address(pages[num_pages - 1]) +
224 ((pagelist->offset + actual) &
225 - (PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)),
226 - fragments->tailbuf, tail_bytes);
227 + (PAGE_SIZE - 1) & ~(g_cache_line_size - 1)),
228 + fragments + g_cache_line_size,
232 down(&g_free_fragments_mutex);
233 - *(FRAGMENTS_T **) fragments = g_free_fragments;
234 + *(char **)fragments = g_free_fragments;
235 g_free_fragments = fragments;
236 up(&g_free_fragments_mutex);
237 up(&g_free_fragments_sema);
240 - if (*need_release) {
241 + if (*need_release) {
242 + unsigned int length = pagelist->length;
243 + unsigned int offset = pagelist->offset;
245 for (i = 0; i < num_pages; i++) {
246 - if (pagelist->type != PAGELIST_WRITE)
247 - set_page_dirty(pages[i]);
248 + struct page *pg = pages[i];
250 - page_cache_release(pages[i]);
251 + if (pagelist->type != PAGELIST_WRITE) {
252 + unsigned int bytes = PAGE_SIZE - offset;
254 + if (bytes > length)
256 + dmac_unmap_area(page_address(pg) + offset,
257 + bytes, DMA_FROM_DEVICE);
260 + set_page_dirty(pg);
262 + page_cache_release(pg);