bcm27xx: add support for linux v5.15
[openwrt/staging/chunkeey.git] / target / linux / bcm27xx / patches-5.15 / 950-0472-staging-vchiq_arm-Add-36-bit-address-support.patch
1 From 3087dcaf4bfed16ac0e1a124a3196ed3384f5ef3 Mon Sep 17 00:00:00 2001
2 From: Phil Elwell <phil@raspberrypi.com>
3 Date: Thu, 1 Nov 2018 17:31:37 +0000
4 Subject: [PATCH] staging: vchiq_arm: Add 36-bit address support
5
6 Conditional on a new compatible string, change the pagelist encoding
7 such that the top 24 bits are the pfn, leaving 8 bits for run length
8 (-1), giving a 36-bit address range.
9
10 Manage the split between addresses for the VPU and addresses for the
11 40-bit DMA controller with a dedicated DMA device pointer that on non-
12 BCM2711 platforms is the same as the main VCHIQ device. This allows
13 the VCHIQ node to stay in the usual place in the DT.
14
15 Signed-off-by: Phil Elwell <phil@raspberrypi.com>
16 ---
17 .../interface/vchiq_arm/vchiq_arm.c | 126 ++++++++++++------
18 .../interface/vchiq_arm/vchiq_arm.h | 1 +
19 2 files changed, 89 insertions(+), 38 deletions(-)
20
21 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
22 +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
23 @@ -79,6 +79,11 @@ static struct vchiq_drvdata bcm2836_drvd
24 .cache_line_size = 64,
25 };
26
27 +static struct vchiq_drvdata bcm2711_drvdata = {
28 + .cache_line_size = 64,
29 + .use_36bit_addrs = true,
30 +};
31 +
32 struct vchiq_2835_state {
33 int inited;
34 struct vchiq_arm_state arm_state;
35 @@ -108,11 +113,13 @@ static void __iomem *g_regs;
36 * of 32.
37 */
38 static unsigned int g_cache_line_size = 32;
39 +static unsigned int g_use_36bit_addrs = 0;
40 static unsigned int g_fragments_size;
41 static char *g_fragments_base;
42 static char *g_free_fragments;
43 static struct semaphore g_free_fragments_sema;
44 static struct device *g_dev;
45 +static struct device *g_dma_dev;
46
47 static DEFINE_SEMAPHORE(g_free_fragments_mutex);
48
49 @@ -142,7 +149,7 @@ static void
50 cleanup_pagelistinfo(struct vchiq_pagelist_info *pagelistinfo)
51 {
52 if (pagelistinfo->scatterlist_mapped) {
53 - dma_unmap_sg(g_dev, pagelistinfo->scatterlist,
54 + dma_unmap_sg(g_dma_dev, pagelistinfo->scatterlist,
55 pagelistinfo->num_pages, pagelistinfo->dma_dir);
56 }
57
58 @@ -291,7 +298,7 @@ create_pagelist(char *buf, char __user *
59 count -= len;
60 }
61
62 - dma_buffers = dma_map_sg(g_dev,
63 + dma_buffers = dma_map_sg(g_dma_dev,
64 scatterlist,
65 num_pages,
66 pagelistinfo->dma_dir);
67 @@ -305,25 +312,60 @@ create_pagelist(char *buf, char __user *
68
69 /* Combine adjacent blocks for performance */
70 k = 0;
71 - for_each_sg(scatterlist, sg, dma_buffers, i) {
72 - u32 len = sg_dma_len(sg);
73 - u32 addr = sg_dma_address(sg);
74 -
75 - /* Note: addrs is the address + page_count - 1
76 - * The firmware expects blocks after the first to be page-
77 - * aligned and a multiple of the page size
78 - */
79 - WARN_ON(len == 0);
80 - WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
81 - WARN_ON(i && (addr & ~PAGE_MASK));
82 - if (k > 0 &&
83 - ((addrs[k - 1] & PAGE_MASK) +
84 - (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT))
85 - == (addr & PAGE_MASK))
86 - addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT);
87 - else
88 - addrs[k++] = (addr & PAGE_MASK) |
89 - (((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1);
90 + if (g_use_36bit_addrs) {
91 + for_each_sg(scatterlist, sg, dma_buffers, i) {
92 + u32 len = sg_dma_len(sg);
93 + u64 addr = sg_dma_address(sg);
94 + u32 page_id = (u32)((addr >> 4) & ~0xff);
95 + u32 sg_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
96 +
97 + /* Note: addrs is the address + page_count - 1
98 + * The firmware expects blocks after the first to be page-
99 + * aligned and a multiple of the page size
100 + */
101 + WARN_ON(len == 0);
102 + WARN_ON(i &&
103 + (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
104 + WARN_ON(i && (addr & ~PAGE_MASK));
105 + WARN_ON(upper_32_bits(addr) > 0xf);
106 + if (k > 0 &&
107 + ((addrs[k - 1] & ~0xff) +
108 + (((addrs[k - 1] & 0xff) + 1) << 8)
109 + == page_id)) {
110 + u32 inc_pages = min(sg_pages,
111 + 0xff - (addrs[k - 1] & 0xff));
112 + addrs[k - 1] += inc_pages;
113 + page_id += inc_pages << 8;
114 + sg_pages -= inc_pages;
115 + }
116 + while (sg_pages) {
117 + u32 inc_pages = min(sg_pages, 0x100u);
118 + addrs[k++] = page_id | (inc_pages - 1);
119 + page_id += inc_pages << 8;
120 + sg_pages -= inc_pages;
121 + }
122 + }
123 + } else {
124 + for_each_sg(scatterlist, sg, dma_buffers, i) {
125 + u32 len = sg_dma_len(sg);
126 + u32 addr = sg_dma_address(sg);
127 + u32 new_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
128 +
129 + /* Note: addrs is the address + page_count - 1
130 + * The firmware expects blocks after the first to be page-
131 + * aligned and a multiple of the page size
132 + */
133 + WARN_ON(len == 0);
134 + WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK));
135 + WARN_ON(i && (addr & ~PAGE_MASK));
136 + if (k > 0 &&
137 + ((addrs[k - 1] & PAGE_MASK) +
138 + (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT))
139 + == (addr & PAGE_MASK))
140 + addrs[k - 1] += new_pages;
141 + else
142 + addrs[k++] = (addr & PAGE_MASK) | (new_pages - 1);
143 + }
144 }
145
146 /* Partial cache lines (fragments) require special measures */
147 @@ -367,7 +409,7 @@ free_pagelist(struct vchiq_pagelist_info
148 * NOTE: dma_unmap_sg must be called before the
149 * cpu can touch any of the data/pages.
150 */
151 - dma_unmap_sg(g_dev, pagelistinfo->scatterlist,
152 + dma_unmap_sg(g_dma_dev, pagelistinfo->scatterlist,
153 pagelistinfo->num_pages, pagelistinfo->dma_dir);
154 pagelistinfo->scatterlist_mapped = 0;
155
156 @@ -425,6 +467,7 @@ free_pagelist(struct vchiq_pagelist_info
157 int vchiq_platform_init(struct platform_device *pdev, struct vchiq_state *state)
158 {
159 struct device *dev = &pdev->dev;
160 + struct device *dma_dev = NULL;
161 struct vchiq_drvdata *drvdata = platform_get_drvdata(pdev);
162 struct rpi_firmware *fw = drvdata->fw;
163 struct vchiq_slot_zero *vchiq_slot_zero;
164 @@ -446,6 +489,24 @@ int vchiq_platform_init(struct platform_
165 g_cache_line_size = drvdata->cache_line_size;
166 g_fragments_size = 2 * g_cache_line_size;
167
168 + if (drvdata->use_36bit_addrs) {
169 + struct device_node *dma_node =
170 + of_find_compatible_node(NULL, NULL, "brcm,bcm2711-dma");
171 +
172 + if (dma_node) {
173 + struct platform_device *pdev;
174 +
175 + pdev = of_find_device_by_node(dma_node);
176 + if (pdev)
177 + dma_dev = &pdev->dev;
178 + of_node_put(dma_node);
179 + g_use_36bit_addrs = true;
180 + } else {
181 + dev_err(dev, "40-bit DMA controller not found\n");
182 + return -EINVAL;
183 + }
184 + }
185 +
186 /* Allocate space for the channels in coherent memory */
187 slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE);
188 frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS);
189 @@ -458,13 +519,14 @@ int vchiq_platform_init(struct platform_
190 }
191
192 WARN_ON(((unsigned long)slot_mem & (PAGE_SIZE - 1)) != 0);
193 + channelbase = slot_phys;
194
195 vchiq_slot_zero = vchiq_init_slots(slot_mem, slot_mem_size);
196 if (!vchiq_slot_zero)
197 return -EINVAL;
198
199 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] =
200 - (int)slot_phys + slot_mem_size;
201 + channelbase + slot_mem_size;
202 vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] =
203 MAX_FRAGMENTS;
204
205 @@ -498,7 +560,6 @@ int vchiq_platform_init(struct platform_
206 }
207
208 /* Send the base address of the slots to VideoCore */
209 - channelbase = slot_phys;
210 err = rpi_firmware_property(fw, RPI_FIRMWARE_VCHIQ_INIT,
211 &channelbase, sizeof(channelbase));
212 if (err || channelbase) {
213 @@ -507,6 +568,8 @@ int vchiq_platform_init(struct platform_
214 }
215
216 g_dev = dev;
217 + g_dma_dev = dma_dev ?: dev;
218 +
219 vchiq_log_info(vchiq_arm_log_level,
220 "vchiq_init - done (slots %pK, phys %pad)",
221 vchiq_slot_zero, &slot_phys);
222 @@ -1768,6 +1831,7 @@ void vchiq_platform_conn_state_changed(s
223 static const struct of_device_id vchiq_of_match[] = {
224 { .compatible = "brcm,bcm2835-vchiq", .data = &bcm2835_drvdata },
225 { .compatible = "brcm,bcm2836-vchiq", .data = &bcm2836_drvdata },
226 + { .compatible = "brcm,bcm2711-vchiq", .data = &bcm2711_drvdata },
227 {},
228 };
229 MODULE_DEVICE_TABLE(of, vchiq_of_match);
230 @@ -1800,22 +1864,8 @@ vchiq_register_child(struct platform_dev
231
232 child->dev.of_node = np;
233
234 - /*
235 - * We want the dma-ranges etc to be copied from a device with the
236 - * correct dma-ranges for the VPU.
237 - * VCHIQ on Pi4 is now under scb which doesn't get those dma-ranges.
238 - * Take the "dma" node as going to be suitable as it sees the world
239 - * through the same eyes as the VPU.
240 - */
241 - np = of_find_node_by_path("dma");
242 - if (!np)
243 - np = pdev->dev.of_node;
244 -
245 of_dma_configure(&child->dev, np, true);
246
247 - if (np != pdev->dev.of_node)
248 - of_node_put(np);
249 -
250 return child;
251 }
252
253 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
254 +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h
255 @@ -61,6 +61,7 @@ struct vchiq_arm_state {
256
257 struct vchiq_drvdata {
258 const unsigned int cache_line_size;
259 + const bool use_36bit_addrs;
260 struct rpi_firmware *fw;
261 };
262