kernel: bump 4.19 to 4.19.77
[openwrt/openwrt.git] / target / linux / brcm2708 / patches-4.19 / 950-0531-pcie-brcmstb-Changes-for-BCM2711.patch
1 From 545951be6cabac8b1df85771c44335a0eaaa3c5d Mon Sep 17 00:00:00 2001
2 From: Phil Elwell <phil@raspberrypi.org>
3 Date: Tue, 19 Feb 2019 22:06:59 +0000
4 Subject: [PATCH 531/806] pcie-brcmstb: Changes for BCM2711
5
6 The initial brcmstb PCIe driver - originally taken from the V3(?)
7 patch set - has been modified significantly for the BCM2711.
8
9 Signed-off-by: Phil Elwell <phil@raspberrypi.org>
10 ---
11 drivers/dma/bcm2835-dma.c | 107 ++++
12 drivers/pci/controller/Makefile | 4 +
13 drivers/pci/controller/pcie-brcmstb-bounce.c | 564 +++++++++++++++++++
14 drivers/pci/controller/pcie-brcmstb-bounce.h | 32 ++
15 drivers/pci/controller/pcie-brcmstb.c | 237 ++++----
16 drivers/soc/bcm/brcmstb/Makefile | 2 +-
17 drivers/soc/bcm/brcmstb/memory.c | 158 ++++++
18 7 files changed, 996 insertions(+), 108 deletions(-)
19 create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce.c
20 create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce.h
21 create mode 100644 drivers/soc/bcm/brcmstb/memory.c
22
23 --- a/drivers/dma/bcm2835-dma.c
24 +++ b/drivers/dma/bcm2835-dma.c
25 @@ -68,6 +68,17 @@ struct bcm2835_dma_cb {
26 uint32_t pad[2];
27 };
28
29 +struct bcm2838_dma40_scb {
30 + uint32_t ti;
31 + uint32_t src;
32 + uint32_t srci;
33 + uint32_t dst;
34 + uint32_t dsti;
35 + uint32_t len;
36 + uint32_t next_cb;
37 + uint32_t rsvd;
38 +};
39 +
40 struct bcm2835_cb_entry {
41 struct bcm2835_dma_cb *cb;
42 dma_addr_t paddr;
43 @@ -185,6 +196,45 @@ struct bcm2835_desc {
44 #define MAX_DMA_LEN SZ_1G
45 #define MAX_LITE_DMA_LEN (SZ_64K - 4)
46
47 +/* 40-bit DMA support */
48 +#define BCM2838_DMA40_CS 0x00
49 +#define BCM2838_DMA40_CB 0x04
50 +#define BCM2838_DMA40_DEBUG 0x0c
51 +#define BCM2858_DMA40_TI 0x10
52 +#define BCM2838_DMA40_SRC 0x14
53 +#define BCM2838_DMA40_SRCI 0x18
54 +#define BCM2838_DMA40_DEST 0x1c
55 +#define BCM2838_DMA40_DESTI 0x20
56 +#define BCM2838_DMA40_LEN 0x24
57 +#define BCM2838_DMA40_NEXT_CB 0x28
58 +#define BCM2838_DMA40_DEBUG2 0x2c
59 +
60 +#define BCM2838_DMA40_CS_ACTIVE BIT(0)
61 +#define BCM2838_DMA40_CS_END BIT(1)
62 +
63 +#define BCM2838_DMA40_CS_QOS(x) (((x) & 0x1f) << 16)
64 +#define BCM2838_DMA40_CS_PANIC_QOS(x) (((x) & 0x1f) << 20)
65 +#define BCM2838_DMA40_CS_WRITE_WAIT BIT(28)
66 +
67 +#define BCM2838_DMA40_BURST_LEN(x) ((((x) - 1) & 0xf) << 8)
68 +#define BCM2838_DMA40_INC BIT(12)
69 +#define BCM2838_DMA40_SIZE_128 (2 << 13)
70 +
71 +#define BCM2838_DMA40_MEMCPY_QOS \
72 + (BCM2838_DMA40_CS_QOS(0x0) | \
73 + BCM2838_DMA40_CS_PANIC_QOS(0x0) | \
74 + BCM2838_DMA40_CS_WRITE_WAIT)
75 +
76 +#define BCM2838_DMA40_MEMCPY_XFER_INFO \
77 + (BCM2838_DMA40_SIZE_128 | \
78 + BCM2838_DMA40_INC | \
79 + BCM2838_DMA40_BURST_LEN(16))
80 +
81 +static void __iomem *memcpy_chan;
82 +static struct bcm2838_dma40_scb *memcpy_scb;
83 +static dma_addr_t memcpy_scb_dma;
84 +DEFINE_SPINLOCK(memcpy_lock);
85 +
86 static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c)
87 {
88 /* lite and normal channels have different max frame length */
89 @@ -868,6 +918,56 @@ static void bcm2835_dma_free(struct bcm2
90 }
91 }
92
93 +int bcm2838_dma40_memcpy_init(struct device *dev)
94 +{
95 + if (memcpy_scb)
96 + return 0;
97 +
98 + memcpy_scb = dma_alloc_coherent(dev, sizeof(*memcpy_scb),
99 + &memcpy_scb_dma, GFP_KERNEL);
100 +
101 + if (!memcpy_scb) {
102 + pr_err("bcm2838_dma40_memcpy_init failed!\n");
103 + return -ENOMEM;
104 + }
105 +
106 + return 0;
107 +}
108 +EXPORT_SYMBOL(bcm2838_dma40_memcpy_init);
109 +
110 +void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size)
111 +{
112 + struct bcm2838_dma40_scb *scb = memcpy_scb;
113 + unsigned long flags;
114 +
115 + if (!scb) {
116 + pr_err("bcm2838_dma40_memcpy not initialised!\n");
117 + return;
118 + }
119 +
120 + spin_lock_irqsave(&memcpy_lock, flags);
121 +
122 + scb->ti = 0;
123 + scb->src = lower_32_bits(src);
124 + scb->srci = upper_32_bits(src) | BCM2838_DMA40_MEMCPY_XFER_INFO;
125 + scb->dst = lower_32_bits(dst);
126 + scb->dsti = upper_32_bits(dst) | BCM2838_DMA40_MEMCPY_XFER_INFO;
127 + scb->len = size;
128 + scb->next_cb = 0;
129 +
130 + writel((u32)(memcpy_scb_dma >> 5), memcpy_chan + BCM2838_DMA40_CB);
131 + writel(BCM2838_DMA40_MEMCPY_QOS + BCM2838_DMA40_CS_ACTIVE,
132 + memcpy_chan + BCM2838_DMA40_CS);
133 + /* Poll for completion */
134 + while (!(readl(memcpy_chan + BCM2838_DMA40_CS) & BCM2838_DMA40_CS_END))
135 + cpu_relax();
136 +
137 + writel(BCM2838_DMA40_CS_END, memcpy_chan + BCM2838_DMA40_CS);
138 +
139 + spin_unlock_irqrestore(&memcpy_lock, flags);
140 +}
141 +EXPORT_SYMBOL(bcm2838_dma40_memcpy);
142 +
143 static const struct of_device_id bcm2835_dma_of_match[] = {
144 { .compatible = "brcm,bcm2835-dma", },
145 {},
146 @@ -966,6 +1066,13 @@ static int bcm2835_dma_probe(struct plat
147 /* Channel 0 is used by the legacy API */
148 chans_available &= ~BCM2835_DMA_BULK_MASK;
149
150 + /* We can't use channels 11-13 yet */
151 + chans_available &= ~(BIT(11) | BIT(12) | BIT(13));
152 +
153 + /* Grab channel 14 for the 40-bit DMA memcpy */
154 + chans_available &= ~BIT(14);
155 + memcpy_chan = BCM2835_DMA_CHANIO(base, 14);
156 +
157 /* get irqs for each channel that we support */
158 for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
159 /* skip masked out channels */
160 --- a/drivers/pci/controller/Makefile
161 +++ b/drivers/pci/controller/Makefile
162 @@ -29,6 +29,10 @@ obj-$(CONFIG_PCIE_MEDIATEK) += pcie-medi
163 obj-$(CONFIG_PCIE_MOBIVEIL) += pcie-mobiveil.o
164 obj-$(CONFIG_PCIE_TANGO_SMP8759) += pcie-tango.o
165 obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o
166 +ifdef CONFIG_ARM
167 +obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o
168 +endif
169 +
170 obj-$(CONFIG_VMD) += vmd.o
171 # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
172 obj-y += dwc/
173 --- /dev/null
174 +++ b/drivers/pci/controller/pcie-brcmstb-bounce.c
175 @@ -0,0 +1,564 @@
176 +/*
177 + * This code started out as a version of arch/arm/common/dmabounce.c,
178 + * modified to cope with highmem pages. Now it has been changed heavily -
179 + * it now preallocates a large block (currently 4MB) and carves it up
180 + * sequentially in ring fashion, and DMA is used to copy the data - to the
181 + * point where very little of the original remains.
182 + *
183 + * Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
184 + *
185 + * Original version by Brad Parker (brad@heeltoe.com)
186 + * Re-written by Christopher Hoover <ch@murgatroid.com>
187 + * Made generic by Deepak Saxena <dsaxena@plexity.net>
188 + *
189 + * Copyright (C) 2002 Hewlett Packard Company.
190 + * Copyright (C) 2004 MontaVista Software, Inc.
191 + *
192 + * This program is free software; you can redistribute it and/or
193 + * modify it under the terms of the GNU General Public License
194 + * version 2 as published by the Free Software Foundation.
195 + */
196 +
197 +#include <linux/module.h>
198 +#include <linux/init.h>
199 +#include <linux/slab.h>
200 +#include <linux/page-flags.h>
201 +#include <linux/device.h>
202 +#include <linux/dma-mapping.h>
203 +#include <linux/dmapool.h>
204 +#include <linux/list.h>
205 +#include <linux/scatterlist.h>
206 +#include <linux/bitmap.h>
207 +
208 +#include <asm/cacheflush.h>
209 +#include <asm/dma-iommu.h>
210 +
211 +#define STATS
212 +
213 +#ifdef STATS
214 +#define DO_STATS(X) do { X ; } while (0)
215 +#else
216 +#define DO_STATS(X) do { } while (0)
217 +#endif
218 +
219 +/* ************************************************** */
220 +
221 +struct safe_buffer {
222 + struct list_head node;
223 +
224 + /* original request */
225 + size_t size;
226 + int direction;
227 +
228 + struct dmabounce_pool *pool;
229 + void *safe;
230 + dma_addr_t unsafe_dma_addr;
231 + dma_addr_t safe_dma_addr;
232 +};
233 +
234 +struct dmabounce_pool {
235 + unsigned long pages;
236 + void *virt_addr;
237 + dma_addr_t dma_addr;
238 + unsigned long *alloc_map;
239 + unsigned long alloc_pos;
240 + spinlock_t lock;
241 + struct device *dev;
242 + unsigned long num_pages;
243 +#ifdef STATS
244 + size_t max_size;
245 + unsigned long num_bufs;
246 + unsigned long max_bufs;
247 + unsigned long max_pages;
248 +#endif
249 +};
250 +
251 +struct dmabounce_device_info {
252 + struct device *dev;
253 + dma_addr_t threshold;
254 + struct list_head safe_buffers;
255 + struct dmabounce_pool pool;
256 + rwlock_t lock;
257 +#ifdef STATS
258 + unsigned long map_count;
259 + unsigned long unmap_count;
260 + unsigned long sync_dev_count;
261 + unsigned long sync_cpu_count;
262 + unsigned long fail_count;
263 + int attr_res;
264 +#endif
265 +};
266 +
267 +static struct dmabounce_device_info *g_dmabounce_device_info;
268 +
269 +extern int bcm2838_dma40_memcpy_init(struct device *dev);
270 +extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size);
271 +
272 +#ifdef STATS
273 +static ssize_t
274 +bounce_show(struct device *dev, struct device_attribute *attr, char *buf)
275 +{
276 + struct dmabounce_device_info *device_info = g_dmabounce_device_info;
277 + return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n",
278 + device_info->map_count,
279 + device_info->unmap_count,
280 + device_info->sync_dev_count,
281 + device_info->sync_cpu_count,
282 + device_info->fail_count,
283 + device_info->pool.max_size,
284 + device_info->pool.num_bufs,
285 + device_info->pool.max_bufs,
286 + device_info->pool.num_pages * PAGE_SIZE,
287 + device_info->pool.max_pages * PAGE_SIZE);
288 +}
289 +
290 +static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL);
291 +#endif
292 +
293 +static int bounce_create(struct dmabounce_pool *pool, struct device *dev,
294 + unsigned long buffer_size)
295 +{
296 + int ret = -ENOMEM;
297 + pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE;
298 + pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL);
299 + if (!pool->alloc_map)
300 + goto err_bitmap;
301 + pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE,
302 + &pool->dma_addr, GFP_KERNEL);
303 + if (!pool->virt_addr)
304 + goto err_dmabuf;
305 +
306 + pool->alloc_pos = 0;
307 + spin_lock_init(&pool->lock);
308 + pool->dev = dev;
309 + pool->num_pages = 0;
310 +
311 + DO_STATS(pool->max_size = 0);
312 + DO_STATS(pool->num_bufs = 0);
313 + DO_STATS(pool->max_bufs = 0);
314 + DO_STATS(pool->max_pages = 0);
315 +
316 + return 0;
317 +
318 +err_dmabuf:
319 + bitmap_free(pool->alloc_map);
320 +err_bitmap:
321 + return ret;
322 +}
323 +
324 +static void bounce_destroy(struct dmabounce_pool *pool)
325 +{
326 + dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr,
327 + pool->dma_addr);
328 +
329 + bitmap_free(pool->alloc_map);
330 +}
331 +
332 +static void *bounce_alloc(struct dmabounce_pool *pool, size_t size,
333 + dma_addr_t *dmaaddrp)
334 +{
335 + unsigned long pages;
336 + unsigned long flags;
337 + unsigned long pos;
338 +
339 + pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
340 +
341 + DO_STATS(pool->max_size = max(size, pool->max_size));
342 +
343 + spin_lock_irqsave(&pool->lock, flags);
344 + pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
345 + pool->alloc_pos, pages, 0);
346 + /* If not found, try from the start */
347 + if (pos >= pool->pages && pool->alloc_pos)
348 + pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
349 + 0, pages, 0);
350 +
351 + if (pos >= pool->pages) {
352 + spin_unlock_irqrestore(&pool->lock, flags);
353 + return NULL;
354 + }
355 +
356 + bitmap_set(pool->alloc_map, pos, pages);
357 + pool->alloc_pos = (pos + pages) % pool->pages;
358 + pool->num_pages += pages;
359 +
360 + DO_STATS(pool->num_bufs++);
361 + DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs));
362 + DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages));
363 +
364 + spin_unlock_irqrestore(&pool->lock, flags);
365 +
366 + *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE;
367 +
368 + return pool->virt_addr + pos * PAGE_SIZE;
369 +}
370 +
371 +static void
372 +bounce_free(struct dmabounce_pool *pool, void *buf, size_t size)
373 +{
374 + unsigned long pages;
375 + unsigned long flags;
376 + unsigned long pos;
377 +
378 + pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
379 + pos = (buf - pool->virt_addr)/PAGE_SIZE;
380 +
381 + BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1));
382 +
383 + spin_lock_irqsave(&pool->lock, flags);
384 + bitmap_clear(pool->alloc_map, pos, pages);
385 + pool->num_pages -= pages;
386 + if (pool->num_pages == 0)
387 + pool->alloc_pos = 0;
388 + DO_STATS(pool->num_bufs--);
389 + spin_unlock_irqrestore(&pool->lock, flags);
390 +}
391 +
392 +/* allocate a 'safe' buffer and keep track of it */
393 +static struct safe_buffer *
394 +alloc_safe_buffer(struct dmabounce_device_info *device_info,
395 + dma_addr_t dma_addr, size_t size, enum dma_data_direction dir)
396 +{
397 + struct safe_buffer *buf;
398 + struct dmabounce_pool *pool = &device_info->pool;
399 + struct device *dev = device_info->dev;
400 + unsigned long flags;
401 +
402 + /*
403 + * Although one might expect this to be called in thread context,
404 + * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic()
405 + * was previously used to select the appropriate allocation mode,
406 + * but this is unsafe.
407 + */
408 + buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
409 + if (!buf) {
410 + dev_warn(dev, "%s: kmalloc failed\n", __func__);
411 + return NULL;
412 + }
413 +
414 + buf->unsafe_dma_addr = dma_addr;
415 + buf->size = size;
416 + buf->direction = dir;
417 + buf->pool = pool;
418 +
419 + buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr);
420 +
421 + if (!buf->safe) {
422 + dev_warn(dev,
423 + "%s: could not alloc dma memory (size=%d)\n",
424 + __func__, size);
425 + kfree(buf);
426 + return NULL;
427 + }
428 +
429 + write_lock_irqsave(&device_info->lock, flags);
430 + list_add(&buf->node, &device_info->safe_buffers);
431 + write_unlock_irqrestore(&device_info->lock, flags);
432 +
433 + return buf;
434 +}
435 +
436 +/* determine if a buffer is from our "safe" pool */
437 +static struct safe_buffer *
438 +find_safe_buffer(struct dmabounce_device_info *device_info,
439 + dma_addr_t safe_dma_addr)
440 +{
441 + struct safe_buffer *b, *rb = NULL;
442 + unsigned long flags;
443 +
444 + read_lock_irqsave(&device_info->lock, flags);
445 +
446 + list_for_each_entry(b, &device_info->safe_buffers, node)
447 + if (b->safe_dma_addr <= safe_dma_addr &&
448 + b->safe_dma_addr + b->size > safe_dma_addr) {
449 + rb = b;
450 + break;
451 + }
452 +
453 + read_unlock_irqrestore(&device_info->lock, flags);
454 + return rb;
455 +}
456 +
457 +static void
458 +free_safe_buffer(struct dmabounce_device_info *device_info,
459 + struct safe_buffer *buf)
460 +{
461 + unsigned long flags;
462 +
463 + write_lock_irqsave(&device_info->lock, flags);
464 + list_del(&buf->node);
465 + write_unlock_irqrestore(&device_info->lock, flags);
466 +
467 + bounce_free(buf->pool, buf->safe, buf->size);
468 +
469 + kfree(buf);
470 +}
471 +
472 +/* ************************************************** */
473 +
474 +static struct safe_buffer *
475 +find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where)
476 +{
477 + if (!dev || !g_dmabounce_device_info)
478 + return NULL;
479 + if (dma_mapping_error(dev, dma_addr)) {
480 + dev_err(dev, "Trying to %s invalid mapping\n", where);
481 + return NULL;
482 + }
483 + return find_safe_buffer(g_dmabounce_device_info, dma_addr);
484 +}
485 +
486 +static dma_addr_t
487 +map_single(struct device *dev, struct safe_buffer *buf, size_t size,
488 + enum dma_data_direction dir, unsigned long attrs)
489 +{
490 + BUG_ON(buf->size != size);
491 + BUG_ON(buf->direction != dir);
492 +
493 + dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr,
494 + (u64)buf->safe_dma_addr);
495 +
496 + if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) &&
497 + !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
498 + bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr,
499 + size);
500 +
501 + return buf->safe_dma_addr;
502 +}
503 +
504 +static dma_addr_t
505 +unmap_single(struct device *dev, struct safe_buffer *buf, size_t size,
506 + enum dma_data_direction dir, unsigned long attrs)
507 +{
508 + BUG_ON(buf->size != size);
509 + BUG_ON(buf->direction != dir);
510 +
511 + if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) &&
512 + !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
513 + dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr,
514 + (u64)buf->unsafe_dma_addr);
515 +
516 + bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr,
517 + size);
518 + }
519 + return buf->unsafe_dma_addr;
520 +}
521 +
522 +/* ************************************************** */
523 +
524 +/*
525 + * see if a buffer address is in an 'unsafe' range. if it is
526 + * allocate a 'safe' buffer and copy the unsafe buffer into it.
527 + * substitute the safe buffer for the unsafe one.
528 + * (basically move the buffer from an unsafe area to a safe one)
529 + */
530 +static dma_addr_t
531 +dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset,
532 + size_t size, enum dma_data_direction dir,
533 + unsigned long attrs)
534 +{
535 + struct dmabounce_device_info *device_info = g_dmabounce_device_info;
536 + dma_addr_t dma_addr;
537 +
538 + dma_addr = pfn_to_dma(dev, page_to_pfn(page)) + offset;
539 +
540 + arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir);
541 +
542 + if (device_info && (dma_addr + size) > device_info->threshold) {
543 + struct safe_buffer *buf;
544 +
545 + buf = alloc_safe_buffer(device_info, dma_addr, size, dir);
546 + if (!buf) {
547 + DO_STATS(device_info->fail_count++);
548 + return ARM_MAPPING_ERROR;
549 + }
550 +
551 + DO_STATS(device_info->map_count++);
552 +
553 + dma_addr = map_single(dev, buf, size, dir, attrs);
554 + }
555 +
556 + return dma_addr;
557 +}
558 +
559 +/*
560 + * see if a mapped address was really a "safe" buffer and if so, copy
561 + * the data from the safe buffer back to the unsafe buffer and free up
562 + * the safe buffer. (basically return things back to the way they
563 + * should be)
564 + */
565 +static void
566 +dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
567 + enum dma_data_direction dir, unsigned long attrs)
568 +{
569 + struct safe_buffer *buf;
570 +
571 + buf = find_safe_buffer_dev(dev, dma_addr, __func__);
572 + if (buf) {
573 + DO_STATS(g_dmabounce_device_info->unmap_count++);
574 + dma_addr = unmap_single(dev, buf, size, dir, attrs);
575 + free_safe_buffer(g_dmabounce_device_info, buf);
576 + }
577 +
578 + arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir);
579 +}
580 +
581 +/*
582 + * A version of dmabounce_map_page that assumes the mapping has already
583 + * been created - intended for streaming operation.
584 + */
585 +static void
586 +dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size,
587 + enum dma_data_direction dir)
588 +{
589 + struct safe_buffer *buf;
590 +
591 + arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir);
592 +
593 + buf = find_safe_buffer_dev(dev, dma_addr, __func__);
594 + if (buf) {
595 + DO_STATS(g_dmabounce_device_info->sync_dev_count++);
596 + map_single(dev, buf, size, dir, 0);
597 + }
598 +}
599 +
600 +/*
601 + * A version of dmabounce_unmap_page that doesn't destroy the mapping -
602 + * intended for streaming operation.
603 + */
604 +static void
605 +dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr,
606 + size_t size, enum dma_data_direction dir)
607 +{
608 + struct safe_buffer *buf;
609 +
610 + buf = find_safe_buffer_dev(dev, dma_addr, __func__);
611 + if (buf) {
612 + DO_STATS(g_dmabounce_device_info->sync_cpu_count++);
613 + dma_addr = unmap_single(dev, buf, size, dir, 0);
614 + }
615 +
616 + arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir);
617 +}
618 +
619 +static int dmabounce_dma_supported(struct device *dev, u64 dma_mask)
620 +{
621 + if (g_dmabounce_device_info)
622 + return 0;
623 +
624 + return arm_dma_ops.dma_supported(dev, dma_mask);
625 +}
626 +
627 +static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr)
628 +{
629 + return arm_dma_ops.mapping_error(dev, dma_addr);
630 +}
631 +
632 +static const struct dma_map_ops dmabounce_ops = {
633 + .alloc = arm_dma_alloc,
634 + .free = arm_dma_free,
635 + .mmap = arm_dma_mmap,
636 + .get_sgtable = arm_dma_get_sgtable,
637 + .map_page = dmabounce_map_page,
638 + .unmap_page = dmabounce_unmap_page,
639 + .sync_single_for_cpu = dmabounce_sync_for_cpu,
640 + .sync_single_for_device = dmabounce_sync_for_device,
641 + .map_sg = arm_dma_map_sg,
642 + .unmap_sg = arm_dma_unmap_sg,
643 + .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu,
644 + .sync_sg_for_device = arm_dma_sync_sg_for_device,
645 + .dma_supported = dmabounce_dma_supported,
646 + .mapping_error = dmabounce_mapping_error,
647 +};
648 +
649 +int brcm_pcie_bounce_register_dev(struct device *dev,
650 + unsigned long buffer_size,
651 + dma_addr_t threshold)
652 +{
653 + struct dmabounce_device_info *device_info;
654 + int ret;
655 +
656 + /* Only support a single client */
657 + if (g_dmabounce_device_info)
658 + return -EBUSY;
659 +
660 + ret = bcm2838_dma40_memcpy_init(dev);
661 + if (ret)
662 + return ret;
663 +
664 + device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
665 + if (!device_info) {
666 + dev_err(dev,
667 + "Could not allocated dmabounce_device_info\n");
668 + return -ENOMEM;
669 + }
670 +
671 + ret = bounce_create(&device_info->pool, dev, buffer_size);
672 + if (ret) {
673 + dev_err(dev,
674 + "dmabounce: could not allocate %ld byte DMA pool\n",
675 + buffer_size);
676 + goto err_bounce;
677 + }
678 +
679 + device_info->dev = dev;
680 + device_info->threshold = threshold;
681 + INIT_LIST_HEAD(&device_info->safe_buffers);
682 + rwlock_init(&device_info->lock);
683 +
684 + DO_STATS(device_info->map_count = 0);
685 + DO_STATS(device_info->unmap_count = 0);
686 + DO_STATS(device_info->sync_dev_count = 0);
687 + DO_STATS(device_info->sync_cpu_count = 0);
688 + DO_STATS(device_info->fail_count = 0);
689 + DO_STATS(device_info->attr_res =
690 + device_create_file(dev, &dev_attr_dmabounce_stats));
691 +
692 + g_dmabounce_device_info = device_info;
693 + set_dma_ops(dev, &dmabounce_ops);
694 +
695 + dev_info(dev, "dmabounce: registered device - %ld kB, threshold %pad\n",
696 + buffer_size / 1024, &threshold);
697 +
698 + return 0;
699 +
700 + err_bounce:
701 + kfree(device_info);
702 + return ret;
703 +}
704 +EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
705 +
706 +void brcm_pcie_bounce_unregister_dev(struct device *dev)
707 +{
708 + struct dmabounce_device_info *device_info = g_dmabounce_device_info;
709 +
710 + g_dmabounce_device_info = NULL;
711 + set_dma_ops(dev, NULL);
712 +
713 + if (!device_info) {
714 + dev_warn(dev,
715 + "Never registered with dmabounce but attempting"
716 + "to unregister!\n");
717 + return;
718 + }
719 +
720 + if (!list_empty(&device_info->safe_buffers)) {
721 + dev_err(dev,
722 + "Removing from dmabounce with pending buffers!\n");
723 + BUG();
724 + }
725 +
726 + bounce_destroy(&device_info->pool);
727 +
728 + DO_STATS(if (device_info->attr_res == 0)
729 + device_remove_file(dev, &dev_attr_dmabounce_stats));
730 +
731 + kfree(device_info);
732 +
733 + dev_info(dev, "dmabounce: device unregistered\n");
734 +}
735 +EXPORT_SYMBOL(brcm_pcie_bounce_unregister_dev);
736 +
737 +MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>");
738 +MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb");
739 +MODULE_LICENSE("GPL");
740 --- /dev/null
741 +++ b/drivers/pci/controller/pcie-brcmstb-bounce.h
742 @@ -0,0 +1,32 @@
743 +/* SPDX-License-Identifier: GPL-2.0 */
744 +/*
745 + * Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
746 + */
747 +
748 +#ifndef _PCIE_BRCMSTB_BOUNCE_H
749 +#define _PCIE_BRCMSTB_BOUNCE_H
750 +
751 +#ifdef CONFIG_ARM
752 +
753 +int brcm_pcie_bounce_register_dev(struct device *dev, unsigned long buffer_size,
754 + dma_addr_t threshold);
755 +
756 +int brcm_pcie_bounce_unregister_dev(struct device *dev);
757 +
758 +#else
759 +
760 +static inline int brcm_pcie_bounce_register_dev(struct device *dev,
761 + unsigned long buffer_size,
762 + dma_addr_t threshold)
763 +{
764 + return 0;
765 +}
766 +
767 +static inline int brcm_pcie_bounce_unregister_dev(struct device *dev)
768 +{
769 + return 0;
770 +}
771 +
772 +#endif
773 +
774 +#endif /* _PCIE_BRCMSTB_BOUNCE_H */
775 --- a/drivers/pci/controller/pcie-brcmstb.c
776 +++ b/drivers/pci/controller/pcie-brcmstb.c
777 @@ -29,6 +29,7 @@
778 #include <linux/string.h>
779 #include <linux/types.h>
780 #include "../pci.h"
781 +#include "pcie-brcmstb-bounce.h"
782
783 /* BRCM_PCIE_CAP_REGS - Offset for the mandatory capability config regs */
784 #define BRCM_PCIE_CAP_REGS 0x00ac
785 @@ -53,6 +54,7 @@
786 #define PCIE_MISC_MSI_BAR_CONFIG_LO 0x4044
787 #define PCIE_MISC_MSI_BAR_CONFIG_HI 0x4048
788 #define PCIE_MISC_MSI_DATA_CONFIG 0x404c
789 +#define PCIE_MISC_EOI_CTRL 0x4060
790 #define PCIE_MISC_PCIE_CTRL 0x4064
791 #define PCIE_MISC_PCIE_STATUS 0x4068
792 #define PCIE_MISC_REVISION 0x406c
793 @@ -260,12 +262,14 @@ struct brcm_pcie {
794 unsigned int rev;
795 const int *reg_offsets;
796 const int *reg_field_info;
797 + u32 max_burst_size;
798 enum pcie_type type;
799 };
800
801 struct pcie_cfg_data {
802 const int *reg_field_info;
803 const int *offsets;
804 + const u32 max_burst_size;
805 const enum pcie_type type;
806 };
807
808 @@ -288,24 +292,27 @@ static const int pcie_offset_bcm7425[] =
809 static const struct pcie_cfg_data bcm7425_cfg = {
810 .reg_field_info = pcie_reg_field_info,
811 .offsets = pcie_offset_bcm7425,
812 + .max_burst_size = BURST_SIZE_256,
813 .type = BCM7425,
814 };
815
816 static const int pcie_offsets[] = {
817 [RGR1_SW_INIT_1] = 0x9210,
818 [EXT_CFG_INDEX] = 0x9000,
819 - [EXT_CFG_DATA] = 0x9004,
820 + [EXT_CFG_DATA] = 0x8000,
821 };
822
823 static const struct pcie_cfg_data bcm7435_cfg = {
824 .reg_field_info = pcie_reg_field_info,
825 .offsets = pcie_offsets,
826 + .max_burst_size = BURST_SIZE_256,
827 .type = BCM7435,
828 };
829
830 static const struct pcie_cfg_data generic_cfg = {
831 .reg_field_info = pcie_reg_field_info,
832 .offsets = pcie_offsets,
833 + .max_burst_size = BURST_SIZE_128, // before BURST_SIZE_512
834 .type = GENERIC,
835 };
836
837 @@ -318,6 +325,7 @@ static const int pcie_offset_bcm7278[] =
838 static const struct pcie_cfg_data bcm7278_cfg = {
839 .reg_field_info = pcie_reg_field_info_bcm7278,
840 .offsets = pcie_offset_bcm7278,
841 + .max_burst_size = BURST_SIZE_512,
842 .type = BCM7278,
843 };
844
845 @@ -360,7 +368,6 @@ static struct pci_ops brcm_pcie_ops = {
846 (reg##_##field##_MASK & (field_val << reg##_##field##_SHIFT)))
847
848 static const struct dma_map_ops *arch_dma_ops;
849 -static const struct dma_map_ops *brcm_dma_ops_ptr;
850 static struct of_pci_range *dma_ranges;
851 static int num_dma_ranges;
852
853 @@ -369,6 +376,16 @@ static int num_memc;
854 static int num_pcie;
855 static DEFINE_MUTEX(brcm_pcie_lock);
856
857 +static unsigned int bounce_buffer = 32*1024*1024;
858 +module_param(bounce_buffer, uint, 0644);
859 +MODULE_PARM_DESC(bounce_buffer, "Size of bounce buffer");
860 +
861 +static unsigned int bounce_threshold = 0xc0000000;
862 +module_param(bounce_threshold, uint, 0644);
863 +MODULE_PARM_DESC(bounce_threshold, "Bounce threshold");
864 +
865 +static struct brcm_pcie *g_pcie;
866 +
867 static dma_addr_t brcm_to_pci(dma_addr_t addr)
868 {
869 struct of_pci_range *p;
870 @@ -457,12 +474,10 @@ static int brcm_map_sg(struct device *de
871 struct scatterlist *sg;
872
873 for_each_sg(sgl, sg, nents, i) {
874 -#ifdef CONFIG_NEED_SG_DMA_LENGTH
875 - sg->dma_length = sg->length;
876 -#endif
877 + sg_dma_len(sg) = sg->length;
878 sg->dma_address =
879 - brcm_dma_ops_ptr->map_page(dev, sg_page(sg), sg->offset,
880 - sg->length, dir, attrs);
881 + brcm_map_page(dev, sg_page(sg), sg->offset,
882 + sg->length, dir, attrs);
883 if (dma_mapping_error(dev, sg->dma_address))
884 goto bad_mapping;
885 }
886 @@ -470,8 +485,8 @@ static int brcm_map_sg(struct device *de
887
888 bad_mapping:
889 for_each_sg(sgl, sg, i, j)
890 - brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg),
891 - sg_dma_len(sg), dir, attrs);
892 + brcm_unmap_page(dev, sg_dma_address(sg),
893 + sg_dma_len(sg), dir, attrs);
894 return 0;
895 }
896
897 @@ -484,8 +499,8 @@ static void brcm_unmap_sg(struct device
898 struct scatterlist *sg;
899
900 for_each_sg(sgl, sg, nents, i)
901 - brcm_dma_ops_ptr->unmap_page(dev, sg_dma_address(sg),
902 - sg_dma_len(sg), dir, attrs);
903 + brcm_unmap_page(dev, sg_dma_address(sg),
904 + sg_dma_len(sg), dir, attrs);
905 }
906
907 static void brcm_sync_single_for_cpu(struct device *dev,
908 @@ -531,8 +546,8 @@ void brcm_sync_sg_for_cpu(struct device
909 int i;
910
911 for_each_sg(sgl, sg, nents, i)
912 - brcm_dma_ops_ptr->sync_single_for_cpu(dev, sg_dma_address(sg),
913 - sg->length, dir);
914 + brcm_sync_single_for_cpu(dev, sg_dma_address(sg),
915 + sg->length, dir);
916 }
917
918 void brcm_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
919 @@ -542,9 +557,9 @@ void brcm_sync_sg_for_device(struct devi
920 int i;
921
922 for_each_sg(sgl, sg, nents, i)
923 - brcm_dma_ops_ptr->sync_single_for_device(dev,
924 - sg_dma_address(sg),
925 - sg->length, dir);
926 + brcm_sync_single_for_device(dev,
927 + sg_dma_address(sg),
928 + sg->length, dir);
929 }
930
931 static int brcm_mapping_error(struct device *dev, dma_addr_t dma_addr)
932 @@ -633,17 +648,47 @@ static void brcm_set_dma_ops(struct devi
933 set_dma_ops(dev, &brcm_dma_ops);
934 }
935
936 +static inline void brcm_pcie_perst_set(struct brcm_pcie *pcie,
937 + unsigned int val);
938 static int brcmstb_platform_notifier(struct notifier_block *nb,
939 unsigned long event, void *__dev)
940 {
941 + extern unsigned long max_pfn;
942 struct device *dev = __dev;
943 + const char *rc_name = "0000:00:00.0";
944
945 - brcm_dma_ops_ptr = &brcm_dma_ops;
946 - if (event != BUS_NOTIFY_ADD_DEVICE)
947 - return NOTIFY_DONE;
948 + switch (event) {
949 + case BUS_NOTIFY_ADD_DEVICE:
950 + if (max_pfn > (bounce_threshold/PAGE_SIZE) &&
951 + strcmp(dev->kobj.name, rc_name)) {
952 + int ret;
953 +
954 + ret = brcm_pcie_bounce_register_dev(dev, bounce_buffer,
955 + (dma_addr_t)bounce_threshold);
956 + if (ret) {
957 + dev_err(dev,
958 + "brcm_pcie_bounce_register_dev() failed: %d\n",
959 + ret);
960 + return ret;
961 + }
962 + }
963 + brcm_set_dma_ops(dev);
964 + return NOTIFY_OK;
965 +
966 + case BUS_NOTIFY_DEL_DEVICE:
967 + if (!strcmp(dev->kobj.name, rc_name) && g_pcie) {
968 + /* Force a bus reset */
969 + brcm_pcie_perst_set(g_pcie, 1);
970 + msleep(100);
971 + brcm_pcie_perst_set(g_pcie, 0);
972 + } else if (max_pfn > (bounce_threshold/PAGE_SIZE)) {
973 + brcm_pcie_bounce_unregister_dev(dev);
974 + }
975 + return NOTIFY_OK;
976
977 - brcm_set_dma_ops(dev);
978 - return NOTIFY_OK;
979 + default:
980 + return NOTIFY_DONE;
981 + }
982 }
983
984 static struct notifier_block brcmstb_platform_nb = {
985 @@ -914,6 +959,7 @@ static void brcm_pcie_msi_isr(struct irq
986 }
987 }
988 chained_irq_exit(chip, desc);
989 + bcm_writel(1, msi->base + PCIE_MISC_EOI_CTRL);
990 }
991
992 static void brcm_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
993 @@ -930,7 +976,8 @@ static void brcm_compose_msi_msg(struct
994 static int brcm_msi_set_affinity(struct irq_data *irq_data,
995 const struct cpumask *mask, bool force)
996 {
997 - return -EINVAL;
998 + struct brcm_msi *msi = irq_data_get_irq_chip_data(irq_data);
999 + return __irq_set_affinity(msi->irq, mask, force);
1000 }
1001
1002 static struct irq_chip brcm_msi_bottom_irq_chip = {
1003 @@ -1168,9 +1215,9 @@ static void __iomem *brcm_pcie_map_conf(
1004 return PCI_SLOT(devfn) ? NULL : base + where;
1005
1006 /* For devices, write to the config space index register */
1007 - idx = cfg_index(bus->number, devfn, where);
1008 + idx = cfg_index(bus->number, devfn, 0);
1009 bcm_writel(idx, pcie->base + IDX_ADDR(pcie));
1010 - return base + DATA_ADDR(pcie) + (where & 0x3);
1011 + return base + DATA_ADDR(pcie) + where;
1012 }
1013
1014 static inline void brcm_pcie_bridge_sw_init_set(struct brcm_pcie *pcie,
1015 @@ -1238,20 +1285,6 @@ static int brcm_pcie_parse_map_dma_range
1016 num_dma_ranges++;
1017 }
1018
1019 - for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) {
1020 - u64 size = brcmstb_memory_memc_size(i);
1021 -
1022 - if (size == (u64)-1) {
1023 - dev_err(pcie->dev, "cannot get memc%d size", i);
1024 - return -EINVAL;
1025 - } else if (size) {
1026 - scb_size[i] = roundup_pow_of_two_64(size);
1027 - num_memc++;
1028 - } else {
1029 - break;
1030 - }
1031 - }
1032 -
1033 return 0;
1034 }
1035
1036 @@ -1275,26 +1308,25 @@ static int brcm_pcie_add_controller(stru
1037 if (ret)
1038 goto done;
1039
1040 - /* Determine num_memc and their sizes */
1041 - for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) {
1042 - u64 size = brcmstb_memory_memc_size(i);
1043 -
1044 - if (size == (u64)-1) {
1045 - dev_err(dev, "cannot get memc%d size\n", i);
1046 - ret = -EINVAL;
1047 - goto done;
1048 - } else if (size) {
1049 - scb_size[i] = roundup_pow_of_two_64(size);
1050 - num_memc++;
1051 - } else {
1052 - break;
1053 + if (!num_dma_ranges) {
1054 + /* Determine num_memc and their sizes by other means */
1055 + for (i = 0, num_memc = 0; i < BRCM_MAX_SCB; i++) {
1056 + u64 size = brcmstb_memory_memc_size(i);
1057 +
1058 + if (size == (u64)-1) {
1059 + dev_err(dev, "cannot get memc%d size\n", i);
1060 + ret = -EINVAL;
1061 + goto done;
1062 + } else if (size) {
1063 + scb_size[i] = roundup_pow_of_two_64(size);
1064 + } else {
1065 + break;
1066 + }
1067 }
1068 - }
1069 - if (!ret && num_memc == 0) {
1070 - ret = -EINVAL;
1071 - goto done;
1072 + num_memc = i;
1073 }
1074
1075 + g_pcie = pcie;
1076 num_pcie++;
1077 done:
1078 mutex_unlock(&brcm_pcie_lock);
1079 @@ -1307,6 +1339,7 @@ static void brcm_pcie_remove_controller(
1080 if (--num_pcie > 0)
1081 goto out;
1082
1083 + g_pcie = NULL;
1084 if (brcm_unregister_notifier())
1085 dev_err(pcie->dev, "failed to unregister pci bus notifier\n");
1086 kfree(dma_ranges);
1087 @@ -1367,7 +1400,7 @@ static int brcm_pcie_setup(struct brcm_p
1088 void __iomem *base = pcie->base;
1089 unsigned int scb_size_val;
1090 u64 rc_bar2_offset, rc_bar2_size, total_mem_size = 0;
1091 - u32 tmp, burst;
1092 + u32 tmp;
1093 int i, j, ret, limit;
1094 u16 nlw, cls, lnksta;
1095 bool ssc_good = false;
1096 @@ -1400,20 +1433,15 @@ static int brcm_pcie_setup(struct brcm_p
1097 /* Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN */
1098 tmp = INSERT_FIELD(0, PCIE_MISC_MISC_CTRL, SCB_ACCESS_EN, 1);
1099 tmp = INSERT_FIELD(tmp, PCIE_MISC_MISC_CTRL, CFG_READ_UR_MODE, 1);
1100 - burst = (pcie->type == GENERIC || pcie->type == BCM7278)
1101 - ? BURST_SIZE_512 : BURST_SIZE_256;
1102 - tmp = INSERT_FIELD(tmp, PCIE_MISC_MISC_CTRL, MAX_BURST_SIZE, burst);
1103 + tmp = INSERT_FIELD(tmp, PCIE_MISC_MISC_CTRL, MAX_BURST_SIZE,
1104 + pcie->max_burst_size);
1105 bcm_writel(tmp, base + PCIE_MISC_MISC_CTRL);
1106
1107 /*
1108 * Set up inbound memory view for the EP (called RC_BAR2,
1109 * not to be confused with the BARs that are advertised by
1110 * the EP).
1111 - */
1112 - for (i = 0; i < num_memc; i++)
1113 - total_mem_size += scb_size[i];
1114 -
1115 - /*
1116 + *
1117 * The PCIe host controller by design must set the inbound
1118 * viewport to be a contiguous arrangement of all of the
1119 * system's memory. In addition, its size mut be a power of
1120 @@ -1429,55 +1457,49 @@ static int brcm_pcie_setup(struct brcm_p
1121 * the controller will know to send outbound memory downstream
1122 * and everything else upstream.
1123 */
1124 - rc_bar2_size = roundup_pow_of_two_64(total_mem_size);
1125
1126 - if (dma_ranges) {
1127 + if (num_dma_ranges) {
1128 /*
1129 - * The best-case scenario is to place the inbound
1130 - * region in the first 4GB of pcie-space, as some
1131 - * legacy devices can only address 32bits.
1132 - * We would also like to put the MSI under 4GB
1133 - * as well, since some devices require a 32bit
1134 - * MSI target address.
1135 + * Use the base address and size(s) provided in the dma-ranges
1136 + * property.
1137 */
1138 - if (total_mem_size <= 0xc0000000ULL &&
1139 - rc_bar2_size <= 0x100000000ULL) {
1140 - rc_bar2_offset = 0;
1141 - /* If the viewport is less then 4GB we can fit
1142 - * the MSI target address under 4GB. Otherwise
1143 - * put it right below 64GB.
1144 - */
1145 - msi_target_addr =
1146 - (rc_bar2_size == 0x100000000ULL)
1147 - ? BRCM_MSI_TARGET_ADDR_GT_4GB
1148 - : BRCM_MSI_TARGET_ADDR_LT_4GB;
1149 - } else {
1150 - /*
1151 - * The system memory is 4GB or larger so we
1152 - * cannot start the inbound region at location
1153 - * 0 (since we have to allow some space for
1154 - * outbound memory @ 3GB). So instead we
1155 - * start it at the 1x multiple of its size
1156 - */
1157 - rc_bar2_offset = rc_bar2_size;
1158 -
1159 - /* Since we are starting the viewport at 4GB or
1160 - * higher, put the MSI target address below 4GB
1161 - */
1162 - msi_target_addr = BRCM_MSI_TARGET_ADDR_LT_4GB;
1163 - }
1164 - } else {
1165 + for (i = 0; i < num_dma_ranges; i++)
1166 + scb_size[i] = roundup_pow_of_two_64(dma_ranges[i].size);
1167 +
1168 + num_memc = num_dma_ranges;
1169 + rc_bar2_offset = dma_ranges[0].pci_addr;
1170 + } else if (num_memc) {
1171 /*
1172 * Set simple configuration based on memory sizes
1173 - * only. We always start the viewport at address 0,
1174 - * and set the MSI target address accordingly.
1175 + * only. We always start the viewport at address 0.
1176 */
1177 rc_bar2_offset = 0;
1178 + } else {
1179 + return -EINVAL;
1180 + }
1181 +
1182 + for (i = 0; i < num_memc; i++)
1183 + total_mem_size += scb_size[i];
1184 +
1185 + rc_bar2_size = roundup_pow_of_two_64(total_mem_size);
1186
1187 - msi_target_addr = (rc_bar2_size >= 0x100000000ULL)
1188 - ? BRCM_MSI_TARGET_ADDR_GT_4GB
1189 - : BRCM_MSI_TARGET_ADDR_LT_4GB;
1190 + /* Verify the alignment is correct */
1191 + if (rc_bar2_offset & (rc_bar2_size - 1)) {
1192 + dev_err(dev, "inbound window is misaligned\n");
1193 + return -EINVAL;
1194 }
1195 +
1196 + /*
1197 + * Position the MSI target low if possible.
1198 + *
1199 + * TO DO: Consider outbound window when choosing MSI target and
1200 + * verifying configuration.
1201 + */
1202 + msi_target_addr = BRCM_MSI_TARGET_ADDR_LT_4GB;
1203 + if (rc_bar2_offset <= msi_target_addr &&
1204 + rc_bar2_offset + rc_bar2_size > msi_target_addr)
1205 + msi_target_addr = BRCM_MSI_TARGET_ADDR_GT_4GB;
1206 +
1207 pcie->msi_target_addr = msi_target_addr;
1208
1209 tmp = lower_32_bits(rc_bar2_offset);
1210 @@ -1713,6 +1735,7 @@ static int brcm_pcie_probe(struct platfo
1211 data = of_id->data;
1212 pcie->reg_offsets = data->offsets;
1213 pcie->reg_field_info = data->reg_field_info;
1214 + pcie->max_burst_size = data->max_burst_size;
1215 pcie->type = data->type;
1216 pcie->dn = dn;
1217 pcie->dev = &pdev->dev;
1218 @@ -1732,7 +1755,7 @@ static int brcm_pcie_probe(struct platfo
1219
1220 pcie->clk = of_clk_get_by_name(dn, "sw_pcie");
1221 if (IS_ERR(pcie->clk)) {
1222 - dev_err(&pdev->dev, "could not get clock\n");
1223 + dev_warn(&pdev->dev, "could not get clock\n");
1224 pcie->clk = NULL;
1225 }
1226 pcie->base = base;
1227 @@ -1755,7 +1778,8 @@ static int brcm_pcie_probe(struct platfo
1228
1229 ret = clk_prepare_enable(pcie->clk);
1230 if (ret) {
1231 - dev_err(&pdev->dev, "could not enable clock\n");
1232 + if (ret != -EPROBE_DEFER)
1233 + dev_err(&pdev->dev, "could not enable clock\n");
1234 return ret;
1235 }
1236
1237 @@ -1818,7 +1842,6 @@ static struct platform_driver brcm_pcie_
1238 .remove = brcm_pcie_remove,
1239 .driver = {
1240 .name = "brcm-pcie",
1241 - .owner = THIS_MODULE,
1242 .of_match_table = brcm_pcie_match,
1243 .pm = &brcm_pcie_pm_ops,
1244 },
1245 --- a/drivers/soc/bcm/brcmstb/Makefile
1246 +++ b/drivers/soc/bcm/brcmstb/Makefile
1247 @@ -1,2 +1,2 @@
1248 -obj-y += common.o biuctrl.o
1249 +obj-y += common.o biuctrl.o memory.o
1250 obj-$(CONFIG_BRCMSTB_PM) += pm/
1251 --- /dev/null
1252 +++ b/drivers/soc/bcm/brcmstb/memory.c
1253 @@ -0,0 +1,158 @@
1254 +// SPDX-License-Identifier: GPL-2.0
1255 +/* Copyright © 2015-2017 Broadcom */
1256 +
1257 +#include <linux/device.h>
1258 +#include <linux/io.h>
1259 +#include <linux/libfdt.h>
1260 +#include <linux/of_address.h>
1261 +#include <linux/of_fdt.h>
1262 +#include <linux/sizes.h>
1263 +#include <soc/brcmstb/memory_api.h>
1264 +
1265 +/* Macro to help extract property data */
1266 +#define DT_PROP_DATA_TO_U32(b, offs) (fdt32_to_cpu(*(u32 *)(b + offs)))
1267 +
1268 +/* Constants used when retrieving memc info */
1269 +#define NUM_BUS_RANGES 10
1270 +#define BUS_RANGE_ULIMIT_SHIFT 4
1271 +#define BUS_RANGE_LLIMIT_SHIFT 4
1272 +#define BUS_RANGE_PA_SHIFT 12
1273 +
1274 +enum {
1275 + BUSNUM_MCP0 = 0x4,
1276 + BUSNUM_MCP1 = 0x5,
1277 + BUSNUM_MCP2 = 0x6,
1278 +};
1279 +
1280 +/*
1281 + * If the DT nodes are handy, determine which MEMC holds the specified
1282 + * physical address.
1283 + */
1284 +#ifdef CONFIG_ARCH_BRCMSTB
1285 +int __brcmstb_memory_phys_addr_to_memc(phys_addr_t pa, void __iomem *base)
1286 +{
1287 + int memc = -1;
1288 + int i;
1289 +
1290 + for (i = 0; i < NUM_BUS_RANGES; i++, base += 8) {
1291 + const u64 ulimit_raw = readl(base);
1292 + const u64 llimit_raw = readl(base + 4);
1293 + const u64 ulimit =
1294 + ((ulimit_raw >> BUS_RANGE_ULIMIT_SHIFT)
1295 + << BUS_RANGE_PA_SHIFT) | 0xfff;
1296 + const u64 llimit = (llimit_raw >> BUS_RANGE_LLIMIT_SHIFT)
1297 + << BUS_RANGE_PA_SHIFT;
1298 + const u32 busnum = (u32)(ulimit_raw & 0xf);
1299 +
1300 + if (pa >= llimit && pa <= ulimit) {
1301 + if (busnum >= BUSNUM_MCP0 && busnum <= BUSNUM_MCP2) {
1302 + memc = busnum - BUSNUM_MCP0;
1303 + break;
1304 + }
1305 + }
1306 + }
1307 +
1308 + return memc;
1309 +}
1310 +
1311 +int brcmstb_memory_phys_addr_to_memc(phys_addr_t pa)
1312 +{
1313 + int memc = -1;
1314 + struct device_node *np;
1315 + void __iomem *cpubiuctrl;
1316 +
1317 + np = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl");
1318 + if (!np)
1319 + return memc;
1320 +
1321 + cpubiuctrl = of_iomap(np, 0);
1322 + if (!cpubiuctrl)
1323 + goto cleanup;
1324 +
1325 + memc = __brcmstb_memory_phys_addr_to_memc(pa, cpubiuctrl);
1326 + iounmap(cpubiuctrl);
1327 +
1328 +cleanup:
1329 + of_node_put(np);
1330 +
1331 + return memc;
1332 +}
1333 +
1334 +#elif defined(CONFIG_MIPS)
1335 +int brcmstb_memory_phys_addr_to_memc(phys_addr_t pa)
1336 +{
1337 + /* The logic here is fairly simple and hardcoded: if pa <= 0x5000_0000,
1338 + * then this is MEMC0, else MEMC1.
1339 + *
1340 + * For systems with 2GB on MEMC0, MEMC1 starts at 9000_0000, with 1GB
1341 + * on MEMC0, MEMC1 starts at 6000_0000.
1342 + */
1343 + if (pa >= 0x50000000ULL)
1344 + return 1;
1345 + else
1346 + return 0;
1347 +}
1348 +#endif
1349 +
1350 +u64 brcmstb_memory_memc_size(int memc)
1351 +{
1352 + const void *fdt = initial_boot_params;
1353 + const int mem_offset = fdt_path_offset(fdt, "/memory");
1354 + int addr_cells = 1, size_cells = 1;
1355 + const struct fdt_property *prop;
1356 + int proplen, cellslen;
1357 + u64 memc_size = 0;
1358 + int i;
1359 +
1360 + /* Get root size and address cells if specified */
1361 + prop = fdt_get_property(fdt, 0, "#size-cells", &proplen);
1362 + if (prop)
1363 + size_cells = DT_PROP_DATA_TO_U32(prop->data, 0);
1364 +
1365 + prop = fdt_get_property(fdt, 0, "#address-cells", &proplen);
1366 + if (prop)
1367 + addr_cells = DT_PROP_DATA_TO_U32(prop->data, 0);
1368 +
1369 + if (mem_offset < 0)
1370 + return -1;
1371 +
1372 + prop = fdt_get_property(fdt, mem_offset, "reg", &proplen);
1373 + cellslen = (int)sizeof(u32) * (addr_cells + size_cells);
1374 + if ((proplen % cellslen) != 0)
1375 + return -1;
1376 +
1377 + for (i = 0; i < proplen / cellslen; ++i) {
1378 + u64 addr = 0;
1379 + u64 size = 0;
1380 + int memc_idx;
1381 + int j;
1382 +
1383 + for (j = 0; j < addr_cells; ++j) {
1384 + int offset = (cellslen * i) + (sizeof(u32) * j);
1385 +
1386 + addr |= (u64)DT_PROP_DATA_TO_U32(prop->data, offset) <<
1387 + ((addr_cells - j - 1) * 32);
1388 + }
1389 + for (j = 0; j < size_cells; ++j) {
1390 + int offset = (cellslen * i) +
1391 + (sizeof(u32) * (j + addr_cells));
1392 +
1393 + size |= (u64)DT_PROP_DATA_TO_U32(prop->data, offset) <<
1394 + ((size_cells - j - 1) * 32);
1395 + }
1396 +
1397 + if ((phys_addr_t)addr != addr) {
1398 + pr_err("phys_addr_t is smaller than provided address 0x%llx!\n",
1399 + addr);
1400 + return -1;
1401 + }
1402 +
1403 + memc_idx = brcmstb_memory_phys_addr_to_memc((phys_addr_t)addr);
1404 + if (memc_idx == memc)
1405 + memc_size += size;
1406 + }
1407 +
1408 + return memc_size;
1409 +}
1410 +EXPORT_SYMBOL_GPL(brcmstb_memory_memc_size);
1411 +