brcm2708: organize kernel patches
[openwrt/staging/dedeckeh.git] / target / linux / brcm2708 / patches-4.19 / 950-0767-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch
diff --git a/target/linux/brcm2708/patches-4.19/950-0767-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch b/target/linux/brcm2708/patches-4.19/950-0767-Ported-pcie-brcmstb-bounce-buffer-implementation-to-.patch
deleted file mode 100644 (file)
index a8f0b01..0000000
+++ /dev/null
@@ -1,818 +0,0 @@
-From ccd23ce562e8223ba7c6acf7dcb7058ff89ff7ec Mon Sep 17 00:00:00 2001
-From: yaroslavros <yaroslavros@gmail.com>
-Date: Wed, 14 Aug 2019 15:22:55 +0100
-Subject: [PATCH 767/806] Ported pcie-brcmstb bounce buffer implementation to
- ARM64. (#3144)
-
-Ported pcie-brcmstb bounce buffer implementation to ARM64.
-This enables full 4G RAM usage on Raspberry Pi in 64-bit mode.
-
-Signed-off-by: Yaroslav Rosomakho <yaroslavros@gmail.com>
----
- arch/arm64/include/asm/dma-mapping.h          |  21 +
- arch/arm64/mm/dma-mapping.c                   |  50 ++
- drivers/pci/controller/Makefile               |   3 +
- drivers/pci/controller/pcie-brcmstb-bounce.h  |   2 +-
- .../pci/controller/pcie-brcmstb-bounce64.c    | 576 ++++++++++++++++++
- drivers/pci/controller/pcie-brcmstb.c         |  30 +-
- 6 files changed, 658 insertions(+), 24 deletions(-)
- create mode 100644 drivers/pci/controller/pcie-brcmstb-bounce64.c
-
---- a/arch/arm64/include/asm/dma-mapping.h
-+++ b/arch/arm64/include/asm/dma-mapping.h
-@@ -24,6 +24,27 @@
- #include <xen/xen.h>
- #include <asm/xen/hypervisor.h>
-+extern void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-+                         gfp_t gfp, unsigned long attrs);
-+extern void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr,
-+                       dma_addr_t handle, unsigned long attrs);
-+extern int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma,
-+                      void *cpu_addr, dma_addr_t dma_addr, size_t size,
-+                      unsigned long attrs);
-+extern int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
-+              void *cpu_addr, dma_addr_t dma_addr, size_t size,
-+              unsigned long attrs);
-+extern int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
-+              enum dma_data_direction dir, unsigned long attrs);
-+extern void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int,
-+              enum dma_data_direction dir, unsigned long attrs);
-+extern void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems,
-+              enum dma_data_direction dir);
-+extern void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems,
-+              enum dma_data_direction dir);
-+
-+
-+
- extern const struct dma_map_ops dummy_dma_ops;
- static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
---- a/arch/arm64/mm/dma-mapping.c
-+++ b/arch/arm64/mm/dma-mapping.c
-@@ -138,6 +138,12 @@ no_mem:
-       return NULL;
- }
-+void *arm64_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-+                           gfp_t gfp, unsigned long attrs)
-+{
-+        return __dma_alloc(dev, size, handle, gfp, attrs);
-+}
-+
- static void __dma_free(struct device *dev, size_t size,
-                      void *vaddr, dma_addr_t dma_handle,
-                      unsigned long attrs)
-@@ -154,6 +160,12 @@ static void __dma_free(struct device *de
-       swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs);
- }
-+void arm64_dma_free(struct device *dev, size_t size, void *cpu_addr,
-+                         dma_addr_t handle, unsigned long attrs)
-+{
-+        __dma_free(dev, size, cpu_addr, handle, attrs);
-+}
-+
- static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
-                                    unsigned long offset, size_t size,
-                                    enum dma_data_direction dir,
-@@ -197,6 +209,12 @@ static int __swiotlb_map_sg_attrs(struct
-       return ret;
- }
-+int arm64_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
-+                enum dma_data_direction dir, unsigned long attrs)
-+{
-+      return __swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
-+}
-+
- static void __swiotlb_unmap_sg_attrs(struct device *dev,
-                                    struct scatterlist *sgl, int nelems,
-                                    enum dma_data_direction dir,
-@@ -213,6 +231,12 @@ static void __swiotlb_unmap_sg_attrs(str
-       swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
- }
-+void arm64_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, int nelems,
-+                enum dma_data_direction dir, unsigned long attrs)
-+{
-+      __swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
-+}
-+
- static void __swiotlb_sync_single_for_cpu(struct device *dev,
-                                         dma_addr_t dev_addr, size_t size,
-                                         enum dma_data_direction dir)
-@@ -245,6 +269,12 @@ static void __swiotlb_sync_sg_for_cpu(st
-       swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
- }
-+void arm64_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, int nelems,
-+                enum dma_data_direction dir)
-+{
-+      __swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
-+}
-+
- static void __swiotlb_sync_sg_for_device(struct device *dev,
-                                        struct scatterlist *sgl, int nelems,
-                                        enum dma_data_direction dir)
-@@ -259,6 +289,12 @@ static void __swiotlb_sync_sg_for_device
-                                      sg->length, dir);
- }
-+void arm64_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems,
-+                enum dma_data_direction dir)
-+{
-+      __swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
-+}
-+
- static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
-                             unsigned long pfn, size_t size)
- {
-@@ -294,6 +330,13 @@ static int __swiotlb_mmap(struct device
-       return __swiotlb_mmap_pfn(vma, pfn, size);
- }
-+int arm64_dma_mmap(struct device *dev, struct vm_area_struct *vma,
-+                        void *cpu_addr, dma_addr_t dma_addr, size_t size,
-+                        unsigned long attrs)
-+{
-+      return __swiotlb_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
-+}
-+
- static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
-                                     struct page *page, size_t size)
- {
-@@ -314,6 +357,13 @@ static int __swiotlb_get_sgtable(struct
-       return __swiotlb_get_sgtable_page(sgt, page, size);
- }
-+int arm64_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
-+                void *cpu_addr, dma_addr_t dma_addr, size_t size,
-+                unsigned long attrs)
-+{
-+      return __swiotlb_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs);
-+}
-+
- static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
- {
-       if (swiotlb)
---- a/drivers/pci/controller/Makefile
-+++ b/drivers/pci/controller/Makefile
-@@ -32,6 +32,9 @@ obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcms
- ifdef CONFIG_ARM
- obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce.o
- endif
-+ifdef CONFIG_ARM64
-+obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb-bounce64.o
-+endif
- obj-$(CONFIG_VMD) += vmd.o
- # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW
---- a/drivers/pci/controller/pcie-brcmstb-bounce.h
-+++ b/drivers/pci/controller/pcie-brcmstb-bounce.h
-@@ -6,7 +6,7 @@
- #ifndef _PCIE_BRCMSTB_BOUNCE_H
- #define _PCIE_BRCMSTB_BOUNCE_H
--#ifdef CONFIG_ARM
-+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
- int brcm_pcie_bounce_init(struct device *dev, unsigned long buffer_size,
-                         dma_addr_t threshold);
---- /dev/null
-+++ b/drivers/pci/controller/pcie-brcmstb-bounce64.c
-@@ -0,0 +1,576 @@
-+/*
-+ *  This code started out as a version of arch/arm/common/dmabounce.c,
-+ *  modified to cope with highmem pages. Now it has been changed heavily -
-+ *  it now preallocates a large block (currently 4MB) and carves it up
-+ *  sequentially in ring fashion, and DMA is used to copy the data - to the
-+ *  point where very little of the original remains.
-+ *
-+ *  Copyright (C) 2019 Raspberry Pi (Trading) Ltd.
-+ *
-+ *  Original version by Brad Parker (brad@heeltoe.com)
-+ *  Re-written by Christopher Hoover <ch@murgatroid.com>
-+ *  Made generic by Deepak Saxena <dsaxena@plexity.net>
-+ *
-+ *  Copyright (C) 2002 Hewlett Packard Company.
-+ *  Copyright (C) 2004 MontaVista Software, Inc.
-+ *
-+ *  This program is free software; you can redistribute it and/or
-+ *  modify it under the terms of the GNU General Public License
-+ *  version 2 as published by the Free Software Foundation.
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/slab.h>
-+#include <linux/page-flags.h>
-+#include <linux/device.h>
-+#include <linux/dma-mapping.h>
-+#include <linux/dma-direct.h>
-+#include <linux/dmapool.h>
-+#include <linux/list.h>
-+#include <linux/scatterlist.h>
-+#include <linux/bitmap.h>
-+#include <linux/swiotlb.h>
-+
-+#include <asm/cacheflush.h>
-+
-+#define STATS
-+
-+#ifdef STATS
-+#define DO_STATS(X) do { X ; } while (0)
-+#else
-+#define DO_STATS(X) do { } while (0)
-+#endif
-+
-+/* ************************************************** */
-+
-+struct safe_buffer {
-+      struct list_head node;
-+
-+      /* original request */
-+      size_t          size;
-+      int             direction;
-+
-+      struct dmabounce_pool *pool;
-+      void            *safe;
-+      dma_addr_t      unsafe_dma_addr;
-+      dma_addr_t      safe_dma_addr;
-+};
-+
-+struct dmabounce_pool {
-+      unsigned long   pages;
-+      void            *virt_addr;
-+      dma_addr_t      dma_addr;
-+      unsigned long   *alloc_map;
-+      unsigned long   alloc_pos;
-+      spinlock_t      lock;
-+      struct device   *dev;
-+      unsigned long   num_pages;
-+#ifdef STATS
-+      size_t          max_size;
-+      unsigned long   num_bufs;
-+      unsigned long   max_bufs;
-+      unsigned long   max_pages;
-+#endif
-+};
-+
-+struct dmabounce_device_info {
-+      struct device *dev;
-+      dma_addr_t threshold;
-+      struct list_head safe_buffers;
-+      struct dmabounce_pool pool;
-+      rwlock_t lock;
-+#ifdef STATS
-+      unsigned long map_count;
-+      unsigned long unmap_count;
-+      unsigned long sync_dev_count;
-+      unsigned long sync_cpu_count;
-+      unsigned long fail_count;
-+      int attr_res;
-+#endif
-+};
-+
-+static struct dmabounce_device_info *g_dmabounce_device_info;
-+
-+extern int bcm2838_dma40_memcpy_init(void);
-+extern void bcm2838_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size);
-+
-+#ifdef STATS
-+static ssize_t
-+bounce_show(struct device *dev, struct device_attribute *attr, char *buf)
-+{
-+      struct dmabounce_device_info *device_info = g_dmabounce_device_info;
-+      return sprintf(buf, "m:%lu/%lu s:%lu/%lu f:%lu s:%zu b:%lu/%lu a:%lu/%lu\n",
-+              device_info->map_count,
-+              device_info->unmap_count,
-+              device_info->sync_dev_count,
-+              device_info->sync_cpu_count,
-+              device_info->fail_count,
-+              device_info->pool.max_size,
-+              device_info->pool.num_bufs,
-+              device_info->pool.max_bufs,
-+              device_info->pool.num_pages * PAGE_SIZE,
-+              device_info->pool.max_pages * PAGE_SIZE);
-+}
-+
-+static DEVICE_ATTR(dmabounce_stats, 0444, bounce_show, NULL);
-+#endif
-+
-+static int bounce_create(struct dmabounce_pool *pool, struct device *dev,
-+                       unsigned long buffer_size)
-+{
-+      int ret = -ENOMEM;
-+      pool->pages = (buffer_size + PAGE_SIZE - 1)/PAGE_SIZE;
-+      pool->alloc_map = bitmap_zalloc(pool->pages, GFP_KERNEL);
-+      if (!pool->alloc_map)
-+              goto err_bitmap;
-+      pool->virt_addr = dma_alloc_coherent(dev, pool->pages * PAGE_SIZE,
-+                                           &pool->dma_addr, GFP_KERNEL);
-+      if (!pool->virt_addr)
-+              goto err_dmabuf;
-+
-+      pool->alloc_pos = 0;
-+      spin_lock_init(&pool->lock);
-+      pool->dev = dev;
-+      pool->num_pages = 0;
-+
-+      DO_STATS(pool->max_size = 0);
-+      DO_STATS(pool->num_bufs = 0);
-+      DO_STATS(pool->max_bufs = 0);
-+      DO_STATS(pool->max_pages = 0);
-+
-+      return  0;
-+
-+err_dmabuf:
-+      bitmap_free(pool->alloc_map);
-+err_bitmap:
-+      return ret;
-+}
-+
-+static void bounce_destroy(struct dmabounce_pool *pool)
-+{
-+      dma_free_coherent(pool->dev, pool->pages * PAGE_SIZE, pool->virt_addr,
-+                        pool->dma_addr);
-+
-+      bitmap_free(pool->alloc_map);
-+}
-+
-+static void *bounce_alloc(struct dmabounce_pool *pool, size_t size,
-+                        dma_addr_t *dmaaddrp)
-+{
-+      unsigned long pages;
-+      unsigned long flags;
-+      unsigned long pos;
-+
-+      pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
-+
-+      DO_STATS(pool->max_size = max(size, pool->max_size));
-+
-+      spin_lock_irqsave(&pool->lock, flags);
-+      pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
-+                                       pool->alloc_pos, pages, 0);
-+      /* If not found, try from the start */
-+      if (pos >= pool->pages && pool->alloc_pos)
-+              pos = bitmap_find_next_zero_area(pool->alloc_map, pool->pages,
-+                                               0, pages, 0);
-+
-+      if (pos >= pool->pages) {
-+              spin_unlock_irqrestore(&pool->lock, flags);
-+              return NULL;
-+      }
-+
-+      bitmap_set(pool->alloc_map, pos, pages);
-+      pool->alloc_pos = (pos + pages) % pool->pages;
-+      pool->num_pages += pages;
-+
-+      DO_STATS(pool->num_bufs++);
-+      DO_STATS(pool->max_bufs = max(pool->num_bufs, pool->max_bufs));
-+      DO_STATS(pool->max_pages = max(pool->num_pages, pool->max_pages));
-+
-+      spin_unlock_irqrestore(&pool->lock, flags);
-+
-+      *dmaaddrp = pool->dma_addr + pos * PAGE_SIZE;
-+
-+      return pool->virt_addr + pos * PAGE_SIZE;
-+}
-+
-+static void
-+bounce_free(struct dmabounce_pool *pool, void *buf, size_t size)
-+{
-+      unsigned long pages;
-+      unsigned long flags;
-+      unsigned long pos;
-+
-+      pages = (size + PAGE_SIZE - 1)/PAGE_SIZE;
-+      pos = (buf - pool->virt_addr)/PAGE_SIZE;
-+
-+      BUG_ON((buf - pool->virt_addr) & (PAGE_SIZE - 1));
-+
-+      spin_lock_irqsave(&pool->lock, flags);
-+      bitmap_clear(pool->alloc_map, pos, pages);
-+      pool->num_pages -= pages;
-+      if (pool->num_pages == 0)
-+              pool->alloc_pos = 0;
-+      DO_STATS(pool->num_bufs--);
-+      spin_unlock_irqrestore(&pool->lock, flags);
-+}
-+
-+/* allocate a 'safe' buffer and keep track of it */
-+static struct safe_buffer *
-+alloc_safe_buffer(struct dmabounce_device_info *device_info,
-+                dma_addr_t dma_addr, size_t size, enum dma_data_direction dir)
-+{
-+      struct safe_buffer *buf;
-+      struct dmabounce_pool *pool = &device_info->pool;
-+      struct device *dev = device_info->dev;
-+      unsigned long flags;
-+
-+      /*
-+       * Although one might expect this to be called in thread context,
-+       * using GFP_KERNEL here leads to hard-to-debug lockups. in_atomic()
-+       * was previously used to select the appropriate allocation mode,
-+       * but this is unsafe.
-+       */
-+      buf = kmalloc(sizeof(struct safe_buffer), GFP_ATOMIC);
-+      if (!buf) {
-+              dev_warn(dev, "%s: kmalloc failed\n", __func__);
-+              return NULL;
-+      }
-+
-+      buf->unsafe_dma_addr = dma_addr;
-+      buf->size = size;
-+      buf->direction = dir;
-+      buf->pool = pool;
-+
-+      buf->safe = bounce_alloc(pool, size, &buf->safe_dma_addr);
-+
-+      if (!buf->safe) {
-+              dev_warn(dev,
-+                       "%s: could not alloc dma memory (size=%d)\n",
-+                       __func__, size);
-+              kfree(buf);
-+              return NULL;
-+      }
-+
-+      write_lock_irqsave(&device_info->lock, flags);
-+      list_add(&buf->node, &device_info->safe_buffers);
-+      write_unlock_irqrestore(&device_info->lock, flags);
-+
-+      return buf;
-+}
-+
-+/* determine if a buffer is from our "safe" pool */
-+static struct safe_buffer *
-+find_safe_buffer(struct dmabounce_device_info *device_info,
-+               dma_addr_t safe_dma_addr)
-+{
-+      struct safe_buffer *b, *rb = NULL;
-+      unsigned long flags;
-+
-+      read_lock_irqsave(&device_info->lock, flags);
-+
-+      list_for_each_entry(b, &device_info->safe_buffers, node)
-+              if (b->safe_dma_addr <= safe_dma_addr &&
-+                  b->safe_dma_addr + b->size > safe_dma_addr) {
-+                      rb = b;
-+                      break;
-+              }
-+
-+      read_unlock_irqrestore(&device_info->lock, flags);
-+      return rb;
-+}
-+
-+static void
-+free_safe_buffer(struct dmabounce_device_info *device_info,
-+               struct safe_buffer *buf)
-+{
-+      unsigned long flags;
-+
-+      write_lock_irqsave(&device_info->lock, flags);
-+      list_del(&buf->node);
-+      write_unlock_irqrestore(&device_info->lock, flags);
-+
-+      bounce_free(buf->pool, buf->safe, buf->size);
-+
-+      kfree(buf);
-+}
-+
-+/* ************************************************** */
-+
-+static struct safe_buffer *
-+find_safe_buffer_dev(struct device *dev, dma_addr_t dma_addr, const char *where)
-+{
-+      if (!dev || !g_dmabounce_device_info)
-+              return NULL;
-+      if (dma_mapping_error(dev, dma_addr)) {
-+              dev_err(dev, "Trying to %s invalid mapping\n", where);
-+              return NULL;
-+      }
-+      return find_safe_buffer(g_dmabounce_device_info, dma_addr);
-+}
-+
-+static dma_addr_t
-+map_single(struct device *dev, struct safe_buffer *buf, size_t size,
-+         enum dma_data_direction dir, unsigned long attrs)
-+{
-+      BUG_ON(buf->size != size);
-+      BUG_ON(buf->direction != dir);
-+
-+      dev_dbg(dev, "map: %llx->%llx\n", (u64)buf->unsafe_dma_addr,
-+              (u64)buf->safe_dma_addr);
-+
-+      if ((dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) &&
-+          !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-+              bcm2838_dma40_memcpy(buf->safe_dma_addr, buf->unsafe_dma_addr,
-+                                   size);
-+
-+      return buf->safe_dma_addr;
-+}
-+
-+static dma_addr_t
-+unmap_single(struct device *dev, struct safe_buffer *buf, size_t size,
-+           enum dma_data_direction dir, unsigned long attrs)
-+{
-+      BUG_ON(buf->size != size);
-+      BUG_ON(buf->direction != dir);
-+
-+      if ((dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) &&
-+          !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
-+              dev_dbg(dev, "unmap: %llx->%llx\n", (u64)buf->safe_dma_addr,
-+                      (u64)buf->unsafe_dma_addr);
-+
-+              bcm2838_dma40_memcpy(buf->unsafe_dma_addr, buf->safe_dma_addr,
-+                                   size);
-+      }
-+      return buf->unsafe_dma_addr;
-+}
-+
-+/* ************************************************** */
-+
-+/*
-+ * see if a buffer address is in an 'unsafe' range.  if it is
-+ * allocate a 'safe' buffer and copy the unsafe buffer into it.
-+ * substitute the safe buffer for the unsafe one.
-+ * (basically move the buffer from an unsafe area to a safe one)
-+ */
-+static dma_addr_t
-+dmabounce_map_page(struct device *dev, struct page *page, unsigned long offset,
-+                 size_t size, enum dma_data_direction dir,
-+                 unsigned long attrs)
-+{
-+      struct dmabounce_device_info *device_info = g_dmabounce_device_info;
-+      dma_addr_t dma_addr;
-+
-+      dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
-+
-+      swiotlb_sync_single_for_device(dev, dma_addr, size, dir);
-+        if (!is_device_dma_coherent(dev))
-+              __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
-+
-+      if (device_info && (dma_addr + size) > device_info->threshold) {
-+              struct safe_buffer *buf;
-+
-+              buf = alloc_safe_buffer(device_info, dma_addr, size, dir);
-+              if (!buf) {
-+                      DO_STATS(device_info->fail_count++);
-+                      return (~(dma_addr_t)0x0);
-+              }
-+
-+              DO_STATS(device_info->map_count++);
-+
-+              dma_addr = map_single(dev, buf, size, dir, attrs);
-+      }
-+      return dma_addr;
-+}
-+
-+/*
-+ * see if a mapped address was really a "safe" buffer and if so, copy
-+ * the data from the safe buffer back to the unsafe buffer and free up
-+ * the safe buffer.  (basically return things back to the way they
-+ * should be)
-+ */
-+static void
-+dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
-+                   enum dma_data_direction dir, unsigned long attrs)
-+{
-+      struct safe_buffer *buf;
-+
-+      buf = find_safe_buffer_dev(dev, dma_addr, __func__);
-+      if (buf) {
-+              DO_STATS(g_dmabounce_device_info->unmap_count++);
-+              dma_addr = unmap_single(dev, buf, size, dir, attrs);
-+              free_safe_buffer(g_dmabounce_device_info, buf);
-+      }
-+
-+        if (!is_device_dma_coherent(dev))
-+              __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
-+      swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir);
-+}
-+
-+/*
-+ * A version of dmabounce_map_page that assumes the mapping has already
-+ * been created - intended for streaming operation.
-+ */
-+static void
-+dmabounce_sync_for_device(struct device *dev, dma_addr_t dma_addr, size_t size,
-+                        enum dma_data_direction dir)
-+{
-+      struct safe_buffer *buf;
-+
-+        swiotlb_sync_single_for_device(dev, dma_addr, size, dir);
-+        if (!is_device_dma_coherent(dev))
-+                __dma_map_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
-+
-+      buf = find_safe_buffer_dev(dev, dma_addr, __func__);
-+      if (buf) {
-+              DO_STATS(g_dmabounce_device_info->sync_dev_count++);
-+              map_single(dev, buf, size, dir, 0);
-+      }
-+}
-+
-+/*
-+ * A version of dmabounce_unmap_page that doesn't destroy the mapping -
-+ * intended for streaming operation.
-+ */
-+static void
-+dmabounce_sync_for_cpu(struct device *dev, dma_addr_t dma_addr,
-+                     size_t size, enum dma_data_direction dir)
-+{
-+      struct safe_buffer *buf;
-+
-+      buf = find_safe_buffer_dev(dev, dma_addr, __func__);
-+      if (buf) {
-+              DO_STATS(g_dmabounce_device_info->sync_cpu_count++);
-+              dma_addr = unmap_single(dev, buf, size, dir, 0);
-+      }
-+
-+        if (!is_device_dma_coherent(dev))
-+                __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dma_addr)), size, dir);
-+        swiotlb_sync_single_for_cpu(dev, dma_addr, size, dir);
-+}
-+
-+static int dmabounce_dma_supported(struct device *dev, u64 dma_mask)
-+{
-+      if (g_dmabounce_device_info)
-+              return 0;
-+
-+      return swiotlb_dma_supported(dev, dma_mask);
-+}
-+
-+static int dmabounce_mapping_error(struct device *dev, dma_addr_t dma_addr)
-+{
-+      return swiotlb_dma_mapping_error(dev, dma_addr);
-+}
-+
-+static const struct dma_map_ops dmabounce_ops = {
-+      .alloc                  = arm64_dma_alloc,
-+      .free                   = arm64_dma_free,
-+      .mmap                   = arm64_dma_mmap,
-+      .get_sgtable            = arm64_dma_get_sgtable,
-+      .map_page               = dmabounce_map_page,
-+      .unmap_page             = dmabounce_unmap_page,
-+      .sync_single_for_cpu    = dmabounce_sync_for_cpu,
-+      .sync_single_for_device = dmabounce_sync_for_device,
-+      .map_sg                 = arm64_dma_map_sg,
-+      .unmap_sg               = arm64_dma_unmap_sg,
-+      .sync_sg_for_cpu        = arm64_dma_sync_sg_for_cpu,
-+      .sync_sg_for_device     = arm64_dma_sync_sg_for_device,
-+      .dma_supported          = dmabounce_dma_supported,
-+      .mapping_error          = dmabounce_mapping_error,
-+};
-+
-+int brcm_pcie_bounce_init(struct device *dev,
-+                        unsigned long buffer_size,
-+                        dma_addr_t threshold)
-+{
-+      struct dmabounce_device_info *device_info;
-+      int ret;
-+
-+      /* Only support a single client */
-+      if (g_dmabounce_device_info)
-+              return -EBUSY;
-+
-+      ret = bcm2838_dma40_memcpy_init();
-+      if (ret)
-+              return ret;
-+
-+      device_info = kmalloc(sizeof(struct dmabounce_device_info), GFP_ATOMIC);
-+      if (!device_info) {
-+              dev_err(dev,
-+                      "Could not allocated dmabounce_device_info\n");
-+              return -ENOMEM;
-+      }
-+
-+      ret = bounce_create(&device_info->pool, dev, buffer_size);
-+      if (ret) {
-+              dev_err(dev,
-+                      "dmabounce: could not allocate %ld byte DMA pool\n",
-+                      buffer_size);
-+              goto err_bounce;
-+      }
-+
-+      device_info->dev = dev;
-+      device_info->threshold = threshold;
-+      INIT_LIST_HEAD(&device_info->safe_buffers);
-+      rwlock_init(&device_info->lock);
-+
-+      DO_STATS(device_info->map_count = 0);
-+      DO_STATS(device_info->unmap_count = 0);
-+      DO_STATS(device_info->sync_dev_count = 0);
-+      DO_STATS(device_info->sync_cpu_count = 0);
-+      DO_STATS(device_info->fail_count = 0);
-+      DO_STATS(device_info->attr_res =
-+               device_create_file(dev, &dev_attr_dmabounce_stats));
-+
-+      g_dmabounce_device_info = device_info;
-+
-+      dev_err(dev, "dmabounce: initialised - %ld kB, threshold %pad\n",
-+               buffer_size / 1024, &threshold);
-+
-+      return 0;
-+
-+ err_bounce:
-+      kfree(device_info);
-+      return ret;
-+}
-+EXPORT_SYMBOL(brcm_pcie_bounce_init);
-+
-+void brcm_pcie_bounce_uninit(struct device *dev)
-+{
-+      struct dmabounce_device_info *device_info = g_dmabounce_device_info;
-+
-+      g_dmabounce_device_info = NULL;
-+
-+      if (!device_info) {
-+              dev_warn(dev,
-+                       "Never registered with dmabounce but attempting"
-+                       "to unregister!\n");
-+              return;
-+      }
-+
-+      if (!list_empty(&device_info->safe_buffers)) {
-+              dev_err(dev,
-+                      "Removing from dmabounce with pending buffers!\n");
-+              BUG();
-+      }
-+
-+      bounce_destroy(&device_info->pool);
-+
-+      DO_STATS(if (device_info->attr_res == 0)
-+                       device_remove_file(dev, &dev_attr_dmabounce_stats));
-+
-+      kfree(device_info);
-+}
-+EXPORT_SYMBOL(brcm_pcie_bounce_uninit);
-+
-+int brcm_pcie_bounce_register_dev(struct device *dev)
-+{
-+      set_dma_ops(dev, &dmabounce_ops);
-+
-+      return 0;
-+}
-+EXPORT_SYMBOL(brcm_pcie_bounce_register_dev);
-+
-+MODULE_AUTHOR("Phil Elwell <phil@raspberrypi.org>");
-+MODULE_DESCRIPTION("Dedicate DMA bounce support for pcie-brcmstb");
-+MODULE_LICENSE("GPL");
---- a/drivers/pci/controller/pcie-brcmstb.c
-+++ b/drivers/pci/controller/pcie-brcmstb.c
-@@ -617,28 +617,6 @@ static const struct dma_map_ops brcm_dma
- static void brcm_set_dma_ops(struct device *dev)
- {
--      int ret;
--
--      if (IS_ENABLED(CONFIG_ARM64)) {
--              /*
--               * We are going to invoke get_dma_ops().  That
--               * function, at this point in time, invokes
--               * get_arch_dma_ops(), and for ARM64 that function
--               * returns a pointer to dummy_dma_ops.  So then we'd
--               * like to call arch_setup_dma_ops(), but that isn't
--               * exported.  Instead, we call of_dma_configure(),
--               * which is exported, and this calls
--               * arch_setup_dma_ops().  Once we do this the call to
--               * get_dma_ops() will work properly because
--               * dev->dma_ops will be set.
--               */
--              ret = of_dma_configure(dev, dev->of_node, true);
--              if (ret) {
--                      dev_err(dev, "of_dma_configure() failed: %d\n", ret);
--                      return;
--              }
--      }
--
-       arch_dma_ops = get_dma_ops(dev);
-       if (!arch_dma_ops) {
-               dev_err(dev, "failed to get arch_dma_ops\n");
-@@ -657,12 +635,12 @@ static int brcmstb_platform_notifier(str
-       extern unsigned long max_pfn;
-       struct device *dev = __dev;
-       const char *rc_name = "0000:00:00.0";
-+      int ret;
-       switch (event) {
-       case BUS_NOTIFY_ADD_DEVICE:
-               if (max_pfn > (bounce_threshold/PAGE_SIZE) &&
-                   strcmp(dev->kobj.name, rc_name)) {
--                      int ret;
-                       ret = brcm_pcie_bounce_register_dev(dev);
-                       if (ret) {
-@@ -671,6 +649,12 @@ static int brcmstb_platform_notifier(str
-                                       ret);
-                               return ret;
-                       }
-+              } else if (IS_ENABLED(CONFIG_ARM64)) {
-+                      ret = of_dma_configure(dev, dev->of_node, true);
-+                      if (ret) {
-+                              dev_err(dev, "of_dma_configure() failed: %d\n", ret);
-+                              return;
-+                      }
-               }
-               brcm_set_dma_ops(dev);
-               return NOTIFY_OK;