brcm2708: update to latest patches from RPi Foundation
[openwrt/staging/dedeckeh.git] / target / linux / brcm2708 / patches-4.19 / 950-0576-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch
diff --git a/target/linux/brcm2708/patches-4.19/950-0576-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch b/target/linux/brcm2708/patches-4.19/950-0576-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch
deleted file mode 100644 (file)
index 105309b..0000000
+++ /dev/null
@@ -1,1206 +0,0 @@
-From 275f4673d8c0601e5dbb16e743187d264e7dbed6 Mon Sep 17 00:00:00 2001
-From: Dave Stevenson <dave.stevenson@raspberrypi.org>
-Date: Fri, 21 Dec 2018 16:50:53 +0000
-Subject: [PATCH] staging: vc-sm-cma: Add in allocation for VPU
- requests.
-
-Module has to change from tristate to bool as all CMA functions
-are boolean.
-
-Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.org>
----
- .../staging/vc04_services/vc-sm-cma/Kconfig   |   4 +-
- .../staging/vc04_services/vc-sm-cma/Makefile  |   2 +-
- .../staging/vc04_services/vc-sm-cma/vc_sm.c   | 642 +++++++++++++++---
- .../staging/vc04_services/vc-sm-cma/vc_sm.h   |  30 +-
- .../vc04_services/vc-sm-cma/vc_sm_cma.c       |  99 +++
- .../vc04_services/vc-sm-cma/vc_sm_cma.h       |  39 ++
- .../vc04_services/vc-sm-cma/vc_sm_cma_vchi.c  |  10 +
- .../vc04_services/vc-sm-cma/vc_sm_cma_vchi.h  |   4 +
- .../vc04_services/vc-sm-cma/vc_sm_defs.h      |   2 +
- 9 files changed, 723 insertions(+), 109 deletions(-)
- create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.c
- create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.h
-
---- a/drivers/staging/vc04_services/vc-sm-cma/Kconfig
-+++ b/drivers/staging/vc04_services/vc-sm-cma/Kconfig
-@@ -1,6 +1,6 @@
- config BCM_VC_SM_CMA
--      tristate "VideoCore Shared Memory (CMA) driver"
--      depends on BCM2835_VCHIQ
-+      bool "VideoCore Shared Memory (CMA) driver"
-+      depends on BCM2835_VCHIQ && DMA_CMA
-       select RBTREE
-       select DMA_SHARED_BUFFER
-       help
---- a/drivers/staging/vc04_services/vc-sm-cma/Makefile
-+++ b/drivers/staging/vc04_services/vc-sm-cma/Makefile
-@@ -3,6 +3,6 @@ ccflags-y += -Idrivers/staging/vc04_serv
- ccflags-y += -D__VCCOREVER__=0
- vc-sm-cma-$(CONFIG_BCM_VC_SM_CMA) := \
--      vc_sm.o vc_sm_cma_vchi.o
-+      vc_sm.o vc_sm_cma_vchi.o vc_sm_cma.o
- obj-$(CONFIG_BCM_VC_SM_CMA) += vc-sm-cma.o
---- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c
-+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c
-@@ -9,10 +9,21 @@
-  * and taking some code for CMA/dmabuf handling from the Android Ion
-  * driver (Google/Linaro).
-  *
-- * This is cut down version to only support import of dma_bufs from
-- * other kernel drivers. A more complete implementation of the old
-- * vmcs_sm functionality can follow later.
-  *
-+ * This driver has 3 main uses:
-+ * 1) Allocating buffers for the kernel or userspace that can be shared with the
-+ *    VPU.
-+ * 2) Importing dmabufs from elsewhere for sharing with the VPU.
-+ * 3) Allocating buffers for use by the VPU.
-+ *
-+ * In the first and second cases the native handle is a dmabuf. Releasing the
-+ * resource inherently comes from releasing the dmabuf, and this will trigger
-+ * unmapping on the VPU. The underlying allocation and our buffer structure are
-+ * retained until the VPU has confirmed that it has finished with it.
-+ *
-+ * For the VPU allocations the VPU is responsible for triggering the release,
-+ * and therefore the released message decrements the dma_buf refcount (with the
-+ * VPU mapping having already been marked as released).
-  */
- /* ---- Include Files ----------------------------------------------------- */
-@@ -39,6 +50,7 @@
- #include "vc_sm_cma_vchi.h"
- #include "vc_sm.h"
-+#include "vc_sm_cma.h"
- #include "vc_sm_knl.h"
- /* ---- Private Constants and Types --------------------------------------- */
-@@ -72,6 +84,7 @@ struct sm_state_t {
-       struct platform_device *pdev;
-       struct sm_instance *sm_handle;  /* Handle for videocore service. */
-+      struct cma *cma_heap;
-       spinlock_t kernelid_map_lock;   /* Spinlock protecting kernelid_map */
-       struct idr kernelid_map;
-@@ -80,6 +93,7 @@ struct sm_state_t {
-       struct list_head buffer_list;   /* List of buffer. */
-       struct vc_sm_privdata_t *data_knl;  /* Kernel internal data tracking. */
-+      struct vc_sm_privdata_t *vpu_allocs; /* All allocations from the VPU */
-       struct dentry *dir_root;        /* Debug fs entries root. */
-       struct sm_pde_t dir_state;      /* Debug fs entries state sub-tree. */
-@@ -89,6 +103,12 @@ struct sm_state_t {
-       u32 int_trans_id;               /* Interrupted transaction. */
- };
-+struct vc_sm_dma_buf_attachment {
-+      struct device *dev;
-+      struct sg_table *table;
-+      struct list_head list;
-+};
-+
- /* ---- Private Variables ----------------------------------------------- */
- static struct sm_state_t *sm_state;
-@@ -172,12 +192,14 @@ static int vc_sm_cma_global_state_show(s
-                                  resource->size);
-                       seq_printf(s, "           DMABUF       %p\n",
-                                  resource->dma_buf);
--                      seq_printf(s, "           ATTACH       %p\n",
--                                 resource->attach);
-+                      if (resource->imported) {
-+                              seq_printf(s, "           ATTACH       %p\n",
-+                                         resource->import.attach);
-+                              seq_printf(s, "           SGT          %p\n",
-+                                         resource->import.sgt);
-+                      }
-                       seq_printf(s, "           SG_TABLE     %p\n",
-                                  resource->sg_table);
--                      seq_printf(s, "           SGT          %p\n",
--                                 resource->sgt);
-                       seq_printf(s, "           DMA_ADDR     %pad\n",
-                                  &resource->dma_addr);
-                       seq_printf(s, "           VC_HANDLE     %08x\n",
-@@ -209,17 +231,33 @@ static void vc_sm_add_resource(struct vc
- }
- /*
-- * Release an allocation.
-- * All refcounting is done via the dma buf object.
-+ * Cleans up imported dmabuf.
-  */
--static void vc_sm_release_resource(struct vc_sm_buffer *buffer, int force)
-+static void vc_sm_clean_up_dmabuf(struct vc_sm_buffer *buffer)
- {
--      mutex_lock(&sm_state->map_lock);
--      mutex_lock(&buffer->lock);
-+      if (!buffer->imported)
-+              return;
--      pr_debug("[%s]: buffer %p (name %s, size %zu)\n",
--               __func__, buffer, buffer->name, buffer->size);
-+      /* Handle cleaning up imported dmabufs */
-+      mutex_lock(&buffer->lock);
-+      if (buffer->import.sgt) {
-+              dma_buf_unmap_attachment(buffer->import.attach,
-+                                       buffer->import.sgt,
-+                                       DMA_BIDIRECTIONAL);
-+              buffer->import.sgt = NULL;
-+      }
-+      if (buffer->import.attach) {
-+              dma_buf_detach(buffer->dma_buf, buffer->import.attach);
-+              buffer->import.attach = NULL;
-+      }
-+      mutex_unlock(&buffer->lock);
-+}
-+/*
-+ * Instructs VPU to decrement the refcount on a buffer.
-+ */
-+static void vc_sm_vpu_free(struct vc_sm_buffer *buffer)
-+{
-       if (buffer->vc_handle && buffer->vpu_state == VPU_MAPPED) {
-               struct vc_sm_free_t free = { buffer->vc_handle, 0 };
-               int status = vc_sm_cma_vchi_free(sm_state->sm_handle, &free,
-@@ -230,17 +268,32 @@ static void vc_sm_release_resource(struc
-               }
-               if (sm_state->require_released_callback) {
--                      /* Need to wait for the VPU to confirm the free */
-+                      /* Need to wait for the VPU to confirm the free. */
-                       /* Retain a reference on this until the VPU has
-                        * released it
-                        */
-                       buffer->vpu_state = VPU_UNMAPPING;
--                      goto defer;
-+              } else {
-+                      buffer->vpu_state = VPU_NOT_MAPPED;
-+                      buffer->vc_handle = 0;
-               }
--              buffer->vpu_state = VPU_NOT_MAPPED;
--              buffer->vc_handle = 0;
-       }
-+}
-+
-+/*
-+ * Release an allocation.
-+ * All refcounting is done via the dma buf object.
-+ *
-+ * Must be called with the mutex held. The function will either release the
-+ * mutex (if defering the release) or destroy it. The caller must therefore not
-+ * reuse the buffer on return.
-+ */
-+static void vc_sm_release_resource(struct vc_sm_buffer *buffer)
-+{
-+      pr_debug("[%s]: buffer %p (name %s, size %zu)\n",
-+               __func__, buffer, buffer->name, buffer->size);
-+
-       if (buffer->vc_handle) {
-               /* We've sent the unmap request but not had the response. */
-               pr_err("[%s]: Waiting for VPU unmap response on %p\n",
-@@ -248,45 +301,43 @@ static void vc_sm_release_resource(struc
-               goto defer;
-       }
-       if (buffer->in_use) {
--              /* Don't release dmabuf here - we await the release */
-+              /* dmabuf still in use - we await the release */
-               pr_err("[%s]: buffer %p is still in use\n",
-                      __func__, buffer);
-               goto defer;
-       }
--      /* Handle cleaning up imported dmabufs */
--      if (buffer->sgt) {
--              dma_buf_unmap_attachment(buffer->attach, buffer->sgt,
--                                       DMA_BIDIRECTIONAL);
--              buffer->sgt = NULL;
--      }
--      if (buffer->attach) {
--              dma_buf_detach(buffer->dma_buf, buffer->attach);
--              buffer->attach = NULL;
--      }
--
--      /* Release the dma_buf (whether ours or imported) */
--      if (buffer->import_dma_buf) {
--              dma_buf_put(buffer->import_dma_buf);
--              buffer->import_dma_buf = NULL;
--              buffer->dma_buf = NULL;
--      } else if (buffer->dma_buf) {
--              dma_buf_put(buffer->dma_buf);
--              buffer->dma_buf = NULL;
-+      /* Release the allocation (whether imported dmabuf or CMA allocation) */
-+      if (buffer->imported) {
-+              pr_debug("%s: Release imported dmabuf %p\n", __func__,
-+                       buffer->import.dma_buf);
-+              if (buffer->import.dma_buf)
-+                      dma_buf_put(buffer->import.dma_buf);
-+              else
-+                      pr_err("%s: Imported dmabuf already been put for buf %p\n",
-+                             __func__, buffer);
-+              buffer->import.dma_buf = NULL;
-+      } else {
-+              if (buffer->sg_table) {
-+                      /* Our own allocation that we need to dma_unmap_sg */
-+                      dma_unmap_sg(&sm_state->pdev->dev,
-+                                   buffer->sg_table->sgl,
-+                                   buffer->sg_table->nents,
-+                                   DMA_BIDIRECTIONAL);
-+              }
-+              pr_debug("%s: Release our allocation\n", __func__);
-+              vc_sm_cma_buffer_free(&buffer->alloc);
-+              pr_debug("%s: Release our allocation - done\n", __func__);
-       }
--      if (buffer->sg_table && !buffer->import_dma_buf) {
--              /* Our own allocation that we need to dma_unmap_sg */
--              dma_unmap_sg(&sm_state->pdev->dev, buffer->sg_table->sgl,
--                           buffer->sg_table->nents, DMA_BIDIRECTIONAL);
--      }
--      /* Free the local resource. Start by removing it from the list */
--      buffer->private = NULL;
-+      /* Free our buffer. Start by removing it from the list */
-+      mutex_lock(&sm_state->map_lock);
-       list_del(&buffer->global_buffer_list);
-+      mutex_unlock(&sm_state->map_lock);
-+      pr_debug("%s: Release our allocation - done\n", __func__);
-       mutex_unlock(&buffer->lock);
--      mutex_unlock(&sm_state->map_lock);
-       mutex_destroy(&buffer->lock);
-@@ -295,7 +346,7 @@ static void vc_sm_release_resource(struc
- defer:
-       mutex_unlock(&buffer->lock);
--      mutex_unlock(&sm_state->map_lock);
-+      return;
- }
- /* Create support for private data tracking. */
-@@ -317,16 +368,267 @@ static struct vc_sm_privdata_t *vc_sm_cm
-       return file_data;
- }
-+static struct sg_table *dup_sg_table(struct sg_table *table)
-+{
-+      struct sg_table *new_table;
-+      int ret, i;
-+      struct scatterlist *sg, *new_sg;
-+
-+      new_table = kzalloc(sizeof(*new_table), GFP_KERNEL);
-+      if (!new_table)
-+              return ERR_PTR(-ENOMEM);
-+
-+      ret = sg_alloc_table(new_table, table->nents, GFP_KERNEL);
-+      if (ret) {
-+              kfree(new_table);
-+              return ERR_PTR(-ENOMEM);
-+      }
-+
-+      new_sg = new_table->sgl;
-+      for_each_sg(table->sgl, sg, table->nents, i) {
-+              memcpy(new_sg, sg, sizeof(*sg));
-+              sg->dma_address = 0;
-+              new_sg = sg_next(new_sg);
-+      }
-+
-+      return new_table;
-+}
-+
-+static void free_duped_table(struct sg_table *table)
-+{
-+      sg_free_table(table);
-+      kfree(table);
-+}
-+
-+/* Dma buf operations for use with our own allocations */
-+
-+static int vc_sm_dma_buf_attach(struct dma_buf *dmabuf,
-+                              struct dma_buf_attachment *attachment)
-+
-+{
-+      struct vc_sm_dma_buf_attachment *a;
-+      struct sg_table *table;
-+      struct vc_sm_buffer *buf = dmabuf->priv;
-+
-+      a = kzalloc(sizeof(*a), GFP_KERNEL);
-+      if (!a)
-+              return -ENOMEM;
-+
-+      table = dup_sg_table(buf->sg_table);
-+      if (IS_ERR(table)) {
-+              kfree(a);
-+              return -ENOMEM;
-+      }
-+
-+      a->table = table;
-+      INIT_LIST_HEAD(&a->list);
-+
-+      attachment->priv = a;
-+
-+      mutex_lock(&buf->lock);
-+      list_add(&a->list, &buf->attachments);
-+      mutex_unlock(&buf->lock);
-+      pr_debug("%s dmabuf %p attachment %p\n", __func__, dmabuf, attachment);
-+
-+      return 0;
-+}
-+
-+static void vc_sm_dma_buf_detatch(struct dma_buf *dmabuf,
-+                                struct dma_buf_attachment *attachment)
-+{
-+      struct vc_sm_dma_buf_attachment *a = attachment->priv;
-+      struct vc_sm_buffer *buf = dmabuf->priv;
-+
-+      pr_debug("%s dmabuf %p attachment %p\n", __func__, dmabuf, attachment);
-+      free_duped_table(a->table);
-+      mutex_lock(&buf->lock);
-+      list_del(&a->list);
-+      mutex_unlock(&buf->lock);
-+
-+      kfree(a);
-+}
-+
-+static struct sg_table *vc_sm_map_dma_buf(struct dma_buf_attachment *attachment,
-+                                        enum dma_data_direction direction)
-+{
-+      struct vc_sm_dma_buf_attachment *a = attachment->priv;
-+      struct sg_table *table;
-+
-+      table = a->table;
-+
-+      if (!dma_map_sg(attachment->dev, table->sgl, table->nents,
-+                      direction))
-+              return ERR_PTR(-ENOMEM);
-+
-+      pr_debug("%s attachment %p\n", __func__, attachment);
-+      return table;
-+}
-+
-+static void vc_sm_unmap_dma_buf(struct dma_buf_attachment *attachment,
-+                              struct sg_table *table,
-+                              enum dma_data_direction direction)
-+{
-+      pr_debug("%s attachment %p\n", __func__, attachment);
-+      dma_unmap_sg(attachment->dev, table->sgl, table->nents, direction);
-+}
-+
-+static int vc_sm_dmabuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
-+{
-+      struct vc_sm_buffer *buf = dmabuf->priv;
-+      struct sg_table *table = buf->sg_table;
-+      unsigned long addr = vma->vm_start;
-+      unsigned long offset = vma->vm_pgoff * PAGE_SIZE;
-+      struct scatterlist *sg;
-+      int i;
-+      int ret = 0;
-+
-+      pr_debug("%s dmabuf %p, buf %p, vm_start %08lX\n", __func__, dmabuf,
-+               buf, addr);
-+
-+      mutex_lock(&buf->lock);
-+
-+      /* now map it to userspace */
-+      for_each_sg(table->sgl, sg, table->nents, i) {
-+              struct page *page = sg_page(sg);
-+              unsigned long remainder = vma->vm_end - addr;
-+              unsigned long len = sg->length;
-+
-+              if (offset >= sg->length) {
-+                      offset -= sg->length;
-+                      continue;
-+              } else if (offset) {
-+                      page += offset / PAGE_SIZE;
-+                      len = sg->length - offset;
-+                      offset = 0;
-+              }
-+              len = min(len, remainder);
-+              ret = remap_pfn_range(vma, addr, page_to_pfn(page), len,
-+                                    vma->vm_page_prot);
-+              if (ret)
-+                      break;
-+              addr += len;
-+              if (addr >= vma->vm_end)
-+                      break;
-+      }
-+      mutex_unlock(&buf->lock);
-+
-+      if (ret)
-+              pr_err("%s: failure mapping buffer to userspace\n",
-+                     __func__);
-+
-+      return ret;
-+}
-+
-+static void vc_sm_dma_buf_release(struct dma_buf *dmabuf)
-+{
-+      struct vc_sm_buffer *buffer;
-+
-+      if (!dmabuf)
-+              return;
-+
-+      buffer = (struct vc_sm_buffer *)dmabuf->priv;
-+
-+      mutex_lock(&buffer->lock);
-+
-+      pr_debug("%s dmabuf %p, buffer %p\n", __func__, dmabuf, buffer);
-+
-+      buffer->in_use = 0;
-+
-+      /* Unmap on the VPU */
-+      vc_sm_vpu_free(buffer);
-+      pr_debug("%s vpu_free done\n", __func__);
-+
-+      /* Unmap our dma_buf object (the vc_sm_buffer remains until released
-+       * on the VPU).
-+       */
-+      vc_sm_clean_up_dmabuf(buffer);
-+      pr_debug("%s clean_up dmabuf done\n", __func__);
-+
-+      vc_sm_release_resource(buffer);
-+      pr_debug("%s done\n", __func__);
-+}
-+
-+static int vc_sm_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-+                                        enum dma_data_direction direction)
-+{
-+      struct vc_sm_buffer *buf;
-+      struct vc_sm_dma_buf_attachment *a;
-+
-+      if (!dmabuf)
-+              return -EFAULT;
-+
-+      buf = dmabuf->priv;
-+      if (!buf)
-+              return -EFAULT;
-+
-+      mutex_lock(&buf->lock);
-+
-+      list_for_each_entry(a, &buf->attachments, list) {
-+              dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents,
-+                                  direction);
-+      }
-+      mutex_unlock(&buf->lock);
-+
-+      return 0;
-+}
-+
-+static int vc_sm_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-+                                      enum dma_data_direction direction)
-+{
-+      struct vc_sm_buffer *buf;
-+      struct vc_sm_dma_buf_attachment *a;
-+
-+      if (!dmabuf)
-+              return -EFAULT;
-+      buf = dmabuf->priv;
-+      if (!buf)
-+              return -EFAULT;
-+
-+      mutex_lock(&buf->lock);
-+
-+      list_for_each_entry(a, &buf->attachments, list) {
-+              dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents,
-+                                     direction);
-+      }
-+      mutex_unlock(&buf->lock);
-+
-+      return 0;
-+}
-+
-+static void *vc_sm_dma_buf_kmap(struct dma_buf *dmabuf, unsigned long offset)
-+{
-+      /* FIXME */
-+      return NULL;
-+}
-+
-+static void vc_sm_dma_buf_kunmap(struct dma_buf *dmabuf, unsigned long offset,
-+                               void *ptr)
-+{
-+      /* FIXME */
-+}
-+
-+static const struct dma_buf_ops dma_buf_ops = {
-+      .map_dma_buf = vc_sm_map_dma_buf,
-+      .unmap_dma_buf = vc_sm_unmap_dma_buf,
-+      .mmap = vc_sm_dmabuf_mmap,
-+      .release = vc_sm_dma_buf_release,
-+      .attach = vc_sm_dma_buf_attach,
-+      .detach = vc_sm_dma_buf_detatch,
-+      .begin_cpu_access = vc_sm_dma_buf_begin_cpu_access,
-+      .end_cpu_access = vc_sm_dma_buf_end_cpu_access,
-+      .map = vc_sm_dma_buf_kmap,
-+      .unmap = vc_sm_dma_buf_kunmap,
-+};
- /* Dma_buf operations for chaining through to an imported dma_buf */
- static
- int vc_sm_import_dma_buf_attach(struct dma_buf *dmabuf,
-                               struct dma_buf_attachment *attachment)
- {
--      struct vc_sm_buffer *res = dmabuf->priv;
-+      struct vc_sm_buffer *buf = dmabuf->priv;
--      if (!res->import_dma_buf)
-+      if (!buf->imported)
-               return -EINVAL;
--      return res->import_dma_buf->ops->attach(res->import_dma_buf,
-+      return buf->import.dma_buf->ops->attach(buf->import.dma_buf,
-                                               attachment);
- }
-@@ -334,22 +636,23 @@ static
- void vc_sm_import_dma_buf_detatch(struct dma_buf *dmabuf,
-                                 struct dma_buf_attachment *attachment)
- {
--      struct vc_sm_buffer *res = dmabuf->priv;
-+      struct vc_sm_buffer *buf = dmabuf->priv;
--      if (!res->import_dma_buf)
-+      if (!buf->imported)
-               return;
--      res->import_dma_buf->ops->detach(res->import_dma_buf, attachment);
-+      buf->import.dma_buf->ops->detach(buf->import.dma_buf, attachment);
- }
- static
- struct sg_table *vc_sm_import_map_dma_buf(struct dma_buf_attachment *attachment,
-                                         enum dma_data_direction direction)
- {
--      struct vc_sm_buffer *res = attachment->dmabuf->priv;
-+      struct vc_sm_buffer *buf = attachment->dmabuf->priv;
--      if (!res->import_dma_buf)
-+      if (!buf->imported)
-               return NULL;
--      return res->import_dma_buf->ops->map_dma_buf(attachment, direction);
-+      return buf->import.dma_buf->ops->map_dma_buf(attachment,
-+                                                   direction);
- }
- static
-@@ -357,87 +660,88 @@ void vc_sm_import_unmap_dma_buf(struct d
-                               struct sg_table *table,
-                               enum dma_data_direction direction)
- {
--      struct vc_sm_buffer *res = attachment->dmabuf->priv;
-+      struct vc_sm_buffer *buf = attachment->dmabuf->priv;
--      if (!res->import_dma_buf)
-+      if (!buf->imported)
-               return;
--      res->import_dma_buf->ops->unmap_dma_buf(attachment, table, direction);
-+      buf->import.dma_buf->ops->unmap_dma_buf(attachment, table, direction);
- }
- static
- int vc_sm_import_dmabuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
- {
--      struct vc_sm_buffer *res = dmabuf->priv;
-+      struct vc_sm_buffer *buf = dmabuf->priv;
--      pr_debug("%s: mmap dma_buf %p, res %p, imported db %p\n", __func__,
--               dmabuf, res, res->import_dma_buf);
--      if (!res->import_dma_buf) {
-+      pr_debug("%s: mmap dma_buf %p, buf %p, imported db %p\n", __func__,
-+               dmabuf, buf, buf->import.dma_buf);
-+      if (!buf->imported) {
-               pr_err("%s: mmap dma_buf %p- not an imported buffer\n",
-                      __func__, dmabuf);
-               return -EINVAL;
-       }
--      return res->import_dma_buf->ops->mmap(res->import_dma_buf, vma);
-+      return buf->import.dma_buf->ops->mmap(buf->import.dma_buf, vma);
- }
- static
- void vc_sm_import_dma_buf_release(struct dma_buf *dmabuf)
- {
--      struct vc_sm_buffer *res = dmabuf->priv;
-+      struct vc_sm_buffer *buf = dmabuf->priv;
-       pr_debug("%s: Relasing dma_buf %p\n", __func__, dmabuf);
--      if (!res->import_dma_buf)
-+      mutex_lock(&buf->lock);
-+      if (!buf->imported)
-               return;
--      res->in_use = 0;
-+      buf->in_use = 0;
--      vc_sm_release_resource(res, 0);
-+      vc_sm_vpu_free(buf);
-+
-+      vc_sm_release_resource(buf);
- }
- static
- void *vc_sm_import_dma_buf_kmap(struct dma_buf *dmabuf,
-                               unsigned long offset)
- {
--      struct vc_sm_buffer *res = dmabuf->priv;
-+      struct vc_sm_buffer *buf = dmabuf->priv;
--      if (!res->import_dma_buf)
-+      if (!buf->imported)
-               return NULL;
--      return res->import_dma_buf->ops->map(res->import_dma_buf,
--                                                    offset);
-+      return buf->import.dma_buf->ops->map(buf->import.dma_buf, offset);
- }
- static
- void vc_sm_import_dma_buf_kunmap(struct dma_buf *dmabuf,
-                                unsigned long offset, void *ptr)
- {
--      struct vc_sm_buffer *res = dmabuf->priv;
-+      struct vc_sm_buffer *buf = dmabuf->priv;
--      if (!res->import_dma_buf)
-+      if (!buf->imported)
-               return;
--      res->import_dma_buf->ops->unmap(res->import_dma_buf,
--                                             offset, ptr);
-+      buf->import.dma_buf->ops->unmap(buf->import.dma_buf, offset, ptr);
- }
- static
- int vc_sm_import_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-                                         enum dma_data_direction direction)
- {
--      struct vc_sm_buffer *res = dmabuf->priv;
-+      struct vc_sm_buffer *buf = dmabuf->priv;
--      if (!res->import_dma_buf)
-+      if (!buf->imported)
-               return -EINVAL;
--      return res->import_dma_buf->ops->begin_cpu_access(res->import_dma_buf,
--                                                          direction);
-+      return buf->import.dma_buf->ops->begin_cpu_access(buf->import.dma_buf,
-+                                                        direction);
- }
- static
- int vc_sm_import_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-                                       enum dma_data_direction direction)
- {
--      struct vc_sm_buffer *res = dmabuf->priv;
-+      struct vc_sm_buffer *buf = dmabuf->priv;
--      if (!res->import_dma_buf)
-+      if (!buf->imported)
-               return -EINVAL;
--      return res->import_dma_buf->ops->end_cpu_access(res->import_dma_buf,
-+      return buf->import.dma_buf->ops->end_cpu_access(buf->import.dma_buf,
-                                                         direction);
- }
-@@ -516,9 +820,8 @@ vc_sm_cma_import_dmabuf_internal(struct
-       memcpy(import.name, VC_SM_RESOURCE_NAME_DEFAULT,
-              sizeof(VC_SM_RESOURCE_NAME_DEFAULT));
--      pr_debug("[%s]: attempt to import \"%s\" data - type %u, addr %pad, size %u\n",
--               __func__, import.name, import.type, &dma_addr,
--               import.size);
-+      pr_debug("[%s]: attempt to import \"%s\" data - type %u, addr %pad, size %u.\n",
-+               __func__, import.name, import.type, &dma_addr, import.size);
-       /* Allocate the videocore buffer. */
-       status = vc_sm_cma_vchi_import(sm_state->sm_handle, &import, &result,
-@@ -548,12 +851,14 @@ vc_sm_cma_import_dmabuf_internal(struct
-       buffer->size = import.size;
-       buffer->vpu_state = VPU_MAPPED;
--      buffer->import_dma_buf = dma_buf;
-+      buffer->imported = 1;
-+      buffer->import.dma_buf = dma_buf;
--      buffer->attach = attach;
--      buffer->sgt = sgt;
-+      buffer->import.attach = attach;
-+      buffer->import.sgt = sgt;
-       buffer->dma_addr = dma_addr;
-       buffer->in_use = 1;
-+      buffer->kernel_id = import.kernel_id;
-       /*
-        * We're done - we need to export a new dmabuf chaining through most
-@@ -594,6 +899,91 @@ error:
-       return ret;
- }
-+static int vc_sm_cma_vpu_alloc(u32 size, uint32_t align, const char *name,
-+                             u32 mem_handle, struct vc_sm_buffer **ret_buffer)
-+{
-+      DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
-+      struct vc_sm_buffer *buffer = NULL;
-+      int aligned_size;
-+      int ret = 0;
-+
-+      /* Align to the user requested align */
-+      aligned_size = ALIGN(size, align);
-+      /* and then to a page boundary */
-+      aligned_size = PAGE_ALIGN(aligned_size);
-+
-+      if (!aligned_size)
-+              return -EINVAL;
-+
-+      /* Allocate local buffer to track this allocation. */
-+      buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
-+      if (!buffer)
-+              return -ENOMEM;
-+
-+      mutex_init(&buffer->lock);
-+
-+      if (vc_sm_cma_buffer_allocate(sm_state->cma_heap, &buffer->alloc,
-+                                    aligned_size)) {
-+              pr_err("[%s]: cma alloc of %d bytes failed\n",
-+                     __func__, aligned_size);
-+              ret = -ENOMEM;
-+              goto error;
-+      }
-+      buffer->sg_table = buffer->alloc.sg_table;
-+
-+      pr_debug("[%s]: cma alloc of %d bytes success\n",
-+               __func__, aligned_size);
-+
-+      if (dma_map_sg(&sm_state->pdev->dev, buffer->sg_table->sgl,
-+                     buffer->sg_table->nents, DMA_BIDIRECTIONAL) <= 0) {
-+              pr_err("[%s]: dma_map_sg failed\n", __func__);
-+              goto error;
-+      }
-+
-+      INIT_LIST_HEAD(&buffer->attachments);
-+
-+      memcpy(buffer->name, name,
-+             min(sizeof(buffer->name), strlen(name)));
-+
-+      exp_info.ops = &dma_buf_ops;
-+      exp_info.size = aligned_size;
-+      exp_info.flags = O_RDWR;
-+      exp_info.priv = buffer;
-+
-+      buffer->dma_buf = dma_buf_export(&exp_info);
-+      if (IS_ERR(buffer->dma_buf)) {
-+              ret = PTR_ERR(buffer->dma_buf);
-+              goto error;
-+      }
-+      buffer->dma_addr = (uint32_t)sg_dma_address(buffer->sg_table->sgl);
-+      if ((buffer->dma_addr & 0xC0000000) != 0xC0000000) {
-+              pr_err("%s: Expecting an uncached alias for dma_addr %pad\n",
-+                     __func__, &buffer->dma_addr);
-+              buffer->dma_addr |= 0xC0000000;
-+      }
-+      buffer->private = sm_state->vpu_allocs;
-+
-+      buffer->vc_handle = mem_handle;
-+      buffer->vpu_state = VPU_MAPPED;
-+      buffer->vpu_allocated = 1;
-+      buffer->size = size;
-+      /*
-+       * Create an ID that will be passed along with our message so
-+       * that when we service the release reply, we can look up which
-+       * resource is being released.
-+       */
-+      buffer->kernel_id = get_kernel_id(buffer);
-+
-+      vc_sm_add_resource(sm_state->vpu_allocs, buffer);
-+
-+      *ret_buffer = buffer;
-+      return 0;
-+error:
-+      if (buffer)
-+              vc_sm_release_resource(buffer);
-+      return ret;
-+}
-+
- static void
- vc_sm_vpu_event(struct sm_instance *instance, struct vc_sm_result_t *reply,
-               int reply_len)
-@@ -612,21 +1002,61 @@ vc_sm_vpu_event(struct sm_instance *inst
-               struct vc_sm_released *release = (struct vc_sm_released *)reply;
-               struct vc_sm_buffer *buffer =
-                                       lookup_kernel_id(release->kernel_id);
-+              if (!buffer) {
-+                      pr_err("%s: VC released a buffer that is already released, kernel_id %d\n",
-+                             __func__, release->kernel_id);
-+                      break;
-+              }
-+              mutex_lock(&buffer->lock);
--              /*
--               * FIXME: Need to check buffer is still valid and allocated
--               * before continuing
--               */
-               pr_debug("%s: Released addr %08x, size %u, id %08x, mem_handle %08x\n",
-                        __func__, release->addr, release->size,
-                        release->kernel_id, release->vc_handle);
--              mutex_lock(&buffer->lock);
-+
-               buffer->vc_handle = 0;
-               buffer->vpu_state = VPU_NOT_MAPPED;
--              mutex_unlock(&buffer->lock);
-               free_kernel_id(release->kernel_id);
--              vc_sm_release_resource(buffer, 0);
-+              if (buffer->vpu_allocated) {
-+                      /* VPU allocation, so release the dmabuf which will
-+                       * trigger the clean up.
-+                       */
-+                      mutex_unlock(&buffer->lock);
-+                      dma_buf_put(buffer->dma_buf);
-+              } else {
-+                      vc_sm_release_resource(buffer);
-+              }
-+      }
-+      break;
-+      case VC_SM_MSG_TYPE_VC_MEM_REQUEST:
-+      {
-+              struct vc_sm_buffer *buffer = NULL;
-+              struct vc_sm_vc_mem_request *req =
-+                                      (struct vc_sm_vc_mem_request *)reply;
-+              struct vc_sm_vc_mem_request_result reply;
-+              int ret;
-+
-+              pr_debug("%s: Request %u bytes of memory, align %d name %s, trans_id %08x\n",
-+                       __func__, req->size, req->align, req->name,
-+                       req->trans_id);
-+              ret = vc_sm_cma_vpu_alloc(req->size, req->align, req->name,
-+                                        req->vc_handle, &buffer);
-+
-+              reply.trans_id = req->trans_id;
-+              if (!ret) {
-+                      reply.addr = buffer->dma_addr;
-+                      reply.kernel_id = buffer->kernel_id;
-+                      pr_debug("%s: Allocated resource buffer %p, addr %pad\n",
-+                               __func__, buffer, &buffer->dma_addr);
-+              } else {
-+                      pr_err("%s: Allocation failed size %u, name %s, vc_handle %u\n",
-+                             __func__, req->size, req->name, req->vc_handle);
-+                      reply.addr = 0;
-+                      reply.kernel_id = 0;
-+              }
-+              vc_sm_vchi_client_vc_mem_req_reply(sm_state->sm_handle, &reply,
-+                                                 &sm_state->int_trans_id);
-+              break;
-       }
-       break;
-       default:
-@@ -645,6 +1075,14 @@ static void vc_sm_connected_init(void)
-       pr_info("[%s]: start\n", __func__);
-+      if (vc_sm_cma_add_heaps(&sm_state->cma_heap) ||
-+          !sm_state->cma_heap) {
-+              pr_err("[%s]: failed to initialise CMA heaps\n",
-+                     __func__);
-+              ret = -EIO;
-+              goto err_free_mem;
-+      }
-+
-       /*
-        * Initialize and create a VCHI connection for the shared memory service
-        * running on videocore.
-@@ -696,7 +1134,7 @@ static void vc_sm_connected_init(void)
-               goto err_remove_shared_memory;
-       }
--      version.version = 1;
-+      version.version = 2;
-       ret = vc_sm_cma_vchi_client_version(sm_state->sm_handle, &version,
-                                           &version_result,
-                                           &sm_state->int_trans_id);
-@@ -768,7 +1206,7 @@ static int bcm2835_vc_sm_cma_remove(stru
- int vc_sm_cma_int_handle(void *handle)
- {
-       struct dma_buf *dma_buf = (struct dma_buf *)handle;
--      struct vc_sm_buffer *res;
-+      struct vc_sm_buffer *buf;
-       /* Validate we can work with this device. */
-       if (!sm_state || !handle) {
-@@ -776,8 +1214,8 @@ int vc_sm_cma_int_handle(void *handle)
-               return 0;
-       }
--      res = (struct vc_sm_buffer *)dma_buf->priv;
--      return res->vc_handle;
-+      buf = (struct vc_sm_buffer *)dma_buf->priv;
-+      return buf->vc_handle;
- }
- EXPORT_SYMBOL_GPL(vc_sm_cma_int_handle);
-@@ -804,7 +1242,7 @@ EXPORT_SYMBOL_GPL(vc_sm_cma_free);
- int vc_sm_cma_import_dmabuf(struct dma_buf *src_dmabuf, void **handle)
- {
-       struct dma_buf *new_dma_buf;
--      struct vc_sm_buffer *res;
-+      struct vc_sm_buffer *buf;
-       int ret;
-       /* Validate we can work with this device. */
-@@ -818,7 +1256,7 @@ int vc_sm_cma_import_dmabuf(struct dma_b
-       if (!ret) {
-               pr_debug("%s: imported to ptr %p\n", __func__, new_dma_buf);
--              res = (struct vc_sm_buffer *)new_dma_buf->priv;
-+              buf = (struct vc_sm_buffer *)new_dma_buf->priv;
-               /* Assign valid handle at this time.*/
-               *handle = new_dma_buf;
---- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h
-+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h
-@@ -21,6 +21,8 @@
- #include <linux/types.h>
- #include <linux/miscdevice.h>
-+#include "vc_sm_cma.h"
-+
- #define VC_SM_MAX_NAME_LEN 32
- enum vc_sm_vpu_mapping_state {
-@@ -29,31 +31,51 @@ enum vc_sm_vpu_mapping_state {
-       VPU_UNMAPPING
- };
-+struct vc_sm_imported {
-+      struct dma_buf *dma_buf;
-+      struct dma_buf_attachment *attach;
-+      struct sg_table *sgt;
-+};
-+
- struct vc_sm_buffer {
-       struct list_head global_buffer_list;    /* Global list of buffers. */
-+      /* Index in the kernel_id idr so that we can find the
-+       * mmal_msg_context again when servicing the VCHI reply.
-+       */
-+      int kernel_id;
-+
-       size_t size;
-       /* Lock over all the following state for this buffer */
-       struct mutex lock;
--      struct sg_table *sg_table;
-       struct list_head attachments;
-       char name[VC_SM_MAX_NAME_LEN];
-       int in_use:1;   /* Kernel is still using this resource */
-+      int imported:1; /* Imported dmabuf */
-+
-+      struct sg_table *sg_table;
-       enum vc_sm_vpu_mapping_state vpu_state;
-       u32 vc_handle;  /* VideoCore handle for this buffer */
-+      int vpu_allocated;      /*
-+                               * The VPU made this allocation. Release the
-+                               * local dma_buf when the VPU releases the
-+                               * resource.
-+                               */
-       /* DMABUF related fields */
--      struct dma_buf *import_dma_buf;
-       struct dma_buf *dma_buf;
--      struct dma_buf_attachment *attach;
--      struct sg_table *sgt;
-       dma_addr_t dma_addr;
-       struct vc_sm_privdata_t *private;
-+
-+      union {
-+              struct vc_sm_cma_alloc_data alloc;
-+              struct vc_sm_imported import;
-+      };
- };
- #endif
---- /dev/null
-+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.c
-@@ -0,0 +1,99 @@
-+// SPDX-License-Identifier: GPL-2.0
-+/*
-+ * VideoCore Shared Memory CMA allocator
-+ *
-+ * Copyright: 2018, Raspberry Pi (Trading) Ltd
-+ *
-+ * Based on the Android ION allocator
-+ * Copyright (C) Linaro 2012
-+ * Author: <benjamin.gaignard@linaro.org> for ST-Ericsson.
-+ *
-+ */
-+
-+#include <linux/slab.h>
-+#include <linux/errno.h>
-+#include <linux/err.h>
-+#include <linux/cma.h>
-+#include <linux/scatterlist.h>
-+
-+#include "vc_sm_cma.h"
-+
-+/* CMA heap operations functions */
-+int vc_sm_cma_buffer_allocate(struct cma *cma_heap,
-+                            struct vc_sm_cma_alloc_data *buffer,
-+                            unsigned long len)
-+{
-+      /* len should already be page aligned */
-+      unsigned long num_pages = len / PAGE_SIZE;
-+      struct sg_table *table;
-+      struct page *pages;
-+      int ret;
-+
-+      pages = cma_alloc(cma_heap, num_pages, 0, GFP_KERNEL);
-+      if (!pages)
-+              return -ENOMEM;
-+
-+      table = kmalloc(sizeof(*table), GFP_KERNEL);
-+      if (!table)
-+              goto err;
-+
-+      ret = sg_alloc_table(table, 1, GFP_KERNEL);
-+      if (ret)
-+              goto free_mem;
-+
-+      sg_set_page(table->sgl, pages, len, 0);
-+
-+      buffer->priv_virt = pages;
-+      buffer->sg_table = table;
-+      buffer->cma_heap = cma_heap;
-+      buffer->num_pages = num_pages;
-+      return 0;
-+
-+free_mem:
-+      kfree(table);
-+err:
-+      cma_release(cma_heap, pages, num_pages);
-+      return -ENOMEM;
-+}
-+
-+void vc_sm_cma_buffer_free(struct vc_sm_cma_alloc_data *buffer)
-+{
-+      struct cma *cma_heap = buffer->cma_heap;
-+      struct page *pages = buffer->priv_virt;
-+
-+      /* release memory */
-+      if (cma_heap)
-+              cma_release(cma_heap, pages, buffer->num_pages);
-+
-+      /* release sg table */
-+      if (buffer->sg_table) {
-+              sg_free_table(buffer->sg_table);
-+              kfree(buffer->sg_table);
-+              buffer->sg_table = NULL;
-+      }
-+}
-+
-+int __vc_sm_cma_add_heaps(struct cma *cma, void *priv)
-+{
-+      struct cma **heap = (struct cma **)priv;
-+      const char *name = cma_get_name(cma);
-+
-+      if (!(*heap)) {
-+              phys_addr_t phys_addr = cma_get_base(cma);
-+
-+              pr_debug("%s: Adding cma heap %s (start %pap, size %lu) for use by vcsm\n",
-+                       __func__, name, &phys_addr, cma_get_size(cma));
-+              *heap = cma;
-+      } else {
-+              pr_err("%s: Ignoring heap %s as already set\n",
-+                     __func__, name);
-+      }
-+
-+      return 0;
-+}
-+
-+int vc_sm_cma_add_heaps(struct cma **cma_heap)
-+{
-+      cma_for_each_area(__vc_sm_cma_add_heaps, cma_heap);
-+      return 0;
-+}
---- /dev/null
-+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.h
-@@ -0,0 +1,39 @@
-+/* SPDX-License-Identifier: GPL-2.0 */
-+
-+/*
-+ * VideoCore Shared Memory CMA allocator
-+ *
-+ * Copyright: 2018, Raspberry Pi (Trading) Ltd
-+ *
-+ * Based on the Android ION allocator
-+ * Copyright (C) Linaro 2012
-+ * Author: <benjamin.gaignard@linaro.org> for ST-Ericsson.
-+ *
-+ * This software is licensed under the terms of the GNU General Public
-+ * License version 2, as published by the Free Software Foundation, and
-+ * may be copied, distributed, and modified under those terms.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ */
-+#ifndef VC_SM_CMA_H
-+#define VC_SM_CMA_H
-+
-+struct vc_sm_cma_alloc_data {
-+      struct cma *cma_heap;
-+      unsigned long num_pages;
-+      void *priv_virt;
-+      struct sg_table *sg_table;
-+};
-+
-+int vc_sm_cma_buffer_allocate(struct cma *cma_heap,
-+                            struct vc_sm_cma_alloc_data *buffer,
-+                            unsigned long len);
-+void vc_sm_cma_buffer_free(struct vc_sm_cma_alloc_data *buffer);
-+
-+int vc_sm_cma_add_heaps(struct cma **cma_heap);
-+
-+#endif
---- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c
-+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c
-@@ -500,3 +500,13 @@ int vc_sm_cma_vchi_client_version(struct
-                                  msg, sizeof(*msg), NULL, 0,
-                                  cur_trans_id, 0);
- }
-+
-+int vc_sm_vchi_client_vc_mem_req_reply(struct sm_instance *handle,
-+                                     struct vc_sm_vc_mem_request_result *msg,
-+                                     uint32_t *cur_trans_id)
-+{
-+      return vc_sm_cma_vchi_send_msg(handle,
-+                                     VC_SM_MSG_TYPE_VC_MEM_REQUEST_REPLY,
-+                                     msg, sizeof(*msg), 0, 0, cur_trans_id,
-+                                     0);
-+}
---- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h
-+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h
-@@ -56,4 +56,8 @@ int vc_sm_cma_vchi_client_version(struct
-                                 struct vc_sm_result_t *result,
-                                 u32 *cur_trans_id);
-+int vc_sm_vchi_client_vc_mem_req_reply(struct sm_instance *handle,
-+                                     struct vc_sm_vc_mem_request_result *msg,
-+                                     uint32_t *cur_trans_id);
-+
- #endif /* __VC_SM_CMA_VCHI_H__INCLUDED__ */
---- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h
-+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h
-@@ -264,6 +264,8 @@ struct vc_sm_vc_mem_request {
-       u32 align;
-       /* resource name (for easier tracking) */
-       char     name[VC_SM_RESOURCE_NAME];
-+      /* VPU handle for the resource */
-+      u32 vc_handle;
- };
- /* Response from the kernel to provide the VPU with some memory */