X-Git-Url: http://git.openwrt.org/?a=blobdiff_plain;f=target%2Flinux%2Fbrcm2708%2Fpatches-4.19%2F950-0576-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch;fp=target%2Flinux%2Fbrcm2708%2Fpatches-4.19%2F950-0576-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch;h=105309b4394124a3a2896d8841b1bc589ba63ce5;hb=67dcc43f3a22dc3a7ac07a7065971b426feeb043;hp=0000000000000000000000000000000000000000;hpb=47a93a810f78adce5a130d287f82b28e9b54313c;p=openwrt%2Fstaging%2Fchunkeey.git diff --git a/target/linux/brcm2708/patches-4.19/950-0576-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch b/target/linux/brcm2708/patches-4.19/950-0576-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch new file mode 100644 index 0000000000..105309b439 --- /dev/null +++ b/target/linux/brcm2708/patches-4.19/950-0576-staging-vc-sm-cma-Add-in-allocation-for-VPU-requests.patch @@ -0,0 +1,1206 @@ +From 275f4673d8c0601e5dbb16e743187d264e7dbed6 Mon Sep 17 00:00:00 2001 +From: Dave Stevenson +Date: Fri, 21 Dec 2018 16:50:53 +0000 +Subject: [PATCH] staging: vc-sm-cma: Add in allocation for VPU + requests. + +Module has to change from tristate to bool as all CMA functions +are boolean. + +Signed-off-by: Dave Stevenson +--- + .../staging/vc04_services/vc-sm-cma/Kconfig | 4 +- + .../staging/vc04_services/vc-sm-cma/Makefile | 2 +- + .../staging/vc04_services/vc-sm-cma/vc_sm.c | 642 +++++++++++++++--- + .../staging/vc04_services/vc-sm-cma/vc_sm.h | 30 +- + .../vc04_services/vc-sm-cma/vc_sm_cma.c | 99 +++ + .../vc04_services/vc-sm-cma/vc_sm_cma.h | 39 ++ + .../vc04_services/vc-sm-cma/vc_sm_cma_vchi.c | 10 + + .../vc04_services/vc-sm-cma/vc_sm_cma_vchi.h | 4 + + .../vc04_services/vc-sm-cma/vc_sm_defs.h | 2 + + 9 files changed, 723 insertions(+), 109 deletions(-) + create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.c + create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.h + +--- a/drivers/staging/vc04_services/vc-sm-cma/Kconfig ++++ b/drivers/staging/vc04_services/vc-sm-cma/Kconfig +@@ -1,6 +1,6 @@ + config BCM_VC_SM_CMA +- tristate "VideoCore Shared Memory (CMA) driver" +- depends on BCM2835_VCHIQ ++ bool "VideoCore Shared Memory (CMA) driver" ++ depends on BCM2835_VCHIQ && DMA_CMA + select RBTREE + select DMA_SHARED_BUFFER + help +--- a/drivers/staging/vc04_services/vc-sm-cma/Makefile ++++ b/drivers/staging/vc04_services/vc-sm-cma/Makefile +@@ -3,6 +3,6 @@ ccflags-y += -Idrivers/staging/vc04_serv + ccflags-y += -D__VCCOREVER__=0 + + vc-sm-cma-$(CONFIG_BCM_VC_SM_CMA) := \ +- vc_sm.o vc_sm_cma_vchi.o ++ vc_sm.o vc_sm_cma_vchi.o vc_sm_cma.o + + obj-$(CONFIG_BCM_VC_SM_CMA) += vc-sm-cma.o +--- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c ++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c +@@ -9,10 +9,21 @@ + * and taking some code for CMA/dmabuf handling from the Android Ion + * driver (Google/Linaro). + * +- * This is cut down version to only support import of dma_bufs from +- * other kernel drivers. A more complete implementation of the old +- * vmcs_sm functionality can follow later. + * ++ * This driver has 3 main uses: ++ * 1) Allocating buffers for the kernel or userspace that can be shared with the ++ * VPU. ++ * 2) Importing dmabufs from elsewhere for sharing with the VPU. ++ * 3) Allocating buffers for use by the VPU. ++ * ++ * In the first and second cases the native handle is a dmabuf. Releasing the ++ * resource inherently comes from releasing the dmabuf, and this will trigger ++ * unmapping on the VPU. The underlying allocation and our buffer structure are ++ * retained until the VPU has confirmed that it has finished with it. ++ * ++ * For the VPU allocations the VPU is responsible for triggering the release, ++ * and therefore the released message decrements the dma_buf refcount (with the ++ * VPU mapping having already been marked as released). + */ + + /* ---- Include Files ----------------------------------------------------- */ +@@ -39,6 +50,7 @@ + #include "vc_sm_cma_vchi.h" + + #include "vc_sm.h" ++#include "vc_sm_cma.h" + #include "vc_sm_knl.h" + + /* ---- Private Constants and Types --------------------------------------- */ +@@ -72,6 +84,7 @@ struct sm_state_t { + struct platform_device *pdev; + + struct sm_instance *sm_handle; /* Handle for videocore service. */ ++ struct cma *cma_heap; + + spinlock_t kernelid_map_lock; /* Spinlock protecting kernelid_map */ + struct idr kernelid_map; +@@ -80,6 +93,7 @@ struct sm_state_t { + struct list_head buffer_list; /* List of buffer. */ + + struct vc_sm_privdata_t *data_knl; /* Kernel internal data tracking. */ ++ struct vc_sm_privdata_t *vpu_allocs; /* All allocations from the VPU */ + struct dentry *dir_root; /* Debug fs entries root. */ + struct sm_pde_t dir_state; /* Debug fs entries state sub-tree. */ + +@@ -89,6 +103,12 @@ struct sm_state_t { + u32 int_trans_id; /* Interrupted transaction. */ + }; + ++struct vc_sm_dma_buf_attachment { ++ struct device *dev; ++ struct sg_table *table; ++ struct list_head list; ++}; ++ + /* ---- Private Variables ----------------------------------------------- */ + + static struct sm_state_t *sm_state; +@@ -172,12 +192,14 @@ static int vc_sm_cma_global_state_show(s + resource->size); + seq_printf(s, " DMABUF %p\n", + resource->dma_buf); +- seq_printf(s, " ATTACH %p\n", +- resource->attach); ++ if (resource->imported) { ++ seq_printf(s, " ATTACH %p\n", ++ resource->import.attach); ++ seq_printf(s, " SGT %p\n", ++ resource->import.sgt); ++ } + seq_printf(s, " SG_TABLE %p\n", + resource->sg_table); +- seq_printf(s, " SGT %p\n", +- resource->sgt); + seq_printf(s, " DMA_ADDR %pad\n", + &resource->dma_addr); + seq_printf(s, " VC_HANDLE %08x\n", +@@ -209,17 +231,33 @@ static void vc_sm_add_resource(struct vc + } + + /* +- * Release an allocation. +- * All refcounting is done via the dma buf object. ++ * Cleans up imported dmabuf. + */ +-static void vc_sm_release_resource(struct vc_sm_buffer *buffer, int force) ++static void vc_sm_clean_up_dmabuf(struct vc_sm_buffer *buffer) + { +- mutex_lock(&sm_state->map_lock); +- mutex_lock(&buffer->lock); ++ if (!buffer->imported) ++ return; + +- pr_debug("[%s]: buffer %p (name %s, size %zu)\n", +- __func__, buffer, buffer->name, buffer->size); ++ /* Handle cleaning up imported dmabufs */ ++ mutex_lock(&buffer->lock); ++ if (buffer->import.sgt) { ++ dma_buf_unmap_attachment(buffer->import.attach, ++ buffer->import.sgt, ++ DMA_BIDIRECTIONAL); ++ buffer->import.sgt = NULL; ++ } ++ if (buffer->import.attach) { ++ dma_buf_detach(buffer->dma_buf, buffer->import.attach); ++ buffer->import.attach = NULL; ++ } ++ mutex_unlock(&buffer->lock); ++} + ++/* ++ * Instructs VPU to decrement the refcount on a buffer. ++ */ ++static void vc_sm_vpu_free(struct vc_sm_buffer *buffer) ++{ + if (buffer->vc_handle && buffer->vpu_state == VPU_MAPPED) { + struct vc_sm_free_t free = { buffer->vc_handle, 0 }; + int status = vc_sm_cma_vchi_free(sm_state->sm_handle, &free, +@@ -230,17 +268,32 @@ static void vc_sm_release_resource(struc + } + + if (sm_state->require_released_callback) { +- /* Need to wait for the VPU to confirm the free */ ++ /* Need to wait for the VPU to confirm the free. */ + + /* Retain a reference on this until the VPU has + * released it + */ + buffer->vpu_state = VPU_UNMAPPING; +- goto defer; ++ } else { ++ buffer->vpu_state = VPU_NOT_MAPPED; ++ buffer->vc_handle = 0; + } +- buffer->vpu_state = VPU_NOT_MAPPED; +- buffer->vc_handle = 0; + } ++} ++ ++/* ++ * Release an allocation. ++ * All refcounting is done via the dma buf object. ++ * ++ * Must be called with the mutex held. The function will either release the ++ * mutex (if defering the release) or destroy it. The caller must therefore not ++ * reuse the buffer on return. ++ */ ++static void vc_sm_release_resource(struct vc_sm_buffer *buffer) ++{ ++ pr_debug("[%s]: buffer %p (name %s, size %zu)\n", ++ __func__, buffer, buffer->name, buffer->size); ++ + if (buffer->vc_handle) { + /* We've sent the unmap request but not had the response. */ + pr_err("[%s]: Waiting for VPU unmap response on %p\n", +@@ -248,45 +301,43 @@ static void vc_sm_release_resource(struc + goto defer; + } + if (buffer->in_use) { +- /* Don't release dmabuf here - we await the release */ ++ /* dmabuf still in use - we await the release */ + pr_err("[%s]: buffer %p is still in use\n", + __func__, buffer); + goto defer; + } + +- /* Handle cleaning up imported dmabufs */ +- if (buffer->sgt) { +- dma_buf_unmap_attachment(buffer->attach, buffer->sgt, +- DMA_BIDIRECTIONAL); +- buffer->sgt = NULL; +- } +- if (buffer->attach) { +- dma_buf_detach(buffer->dma_buf, buffer->attach); +- buffer->attach = NULL; +- } +- +- /* Release the dma_buf (whether ours or imported) */ +- if (buffer->import_dma_buf) { +- dma_buf_put(buffer->import_dma_buf); +- buffer->import_dma_buf = NULL; +- buffer->dma_buf = NULL; +- } else if (buffer->dma_buf) { +- dma_buf_put(buffer->dma_buf); +- buffer->dma_buf = NULL; ++ /* Release the allocation (whether imported dmabuf or CMA allocation) */ ++ if (buffer->imported) { ++ pr_debug("%s: Release imported dmabuf %p\n", __func__, ++ buffer->import.dma_buf); ++ if (buffer->import.dma_buf) ++ dma_buf_put(buffer->import.dma_buf); ++ else ++ pr_err("%s: Imported dmabuf already been put for buf %p\n", ++ __func__, buffer); ++ buffer->import.dma_buf = NULL; ++ } else { ++ if (buffer->sg_table) { ++ /* Our own allocation that we need to dma_unmap_sg */ ++ dma_unmap_sg(&sm_state->pdev->dev, ++ buffer->sg_table->sgl, ++ buffer->sg_table->nents, ++ DMA_BIDIRECTIONAL); ++ } ++ pr_debug("%s: Release our allocation\n", __func__); ++ vc_sm_cma_buffer_free(&buffer->alloc); ++ pr_debug("%s: Release our allocation - done\n", __func__); + } + +- if (buffer->sg_table && !buffer->import_dma_buf) { +- /* Our own allocation that we need to dma_unmap_sg */ +- dma_unmap_sg(&sm_state->pdev->dev, buffer->sg_table->sgl, +- buffer->sg_table->nents, DMA_BIDIRECTIONAL); +- } + +- /* Free the local resource. Start by removing it from the list */ +- buffer->private = NULL; ++ /* Free our buffer. Start by removing it from the list */ ++ mutex_lock(&sm_state->map_lock); + list_del(&buffer->global_buffer_list); ++ mutex_unlock(&sm_state->map_lock); + ++ pr_debug("%s: Release our allocation - done\n", __func__); + mutex_unlock(&buffer->lock); +- mutex_unlock(&sm_state->map_lock); + + mutex_destroy(&buffer->lock); + +@@ -295,7 +346,7 @@ static void vc_sm_release_resource(struc + + defer: + mutex_unlock(&buffer->lock); +- mutex_unlock(&sm_state->map_lock); ++ return; + } + + /* Create support for private data tracking. */ +@@ -317,16 +368,267 @@ static struct vc_sm_privdata_t *vc_sm_cm + return file_data; + } + ++static struct sg_table *dup_sg_table(struct sg_table *table) ++{ ++ struct sg_table *new_table; ++ int ret, i; ++ struct scatterlist *sg, *new_sg; ++ ++ new_table = kzalloc(sizeof(*new_table), GFP_KERNEL); ++ if (!new_table) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = sg_alloc_table(new_table, table->nents, GFP_KERNEL); ++ if (ret) { ++ kfree(new_table); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ new_sg = new_table->sgl; ++ for_each_sg(table->sgl, sg, table->nents, i) { ++ memcpy(new_sg, sg, sizeof(*sg)); ++ sg->dma_address = 0; ++ new_sg = sg_next(new_sg); ++ } ++ ++ return new_table; ++} ++ ++static void free_duped_table(struct sg_table *table) ++{ ++ sg_free_table(table); ++ kfree(table); ++} ++ ++/* Dma buf operations for use with our own allocations */ ++ ++static int vc_sm_dma_buf_attach(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attachment) ++ ++{ ++ struct vc_sm_dma_buf_attachment *a; ++ struct sg_table *table; ++ struct vc_sm_buffer *buf = dmabuf->priv; ++ ++ a = kzalloc(sizeof(*a), GFP_KERNEL); ++ if (!a) ++ return -ENOMEM; ++ ++ table = dup_sg_table(buf->sg_table); ++ if (IS_ERR(table)) { ++ kfree(a); ++ return -ENOMEM; ++ } ++ ++ a->table = table; ++ INIT_LIST_HEAD(&a->list); ++ ++ attachment->priv = a; ++ ++ mutex_lock(&buf->lock); ++ list_add(&a->list, &buf->attachments); ++ mutex_unlock(&buf->lock); ++ pr_debug("%s dmabuf %p attachment %p\n", __func__, dmabuf, attachment); ++ ++ return 0; ++} ++ ++static void vc_sm_dma_buf_detatch(struct dma_buf *dmabuf, ++ struct dma_buf_attachment *attachment) ++{ ++ struct vc_sm_dma_buf_attachment *a = attachment->priv; ++ struct vc_sm_buffer *buf = dmabuf->priv; ++ ++ pr_debug("%s dmabuf %p attachment %p\n", __func__, dmabuf, attachment); ++ free_duped_table(a->table); ++ mutex_lock(&buf->lock); ++ list_del(&a->list); ++ mutex_unlock(&buf->lock); ++ ++ kfree(a); ++} ++ ++static struct sg_table *vc_sm_map_dma_buf(struct dma_buf_attachment *attachment, ++ enum dma_data_direction direction) ++{ ++ struct vc_sm_dma_buf_attachment *a = attachment->priv; ++ struct sg_table *table; ++ ++ table = a->table; ++ ++ if (!dma_map_sg(attachment->dev, table->sgl, table->nents, ++ direction)) ++ return ERR_PTR(-ENOMEM); ++ ++ pr_debug("%s attachment %p\n", __func__, attachment); ++ return table; ++} ++ ++static void vc_sm_unmap_dma_buf(struct dma_buf_attachment *attachment, ++ struct sg_table *table, ++ enum dma_data_direction direction) ++{ ++ pr_debug("%s attachment %p\n", __func__, attachment); ++ dma_unmap_sg(attachment->dev, table->sgl, table->nents, direction); ++} ++ ++static int vc_sm_dmabuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) ++{ ++ struct vc_sm_buffer *buf = dmabuf->priv; ++ struct sg_table *table = buf->sg_table; ++ unsigned long addr = vma->vm_start; ++ unsigned long offset = vma->vm_pgoff * PAGE_SIZE; ++ struct scatterlist *sg; ++ int i; ++ int ret = 0; ++ ++ pr_debug("%s dmabuf %p, buf %p, vm_start %08lX\n", __func__, dmabuf, ++ buf, addr); ++ ++ mutex_lock(&buf->lock); ++ ++ /* now map it to userspace */ ++ for_each_sg(table->sgl, sg, table->nents, i) { ++ struct page *page = sg_page(sg); ++ unsigned long remainder = vma->vm_end - addr; ++ unsigned long len = sg->length; ++ ++ if (offset >= sg->length) { ++ offset -= sg->length; ++ continue; ++ } else if (offset) { ++ page += offset / PAGE_SIZE; ++ len = sg->length - offset; ++ offset = 0; ++ } ++ len = min(len, remainder); ++ ret = remap_pfn_range(vma, addr, page_to_pfn(page), len, ++ vma->vm_page_prot); ++ if (ret) ++ break; ++ addr += len; ++ if (addr >= vma->vm_end) ++ break; ++ } ++ mutex_unlock(&buf->lock); ++ ++ if (ret) ++ pr_err("%s: failure mapping buffer to userspace\n", ++ __func__); ++ ++ return ret; ++} ++ ++static void vc_sm_dma_buf_release(struct dma_buf *dmabuf) ++{ ++ struct vc_sm_buffer *buffer; ++ ++ if (!dmabuf) ++ return; ++ ++ buffer = (struct vc_sm_buffer *)dmabuf->priv; ++ ++ mutex_lock(&buffer->lock); ++ ++ pr_debug("%s dmabuf %p, buffer %p\n", __func__, dmabuf, buffer); ++ ++ buffer->in_use = 0; ++ ++ /* Unmap on the VPU */ ++ vc_sm_vpu_free(buffer); ++ pr_debug("%s vpu_free done\n", __func__); ++ ++ /* Unmap our dma_buf object (the vc_sm_buffer remains until released ++ * on the VPU). ++ */ ++ vc_sm_clean_up_dmabuf(buffer); ++ pr_debug("%s clean_up dmabuf done\n", __func__); ++ ++ vc_sm_release_resource(buffer); ++ pr_debug("%s done\n", __func__); ++} ++ ++static int vc_sm_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction direction) ++{ ++ struct vc_sm_buffer *buf; ++ struct vc_sm_dma_buf_attachment *a; ++ ++ if (!dmabuf) ++ return -EFAULT; ++ ++ buf = dmabuf->priv; ++ if (!buf) ++ return -EFAULT; ++ ++ mutex_lock(&buf->lock); ++ ++ list_for_each_entry(a, &buf->attachments, list) { ++ dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents, ++ direction); ++ } ++ mutex_unlock(&buf->lock); ++ ++ return 0; ++} ++ ++static int vc_sm_dma_buf_end_cpu_access(struct dma_buf *dmabuf, ++ enum dma_data_direction direction) ++{ ++ struct vc_sm_buffer *buf; ++ struct vc_sm_dma_buf_attachment *a; ++ ++ if (!dmabuf) ++ return -EFAULT; ++ buf = dmabuf->priv; ++ if (!buf) ++ return -EFAULT; ++ ++ mutex_lock(&buf->lock); ++ ++ list_for_each_entry(a, &buf->attachments, list) { ++ dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents, ++ direction); ++ } ++ mutex_unlock(&buf->lock); ++ ++ return 0; ++} ++ ++static void *vc_sm_dma_buf_kmap(struct dma_buf *dmabuf, unsigned long offset) ++{ ++ /* FIXME */ ++ return NULL; ++} ++ ++static void vc_sm_dma_buf_kunmap(struct dma_buf *dmabuf, unsigned long offset, ++ void *ptr) ++{ ++ /* FIXME */ ++} ++ ++static const struct dma_buf_ops dma_buf_ops = { ++ .map_dma_buf = vc_sm_map_dma_buf, ++ .unmap_dma_buf = vc_sm_unmap_dma_buf, ++ .mmap = vc_sm_dmabuf_mmap, ++ .release = vc_sm_dma_buf_release, ++ .attach = vc_sm_dma_buf_attach, ++ .detach = vc_sm_dma_buf_detatch, ++ .begin_cpu_access = vc_sm_dma_buf_begin_cpu_access, ++ .end_cpu_access = vc_sm_dma_buf_end_cpu_access, ++ .map = vc_sm_dma_buf_kmap, ++ .unmap = vc_sm_dma_buf_kunmap, ++}; + /* Dma_buf operations for chaining through to an imported dma_buf */ + static + int vc_sm_import_dma_buf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attachment) + { +- struct vc_sm_buffer *res = dmabuf->priv; ++ struct vc_sm_buffer *buf = dmabuf->priv; + +- if (!res->import_dma_buf) ++ if (!buf->imported) + return -EINVAL; +- return res->import_dma_buf->ops->attach(res->import_dma_buf, ++ return buf->import.dma_buf->ops->attach(buf->import.dma_buf, + attachment); + } + +@@ -334,22 +636,23 @@ static + void vc_sm_import_dma_buf_detatch(struct dma_buf *dmabuf, + struct dma_buf_attachment *attachment) + { +- struct vc_sm_buffer *res = dmabuf->priv; ++ struct vc_sm_buffer *buf = dmabuf->priv; + +- if (!res->import_dma_buf) ++ if (!buf->imported) + return; +- res->import_dma_buf->ops->detach(res->import_dma_buf, attachment); ++ buf->import.dma_buf->ops->detach(buf->import.dma_buf, attachment); + } + + static + struct sg_table *vc_sm_import_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction direction) + { +- struct vc_sm_buffer *res = attachment->dmabuf->priv; ++ struct vc_sm_buffer *buf = attachment->dmabuf->priv; + +- if (!res->import_dma_buf) ++ if (!buf->imported) + return NULL; +- return res->import_dma_buf->ops->map_dma_buf(attachment, direction); ++ return buf->import.dma_buf->ops->map_dma_buf(attachment, ++ direction); + } + + static +@@ -357,87 +660,88 @@ void vc_sm_import_unmap_dma_buf(struct d + struct sg_table *table, + enum dma_data_direction direction) + { +- struct vc_sm_buffer *res = attachment->dmabuf->priv; ++ struct vc_sm_buffer *buf = attachment->dmabuf->priv; + +- if (!res->import_dma_buf) ++ if (!buf->imported) + return; +- res->import_dma_buf->ops->unmap_dma_buf(attachment, table, direction); ++ buf->import.dma_buf->ops->unmap_dma_buf(attachment, table, direction); + } + + static + int vc_sm_import_dmabuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) + { +- struct vc_sm_buffer *res = dmabuf->priv; ++ struct vc_sm_buffer *buf = dmabuf->priv; + +- pr_debug("%s: mmap dma_buf %p, res %p, imported db %p\n", __func__, +- dmabuf, res, res->import_dma_buf); +- if (!res->import_dma_buf) { ++ pr_debug("%s: mmap dma_buf %p, buf %p, imported db %p\n", __func__, ++ dmabuf, buf, buf->import.dma_buf); ++ if (!buf->imported) { + pr_err("%s: mmap dma_buf %p- not an imported buffer\n", + __func__, dmabuf); + return -EINVAL; + } +- return res->import_dma_buf->ops->mmap(res->import_dma_buf, vma); ++ return buf->import.dma_buf->ops->mmap(buf->import.dma_buf, vma); + } + + static + void vc_sm_import_dma_buf_release(struct dma_buf *dmabuf) + { +- struct vc_sm_buffer *res = dmabuf->priv; ++ struct vc_sm_buffer *buf = dmabuf->priv; + + pr_debug("%s: Relasing dma_buf %p\n", __func__, dmabuf); +- if (!res->import_dma_buf) ++ mutex_lock(&buf->lock); ++ if (!buf->imported) + return; + +- res->in_use = 0; ++ buf->in_use = 0; + +- vc_sm_release_resource(res, 0); ++ vc_sm_vpu_free(buf); ++ ++ vc_sm_release_resource(buf); + } + + static + void *vc_sm_import_dma_buf_kmap(struct dma_buf *dmabuf, + unsigned long offset) + { +- struct vc_sm_buffer *res = dmabuf->priv; ++ struct vc_sm_buffer *buf = dmabuf->priv; + +- if (!res->import_dma_buf) ++ if (!buf->imported) + return NULL; +- return res->import_dma_buf->ops->map(res->import_dma_buf, +- offset); ++ return buf->import.dma_buf->ops->map(buf->import.dma_buf, offset); + } + + static + void vc_sm_import_dma_buf_kunmap(struct dma_buf *dmabuf, + unsigned long offset, void *ptr) + { +- struct vc_sm_buffer *res = dmabuf->priv; ++ struct vc_sm_buffer *buf = dmabuf->priv; + +- if (!res->import_dma_buf) ++ if (!buf->imported) + return; +- res->import_dma_buf->ops->unmap(res->import_dma_buf, +- offset, ptr); ++ buf->import.dma_buf->ops->unmap(buf->import.dma_buf, offset, ptr); + } + + static + int vc_sm_import_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, + enum dma_data_direction direction) + { +- struct vc_sm_buffer *res = dmabuf->priv; ++ struct vc_sm_buffer *buf = dmabuf->priv; + +- if (!res->import_dma_buf) ++ if (!buf->imported) + return -EINVAL; +- return res->import_dma_buf->ops->begin_cpu_access(res->import_dma_buf, +- direction); ++ return buf->import.dma_buf->ops->begin_cpu_access(buf->import.dma_buf, ++ direction); + } + + static + int vc_sm_import_dma_buf_end_cpu_access(struct dma_buf *dmabuf, + enum dma_data_direction direction) + { +- struct vc_sm_buffer *res = dmabuf->priv; ++ struct vc_sm_buffer *buf = dmabuf->priv; + +- if (!res->import_dma_buf) ++ if (!buf->imported) + return -EINVAL; +- return res->import_dma_buf->ops->end_cpu_access(res->import_dma_buf, ++ return buf->import.dma_buf->ops->end_cpu_access(buf->import.dma_buf, + direction); + } + +@@ -516,9 +820,8 @@ vc_sm_cma_import_dmabuf_internal(struct + memcpy(import.name, VC_SM_RESOURCE_NAME_DEFAULT, + sizeof(VC_SM_RESOURCE_NAME_DEFAULT)); + +- pr_debug("[%s]: attempt to import \"%s\" data - type %u, addr %pad, size %u\n", +- __func__, import.name, import.type, &dma_addr, +- import.size); ++ pr_debug("[%s]: attempt to import \"%s\" data - type %u, addr %pad, size %u.\n", ++ __func__, import.name, import.type, &dma_addr, import.size); + + /* Allocate the videocore buffer. */ + status = vc_sm_cma_vchi_import(sm_state->sm_handle, &import, &result, +@@ -548,12 +851,14 @@ vc_sm_cma_import_dmabuf_internal(struct + buffer->size = import.size; + buffer->vpu_state = VPU_MAPPED; + +- buffer->import_dma_buf = dma_buf; ++ buffer->imported = 1; ++ buffer->import.dma_buf = dma_buf; + +- buffer->attach = attach; +- buffer->sgt = sgt; ++ buffer->import.attach = attach; ++ buffer->import.sgt = sgt; + buffer->dma_addr = dma_addr; + buffer->in_use = 1; ++ buffer->kernel_id = import.kernel_id; + + /* + * We're done - we need to export a new dmabuf chaining through most +@@ -594,6 +899,91 @@ error: + return ret; + } + ++static int vc_sm_cma_vpu_alloc(u32 size, uint32_t align, const char *name, ++ u32 mem_handle, struct vc_sm_buffer **ret_buffer) ++{ ++ DEFINE_DMA_BUF_EXPORT_INFO(exp_info); ++ struct vc_sm_buffer *buffer = NULL; ++ int aligned_size; ++ int ret = 0; ++ ++ /* Align to the user requested align */ ++ aligned_size = ALIGN(size, align); ++ /* and then to a page boundary */ ++ aligned_size = PAGE_ALIGN(aligned_size); ++ ++ if (!aligned_size) ++ return -EINVAL; ++ ++ /* Allocate local buffer to track this allocation. */ ++ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); ++ if (!buffer) ++ return -ENOMEM; ++ ++ mutex_init(&buffer->lock); ++ ++ if (vc_sm_cma_buffer_allocate(sm_state->cma_heap, &buffer->alloc, ++ aligned_size)) { ++ pr_err("[%s]: cma alloc of %d bytes failed\n", ++ __func__, aligned_size); ++ ret = -ENOMEM; ++ goto error; ++ } ++ buffer->sg_table = buffer->alloc.sg_table; ++ ++ pr_debug("[%s]: cma alloc of %d bytes success\n", ++ __func__, aligned_size); ++ ++ if (dma_map_sg(&sm_state->pdev->dev, buffer->sg_table->sgl, ++ buffer->sg_table->nents, DMA_BIDIRECTIONAL) <= 0) { ++ pr_err("[%s]: dma_map_sg failed\n", __func__); ++ goto error; ++ } ++ ++ INIT_LIST_HEAD(&buffer->attachments); ++ ++ memcpy(buffer->name, name, ++ min(sizeof(buffer->name), strlen(name))); ++ ++ exp_info.ops = &dma_buf_ops; ++ exp_info.size = aligned_size; ++ exp_info.flags = O_RDWR; ++ exp_info.priv = buffer; ++ ++ buffer->dma_buf = dma_buf_export(&exp_info); ++ if (IS_ERR(buffer->dma_buf)) { ++ ret = PTR_ERR(buffer->dma_buf); ++ goto error; ++ } ++ buffer->dma_addr = (uint32_t)sg_dma_address(buffer->sg_table->sgl); ++ if ((buffer->dma_addr & 0xC0000000) != 0xC0000000) { ++ pr_err("%s: Expecting an uncached alias for dma_addr %pad\n", ++ __func__, &buffer->dma_addr); ++ buffer->dma_addr |= 0xC0000000; ++ } ++ buffer->private = sm_state->vpu_allocs; ++ ++ buffer->vc_handle = mem_handle; ++ buffer->vpu_state = VPU_MAPPED; ++ buffer->vpu_allocated = 1; ++ buffer->size = size; ++ /* ++ * Create an ID that will be passed along with our message so ++ * that when we service the release reply, we can look up which ++ * resource is being released. ++ */ ++ buffer->kernel_id = get_kernel_id(buffer); ++ ++ vc_sm_add_resource(sm_state->vpu_allocs, buffer); ++ ++ *ret_buffer = buffer; ++ return 0; ++error: ++ if (buffer) ++ vc_sm_release_resource(buffer); ++ return ret; ++} ++ + static void + vc_sm_vpu_event(struct sm_instance *instance, struct vc_sm_result_t *reply, + int reply_len) +@@ -612,21 +1002,61 @@ vc_sm_vpu_event(struct sm_instance *inst + struct vc_sm_released *release = (struct vc_sm_released *)reply; + struct vc_sm_buffer *buffer = + lookup_kernel_id(release->kernel_id); ++ if (!buffer) { ++ pr_err("%s: VC released a buffer that is already released, kernel_id %d\n", ++ __func__, release->kernel_id); ++ break; ++ } ++ mutex_lock(&buffer->lock); + +- /* +- * FIXME: Need to check buffer is still valid and allocated +- * before continuing +- */ + pr_debug("%s: Released addr %08x, size %u, id %08x, mem_handle %08x\n", + __func__, release->addr, release->size, + release->kernel_id, release->vc_handle); +- mutex_lock(&buffer->lock); ++ + buffer->vc_handle = 0; + buffer->vpu_state = VPU_NOT_MAPPED; +- mutex_unlock(&buffer->lock); + free_kernel_id(release->kernel_id); + +- vc_sm_release_resource(buffer, 0); ++ if (buffer->vpu_allocated) { ++ /* VPU allocation, so release the dmabuf which will ++ * trigger the clean up. ++ */ ++ mutex_unlock(&buffer->lock); ++ dma_buf_put(buffer->dma_buf); ++ } else { ++ vc_sm_release_resource(buffer); ++ } ++ } ++ break; ++ case VC_SM_MSG_TYPE_VC_MEM_REQUEST: ++ { ++ struct vc_sm_buffer *buffer = NULL; ++ struct vc_sm_vc_mem_request *req = ++ (struct vc_sm_vc_mem_request *)reply; ++ struct vc_sm_vc_mem_request_result reply; ++ int ret; ++ ++ pr_debug("%s: Request %u bytes of memory, align %d name %s, trans_id %08x\n", ++ __func__, req->size, req->align, req->name, ++ req->trans_id); ++ ret = vc_sm_cma_vpu_alloc(req->size, req->align, req->name, ++ req->vc_handle, &buffer); ++ ++ reply.trans_id = req->trans_id; ++ if (!ret) { ++ reply.addr = buffer->dma_addr; ++ reply.kernel_id = buffer->kernel_id; ++ pr_debug("%s: Allocated resource buffer %p, addr %pad\n", ++ __func__, buffer, &buffer->dma_addr); ++ } else { ++ pr_err("%s: Allocation failed size %u, name %s, vc_handle %u\n", ++ __func__, req->size, req->name, req->vc_handle); ++ reply.addr = 0; ++ reply.kernel_id = 0; ++ } ++ vc_sm_vchi_client_vc_mem_req_reply(sm_state->sm_handle, &reply, ++ &sm_state->int_trans_id); ++ break; + } + break; + default: +@@ -645,6 +1075,14 @@ static void vc_sm_connected_init(void) + + pr_info("[%s]: start\n", __func__); + ++ if (vc_sm_cma_add_heaps(&sm_state->cma_heap) || ++ !sm_state->cma_heap) { ++ pr_err("[%s]: failed to initialise CMA heaps\n", ++ __func__); ++ ret = -EIO; ++ goto err_free_mem; ++ } ++ + /* + * Initialize and create a VCHI connection for the shared memory service + * running on videocore. +@@ -696,7 +1134,7 @@ static void vc_sm_connected_init(void) + goto err_remove_shared_memory; + } + +- version.version = 1; ++ version.version = 2; + ret = vc_sm_cma_vchi_client_version(sm_state->sm_handle, &version, + &version_result, + &sm_state->int_trans_id); +@@ -768,7 +1206,7 @@ static int bcm2835_vc_sm_cma_remove(stru + int vc_sm_cma_int_handle(void *handle) + { + struct dma_buf *dma_buf = (struct dma_buf *)handle; +- struct vc_sm_buffer *res; ++ struct vc_sm_buffer *buf; + + /* Validate we can work with this device. */ + if (!sm_state || !handle) { +@@ -776,8 +1214,8 @@ int vc_sm_cma_int_handle(void *handle) + return 0; + } + +- res = (struct vc_sm_buffer *)dma_buf->priv; +- return res->vc_handle; ++ buf = (struct vc_sm_buffer *)dma_buf->priv; ++ return buf->vc_handle; + } + EXPORT_SYMBOL_GPL(vc_sm_cma_int_handle); + +@@ -804,7 +1242,7 @@ EXPORT_SYMBOL_GPL(vc_sm_cma_free); + int vc_sm_cma_import_dmabuf(struct dma_buf *src_dmabuf, void **handle) + { + struct dma_buf *new_dma_buf; +- struct vc_sm_buffer *res; ++ struct vc_sm_buffer *buf; + int ret; + + /* Validate we can work with this device. */ +@@ -818,7 +1256,7 @@ int vc_sm_cma_import_dmabuf(struct dma_b + + if (!ret) { + pr_debug("%s: imported to ptr %p\n", __func__, new_dma_buf); +- res = (struct vc_sm_buffer *)new_dma_buf->priv; ++ buf = (struct vc_sm_buffer *)new_dma_buf->priv; + + /* Assign valid handle at this time.*/ + *handle = new_dma_buf; +--- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h ++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h +@@ -21,6 +21,8 @@ + #include + #include + ++#include "vc_sm_cma.h" ++ + #define VC_SM_MAX_NAME_LEN 32 + + enum vc_sm_vpu_mapping_state { +@@ -29,31 +31,51 @@ enum vc_sm_vpu_mapping_state { + VPU_UNMAPPING + }; + ++struct vc_sm_imported { ++ struct dma_buf *dma_buf; ++ struct dma_buf_attachment *attach; ++ struct sg_table *sgt; ++}; ++ + struct vc_sm_buffer { + struct list_head global_buffer_list; /* Global list of buffers. */ + ++ /* Index in the kernel_id idr so that we can find the ++ * mmal_msg_context again when servicing the VCHI reply. ++ */ ++ int kernel_id; ++ + size_t size; + + /* Lock over all the following state for this buffer */ + struct mutex lock; +- struct sg_table *sg_table; + struct list_head attachments; + + char name[VC_SM_MAX_NAME_LEN]; + + int in_use:1; /* Kernel is still using this resource */ ++ int imported:1; /* Imported dmabuf */ ++ ++ struct sg_table *sg_table; + + enum vc_sm_vpu_mapping_state vpu_state; + u32 vc_handle; /* VideoCore handle for this buffer */ ++ int vpu_allocated; /* ++ * The VPU made this allocation. Release the ++ * local dma_buf when the VPU releases the ++ * resource. ++ */ + + /* DMABUF related fields */ +- struct dma_buf *import_dma_buf; + struct dma_buf *dma_buf; +- struct dma_buf_attachment *attach; +- struct sg_table *sgt; + dma_addr_t dma_addr; + + struct vc_sm_privdata_t *private; ++ ++ union { ++ struct vc_sm_cma_alloc_data alloc; ++ struct vc_sm_imported import; ++ }; + }; + + #endif +--- /dev/null ++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.c +@@ -0,0 +1,99 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * VideoCore Shared Memory CMA allocator ++ * ++ * Copyright: 2018, Raspberry Pi (Trading) Ltd ++ * ++ * Based on the Android ION allocator ++ * Copyright (C) Linaro 2012 ++ * Author: for ST-Ericsson. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "vc_sm_cma.h" ++ ++/* CMA heap operations functions */ ++int vc_sm_cma_buffer_allocate(struct cma *cma_heap, ++ struct vc_sm_cma_alloc_data *buffer, ++ unsigned long len) ++{ ++ /* len should already be page aligned */ ++ unsigned long num_pages = len / PAGE_SIZE; ++ struct sg_table *table; ++ struct page *pages; ++ int ret; ++ ++ pages = cma_alloc(cma_heap, num_pages, 0, GFP_KERNEL); ++ if (!pages) ++ return -ENOMEM; ++ ++ table = kmalloc(sizeof(*table), GFP_KERNEL); ++ if (!table) ++ goto err; ++ ++ ret = sg_alloc_table(table, 1, GFP_KERNEL); ++ if (ret) ++ goto free_mem; ++ ++ sg_set_page(table->sgl, pages, len, 0); ++ ++ buffer->priv_virt = pages; ++ buffer->sg_table = table; ++ buffer->cma_heap = cma_heap; ++ buffer->num_pages = num_pages; ++ return 0; ++ ++free_mem: ++ kfree(table); ++err: ++ cma_release(cma_heap, pages, num_pages); ++ return -ENOMEM; ++} ++ ++void vc_sm_cma_buffer_free(struct vc_sm_cma_alloc_data *buffer) ++{ ++ struct cma *cma_heap = buffer->cma_heap; ++ struct page *pages = buffer->priv_virt; ++ ++ /* release memory */ ++ if (cma_heap) ++ cma_release(cma_heap, pages, buffer->num_pages); ++ ++ /* release sg table */ ++ if (buffer->sg_table) { ++ sg_free_table(buffer->sg_table); ++ kfree(buffer->sg_table); ++ buffer->sg_table = NULL; ++ } ++} ++ ++int __vc_sm_cma_add_heaps(struct cma *cma, void *priv) ++{ ++ struct cma **heap = (struct cma **)priv; ++ const char *name = cma_get_name(cma); ++ ++ if (!(*heap)) { ++ phys_addr_t phys_addr = cma_get_base(cma); ++ ++ pr_debug("%s: Adding cma heap %s (start %pap, size %lu) for use by vcsm\n", ++ __func__, name, &phys_addr, cma_get_size(cma)); ++ *heap = cma; ++ } else { ++ pr_err("%s: Ignoring heap %s as already set\n", ++ __func__, name); ++ } ++ ++ return 0; ++} ++ ++int vc_sm_cma_add_heaps(struct cma **cma_heap) ++{ ++ cma_for_each_area(__vc_sm_cma_add_heaps, cma_heap); ++ return 0; ++} +--- /dev/null ++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma.h +@@ -0,0 +1,39 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++ ++/* ++ * VideoCore Shared Memory CMA allocator ++ * ++ * Copyright: 2018, Raspberry Pi (Trading) Ltd ++ * ++ * Based on the Android ION allocator ++ * Copyright (C) Linaro 2012 ++ * Author: for ST-Ericsson. ++ * ++ * This software is licensed under the terms of the GNU General Public ++ * License version 2, as published by the Free Software Foundation, and ++ * may be copied, distributed, and modified under those terms. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ */ ++#ifndef VC_SM_CMA_H ++#define VC_SM_CMA_H ++ ++struct vc_sm_cma_alloc_data { ++ struct cma *cma_heap; ++ unsigned long num_pages; ++ void *priv_virt; ++ struct sg_table *sg_table; ++}; ++ ++int vc_sm_cma_buffer_allocate(struct cma *cma_heap, ++ struct vc_sm_cma_alloc_data *buffer, ++ unsigned long len); ++void vc_sm_cma_buffer_free(struct vc_sm_cma_alloc_data *buffer); ++ ++int vc_sm_cma_add_heaps(struct cma **cma_heap); ++ ++#endif +--- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c ++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c +@@ -500,3 +500,13 @@ int vc_sm_cma_vchi_client_version(struct + msg, sizeof(*msg), NULL, 0, + cur_trans_id, 0); + } ++ ++int vc_sm_vchi_client_vc_mem_req_reply(struct sm_instance *handle, ++ struct vc_sm_vc_mem_request_result *msg, ++ uint32_t *cur_trans_id) ++{ ++ return vc_sm_cma_vchi_send_msg(handle, ++ VC_SM_MSG_TYPE_VC_MEM_REQUEST_REPLY, ++ msg, sizeof(*msg), 0, 0, cur_trans_id, ++ 0); ++} +--- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h ++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h +@@ -56,4 +56,8 @@ int vc_sm_cma_vchi_client_version(struct + struct vc_sm_result_t *result, + u32 *cur_trans_id); + ++int vc_sm_vchi_client_vc_mem_req_reply(struct sm_instance *handle, ++ struct vc_sm_vc_mem_request_result *msg, ++ uint32_t *cur_trans_id); ++ + #endif /* __VC_SM_CMA_VCHI_H__INCLUDED__ */ +--- a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h ++++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h +@@ -264,6 +264,8 @@ struct vc_sm_vc_mem_request { + u32 align; + /* resource name (for easier tracking) */ + char name[VC_SM_RESOURCE_NAME]; ++ /* VPU handle for the resource */ ++ u32 vc_handle; + }; + + /* Response from the kernel to provide the VPU with some memory */