brcm2708: update to latest version
[openwrt/openwrt.git] / target / linux / brcm2708 / patches-4.4 / 0113-drm-vc4-Add-an-interface-for-capturing-the-GPU-state.patch
diff --git a/target/linux/brcm2708/patches-4.4/0113-drm-vc4-Add-an-interface-for-capturing-the-GPU-state.patch b/target/linux/brcm2708/patches-4.4/0113-drm-vc4-Add-an-interface-for-capturing-the-GPU-state.patch
new file mode 100644 (file)
index 0000000..e71b82b
--- /dev/null
@@ -0,0 +1,333 @@
+From b9e5697fbec13e6203b63649ee5d7c6819a5fb6b Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Fri, 30 Oct 2015 10:09:02 -0700
+Subject: [PATCH 113/232] drm/vc4: Add an interface for capturing the GPU state
+ after a hang.
+
+This can be parsed with vc4-gpu-tools tools for trying to figure out
+what was going on.
+
+Signed-off-by: Eric Anholt <eric@anholt.net>
+---
+ drivers/gpu/drm/vc4/vc4_bo.c  |   4 +-
+ drivers/gpu/drm/vc4/vc4_drv.c |   1 +
+ drivers/gpu/drm/vc4/vc4_drv.h |   4 +
+ drivers/gpu/drm/vc4/vc4_gem.c | 185 ++++++++++++++++++++++++++++++++++++++++++
+ include/uapi/drm/vc4_drm.h    |  45 ++++++++++
+ 5 files changed, 237 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/vc4/vc4_bo.c
++++ b/drivers/gpu/drm/vc4/vc4_bo.c
+@@ -415,8 +415,8 @@ int vc4_mmap(struct file *filp, struct v
+       gem_obj = vma->vm_private_data;
+       bo = to_vc4_bo(gem_obj);
+-      if (bo->validated_shader) {
+-              DRM_ERROR("mmaping of shader BOs not allowed.\n");
++      if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
++              DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
+               return -EINVAL;
+       }
+--- a/drivers/gpu/drm/vc4/vc4_drv.c
++++ b/drivers/gpu/drm/vc4/vc4_drv.c
+@@ -81,6 +81,7 @@ static const struct drm_ioctl_desc vc4_d
+       DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
+       DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
+       DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
++      DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY),
+ };
+ static struct drm_driver vc4_drm_driver = {
+--- a/drivers/gpu/drm/vc4/vc4_drv.h
++++ b/drivers/gpu/drm/vc4/vc4_drv.h
+@@ -20,6 +20,8 @@ struct vc4_dev {
+       struct drm_fbdev_cma *fbdev;
+       struct rpi_firmware *firmware;
++      struct vc4_hang_state *hang_state;
++
+       /* The kernel-space BO cache.  Tracks buffers that have been
+        * unreferenced by all other users (refcounts of 0!) but not
+        * yet freed, so we can do cheap allocations.
+@@ -366,6 +368,8 @@ int vc4_create_shader_bo_ioctl(struct dr
+                              struct drm_file *file_priv);
+ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
+                     struct drm_file *file_priv);
++int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
++                           struct drm_file *file_priv);
+ int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
+ int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+ void *vc4_prime_vmap(struct drm_gem_object *obj);
+--- a/drivers/gpu/drm/vc4/vc4_gem.c
++++ b/drivers/gpu/drm/vc4/vc4_gem.c
+@@ -40,6 +40,186 @@ vc4_queue_hangcheck(struct drm_device *d
+                 round_jiffies_up(jiffies + msecs_to_jiffies(100)));
+ }
++struct vc4_hang_state {
++      struct drm_vc4_get_hang_state user_state;
++
++      u32 bo_count;
++      struct drm_gem_object **bo;
++};
++
++static void
++vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
++{
++      unsigned int i;
++
++      mutex_lock(&dev->struct_mutex);
++      for (i = 0; i < state->user_state.bo_count; i++) {
++              drm_gem_object_unreference(state->bo[i]);
++      }
++      mutex_unlock(&dev->struct_mutex);
++
++      kfree(state);
++}
++
++int
++vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
++                       struct drm_file *file_priv)
++{
++      struct drm_vc4_get_hang_state *get_state = data;
++      struct drm_vc4_get_hang_state_bo *bo_state;
++      struct vc4_hang_state *kernel_state;
++      struct drm_vc4_get_hang_state *state;
++      struct vc4_dev *vc4 = to_vc4_dev(dev);
++      unsigned long irqflags;
++      u32 i;
++      int ret;
++
++      spin_lock_irqsave(&vc4->job_lock, irqflags);
++      kernel_state = vc4->hang_state;
++      if (!kernel_state) {
++              spin_unlock_irqrestore(&vc4->job_lock, irqflags);
++              return -ENOENT;
++      }
++      state = &kernel_state->user_state;
++
++      /* If the user's array isn't big enough, just return the
++       * required array size.
++       */
++      if (get_state->bo_count < state->bo_count) {
++              get_state->bo_count = state->bo_count;
++              spin_unlock_irqrestore(&vc4->job_lock, irqflags);
++              return 0;
++      }
++
++      vc4->hang_state = NULL;
++      spin_unlock_irqrestore(&vc4->job_lock, irqflags);
++
++      /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
++      state->bo = get_state->bo;
++      memcpy(get_state, state, sizeof(*state));
++
++      bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
++      if (!bo_state) {
++              ret = -ENOMEM;
++              goto err_free;
++      }
++
++      for (i = 0; i < state->bo_count; i++) {
++              struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
++              u32 handle;
++              ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
++                                          &handle);
++
++              if (ret) {
++                      state->bo_count = i - 1;
++                      goto err;
++              }
++              bo_state[i].handle = handle;
++              bo_state[i].paddr = vc4_bo->base.paddr;
++              bo_state[i].size = vc4_bo->base.base.size;
++      }
++
++      ret = copy_to_user((void __user *)(uintptr_t)get_state->bo,
++                         bo_state,
++                         state->bo_count * sizeof(*bo_state));
++      kfree(bo_state);
++
++ err_free:
++
++      vc4_free_hang_state(dev, kernel_state);
++
++err:
++      return ret;
++}
++
++static void
++vc4_save_hang_state(struct drm_device *dev)
++{
++      struct vc4_dev *vc4 = to_vc4_dev(dev);
++      struct drm_vc4_get_hang_state *state;
++      struct vc4_hang_state *kernel_state;
++      struct vc4_exec_info *exec;
++      struct vc4_bo *bo;
++      unsigned long irqflags;
++      unsigned int i, unref_list_count;
++
++      kernel_state = kcalloc(1, sizeof(*state), GFP_KERNEL);
++      if (!kernel_state)
++              return;
++
++      state = &kernel_state->user_state;
++
++      spin_lock_irqsave(&vc4->job_lock, irqflags);
++      exec = vc4_first_job(vc4);
++      if (!exec) {
++              spin_unlock_irqrestore(&vc4->job_lock, irqflags);
++              return;
++      }
++
++      unref_list_count = 0;
++      list_for_each_entry(bo, &exec->unref_list, unref_head)
++              unref_list_count++;
++
++      state->bo_count = exec->bo_count + unref_list_count;
++      kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
++                                 GFP_ATOMIC);
++      if (!kernel_state->bo) {
++              spin_unlock_irqrestore(&vc4->job_lock, irqflags);
++              return;
++      }
++
++      for (i = 0; i < exec->bo_count; i++) {
++              drm_gem_object_reference(&exec->bo[i].bo->base);
++              kernel_state->bo[i] = &exec->bo[i].bo->base;
++      }
++
++      list_for_each_entry(bo, &exec->unref_list, unref_head) {
++              drm_gem_object_reference(&bo->base.base);
++              kernel_state->bo[i] = &bo->base.base;
++              i++;
++      }
++
++      state->start_bin = exec->ct0ca;
++      state->start_render = exec->ct1ca;
++
++      spin_unlock_irqrestore(&vc4->job_lock, irqflags);
++
++      state->ct0ca = V3D_READ(V3D_CTNCA(0));
++      state->ct0ea = V3D_READ(V3D_CTNEA(0));
++
++      state->ct1ca = V3D_READ(V3D_CTNCA(1));
++      state->ct1ea = V3D_READ(V3D_CTNEA(1));
++
++      state->ct0cs = V3D_READ(V3D_CTNCS(0));
++      state->ct1cs = V3D_READ(V3D_CTNCS(1));
++
++      state->ct0ra0 = V3D_READ(V3D_CT00RA0);
++      state->ct1ra0 = V3D_READ(V3D_CT01RA0);
++
++      state->bpca = V3D_READ(V3D_BPCA);
++      state->bpcs = V3D_READ(V3D_BPCS);
++      state->bpoa = V3D_READ(V3D_BPOA);
++      state->bpos = V3D_READ(V3D_BPOS);
++
++      state->vpmbase = V3D_READ(V3D_VPMBASE);
++
++      state->dbge = V3D_READ(V3D_DBGE);
++      state->fdbgo = V3D_READ(V3D_FDBGO);
++      state->fdbgb = V3D_READ(V3D_FDBGB);
++      state->fdbgr = V3D_READ(V3D_FDBGR);
++      state->fdbgs = V3D_READ(V3D_FDBGS);
++      state->errstat = V3D_READ(V3D_ERRSTAT);
++
++      spin_lock_irqsave(&vc4->job_lock, irqflags);
++      if (vc4->hang_state) {
++              spin_unlock_irqrestore(&vc4->job_lock, irqflags);
++              vc4_free_hang_state(dev, kernel_state);
++      } else {
++              vc4->hang_state = kernel_state;
++              spin_unlock_irqrestore(&vc4->job_lock, irqflags);
++      }
++}
++
+ static void
+ vc4_reset(struct drm_device *dev)
+ {
+@@ -64,6 +244,8 @@ vc4_reset_work(struct work_struct *work)
+       struct vc4_dev *vc4 =
+               container_of(work, struct vc4_dev, hangcheck.reset_work);
++      vc4_save_hang_state(vc4->dev);
++
+       vc4_reset(vc4->dev);
+ }
+@@ -673,4 +855,7 @@ vc4_gem_destroy(struct drm_device *dev)
+       }
+       vc4_bo_cache_destroy(dev);
++
++      if (vc4->hang_state)
++              vc4_free_hang_state(dev, vc4->hang_state);
+ }
+--- a/include/uapi/drm/vc4_drm.h
++++ b/include/uapi/drm/vc4_drm.h
+@@ -32,6 +32,7 @@
+ #define DRM_VC4_CREATE_BO                         0x03
+ #define DRM_VC4_MMAP_BO                           0x04
+ #define DRM_VC4_CREATE_SHADER_BO                  0x05
++#define DRM_VC4_GET_HANG_STATE                    0x06
+ #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
+ #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
+@@ -39,6 +40,7 @@
+ #define DRM_IOCTL_VC4_CREATE_BO           DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
+ #define DRM_IOCTL_VC4_MMAP_BO             DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
+ #define DRM_IOCTL_VC4_CREATE_SHADER_BO    DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
++#define DRM_IOCTL_VC4_GET_HANG_STATE      DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
+ struct drm_vc4_submit_rcl_surface {
+       uint32_t hindex; /* Handle index, or ~0 if not present. */
+@@ -226,4 +228,47 @@ struct drm_vc4_mmap_bo {
+       uint64_t offset;
+ };
++struct drm_vc4_get_hang_state_bo {
++      uint32_t handle;
++      uint32_t paddr;
++      uint32_t size;
++      uint32_t pad;
++};
++
++/**
++ * struct drm_vc4_hang_state - ioctl argument for collecting state
++ * from a GPU hang for analysis.
++*/
++struct drm_vc4_get_hang_state {
++      /** Pointer to array of struct drm_vc4_get_hang_state_bo. */
++      uint64_t bo;
++      /**
++       * On input, the size of the bo array.  Output is the number
++       * of bos to be returned.
++       */
++      uint32_t bo_count;
++
++      uint32_t start_bin, start_render;
++
++      uint32_t ct0ca, ct0ea;
++      uint32_t ct1ca, ct1ea;
++      uint32_t ct0cs, ct1cs;
++      uint32_t ct0ra0, ct1ra0;
++
++      uint32_t bpca, bpcs;
++      uint32_t bpoa, bpos;
++
++      uint32_t vpmbase;
++
++      uint32_t dbge;
++      uint32_t fdbgo;
++      uint32_t fdbgb;
++      uint32_t fdbgr;
++      uint32_t fdbgs;
++      uint32_t errstat;
++
++      /* Pad that we may save more registers into in the future. */
++      uint32_t pad[16];
++};
++
+ #endif /* _UAPI_VC4_DRM_H_ */