1 From eb445fa566bd604dd3c0cd5e08e43735ccc149f2 Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Fri, 30 Oct 2015 10:09:02 -0700
4 Subject: [PATCH 113/304] drm/vc4: Add an interface for capturing the GPU state
7 This can be parsed with vc4-gpu-tools tools for trying to figure out
10 Signed-off-by: Eric Anholt <eric@anholt.net>
12 drivers/gpu/drm/vc4/vc4_bo.c | 4 +-
13 drivers/gpu/drm/vc4/vc4_drv.c | 1 +
14 drivers/gpu/drm/vc4/vc4_drv.h | 4 +
15 drivers/gpu/drm/vc4/vc4_gem.c | 185 ++++++++++++++++++++++++++++++++++++++++++
16 include/uapi/drm/vc4_drm.h | 45 ++++++++++
17 5 files changed, 237 insertions(+), 2 deletions(-)
19 --- a/drivers/gpu/drm/vc4/vc4_bo.c
20 +++ b/drivers/gpu/drm/vc4/vc4_bo.c
21 @@ -415,8 +415,8 @@ int vc4_mmap(struct file *filp, struct v
22 gem_obj = vma->vm_private_data;
23 bo = to_vc4_bo(gem_obj);
25 - if (bo->validated_shader) {
26 - DRM_ERROR("mmaping of shader BOs not allowed.\n");
27 + if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
28 + DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
32 --- a/drivers/gpu/drm/vc4/vc4_drv.c
33 +++ b/drivers/gpu/drm/vc4/vc4_drv.c
34 @@ -81,6 +81,7 @@ static const struct drm_ioctl_desc vc4_d
35 DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
36 DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
37 DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
38 + DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY),
41 static struct drm_driver vc4_drm_driver = {
42 --- a/drivers/gpu/drm/vc4/vc4_drv.h
43 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
44 @@ -20,6 +20,8 @@ struct vc4_dev {
45 struct drm_fbdev_cma *fbdev;
46 struct rpi_firmware *firmware;
48 + struct vc4_hang_state *hang_state;
50 /* The kernel-space BO cache. Tracks buffers that have been
51 * unreferenced by all other users (refcounts of 0!) but not
52 * yet freed, so we can do cheap allocations.
53 @@ -366,6 +368,8 @@ int vc4_create_shader_bo_ioctl(struct dr
54 struct drm_file *file_priv);
55 int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
56 struct drm_file *file_priv);
57 +int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
58 + struct drm_file *file_priv);
59 int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
60 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
61 void *vc4_prime_vmap(struct drm_gem_object *obj);
62 --- a/drivers/gpu/drm/vc4/vc4_gem.c
63 +++ b/drivers/gpu/drm/vc4/vc4_gem.c
64 @@ -40,6 +40,186 @@ vc4_queue_hangcheck(struct drm_device *d
65 round_jiffies_up(jiffies + msecs_to_jiffies(100)));
68 +struct vc4_hang_state {
69 + struct drm_vc4_get_hang_state user_state;
72 + struct drm_gem_object **bo;
76 +vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
80 + mutex_lock(&dev->struct_mutex);
81 + for (i = 0; i < state->user_state.bo_count; i++) {
82 + drm_gem_object_unreference(state->bo[i]);
84 + mutex_unlock(&dev->struct_mutex);
90 +vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
91 + struct drm_file *file_priv)
93 + struct drm_vc4_get_hang_state *get_state = data;
94 + struct drm_vc4_get_hang_state_bo *bo_state;
95 + struct vc4_hang_state *kernel_state;
96 + struct drm_vc4_get_hang_state *state;
97 + struct vc4_dev *vc4 = to_vc4_dev(dev);
98 + unsigned long irqflags;
102 + spin_lock_irqsave(&vc4->job_lock, irqflags);
103 + kernel_state = vc4->hang_state;
104 + if (!kernel_state) {
105 + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
108 + state = &kernel_state->user_state;
110 + /* If the user's array isn't big enough, just return the
111 + * required array size.
113 + if (get_state->bo_count < state->bo_count) {
114 + get_state->bo_count = state->bo_count;
115 + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
119 + vc4->hang_state = NULL;
120 + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
122 + /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
123 + state->bo = get_state->bo;
124 + memcpy(get_state, state, sizeof(*state));
126 + bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
132 + for (i = 0; i < state->bo_count; i++) {
133 + struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
135 + ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
139 + state->bo_count = i - 1;
142 + bo_state[i].handle = handle;
143 + bo_state[i].paddr = vc4_bo->base.paddr;
144 + bo_state[i].size = vc4_bo->base.base.size;
147 + ret = copy_to_user((void __user *)(uintptr_t)get_state->bo,
149 + state->bo_count * sizeof(*bo_state));
154 + vc4_free_hang_state(dev, kernel_state);
161 +vc4_save_hang_state(struct drm_device *dev)
163 + struct vc4_dev *vc4 = to_vc4_dev(dev);
164 + struct drm_vc4_get_hang_state *state;
165 + struct vc4_hang_state *kernel_state;
166 + struct vc4_exec_info *exec;
168 + unsigned long irqflags;
169 + unsigned int i, unref_list_count;
171 + kernel_state = kcalloc(1, sizeof(*state), GFP_KERNEL);
175 + state = &kernel_state->user_state;
177 + spin_lock_irqsave(&vc4->job_lock, irqflags);
178 + exec = vc4_first_job(vc4);
180 + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
184 + unref_list_count = 0;
185 + list_for_each_entry(bo, &exec->unref_list, unref_head)
186 + unref_list_count++;
188 + state->bo_count = exec->bo_count + unref_list_count;
189 + kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
191 + if (!kernel_state->bo) {
192 + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
196 + for (i = 0; i < exec->bo_count; i++) {
197 + drm_gem_object_reference(&exec->bo[i].bo->base);
198 + kernel_state->bo[i] = &exec->bo[i].bo->base;
201 + list_for_each_entry(bo, &exec->unref_list, unref_head) {
202 + drm_gem_object_reference(&bo->base.base);
203 + kernel_state->bo[i] = &bo->base.base;
207 + state->start_bin = exec->ct0ca;
208 + state->start_render = exec->ct1ca;
210 + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
212 + state->ct0ca = V3D_READ(V3D_CTNCA(0));
213 + state->ct0ea = V3D_READ(V3D_CTNEA(0));
215 + state->ct1ca = V3D_READ(V3D_CTNCA(1));
216 + state->ct1ea = V3D_READ(V3D_CTNEA(1));
218 + state->ct0cs = V3D_READ(V3D_CTNCS(0));
219 + state->ct1cs = V3D_READ(V3D_CTNCS(1));
221 + state->ct0ra0 = V3D_READ(V3D_CT00RA0);
222 + state->ct1ra0 = V3D_READ(V3D_CT01RA0);
224 + state->bpca = V3D_READ(V3D_BPCA);
225 + state->bpcs = V3D_READ(V3D_BPCS);
226 + state->bpoa = V3D_READ(V3D_BPOA);
227 + state->bpos = V3D_READ(V3D_BPOS);
229 + state->vpmbase = V3D_READ(V3D_VPMBASE);
231 + state->dbge = V3D_READ(V3D_DBGE);
232 + state->fdbgo = V3D_READ(V3D_FDBGO);
233 + state->fdbgb = V3D_READ(V3D_FDBGB);
234 + state->fdbgr = V3D_READ(V3D_FDBGR);
235 + state->fdbgs = V3D_READ(V3D_FDBGS);
236 + state->errstat = V3D_READ(V3D_ERRSTAT);
238 + spin_lock_irqsave(&vc4->job_lock, irqflags);
239 + if (vc4->hang_state) {
240 + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
241 + vc4_free_hang_state(dev, kernel_state);
243 + vc4->hang_state = kernel_state;
244 + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
249 vc4_reset(struct drm_device *dev)
251 @@ -64,6 +244,8 @@ vc4_reset_work(struct work_struct *work)
252 struct vc4_dev *vc4 =
253 container_of(work, struct vc4_dev, hangcheck.reset_work);
255 + vc4_save_hang_state(vc4->dev);
260 @@ -673,4 +855,7 @@ vc4_gem_destroy(struct drm_device *dev)
263 vc4_bo_cache_destroy(dev);
265 + if (vc4->hang_state)
266 + vc4_free_hang_state(dev, vc4->hang_state);
268 --- a/include/uapi/drm/vc4_drm.h
269 +++ b/include/uapi/drm/vc4_drm.h
271 #define DRM_VC4_CREATE_BO 0x03
272 #define DRM_VC4_MMAP_BO 0x04
273 #define DRM_VC4_CREATE_SHADER_BO 0x05
274 +#define DRM_VC4_GET_HANG_STATE 0x06
276 #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
277 #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
279 #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
280 #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
281 #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
282 +#define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
284 struct drm_vc4_submit_rcl_surface {
285 uint32_t hindex; /* Handle index, or ~0 if not present. */
286 @@ -226,4 +228,47 @@ struct drm_vc4_mmap_bo {
290 +struct drm_vc4_get_hang_state_bo {
298 + * struct drm_vc4_hang_state - ioctl argument for collecting state
299 + * from a GPU hang for analysis.
301 +struct drm_vc4_get_hang_state {
302 + /** Pointer to array of struct drm_vc4_get_hang_state_bo. */
305 + * On input, the size of the bo array. Output is the number
306 + * of bos to be returned.
310 + uint32_t start_bin, start_render;
312 + uint32_t ct0ca, ct0ea;
313 + uint32_t ct1ca, ct1ea;
314 + uint32_t ct0cs, ct1cs;
315 + uint32_t ct0ra0, ct1ra0;
317 + uint32_t bpca, bpcs;
318 + uint32_t bpoa, bpos;
329 + /* Pad that we may save more registers into in the future. */
333 #endif /* _UAPI_VC4_DRM_H_ */