1192b166d45d5a65043601e2eac38588ecb40385
[openwrt/openwrt.git] / target / linux / brcm2708 / patches-4.19 / 950-0511-drm-v3d-Refactor-job-management.patch
1 From ccf319a0265bfdb4a622a52645f159461bc88079 Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Thu, 27 Dec 2018 12:11:52 -0800
4 Subject: [PATCH] drm/v3d: Refactor job management.
5
6 The CL submission had two jobs embedded in an exec struct. When I
7 added TFU support, I had to replicate some of the exec stuff and some
8 of the job stuff. As I went to add CSD, it became clear that actually
9 what was in exec should just be in the two CL jobs, and it would let
10 us share a lot more code between the 4 queues.
11
12 Signed-off-by: Eric Anholt <eric@anholt.net>
13 ---
14 drivers/gpu/drm/v3d/v3d_drv.h | 77 ++++----
15 drivers/gpu/drm/v3d/v3d_gem.c | 331 +++++++++++++++++---------------
16 drivers/gpu/drm/v3d/v3d_irq.c | 8 +-
17 drivers/gpu/drm/v3d/v3d_sched.c | 264 ++++++++++++++-----------
18 4 files changed, 373 insertions(+), 307 deletions(-)
19
20 --- a/drivers/gpu/drm/v3d/v3d_drv.h
21 +++ b/drivers/gpu/drm/v3d/v3d_drv.h
22 @@ -67,8 +67,8 @@ struct v3d_dev {
23
24 struct work_struct overflow_mem_work;
25
26 - struct v3d_exec_info *bin_job;
27 - struct v3d_exec_info *render_job;
28 + struct v3d_bin_job *bin_job;
29 + struct v3d_render_job *render_job;
30 struct v3d_tfu_job *tfu_job;
31
32 struct v3d_queue_state queue[V3D_MAX_QUEUES];
33 @@ -132,7 +132,7 @@ struct v3d_bo {
34 struct list_head vmas; /* list of v3d_vma */
35
36 /* List entry for the BO's position in
37 - * v3d_exec_info->unref_list
38 + * v3d_render_job->unref_list
39 */
40 struct list_head unref_head;
41
42 @@ -176,7 +176,15 @@ to_v3d_fence(struct dma_fence *fence)
43 struct v3d_job {
44 struct drm_sched_job base;
45
46 - struct v3d_exec_info *exec;
47 + struct kref refcount;
48 +
49 + struct v3d_dev *v3d;
50 +
51 + /* This is the array of BOs that were looked up at the start
52 + * of submission.
53 + */
54 + struct v3d_bo **bo;
55 + u32 bo_count;
56
57 /* An optional fence userspace can pass in for the job to depend on. */
58 struct dma_fence *in_fence;
59 @@ -184,59 +192,53 @@ struct v3d_job {
60 /* v3d fence to be signaled by IRQ handler when the job is complete. */
61 struct dma_fence *irq_fence;
62
63 + /* scheduler fence for when the job is considered complete and
64 + * the BO reservations can be released.
65 + */
66 + struct dma_fence *done_fence;
67 +
68 + /* Callback for the freeing of the job on refcount going to 0. */
69 + void (*free)(struct kref *ref);
70 +};
71 +
72 +struct v3d_bin_job {
73 + struct v3d_job base;
74 +
75 /* GPU virtual addresses of the start/end of the CL job. */
76 u32 start, end;
77
78 u32 timedout_ctca, timedout_ctra;
79 -};
80
81 -struct v3d_exec_info {
82 - struct v3d_dev *v3d;
83 + /* Corresponding render job, for attaching our overflow memory. */
84 + struct v3d_render_job *render;
85 +
86 + /* Submitted tile memory allocation start/size, tile state. */
87 + u32 qma, qms, qts;
88 +};
89
90 - struct v3d_job bin, render;
91 +struct v3d_render_job {
92 + struct v3d_job base;
93
94 - /* Fence for when the scheduler considers the binner to be
95 - * done, for render to depend on.
96 + /* Optional fence for the binner, to depend on before starting
97 + * our job.
98 */
99 struct dma_fence *bin_done_fence;
100
101 - /* Fence for when the scheduler considers the render to be
102 - * done, for when the BOs reservations should be complete.
103 - */
104 - struct dma_fence *render_done_fence;
105 -
106 - struct kref refcount;
107 + /* GPU virtual addresses of the start/end of the CL job. */
108 + u32 start, end;
109
110 - /* This is the array of BOs that were looked up at the start of exec. */
111 - struct v3d_bo **bo;
112 - u32 bo_count;
113 + u32 timedout_ctca, timedout_ctra;
114
115 /* List of overflow BOs used in the job that need to be
116 * released once the job is complete.
117 */
118 struct list_head unref_list;
119 -
120 - /* Submitted tile memory allocation start/size, tile state. */
121 - u32 qma, qms, qts;
122 };
123
124 struct v3d_tfu_job {
125 - struct drm_sched_job base;
126 + struct v3d_job base;
127
128 struct drm_v3d_submit_tfu args;
129 -
130 - /* An optional fence userspace can pass in for the job to depend on. */
131 - struct dma_fence *in_fence;
132 -
133 - /* v3d fence to be signaled by IRQ handler when the job is complete. */
134 - struct dma_fence *irq_fence;
135 -
136 - struct v3d_dev *v3d;
137 -
138 - struct kref refcount;
139 -
140 - /* This is the array of BOs that were looked up at the start of exec. */
141 - struct v3d_bo *bo[4];
142 };
143
144 /**
145 @@ -306,8 +308,7 @@ int v3d_submit_tfu_ioctl(struct drm_devi
146 struct drm_file *file_priv);
147 int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
148 struct drm_file *file_priv);
149 -void v3d_exec_put(struct v3d_exec_info *exec);
150 -void v3d_tfu_job_put(struct v3d_tfu_job *exec);
151 +void v3d_job_put(struct v3d_job *job);
152 void v3d_reset(struct v3d_dev *v3d);
153 void v3d_invalidate_caches(struct v3d_dev *v3d);
154
155 --- a/drivers/gpu/drm/v3d/v3d_gem.c
156 +++ b/drivers/gpu/drm/v3d/v3d_gem.c
157 @@ -293,11 +293,11 @@ retry:
158 }
159
160 /**
161 - * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
162 + * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
163 * referenced by the job.
164 * @dev: DRM device
165 * @file_priv: DRM file for this fd
166 - * @exec: V3D job being set up
167 + * @job: V3D job being set up
168 *
169 * The command validator needs to reference BOs by their index within
170 * the submitted job's BO list. This does the validation of the job's
171 @@ -307,18 +307,19 @@ retry:
172 * failure, because that will happen at v3d_exec_cleanup() time.
173 */
174 static int
175 -v3d_cl_lookup_bos(struct drm_device *dev,
176 - struct drm_file *file_priv,
177 - struct drm_v3d_submit_cl *args,
178 - struct v3d_exec_info *exec)
179 +v3d_lookup_bos(struct drm_device *dev,
180 + struct drm_file *file_priv,
181 + struct v3d_job *job,
182 + u64 bo_handles,
183 + u32 bo_count)
184 {
185 u32 *handles;
186 int ret = 0;
187 int i;
188
189 - exec->bo_count = args->bo_handle_count;
190 + job->bo_count = bo_count;
191
192 - if (!exec->bo_count) {
193 + if (!job->bo_count) {
194 /* See comment on bo_index for why we have to check
195 * this.
196 */
197 @@ -326,15 +327,15 @@ v3d_cl_lookup_bos(struct drm_device *dev
198 return -EINVAL;
199 }
200
201 - exec->bo = kvmalloc_array(exec->bo_count,
202 - sizeof(struct drm_gem_cma_object *),
203 - GFP_KERNEL | __GFP_ZERO);
204 - if (!exec->bo) {
205 + job->bo = kvmalloc_array(job->bo_count,
206 + sizeof(struct drm_gem_cma_object *),
207 + GFP_KERNEL | __GFP_ZERO);
208 + if (!job->bo) {
209 DRM_DEBUG("Failed to allocate validated BO pointers\n");
210 return -ENOMEM;
211 }
212
213 - handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL);
214 + handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL);
215 if (!handles) {
216 ret = -ENOMEM;
217 DRM_DEBUG("Failed to allocate incoming GEM handles\n");
218 @@ -342,15 +343,15 @@ v3d_cl_lookup_bos(struct drm_device *dev
219 }
220
221 if (copy_from_user(handles,
222 - (void __user *)(uintptr_t)args->bo_handles,
223 - exec->bo_count * sizeof(u32))) {
224 + (void __user *)(uintptr_t)bo_handles,
225 + job->bo_count * sizeof(u32))) {
226 ret = -EFAULT;
227 DRM_DEBUG("Failed to copy in GEM handles\n");
228 goto fail;
229 }
230
231 spin_lock(&file_priv->table_lock);
232 - for (i = 0; i < exec->bo_count; i++) {
233 + for (i = 0; i < job->bo_count; i++) {
234 struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
235 handles[i]);
236 if (!bo) {
237 @@ -361,7 +362,7 @@ v3d_cl_lookup_bos(struct drm_device *dev
238 goto fail;
239 }
240 drm_gem_object_get(bo);
241 - exec->bo[i] = to_v3d_bo(bo);
242 + job->bo[i] = to_v3d_bo(bo);
243 }
244 spin_unlock(&file_priv->table_lock);
245
246 @@ -371,59 +372,41 @@ fail:
247 }
248
249 static void
250 -v3d_exec_cleanup(struct kref *ref)
251 +v3d_job_free(struct kref *ref)
252 {
253 - struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info,
254 - refcount);
255 - unsigned int i;
256 - struct v3d_bo *bo, *save;
257 -
258 - dma_fence_put(exec->bin.in_fence);
259 - dma_fence_put(exec->render.in_fence);
260 -
261 - dma_fence_put(exec->bin.irq_fence);
262 - dma_fence_put(exec->render.irq_fence);
263 -
264 - dma_fence_put(exec->bin_done_fence);
265 - dma_fence_put(exec->render_done_fence);
266 -
267 - for (i = 0; i < exec->bo_count; i++)
268 - drm_gem_object_put_unlocked(&exec->bo[i]->base);
269 - kvfree(exec->bo);
270 + struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
271 + int i;
272
273 - list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) {
274 - drm_gem_object_put_unlocked(&bo->base);
275 + for (i = 0; i < job->bo_count; i++) {
276 + if (job->bo[i])
277 + drm_gem_object_put_unlocked(&job->bo[i]->base);
278 }
279 + kvfree(job->bo);
280
281 - kfree(exec);
282 -}
283 + dma_fence_put(job->in_fence);
284 + dma_fence_put(job->irq_fence);
285 + dma_fence_put(job->done_fence);
286
287 -void v3d_exec_put(struct v3d_exec_info *exec)
288 -{
289 - kref_put(&exec->refcount, v3d_exec_cleanup);
290 + kfree(job);
291 }
292
293 static void
294 -v3d_tfu_job_cleanup(struct kref *ref)
295 +v3d_render_job_free(struct kref *ref)
296 {
297 - struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job,
298 - refcount);
299 - unsigned int i;
300 -
301 - dma_fence_put(job->in_fence);
302 - dma_fence_put(job->irq_fence);
303 + struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
304 + base.refcount);
305 + struct v3d_bo *bo, *save;
306
307 - for (i = 0; i < ARRAY_SIZE(job->bo); i++) {
308 - if (job->bo[i])
309 - drm_gem_object_put_unlocked(&job->bo[i]->base);
310 + list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
311 + drm_gem_object_put_unlocked(&bo->base);
312 }
313
314 - kfree(job);
315 + v3d_job_free(ref);
316 }
317
318 -void v3d_tfu_job_put(struct v3d_tfu_job *job)
319 +void v3d_job_put(struct v3d_job *job)
320 {
321 - kref_put(&job->refcount, v3d_tfu_job_cleanup);
322 + kref_put(&job->refcount, job->free);
323 }
324
325 int
326 @@ -476,6 +459,65 @@ v3d_wait_bo_ioctl(struct drm_device *dev
327 return ret;
328 }
329
330 +static int
331 +v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
332 + struct v3d_job *job, void (*free)(struct kref *ref),
333 + u32 in_sync)
334 +{
335 + int ret;
336 +
337 + job->v3d = v3d;
338 + job->free = free;
339 +
340 + ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &job->in_fence);
341 + if (ret == -EINVAL)
342 + return ret;
343 +
344 + kref_init(&job->refcount);
345 +
346 + return 0;
347 +}
348 +
349 +static int
350 +v3d_push_job(struct v3d_file_priv *v3d_priv,
351 + struct v3d_job *job, enum v3d_queue queue)
352 +{
353 + int ret;
354 +
355 + ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
356 + v3d_priv);
357 + if (ret)
358 + return ret;
359 +
360 + job->done_fence = dma_fence_get(&job->base.s_fence->finished);
361 +
362 + /* put by scheduler job completion */
363 + kref_get(&job->refcount);
364 +
365 + drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[queue]);
366 +
367 + return 0;
368 +}
369 +
370 +static void
371 +v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
372 + struct v3d_job *job,
373 + struct ww_acquire_ctx *acquire_ctx,
374 + u32 out_sync)
375 +{
376 + struct drm_syncobj *sync_out;
377 +
378 + v3d_attach_object_fences(job->bo, job->bo_count, job->done_fence);
379 + v3d_unlock_bo_reservations(job->bo, job->bo_count, acquire_ctx);
380 +
381 + /* Update the return sync object for the job */
382 + sync_out = drm_syncobj_find(file_priv, out_sync);
383 + if (sync_out) {
384 + drm_syncobj_replace_fence(sync_out, job->done_fence);
385 + drm_syncobj_put(sync_out);
386 + }
387 +}
388 +
389 /**
390 * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
391 * @dev: DRM device
392 @@ -495,9 +537,9 @@ v3d_submit_cl_ioctl(struct drm_device *d
393 struct v3d_dev *v3d = to_v3d_dev(dev);
394 struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
395 struct drm_v3d_submit_cl *args = data;
396 - struct v3d_exec_info *exec;
397 + struct v3d_bin_job *bin = NULL;
398 + struct v3d_render_job *render;
399 struct ww_acquire_ctx acquire_ctx;
400 - struct drm_syncobj *sync_out;
401 int ret = 0;
402
403 trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
404 @@ -507,95 +549,84 @@ v3d_submit_cl_ioctl(struct drm_device *d
405 return -EINVAL;
406 }
407
408 - exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
409 - if (!exec)
410 + render = kcalloc(1, sizeof(*render), GFP_KERNEL);
411 + if (!render)
412 return -ENOMEM;
413
414 - kref_init(&exec->refcount);
415 + render->start = args->rcl_start;
416 + render->end = args->rcl_end;
417 + INIT_LIST_HEAD(&render->unref_list);
418
419 - ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
420 - 0, &exec->bin.in_fence);
421 - if (ret == -EINVAL)
422 - goto fail;
423 + ret = v3d_job_init(v3d, file_priv, &render->base,
424 + v3d_render_job_free, args->in_sync_rcl);
425 + if (ret) {
426 + kfree(bin);
427 + kfree(render);
428 + return ret;
429 + }
430
431 - ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
432 - 0, &exec->render.in_fence);
433 - if (ret == -EINVAL)
434 - goto fail;
435 + if (args->bcl_start != args->bcl_end) {
436 + bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
437 + if (!bin)
438 + return -ENOMEM;
439 +
440 + ret = v3d_job_init(v3d, file_priv, &bin->base,
441 + v3d_job_free, args->in_sync_bcl);
442 + if (ret) {
443 + v3d_job_put(&render->base);
444 + return ret;
445 + }
446
447 - exec->qma = args->qma;
448 - exec->qms = args->qms;
449 - exec->qts = args->qts;
450 - exec->bin.exec = exec;
451 - exec->bin.start = args->bcl_start;
452 - exec->bin.end = args->bcl_end;
453 - exec->render.exec = exec;
454 - exec->render.start = args->rcl_start;
455 - exec->render.end = args->rcl_end;
456 - exec->v3d = v3d;
457 - INIT_LIST_HEAD(&exec->unref_list);
458 + bin->start = args->bcl_start;
459 + bin->end = args->bcl_end;
460 + bin->qma = args->qma;
461 + bin->qms = args->qms;
462 + bin->qts = args->qts;
463 + bin->render = render;
464 + }
465
466 - ret = v3d_cl_lookup_bos(dev, file_priv, args, exec);
467 + ret = v3d_lookup_bos(dev, file_priv, &render->base,
468 + args->bo_handles, args->bo_handle_count);
469 if (ret)
470 goto fail;
471
472 - ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count,
473 + ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count,
474 &acquire_ctx);
475 if (ret)
476 goto fail;
477
478 mutex_lock(&v3d->sched_lock);
479 - if (exec->bin.start != exec->bin.end) {
480 - ret = drm_sched_job_init(&exec->bin.base,
481 - &v3d_priv->sched_entity[V3D_BIN],
482 - v3d_priv);
483 + if (bin) {
484 + ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
485 if (ret)
486 goto fail_unreserve;
487
488 - exec->bin_done_fence =
489 - dma_fence_get(&exec->bin.base.s_fence->finished);
490 -
491 - kref_get(&exec->refcount); /* put by scheduler job completion */
492 - drm_sched_entity_push_job(&exec->bin.base,
493 - &v3d_priv->sched_entity[V3D_BIN]);
494 + render->bin_done_fence = dma_fence_get(bin->base.done_fence);
495 }
496
497 - ret = drm_sched_job_init(&exec->render.base,
498 - &v3d_priv->sched_entity[V3D_RENDER],
499 - v3d_priv);
500 + ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
501 if (ret)
502 goto fail_unreserve;
503 -
504 - exec->render_done_fence =
505 - dma_fence_get(&exec->render.base.s_fence->finished);
506 -
507 - kref_get(&exec->refcount); /* put by scheduler job completion */
508 - drm_sched_entity_push_job(&exec->render.base,
509 - &v3d_priv->sched_entity[V3D_RENDER]);
510 mutex_unlock(&v3d->sched_lock);
511
512 - v3d_attach_object_fences(exec->bo, exec->bo_count,
513 - exec->render_done_fence);
514 -
515 - v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
516 -
517 - /* Update the return sync object for the */
518 - sync_out = drm_syncobj_find(file_priv, args->out_sync);
519 - if (sync_out) {
520 - drm_syncobj_replace_fence(sync_out,
521 - exec->render_done_fence);
522 - drm_syncobj_put(sync_out);
523 - }
524 -
525 - v3d_exec_put(exec);
526 + v3d_attach_fences_and_unlock_reservation(file_priv,
527 + &render->base, &acquire_ctx,
528 + args->out_sync);
529 +
530 + if (bin)
531 + v3d_job_put(&bin->base);
532 + v3d_job_put(&render->base);
533
534 return 0;
535
536 fail_unreserve:
537 mutex_unlock(&v3d->sched_lock);
538 - v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
539 + v3d_unlock_bo_reservations(render->base.bo,
540 + render->base.bo_count, &acquire_ctx);
541 fail:
542 - v3d_exec_put(exec);
543 + if (bin)
544 + v3d_job_put(&bin->base);
545 + v3d_job_put(&render->base);
546
547 return ret;
548 }
549 @@ -618,10 +649,7 @@ v3d_submit_tfu_ioctl(struct drm_device *
550 struct drm_v3d_submit_tfu *args = data;
551 struct v3d_tfu_job *job;
552 struct ww_acquire_ctx acquire_ctx;
553 - struct drm_syncobj *sync_out;
554 - struct dma_fence *sched_done_fence;
555 int ret = 0;
556 - int bo_count;
557
558 trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
559
560 @@ -629,75 +657,66 @@ v3d_submit_tfu_ioctl(struct drm_device *
561 if (!job)
562 return -ENOMEM;
563
564 - kref_init(&job->refcount);
565 -
566 - ret = drm_syncobj_find_fence(file_priv, args->in_sync,
567 - 0, &job->in_fence);
568 - if (ret == -EINVAL)
569 - goto fail;
570 + ret = v3d_job_init(v3d, file_priv, &job->base,
571 + v3d_job_free, args->in_sync);
572 + if (ret) {
573 + kfree(job);
574 + return ret;
575 + }
576
577 + job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
578 + sizeof(*job->base.bo), GFP_KERNEL);
579 job->args = *args;
580 - job->v3d = v3d;
581
582 spin_lock(&file_priv->table_lock);
583 - for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) {
584 + for (job->base.bo_count = 0;
585 + job->base.bo_count < ARRAY_SIZE(args->bo_handles);
586 + job->base.bo_count++) {
587 struct drm_gem_object *bo;
588
589 - if (!args->bo_handles[bo_count])
590 + if (!args->bo_handles[job->base.bo_count])
591 break;
592
593 bo = idr_find(&file_priv->object_idr,
594 - args->bo_handles[bo_count]);
595 + args->bo_handles[job->base.bo_count]);
596 if (!bo) {
597 DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
598 - bo_count, args->bo_handles[bo_count]);
599 + job->base.bo_count,
600 + args->bo_handles[job->base.bo_count]);
601 ret = -ENOENT;
602 spin_unlock(&file_priv->table_lock);
603 goto fail;
604 }
605 drm_gem_object_get(bo);
606 - job->bo[bo_count] = to_v3d_bo(bo);
607 + job->base.bo[job->base.bo_count] = to_v3d_bo(bo);
608 }
609 spin_unlock(&file_priv->table_lock);
610
611 - ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx);
612 + ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count,
613 + &acquire_ctx);
614 if (ret)
615 goto fail;
616
617 mutex_lock(&v3d->sched_lock);
618 - ret = drm_sched_job_init(&job->base,
619 - &v3d_priv->sched_entity[V3D_TFU],
620 - v3d_priv);
621 + ret = v3d_push_job(v3d_priv, &job->base, V3D_TFU);
622 if (ret)
623 goto fail_unreserve;
624 -
625 - sched_done_fence = dma_fence_get(&job->base.s_fence->finished);
626 -
627 - kref_get(&job->refcount); /* put by scheduler job completion */
628 - drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]);
629 mutex_unlock(&v3d->sched_lock);
630
631 - v3d_attach_object_fences(job->bo, bo_count, sched_done_fence);
632 -
633 - v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
634 -
635 - /* Update the return sync object */
636 - sync_out = drm_syncobj_find(file_priv, args->out_sync);
637 - if (sync_out) {
638 - drm_syncobj_replace_fence(sync_out, sched_done_fence);
639 - drm_syncobj_put(sync_out);
640 - }
641 - dma_fence_put(sched_done_fence);
642 + v3d_attach_fences_and_unlock_reservation(file_priv,
643 + &job->base, &acquire_ctx,
644 + args->out_sync);
645
646 - v3d_tfu_job_put(job);
647 + v3d_job_put(&job->base);
648
649 return 0;
650
651 fail_unreserve:
652 mutex_unlock(&v3d->sched_lock);
653 - v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
654 + v3d_unlock_bo_reservations(job->base.bo, job->base.bo_count,
655 + &acquire_ctx);
656 fail:
657 - v3d_tfu_job_put(job);
658 + v3d_job_put(&job->base);
659
660 return ret;
661 }
662 @@ -755,7 +774,7 @@ v3d_gem_destroy(struct drm_device *dev)
663
664 v3d_sched_fini(v3d);
665
666 - /* Waiting for exec to finish would need to be done before
667 + /* Waiting for jobs to finish would need to be done before
668 * unregistering V3D.
669 */
670 WARN_ON(v3d->bin_job);
671 --- a/drivers/gpu/drm/v3d/v3d_irq.c
672 +++ b/drivers/gpu/drm/v3d/v3d_irq.c
673 @@ -60,7 +60,7 @@ v3d_overflow_mem_work(struct work_struct
674 }
675
676 drm_gem_object_get(&bo->base);
677 - list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list);
678 + list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list);
679 spin_unlock_irqrestore(&v3d->job_lock, irqflags);
680
681 V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT);
682 @@ -93,7 +93,7 @@ v3d_irq(int irq, void *arg)
683
684 if (intsts & V3D_INT_FLDONE) {
685 struct v3d_fence *fence =
686 - to_v3d_fence(v3d->bin_job->bin.irq_fence);
687 + to_v3d_fence(v3d->bin_job->base.irq_fence);
688
689 trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
690 dma_fence_signal(&fence->base);
691 @@ -102,7 +102,7 @@ v3d_irq(int irq, void *arg)
692
693 if (intsts & V3D_INT_FRDONE) {
694 struct v3d_fence *fence =
695 - to_v3d_fence(v3d->render_job->render.irq_fence);
696 + to_v3d_fence(v3d->render_job->base.irq_fence);
697
698 trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
699 dma_fence_signal(&fence->base);
700 @@ -138,7 +138,7 @@ v3d_hub_irq(int irq, void *arg)
701
702 if (intsts & V3D_HUB_INT_TFUC) {
703 struct v3d_fence *fence =
704 - to_v3d_fence(v3d->tfu_job->irq_fence);
705 + to_v3d_fence(v3d->tfu_job->base.irq_fence);
706
707 trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
708 dma_fence_signal(&fence->base);
709 --- a/drivers/gpu/drm/v3d/v3d_sched.c
710 +++ b/drivers/gpu/drm/v3d/v3d_sched.c
711 @@ -30,39 +30,43 @@ to_v3d_job(struct drm_sched_job *sched_j
712 return container_of(sched_job, struct v3d_job, base);
713 }
714
715 -static struct v3d_tfu_job *
716 -to_tfu_job(struct drm_sched_job *sched_job)
717 +static struct v3d_bin_job *
718 +to_bin_job(struct drm_sched_job *sched_job)
719 {
720 - return container_of(sched_job, struct v3d_tfu_job, base);
721 + return container_of(sched_job, struct v3d_bin_job, base.base);
722 }
723
724 -static void
725 -v3d_job_free(struct drm_sched_job *sched_job)
726 +static struct v3d_render_job *
727 +to_render_job(struct drm_sched_job *sched_job)
728 {
729 - struct v3d_job *job = to_v3d_job(sched_job);
730 + return container_of(sched_job, struct v3d_render_job, base.base);
731 +}
732
733 - v3d_exec_put(job->exec);
734 +static struct v3d_tfu_job *
735 +to_tfu_job(struct drm_sched_job *sched_job)
736 +{
737 + return container_of(sched_job, struct v3d_tfu_job, base.base);
738 }
739
740 static void
741 -v3d_tfu_job_free(struct drm_sched_job *sched_job)
742 +v3d_job_free(struct drm_sched_job *sched_job)
743 {
744 - struct v3d_tfu_job *job = to_tfu_job(sched_job);
745 + struct v3d_job *job = to_v3d_job(sched_job);
746
747 - v3d_tfu_job_put(job);
748 + v3d_job_put(job);
749 }
750
751 /**
752 - * Returns the fences that the bin or render job depends on, one by one.
753 - * v3d_job_run() won't be called until all of them have been signaled.
754 + * Returns the fences that the job depends on, one by one.
755 + *
756 + * If placed in the scheduler's .dependency method, the corresponding
757 + * .run_job won't be called until all of them have been signaled.
758 */
759 static struct dma_fence *
760 v3d_job_dependency(struct drm_sched_job *sched_job,
761 struct drm_sched_entity *s_entity)
762 {
763 struct v3d_job *job = to_v3d_job(sched_job);
764 - struct v3d_exec_info *exec = job->exec;
765 - enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
766 struct dma_fence *fence;
767
768 fence = job->in_fence;
769 @@ -71,113 +75,132 @@ v3d_job_dependency(struct drm_sched_job
770 return fence;
771 }
772
773 - if (q == V3D_RENDER) {
774 - /* If we had a bin job, the render job definitely depends on
775 - * it. We first have to wait for bin to be scheduled, so that
776 - * its done_fence is created.
777 - */
778 - fence = exec->bin_done_fence;
779 - if (fence) {
780 - exec->bin_done_fence = NULL;
781 - return fence;
782 - }
783 - }
784 -
785 - /* XXX: Wait on a fence for switching the GMP if necessary,
786 - * and then do so.
787 - */
788 -
789 - return fence;
790 + return NULL;
791 }
792
793 /**
794 - * Returns the fences that the TFU job depends on, one by one.
795 - * v3d_tfu_job_run() won't be called until all of them have been
796 - * signaled.
797 + * Returns the fences that the render job depends on, one by one.
798 + * v3d_job_run() won't be called until all of them have been signaled.
799 */
800 static struct dma_fence *
801 -v3d_tfu_job_dependency(struct drm_sched_job *sched_job,
802 - struct drm_sched_entity *s_entity)
803 +v3d_render_job_dependency(struct drm_sched_job *sched_job,
804 + struct drm_sched_entity *s_entity)
805 {
806 - struct v3d_tfu_job *job = to_tfu_job(sched_job);
807 + struct v3d_render_job *job = to_render_job(sched_job);
808 struct dma_fence *fence;
809
810 - fence = job->in_fence;
811 + fence = v3d_job_dependency(sched_job, s_entity);
812 + if (fence)
813 + return fence;
814 +
815 + /* If we had a bin job, the render job definitely depends on
816 + * it. We first have to wait for bin to be scheduled, so that
817 + * its done_fence is created.
818 + */
819 + fence = job->bin_done_fence;
820 if (fence) {
821 - job->in_fence = NULL;
822 + job->bin_done_fence = NULL;
823 return fence;
824 }
825
826 - return NULL;
827 + /* XXX: Wait on a fence for switching the GMP if necessary,
828 + * and then do so.
829 + */
830 +
831 + return fence;
832 }
833
834 -static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
835 +static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
836 {
837 - struct v3d_job *job = to_v3d_job(sched_job);
838 - struct v3d_exec_info *exec = job->exec;
839 - enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
840 - struct v3d_dev *v3d = exec->v3d;
841 + struct v3d_bin_job *job = to_bin_job(sched_job);
842 + struct v3d_dev *v3d = job->base.v3d;
843 struct drm_device *dev = &v3d->drm;
844 struct dma_fence *fence;
845 unsigned long irqflags;
846
847 - if (unlikely(job->base.s_fence->finished.error))
848 + if (unlikely(job->base.base.s_fence->finished.error))
849 return NULL;
850
851 /* Lock required around bin_job update vs
852 * v3d_overflow_mem_work().
853 */
854 spin_lock_irqsave(&v3d->job_lock, irqflags);
855 - if (q == V3D_BIN) {
856 - v3d->bin_job = job->exec;
857 + v3d->bin_job = job;
858 + /* Clear out the overflow allocation, so we don't
859 + * reuse the overflow attached to a previous job.
860 + */
861 + V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
862 + spin_unlock_irqrestore(&v3d->job_lock, irqflags);
863 +
864 + v3d_invalidate_caches(v3d);
865
866 - /* Clear out the overflow allocation, so we don't
867 - * reuse the overflow attached to a previous job.
868 - */
869 - V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
870 - } else {
871 - v3d->render_job = job->exec;
872 + fence = v3d_fence_create(v3d, V3D_BIN);
873 + if (IS_ERR(fence))
874 + return NULL;
875 +
876 + if (job->base.irq_fence)
877 + dma_fence_put(job->base.irq_fence);
878 + job->base.irq_fence = dma_fence_get(fence);
879 +
880 + trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
881 + job->start, job->end);
882 +
883 + /* Set the current and end address of the control list.
884 + * Writing the end register is what starts the job.
885 + */
886 + if (job->qma) {
887 + V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
888 + V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
889 }
890 - spin_unlock_irqrestore(&v3d->job_lock, irqflags);
891 + if (job->qts) {
892 + V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
893 + V3D_CLE_CT0QTS_ENABLE |
894 + job->qts);
895 + }
896 + V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
897 + V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
898 +
899 + return fence;
900 +}
901 +
902 +static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
903 +{
904 + struct v3d_render_job *job = to_render_job(sched_job);
905 + struct v3d_dev *v3d = job->base.v3d;
906 + struct drm_device *dev = &v3d->drm;
907 + struct dma_fence *fence;
908 +
909 + if (unlikely(job->base.base.s_fence->finished.error))
910 + return NULL;
911
912 - /* Can we avoid this flush when q==RENDER? We need to be
913 - * careful of scheduling, though -- imagine job0 rendering to
914 - * texture and job1 reading, and them being executed as bin0,
915 - * bin1, render0, render1, so that render1's flush at bin time
916 + v3d->render_job = job;
917 +
918 + /* Can we avoid this flush? We need to be careful of
919 + * scheduling, though -- imagine job0 rendering to texture and
920 + * job1 reading, and them being executed as bin0, bin1,
921 + * render0, render1, so that render1's flush at bin time
922 * wasn't enough.
923 */
924 v3d_invalidate_caches(v3d);
925
926 - fence = v3d_fence_create(v3d, q);
927 + fence = v3d_fence_create(v3d, V3D_RENDER);
928 if (IS_ERR(fence))
929 return NULL;
930
931 - if (job->irq_fence)
932 - dma_fence_put(job->irq_fence);
933 - job->irq_fence = dma_fence_get(fence);
934 + if (job->base.irq_fence)
935 + dma_fence_put(job->base.irq_fence);
936 + job->base.irq_fence = dma_fence_get(fence);
937
938 - trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
939 + trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
940 job->start, job->end);
941
942 - if (q == V3D_BIN) {
943 - if (exec->qma) {
944 - V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
945 - V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
946 - }
947 - if (exec->qts) {
948 - V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
949 - V3D_CLE_CT0QTS_ENABLE |
950 - exec->qts);
951 - }
952 - } else {
953 - /* XXX: Set the QCFG */
954 - }
955 + /* XXX: Set the QCFG */
956
957 /* Set the current and end address of the control list.
958 * Writing the end register is what starts the job.
959 */
960 - V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
961 - V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
962 + V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
963 + V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
964
965 return fence;
966 }
967 @@ -186,7 +209,7 @@ static struct dma_fence *
968 v3d_tfu_job_run(struct drm_sched_job *sched_job)
969 {
970 struct v3d_tfu_job *job = to_tfu_job(sched_job);
971 - struct v3d_dev *v3d = job->v3d;
972 + struct v3d_dev *v3d = job->base.v3d;
973 struct drm_device *dev = &v3d->drm;
974 struct dma_fence *fence;
975
976 @@ -195,9 +218,9 @@ v3d_tfu_job_run(struct drm_sched_job *sc
977 return NULL;
978
979 v3d->tfu_job = job;
980 - if (job->irq_fence)
981 - dma_fence_put(job->irq_fence);
982 - job->irq_fence = dma_fence_get(fence);
983 + if (job->base.irq_fence)
984 + dma_fence_put(job->base.irq_fence);
985 + job->base.irq_fence = dma_fence_get(fence);
986
987 trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
988
989 @@ -247,25 +270,23 @@ v3d_gpu_reset_for_timeout(struct v3d_dev
990 mutex_unlock(&v3d->reset_lock);
991 }
992
993 +/* If the current address or return address have changed, then the GPU
994 + * has probably made progress and we should delay the reset. This
995 + * could fail if the GPU got in an infinite loop in the CL, but that
996 + * is pretty unlikely outside of an i-g-t testcase.
997 + */
998 static void
999 -v3d_job_timedout(struct drm_sched_job *sched_job)
1000 +v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
1001 + u32 *timedout_ctca, u32 *timedout_ctra)
1002 {
1003 struct v3d_job *job = to_v3d_job(sched_job);
1004 - struct v3d_exec_info *exec = job->exec;
1005 - struct v3d_dev *v3d = exec->v3d;
1006 - enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
1007 - u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
1008 - u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
1009 -
1010 - /* If the current address or return address have changed, then
1011 - * the GPU has probably made progress and we should delay the
1012 - * reset. This could fail if the GPU got in an infinite loop
1013 - * in the CL, but that is pretty unlikely outside of an i-g-t
1014 - * testcase.
1015 - */
1016 - if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
1017 - job->timedout_ctca = ctca;
1018 - job->timedout_ctra = ctra;
1019 + struct v3d_dev *v3d = job->v3d;
1020 + u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
1021 + u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
1022 +
1023 + if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
1024 + *timedout_ctca = ctca;
1025 + *timedout_ctra = ctra;
1026 schedule_delayed_work(&job->base.work_tdr,
1027 job->base.sched->timeout);
1028 return;
1029 @@ -275,25 +296,50 @@ v3d_job_timedout(struct drm_sched_job *s
1030 }
1031
1032 static void
1033 +v3d_bin_job_timedout(struct drm_sched_job *sched_job)
1034 +{
1035 + struct v3d_bin_job *job = to_bin_job(sched_job);
1036 +
1037 + v3d_cl_job_timedout(sched_job, V3D_BIN,
1038 + &job->timedout_ctca, &job->timedout_ctra);
1039 +}
1040 +
1041 +static void
1042 +v3d_render_job_timedout(struct drm_sched_job *sched_job)
1043 +{
1044 + struct v3d_render_job *job = to_render_job(sched_job);
1045 +
1046 + v3d_cl_job_timedout(sched_job, V3D_RENDER,
1047 + &job->timedout_ctca, &job->timedout_ctra);
1048 +}
1049 +
1050 +static void
1051 v3d_tfu_job_timedout(struct drm_sched_job *sched_job)
1052 {
1053 - struct v3d_tfu_job *job = to_tfu_job(sched_job);
1054 + struct v3d_job *job = to_v3d_job(sched_job);
1055
1056 v3d_gpu_reset_for_timeout(job->v3d, sched_job);
1057 }
1058
1059 -static const struct drm_sched_backend_ops v3d_sched_ops = {
1060 +static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
1061 .dependency = v3d_job_dependency,
1062 - .run_job = v3d_job_run,
1063 - .timedout_job = v3d_job_timedout,
1064 - .free_job = v3d_job_free
1065 + .run_job = v3d_bin_job_run,
1066 + .timedout_job = v3d_bin_job_timedout,
1067 + .free_job = v3d_job_free,
1068 +};
1069 +
1070 +static const struct drm_sched_backend_ops v3d_render_sched_ops = {
1071 + .dependency = v3d_render_job_dependency,
1072 + .run_job = v3d_render_job_run,
1073 + .timedout_job = v3d_render_job_timedout,
1074 + .free_job = v3d_job_free,
1075 };
1076
1077 static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
1078 - .dependency = v3d_tfu_job_dependency,
1079 + .dependency = v3d_job_dependency,
1080 .run_job = v3d_tfu_job_run,
1081 .timedout_job = v3d_tfu_job_timedout,
1082 - .free_job = v3d_tfu_job_free
1083 + .free_job = v3d_job_free,
1084 };
1085
1086 int
1087 @@ -305,7 +351,7 @@ v3d_sched_init(struct v3d_dev *v3d)
1088 int ret;
1089
1090 ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
1091 - &v3d_sched_ops,
1092 + &v3d_bin_sched_ops,
1093 hw_jobs_limit, job_hang_limit,
1094 msecs_to_jiffies(hang_limit_ms),
1095 "v3d_bin");
1096 @@ -315,7 +361,7 @@ v3d_sched_init(struct v3d_dev *v3d)
1097 }
1098
1099 ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
1100 - &v3d_sched_ops,
1101 + &v3d_render_sched_ops,
1102 hw_jobs_limit, job_hang_limit,
1103 msecs_to_jiffies(hang_limit_ms),
1104 "v3d_render");