target/linux/brcm2708/patches-4.19/950-0511-drm-v3d-Refactor-job-management.patch

   1 From ccf319a0265bfdb4a622a52645f159461bc88079 Mon Sep 17 00:00:00 2001
   2 From: Eric Anholt <eric@anholt.net>
   3 Date: Thu, 27 Dec 2018 12:11:52 -0800
   4 Subject: [PATCH] drm/v3d: Refactor job management.
   5
   6 The CL submission had two jobs embedded in an exec struct.  When I
   7 added TFU support, I had to replicate some of the exec stuff and some
   8 of the job stuff.  As I went to add CSD, it became clear that actually
   9 what was in exec should just be in the two CL jobs, and it would let
  10 us share a lot more code between the 4 queues.
  11
  12 Signed-off-by: Eric Anholt <eric@anholt.net>
  13 ---
  14  drivers/gpu/drm/v3d/v3d_drv.h   |  77 ++++----
  15  drivers/gpu/drm/v3d/v3d_gem.c   | 331 +++++++++++++++++---------------
  16  drivers/gpu/drm/v3d/v3d_irq.c   |   8 +-
  17  drivers/gpu/drm/v3d/v3d_sched.c | 264 ++++++++++++++-----------
  18  4 files changed, 373 insertions(+), 307 deletions(-)
  19
  20 --- a/drivers/gpu/drm/v3d/v3d_drv.h
  21 +++ b/drivers/gpu/drm/v3d/v3d_drv.h
  22 @@ -67,8 +67,8 @@ struct v3d_dev {
  23
  24         struct work_struct overflow_mem_work;
  25
  26 -       struct v3d_exec_info *bin_job;
  27 -       struct v3d_exec_info *render_job;
  28 +       struct v3d_bin_job *bin_job;
  29 +       struct v3d_render_job *render_job;
  30         struct v3d_tfu_job *tfu_job;
  31
  32         struct v3d_queue_state queue[V3D_MAX_QUEUES];
  33 @@ -132,7 +132,7 @@ struct v3d_bo {
  34         struct list_head vmas;    /* list of v3d_vma */
  35
  36         /* List entry for the BO's position in
  37 -        * v3d_exec_info->unref_list
  38 +        * v3d_render_job->unref_list
  39          */
  40         struct list_head unref_head;
  41
  42 @@ -176,7 +176,15 @@ to_v3d_fence(struct dma_fence *fence)
  43  struct v3d_job {
  44         struct drm_sched_job base;
  45
  46 -       struct v3d_exec_info *exec;
  47 +       struct kref refcount;
  48 +
  49 +       struct v3d_dev *v3d;
  50 +
  51 +       /* This is the array of BOs that were looked up at the start
  52 +        * of submission.
  53 +        */
  54 +       struct v3d_bo **bo;
  55 +       u32 bo_count;
  56
  57         /* An optional fence userspace can pass in for the job to depend on. */
  58         struct dma_fence *in_fence;
  59 @@ -184,59 +192,53 @@ struct v3d_job {
  60         /* v3d fence to be signaled by IRQ handler when the job is complete. */
  61         struct dma_fence *irq_fence;
  62
  63 +       /* scheduler fence for when the job is considered complete and
  64 +        * the BO reservations can be released.
  65 +        */
  66 +       struct dma_fence *done_fence;
  67 +
  68 +       /* Callback for the freeing of the job on refcount going to 0. */
  69 +       void (*free)(struct kref *ref);
  70 +};
  71 +
  72 +struct v3d_bin_job {
  73 +       struct v3d_job base;
  74 +
  75         /* GPU virtual addresses of the start/end of the CL job. */
  76         u32 start, end;
  77
  78         u32 timedout_ctca, timedout_ctra;
  79 -};
  80
  81 -struct v3d_exec_info {
  82 -       struct v3d_dev *v3d;
  83 +       /* Corresponding render job, for attaching our overflow memory. */
  84 +       struct v3d_render_job *render;
  85 +
  86 +       /* Submitted tile memory allocation start/size, tile state. */
  87 +       u32 qma, qms, qts;
  88 +};
  89
  90 -       struct v3d_job bin, render;
  91 +struct v3d_render_job {
  92 +       struct v3d_job base;
  93
  94 -       /* Fence for when the scheduler considers the binner to be
  95 -        * done, for render to depend on.
  96 +       /* Optional fence for the binner, to depend on before starting
  97 +        * our job.
  98          */
  99         struct dma_fence *bin_done_fence;
 100
 101 -       /* Fence for when the scheduler considers the render to be
 102 -        * done, for when the BOs reservations should be complete.
 103 -        */
 104 -       struct dma_fence *render_done_fence;
 105 -
 106 -       struct kref refcount;
 107 +       /* GPU virtual addresses of the start/end of the CL job. */
 108 +       u32 start, end;
 109
 110 -       /* This is the array of BOs that were looked up at the start of exec. */
 111 -       struct v3d_bo **bo;
 112 -       u32 bo_count;
 113 +       u32 timedout_ctca, timedout_ctra;
 114
 115         /* List of overflow BOs used in the job that need to be
 116          * released once the job is complete.
 117          */
 118         struct list_head unref_list;
 119 -
 120 -       /* Submitted tile memory allocation start/size, tile state. */
 121 -       u32 qma, qms, qts;
 122  };
 123
 124  struct v3d_tfu_job {
 125 -       struct drm_sched_job base;
 126 +       struct v3d_job base;
 127
 128         struct drm_v3d_submit_tfu args;
 129 -
 130 -       /* An optional fence userspace can pass in for the job to depend on. */
 131 -       struct dma_fence *in_fence;
 132 -
 133 -       /* v3d fence to be signaled by IRQ handler when the job is complete. */
 134 -       struct dma_fence *irq_fence;
 135 -
 136 -       struct v3d_dev *v3d;
 137 -
 138 -       struct kref refcount;
 139 -
 140 -       /* This is the array of BOs that were looked up at the start of exec. */
 141 -       struct v3d_bo *bo[4];
 142  };
 143
 144  /**
 145 @@ -306,8 +308,7 @@ int v3d_submit_tfu_ioctl(struct drm_devi
 146                          struct drm_file *file_priv);
 147  int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
 148                       struct drm_file *file_priv);
 149 -void v3d_exec_put(struct v3d_exec_info *exec);
 150 -void v3d_tfu_job_put(struct v3d_tfu_job *exec);
 151 +void v3d_job_put(struct v3d_job *job);
 152  void v3d_reset(struct v3d_dev *v3d);
 153  void v3d_invalidate_caches(struct v3d_dev *v3d);
 154
 155 --- a/drivers/gpu/drm/v3d/v3d_gem.c
 156 +++ b/drivers/gpu/drm/v3d/v3d_gem.c
 157 @@ -293,11 +293,11 @@ retry:
 158  }
 159
 160  /**
 161 - * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
 162 + * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
 163   * referenced by the job.
 164   * @dev: DRM device
 165   * @file_priv: DRM file for this fd
 166 - * @exec: V3D job being set up
 167 + * @job: V3D job being set up
 168   *
 169   * The command validator needs to reference BOs by their index within
 170   * the submitted job's BO list.  This does the validation of the job's
 171 @@ -307,18 +307,19 @@ retry:
 172   * failure, because that will happen at v3d_exec_cleanup() time.
 173   */
 174  static int
 175 -v3d_cl_lookup_bos(struct drm_device *dev,
 176 -                 struct drm_file *file_priv,
 177 -                 struct drm_v3d_submit_cl *args,
 178 -                 struct v3d_exec_info *exec)
 179 +v3d_lookup_bos(struct drm_device *dev,
 180 +              struct drm_file *file_priv,
 181 +              struct v3d_job *job,
 182 +              u64 bo_handles,
 183 +              u32 bo_count)
 184  {
 185         u32 *handles;
 186         int ret = 0;
 187         int i;
 188
 189 -       exec->bo_count = args->bo_handle_count;
 190 +       job->bo_count = bo_count;
 191
 192 -       if (!exec->bo_count) {
 193 +       if (!job->bo_count) {
 194                 /* See comment on bo_index for why we have to check
 195                  * this.
 196                  */
 197 @@ -326,15 +327,15 @@ v3d_cl_lookup_bos(struct drm_device *dev
 198                 return -EINVAL;
 199         }
 200
 201 -       exec->bo = kvmalloc_array(exec->bo_count,
 202 -                                 sizeof(struct drm_gem_cma_object *),
 203 -                                 GFP_KERNEL | __GFP_ZERO);
 204 -       if (!exec->bo) {
 205 +       job->bo = kvmalloc_array(job->bo_count,
 206 +                                sizeof(struct drm_gem_cma_object *),
 207 +                                GFP_KERNEL | __GFP_ZERO);
 208 +       if (!job->bo) {
 209                 DRM_DEBUG("Failed to allocate validated BO pointers\n");
 210                 return -ENOMEM;
 211         }
 212
 213 -       handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL);
 214 +       handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL);
 215         if (!handles) {
 216                 ret = -ENOMEM;
 217                 DRM_DEBUG("Failed to allocate incoming GEM handles\n");
 218 @@ -342,15 +343,15 @@ v3d_cl_lookup_bos(struct drm_device *dev
 219         }
 220
 221         if (copy_from_user(handles,
 222 -                          (void __user *)(uintptr_t)args->bo_handles,
 223 -                          exec->bo_count * sizeof(u32))) {
 224 +                          (void __user *)(uintptr_t)bo_handles,
 225 +                          job->bo_count * sizeof(u32))) {
 226                 ret = -EFAULT;
 227                 DRM_DEBUG("Failed to copy in GEM handles\n");
 228                 goto fail;
 229         }
 230
 231         spin_lock(&file_priv->table_lock);
 232 -       for (i = 0; i < exec->bo_count; i++) {
 233 +       for (i = 0; i < job->bo_count; i++) {
 234                 struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
 235                                                      handles[i]);
 236                 if (!bo) {
 237 @@ -361,7 +362,7 @@ v3d_cl_lookup_bos(struct drm_device *dev
 238                         goto fail;
 239                 }
 240                 drm_gem_object_get(bo);
 241 -               exec->bo[i] = to_v3d_bo(bo);
 242 +               job->bo[i] = to_v3d_bo(bo);
 243         }
 244         spin_unlock(&file_priv->table_lock);
 245
 246 @@ -371,59 +372,41 @@ fail:
 247  }
 248
 249  static void
 250 -v3d_exec_cleanup(struct kref *ref)
 251 +v3d_job_free(struct kref *ref)
 252  {
 253 -       struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info,
 254 -                                                 refcount);
 255 -       unsigned int i;
 256 -       struct v3d_bo *bo, *save;
 257 -
 258 -       dma_fence_put(exec->bin.in_fence);
 259 -       dma_fence_put(exec->render.in_fence);
 260 -
 261 -       dma_fence_put(exec->bin.irq_fence);
 262 -       dma_fence_put(exec->render.irq_fence);
 263 -
 264 -       dma_fence_put(exec->bin_done_fence);
 265 -       dma_fence_put(exec->render_done_fence);
 266 -
 267 -       for (i = 0; i < exec->bo_count; i++)
 268 -               drm_gem_object_put_unlocked(&exec->bo[i]->base);
 269 -       kvfree(exec->bo);
 270 +       struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
 271 +       int i;
 272
 273 -       list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) {
 274 -               drm_gem_object_put_unlocked(&bo->base);
 275 +       for (i = 0; i < job->bo_count; i++) {
 276 +               if (job->bo[i])
 277 +                       drm_gem_object_put_unlocked(&job->bo[i]->base);
 278         }
 279 +       kvfree(job->bo);
 280
 281 -       kfree(exec);
 282 -}
 283 +       dma_fence_put(job->in_fence);
 284 +       dma_fence_put(job->irq_fence);
 285 +       dma_fence_put(job->done_fence);
 286
 287 -void v3d_exec_put(struct v3d_exec_info *exec)
 288 -{
 289 -       kref_put(&exec->refcount, v3d_exec_cleanup);
 290 +       kfree(job);
 291  }
 292
 293  static void
 294 -v3d_tfu_job_cleanup(struct kref *ref)
 295 +v3d_render_job_free(struct kref *ref)
 296  {
 297 -       struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job,
 298 -                                              refcount);
 299 -       unsigned int i;
 300 -
 301 -       dma_fence_put(job->in_fence);
 302 -       dma_fence_put(job->irq_fence);
 303 +       struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
 304 +                                                 base.refcount);
 305 +       struct v3d_bo *bo, *save;
 306
 307 -       for (i = 0; i < ARRAY_SIZE(job->bo); i++) {
 308 -               if (job->bo[i])
 309 -                       drm_gem_object_put_unlocked(&job->bo[i]->base);
 310 +       list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
 311 +               drm_gem_object_put_unlocked(&bo->base);
 312         }
 313
 314 -       kfree(job);
 315 +       v3d_job_free(ref);
 316  }
 317
 318 -void v3d_tfu_job_put(struct v3d_tfu_job *job)
 319 +void v3d_job_put(struct v3d_job *job)
 320  {
 321 -       kref_put(&job->refcount, v3d_tfu_job_cleanup);
 322 +       kref_put(&job->refcount, job->free);
 323  }
 324
 325  int
 326 @@ -476,6 +459,65 @@ v3d_wait_bo_ioctl(struct drm_device *dev
 327         return ret;
 328  }
 329
 330 +static int
 331 +v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 332 +            struct v3d_job *job, void (*free)(struct kref *ref),
 333 +            u32 in_sync)
 334 +{
 335 +       int ret;
 336 +
 337 +       job->v3d = v3d;
 338 +       job->free = free;
 339 +
 340 +       ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &job->in_fence);
 341 +       if (ret == -EINVAL)
 342 +               return ret;
 343 +
 344 +       kref_init(&job->refcount);
 345 +
 346 +       return 0;
 347 +}
 348 +
 349 +static int
 350 +v3d_push_job(struct v3d_file_priv *v3d_priv,
 351 +            struct v3d_job *job, enum v3d_queue queue)
 352 +{
 353 +       int ret;
 354 +
 355 +       ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
 356 +                                v3d_priv);
 357 +       if (ret)
 358 +               return ret;
 359 +
 360 +       job->done_fence = dma_fence_get(&job->base.s_fence->finished);
 361 +
 362 +       /* put by scheduler job completion */
 363 +       kref_get(&job->refcount);
 364 +
 365 +       drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[queue]);
 366 +
 367 +       return 0;
 368 +}
 369 +
 370 +static void
 371 +v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
 372 +                                        struct v3d_job *job,
 373 +                                        struct ww_acquire_ctx *acquire_ctx,
 374 +                                        u32 out_sync)
 375 +{
 376 +       struct drm_syncobj *sync_out;
 377 +
 378 +       v3d_attach_object_fences(job->bo, job->bo_count, job->done_fence);
 379 +       v3d_unlock_bo_reservations(job->bo, job->bo_count, acquire_ctx);
 380 +
 381 +       /* Update the return sync object for the job */
 382 +       sync_out = drm_syncobj_find(file_priv, out_sync);
 383 +       if (sync_out) {
 384 +               drm_syncobj_replace_fence(sync_out, job->done_fence);
 385 +               drm_syncobj_put(sync_out);
 386 +       }
 387 +}
 388 +
 389  /**
 390   * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
 391   * @dev: DRM device
 392 @@ -495,9 +537,9 @@ v3d_submit_cl_ioctl(struct drm_device *d
 393         struct v3d_dev *v3d = to_v3d_dev(dev);
 394         struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
 395         struct drm_v3d_submit_cl *args = data;
 396 -       struct v3d_exec_info *exec;
 397 +       struct v3d_bin_job *bin = NULL;
 398 +       struct v3d_render_job *render;
 399         struct ww_acquire_ctx acquire_ctx;
 400 -       struct drm_syncobj *sync_out;
 401         int ret = 0;
 402
 403         trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
 404 @@ -507,95 +549,84 @@ v3d_submit_cl_ioctl(struct drm_device *d
 405                 return -EINVAL;
 406         }
 407
 408 -       exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
 409 -       if (!exec)
 410 +       render = kcalloc(1, sizeof(*render), GFP_KERNEL);
 411 +       if (!render)
 412                 return -ENOMEM;
 413
 414 -       kref_init(&exec->refcount);
 415 +       render->start = args->rcl_start;
 416 +       render->end = args->rcl_end;
 417 +       INIT_LIST_HEAD(&render->unref_list);
 418
 419 -       ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
 420 -                                    0, &exec->bin.in_fence);
 421 -       if (ret == -EINVAL)
 422 -               goto fail;
 423 +       ret = v3d_job_init(v3d, file_priv, &render->base,
 424 +                          v3d_render_job_free, args->in_sync_rcl);
 425 +       if (ret) {
 426 +               kfree(bin);
 427 +               kfree(render);
 428 +               return ret;
 429 +       }
 430
 431 -       ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
 432 -                                    0, &exec->render.in_fence);
 433 -       if (ret == -EINVAL)
 434 -               goto fail;
 435 +       if (args->bcl_start != args->bcl_end) {
 436 +               bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
 437 +               if (!bin)
 438 +                       return -ENOMEM;
 439 +
 440 +               ret = v3d_job_init(v3d, file_priv, &bin->base,
 441 +                                  v3d_job_free, args->in_sync_bcl);
 442 +               if (ret) {
 443 +                       v3d_job_put(&render->base);
 444 +                       return ret;
 445 +               }
 446
 447 -       exec->qma = args->qma;
 448 -       exec->qms = args->qms;
 449 -       exec->qts = args->qts;
 450 -       exec->bin.exec = exec;
 451 -       exec->bin.start = args->bcl_start;
 452 -       exec->bin.end = args->bcl_end;
 453 -       exec->render.exec = exec;
 454 -       exec->render.start = args->rcl_start;
 455 -       exec->render.end = args->rcl_end;
 456 -       exec->v3d = v3d;
 457 -       INIT_LIST_HEAD(&exec->unref_list);
 458 +               bin->start = args->bcl_start;
 459 +               bin->end = args->bcl_end;
 460 +               bin->qma = args->qma;
 461 +               bin->qms = args->qms;
 462 +               bin->qts = args->qts;
 463 +               bin->render = render;
 464 +       }
 465
 466 -       ret = v3d_cl_lookup_bos(dev, file_priv, args, exec);
 467 +       ret = v3d_lookup_bos(dev, file_priv, &render->base,
 468 +                            args->bo_handles, args->bo_handle_count);
 469         if (ret)
 470                 goto fail;
 471
 472 -       ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count,
 473 +       ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count,
 474                                        &acquire_ctx);
 475         if (ret)
 476                 goto fail;
 477
 478         mutex_lock(&v3d->sched_lock);
 479 -       if (exec->bin.start != exec->bin.end) {
 480 -               ret = drm_sched_job_init(&exec->bin.base,
 481 -                                        &v3d_priv->sched_entity[V3D_BIN],
 482 -                                        v3d_priv);
 483 +       if (bin) {
 484 +               ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
 485                 if (ret)
 486                         goto fail_unreserve;
 487
 488 -               exec->bin_done_fence =
 489 -                       dma_fence_get(&exec->bin.base.s_fence->finished);
 490 -
 491 -               kref_get(&exec->refcount); /* put by scheduler job completion */
 492 -               drm_sched_entity_push_job(&exec->bin.base,
 493 -                                         &v3d_priv->sched_entity[V3D_BIN]);
 494 +               render->bin_done_fence = dma_fence_get(bin->base.done_fence);
 495         }
 496
 497 -       ret = drm_sched_job_init(&exec->render.base,
 498 -                                &v3d_priv->sched_entity[V3D_RENDER],
 499 -                                v3d_priv);
 500 +       ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
 501         if (ret)
 502                 goto fail_unreserve;
 503 -
 504 -       exec->render_done_fence =
 505 -               dma_fence_get(&exec->render.base.s_fence->finished);
 506 -
 507 -       kref_get(&exec->refcount); /* put by scheduler job completion */
 508 -       drm_sched_entity_push_job(&exec->render.base,
 509 -                                 &v3d_priv->sched_entity[V3D_RENDER]);
 510         mutex_unlock(&v3d->sched_lock);
 511
 512 -       v3d_attach_object_fences(exec->bo, exec->bo_count,
 513 -                                exec->render_done_fence);
 514 -
 515 -       v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
 516 -
 517 -       /* Update the return sync object for the */
 518 -       sync_out = drm_syncobj_find(file_priv, args->out_sync);
 519 -       if (sync_out) {
 520 -               drm_syncobj_replace_fence(sync_out,
 521 -                                         exec->render_done_fence);
 522 -               drm_syncobj_put(sync_out);
 523 -       }
 524 -
 525 -       v3d_exec_put(exec);
 526 +       v3d_attach_fences_and_unlock_reservation(file_priv,
 527 +                                                &render->base, &acquire_ctx,
 528 +                                                args->out_sync);
 529 +
 530 +       if (bin)
 531 +               v3d_job_put(&bin->base);
 532 +       v3d_job_put(&render->base);
 533
 534         return 0;
 535
 536  fail_unreserve:
 537         mutex_unlock(&v3d->sched_lock);
 538 -       v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
 539 +       v3d_unlock_bo_reservations(render->base.bo,
 540 +                                  render->base.bo_count, &acquire_ctx);
 541  fail:
 542 -       v3d_exec_put(exec);
 543 +       if (bin)
 544 +               v3d_job_put(&bin->base);
 545 +       v3d_job_put(&render->base);
 546
 547         return ret;
 548  }
 549 @@ -618,10 +649,7 @@ v3d_submit_tfu_ioctl(struct drm_device *
 550         struct drm_v3d_submit_tfu *args = data;
 551         struct v3d_tfu_job *job;
 552         struct ww_acquire_ctx acquire_ctx;
 553 -       struct drm_syncobj *sync_out;
 554 -       struct dma_fence *sched_done_fence;
 555         int ret = 0;
 556 -       int bo_count;
 557
 558         trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
 559
 560 @@ -629,75 +657,66 @@ v3d_submit_tfu_ioctl(struct drm_device *
 561         if (!job)
 562                 return -ENOMEM;
 563
 564 -       kref_init(&job->refcount);
 565 -
 566 -       ret = drm_syncobj_find_fence(file_priv, args->in_sync,
 567 -                                    0, &job->in_fence);
 568 -       if (ret == -EINVAL)
 569 -               goto fail;
 570 +       ret = v3d_job_init(v3d, file_priv, &job->base,
 571 +                          v3d_job_free, args->in_sync);
 572 +       if (ret) {
 573 +               kfree(job);
 574 +               return ret;
 575 +       }
 576
 577 +       job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
 578 +                              sizeof(*job->base.bo), GFP_KERNEL);
 579         job->args = *args;
 580 -       job->v3d = v3d;
 581
 582         spin_lock(&file_priv->table_lock);
 583 -       for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) {
 584 +       for (job->base.bo_count = 0;
 585 +            job->base.bo_count < ARRAY_SIZE(args->bo_handles);
 586 +            job->base.bo_count++) {
 587                 struct drm_gem_object *bo;
 588
 589 -               if (!args->bo_handles[bo_count])
 590 +               if (!args->bo_handles[job->base.bo_count])
 591                         break;
 592
 593                 bo = idr_find(&file_priv->object_idr,
 594 -                             args->bo_handles[bo_count]);
 595 +                             args->bo_handles[job->base.bo_count]);
 596                 if (!bo) {
 597                         DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
 598 -                                 bo_count, args->bo_handles[bo_count]);
 599 +                                 job->base.bo_count,
 600 +                                 args->bo_handles[job->base.bo_count]);
 601                         ret = -ENOENT;
 602                         spin_unlock(&file_priv->table_lock);
 603                         goto fail;
 604                 }
 605                 drm_gem_object_get(bo);
 606 -               job->bo[bo_count] = to_v3d_bo(bo);
 607 +               job->base.bo[job->base.bo_count] = to_v3d_bo(bo);
 608         }
 609         spin_unlock(&file_priv->table_lock);
 610
 611 -       ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx);
 612 +       ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count,
 613 +                                      &acquire_ctx);
 614         if (ret)
 615                 goto fail;
 616
 617         mutex_lock(&v3d->sched_lock);
 618 -       ret = drm_sched_job_init(&job->base,
 619 -                                &v3d_priv->sched_entity[V3D_TFU],
 620 -                                v3d_priv);
 621 +       ret = v3d_push_job(v3d_priv, &job->base, V3D_TFU);
 622         if (ret)
 623                 goto fail_unreserve;
 624 -
 625 -       sched_done_fence = dma_fence_get(&job->base.s_fence->finished);
 626 -
 627 -       kref_get(&job->refcount); /* put by scheduler job completion */
 628 -       drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]);
 629         mutex_unlock(&v3d->sched_lock);
 630
 631 -       v3d_attach_object_fences(job->bo, bo_count, sched_done_fence);
 632 -
 633 -       v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
 634 -
 635 -       /* Update the return sync object */
 636 -       sync_out = drm_syncobj_find(file_priv, args->out_sync);
 637 -       if (sync_out) {
 638 -               drm_syncobj_replace_fence(sync_out, sched_done_fence);
 639 -               drm_syncobj_put(sync_out);
 640 -       }
 641 -       dma_fence_put(sched_done_fence);
 642 +       v3d_attach_fences_and_unlock_reservation(file_priv,
 643 +                                                &job->base, &acquire_ctx,
 644 +                                                args->out_sync);
 645
 646 -       v3d_tfu_job_put(job);
 647 +       v3d_job_put(&job->base);
 648
 649         return 0;
 650
 651  fail_unreserve:
 652         mutex_unlock(&v3d->sched_lock);
 653 -       v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
 654 +       v3d_unlock_bo_reservations(job->base.bo, job->base.bo_count,
 655 +                                  &acquire_ctx);
 656  fail:
 657 -       v3d_tfu_job_put(job);
 658 +       v3d_job_put(&job->base);
 659
 660         return ret;
 661  }
 662 @@ -755,7 +774,7 @@ v3d_gem_destroy(struct drm_device *dev)
 663
 664         v3d_sched_fini(v3d);
 665
 666 -       /* Waiting for exec to finish would need to be done before
 667 +       /* Waiting for jobs to finish would need to be done before
 668          * unregistering V3D.
 669          */
 670         WARN_ON(v3d->bin_job);
 671 --- a/drivers/gpu/drm/v3d/v3d_irq.c
 672 +++ b/drivers/gpu/drm/v3d/v3d_irq.c
 673 @@ -60,7 +60,7 @@ v3d_overflow_mem_work(struct work_struct
 674         }
 675
 676         drm_gem_object_get(&bo->base);
 677 -       list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list);
 678 +       list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list);
 679         spin_unlock_irqrestore(&v3d->job_lock, irqflags);
 680
 681         V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT);
 682 @@ -93,7 +93,7 @@ v3d_irq(int irq, void *arg)
 683
 684         if (intsts & V3D_INT_FLDONE) {
 685                 struct v3d_fence *fence =
 686 -                       to_v3d_fence(v3d->bin_job->bin.irq_fence);
 687 +                       to_v3d_fence(v3d->bin_job->base.irq_fence);
 688
 689                 trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
 690                 dma_fence_signal(&fence->base);
 691 @@ -102,7 +102,7 @@ v3d_irq(int irq, void *arg)
 692
 693         if (intsts & V3D_INT_FRDONE) {
 694                 struct v3d_fence *fence =
 695 -                       to_v3d_fence(v3d->render_job->render.irq_fence);
 696 +                       to_v3d_fence(v3d->render_job->base.irq_fence);
 697
 698                 trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
 699                 dma_fence_signal(&fence->base);
 700 @@ -138,7 +138,7 @@ v3d_hub_irq(int irq, void *arg)
 701
 702         if (intsts & V3D_HUB_INT_TFUC) {
 703                 struct v3d_fence *fence =
 704 -                       to_v3d_fence(v3d->tfu_job->irq_fence);
 705 +                       to_v3d_fence(v3d->tfu_job->base.irq_fence);
 706
 707                 trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
 708                 dma_fence_signal(&fence->base);
 709 --- a/drivers/gpu/drm/v3d/v3d_sched.c
 710 +++ b/drivers/gpu/drm/v3d/v3d_sched.c
 711 @@ -30,39 +30,43 @@ to_v3d_job(struct drm_sched_job *sched_j
 712         return container_of(sched_job, struct v3d_job, base);
 713  }
 714
 715 -static struct v3d_tfu_job *
 716 -to_tfu_job(struct drm_sched_job *sched_job)
 717 +static struct v3d_bin_job *
 718 +to_bin_job(struct drm_sched_job *sched_job)
 719  {
 720 -       return container_of(sched_job, struct v3d_tfu_job, base);
 721 +       return container_of(sched_job, struct v3d_bin_job, base.base);
 722  }
 723
 724 -static void
 725 -v3d_job_free(struct drm_sched_job *sched_job)
 726 +static struct v3d_render_job *
 727 +to_render_job(struct drm_sched_job *sched_job)
 728  {
 729 -       struct v3d_job *job = to_v3d_job(sched_job);
 730 +       return container_of(sched_job, struct v3d_render_job, base.base);
 731 +}
 732
 733 -       v3d_exec_put(job->exec);
 734 +static struct v3d_tfu_job *
 735 +to_tfu_job(struct drm_sched_job *sched_job)
 736 +{
 737 +       return container_of(sched_job, struct v3d_tfu_job, base.base);
 738  }
 739
 740  static void
 741 -v3d_tfu_job_free(struct drm_sched_job *sched_job)
 742 +v3d_job_free(struct drm_sched_job *sched_job)
 743  {
 744 -       struct v3d_tfu_job *job = to_tfu_job(sched_job);
 745 +       struct v3d_job *job = to_v3d_job(sched_job);
 746
 747 -       v3d_tfu_job_put(job);
 748 +       v3d_job_put(job);
 749  }
 750
 751  /**
 752 - * Returns the fences that the bin or render job depends on, one by one.
 753 - * v3d_job_run() won't be called until all of them have been signaled.
 754 + * Returns the fences that the job depends on, one by one.
 755 + *
 756 + * If placed in the scheduler's .dependency method, the corresponding
 757 + * .run_job won't be called until all of them have been signaled.
 758   */
 759  static struct dma_fence *
 760  v3d_job_dependency(struct drm_sched_job *sched_job,
 761                    struct drm_sched_entity *s_entity)
 762  {
 763         struct v3d_job *job = to_v3d_job(sched_job);
 764 -       struct v3d_exec_info *exec = job->exec;
 765 -       enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
 766         struct dma_fence *fence;
 767
 768         fence = job->in_fence;
 769 @@ -71,113 +75,132 @@ v3d_job_dependency(struct drm_sched_job
 770                 return fence;
 771         }
 772
 773 -       if (q == V3D_RENDER) {
 774 -               /* If we had a bin job, the render job definitely depends on
 775 -                * it. We first have to wait for bin to be scheduled, so that
 776 -                * its done_fence is created.
 777 -                */
 778 -               fence = exec->bin_done_fence;
 779 -               if (fence) {
 780 -                       exec->bin_done_fence = NULL;
 781 -                       return fence;
 782 -               }
 783 -       }
 784 -
 785 -       /* XXX: Wait on a fence for switching the GMP if necessary,
 786 -        * and then do so.
 787 -        */
 788 -
 789 -       return fence;
 790 +       return NULL;
 791  }
 792
 793  /**
 794 - * Returns the fences that the TFU job depends on, one by one.
 795 - * v3d_tfu_job_run() won't be called until all of them have been
 796 - * signaled.
 797 + * Returns the fences that the render job depends on, one by one.
 798 + * v3d_job_run() won't be called until all of them have been signaled.
 799   */
 800  static struct dma_fence *
 801 -v3d_tfu_job_dependency(struct drm_sched_job *sched_job,
 802 -                      struct drm_sched_entity *s_entity)
 803 +v3d_render_job_dependency(struct drm_sched_job *sched_job,
 804 +                         struct drm_sched_entity *s_entity)
 805  {
 806 -       struct v3d_tfu_job *job = to_tfu_job(sched_job);
 807 +       struct v3d_render_job *job = to_render_job(sched_job);
 808         struct dma_fence *fence;
 809
 810 -       fence = job->in_fence;
 811 +       fence = v3d_job_dependency(sched_job, s_entity);
 812 +       if (fence)
 813 +               return fence;
 814 +
 815 +       /* If we had a bin job, the render job definitely depends on
 816 +        * it. We first have to wait for bin to be scheduled, so that
 817 +        * its done_fence is created.
 818 +        */
 819 +       fence = job->bin_done_fence;
 820         if (fence) {
 821 -               job->in_fence = NULL;
 822 +               job->bin_done_fence = NULL;
 823                 return fence;
 824         }
 825
 826 -       return NULL;
 827 +       /* XXX: Wait on a fence for switching the GMP if necessary,
 828 +        * and then do so.
 829 +        */
 830 +
 831 +       return fence;
 832  }
 833
 834 -static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
 835 +static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 836  {
 837 -       struct v3d_job *job = to_v3d_job(sched_job);
 838 -       struct v3d_exec_info *exec = job->exec;
 839 -       enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
 840 -       struct v3d_dev *v3d = exec->v3d;
 841 +       struct v3d_bin_job *job = to_bin_job(sched_job);
 842 +       struct v3d_dev *v3d = job->base.v3d;
 843         struct drm_device *dev = &v3d->drm;
 844         struct dma_fence *fence;
 845         unsigned long irqflags;
 846
 847 -       if (unlikely(job->base.s_fence->finished.error))
 848 +       if (unlikely(job->base.base.s_fence->finished.error))
 849                 return NULL;
 850
 851         /* Lock required around bin_job update vs
 852          * v3d_overflow_mem_work().
 853          */
 854         spin_lock_irqsave(&v3d->job_lock, irqflags);
 855 -       if (q == V3D_BIN) {
 856 -               v3d->bin_job = job->exec;
 857 +       v3d->bin_job = job;
 858 +       /* Clear out the overflow allocation, so we don't
 859 +        * reuse the overflow attached to a previous job.
 860 +        */
 861 +       V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
 862 +       spin_unlock_irqrestore(&v3d->job_lock, irqflags);
 863 +
 864 +       v3d_invalidate_caches(v3d);
 865
 866 -               /* Clear out the overflow allocation, so we don't
 867 -                * reuse the overflow attached to a previous job.
 868 -                */
 869 -               V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
 870 -       } else {
 871 -               v3d->render_job = job->exec;
 872 +       fence = v3d_fence_create(v3d, V3D_BIN);
 873 +       if (IS_ERR(fence))
 874 +               return NULL;
 875 +
 876 +       if (job->base.irq_fence)
 877 +               dma_fence_put(job->base.irq_fence);
 878 +       job->base.irq_fence = dma_fence_get(fence);
 879 +
 880 +       trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
 881 +                           job->start, job->end);
 882 +
 883 +       /* Set the current and end address of the control list.
 884 +        * Writing the end register is what starts the job.
 885 +        */
 886 +       if (job->qma) {
 887 +               V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
 888 +               V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
 889         }
 890 -       spin_unlock_irqrestore(&v3d->job_lock, irqflags);
 891 +       if (job->qts) {
 892 +               V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
 893 +                              V3D_CLE_CT0QTS_ENABLE |
 894 +                              job->qts);
 895 +       }
 896 +       V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
 897 +       V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
 898 +
 899 +       return fence;
 900 +}
 901 +
 902 +static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
 903 +{
 904 +       struct v3d_render_job *job = to_render_job(sched_job);
 905 +       struct v3d_dev *v3d = job->base.v3d;
 906 +       struct drm_device *dev = &v3d->drm;
 907 +       struct dma_fence *fence;
 908 +
 909 +       if (unlikely(job->base.base.s_fence->finished.error))
 910 +               return NULL;
 911
 912 -       /* Can we avoid this flush when q==RENDER?  We need to be
 913 -        * careful of scheduling, though -- imagine job0 rendering to
 914 -        * texture and job1 reading, and them being executed as bin0,
 915 -        * bin1, render0, render1, so that render1's flush at bin time
 916 +       v3d->render_job = job;
 917 +
 918 +       /* Can we avoid this flush?  We need to be careful of
 919 +        * scheduling, though -- imagine job0 rendering to texture and
 920 +        * job1 reading, and them being executed as bin0, bin1,
 921 +        * render0, render1, so that render1's flush at bin time
 922          * wasn't enough.
 923          */
 924         v3d_invalidate_caches(v3d);
 925
 926 -       fence = v3d_fence_create(v3d, q);
 927 +       fence = v3d_fence_create(v3d, V3D_RENDER);
 928         if (IS_ERR(fence))
 929                 return NULL;
 930
 931 -       if (job->irq_fence)
 932 -               dma_fence_put(job->irq_fence);
 933 -       job->irq_fence = dma_fence_get(fence);
 934 +       if (job->base.irq_fence)
 935 +               dma_fence_put(job->base.irq_fence);
 936 +       job->base.irq_fence = dma_fence_get(fence);
 937
 938 -       trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
 939 +       trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
 940                             job->start, job->end);
 941
 942 -       if (q == V3D_BIN) {
 943 -               if (exec->qma) {
 944 -                       V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
 945 -                       V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
 946 -               }
 947 -               if (exec->qts) {
 948 -                       V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
 949 -                                      V3D_CLE_CT0QTS_ENABLE |
 950 -                                      exec->qts);
 951 -               }
 952 -       } else {
 953 -               /* XXX: Set the QCFG */
 954 -       }
 955 +       /* XXX: Set the QCFG */
 956
 957         /* Set the current and end address of the control list.
 958          * Writing the end register is what starts the job.
 959          */
 960 -       V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
 961 -       V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
 962 +       V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
 963 +       V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
 964
 965         return fence;
 966  }
 967 @@ -186,7 +209,7 @@ static struct dma_fence *
 968  v3d_tfu_job_run(struct drm_sched_job *sched_job)
 969  {
 970         struct v3d_tfu_job *job = to_tfu_job(sched_job);
 971 -       struct v3d_dev *v3d = job->v3d;
 972 +       struct v3d_dev *v3d = job->base.v3d;
 973         struct drm_device *dev = &v3d->drm;
 974         struct dma_fence *fence;
 975
 976 @@ -195,9 +218,9 @@ v3d_tfu_job_run(struct drm_sched_job *sc
 977                 return NULL;
 978
 979         v3d->tfu_job = job;
 980 -       if (job->irq_fence)
 981 -               dma_fence_put(job->irq_fence);
 982 -       job->irq_fence = dma_fence_get(fence);
 983 +       if (job->base.irq_fence)
 984 +               dma_fence_put(job->base.irq_fence);
 985 +       job->base.irq_fence = dma_fence_get(fence);
 986
 987         trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 988
 989 @@ -247,25 +270,23 @@ v3d_gpu_reset_for_timeout(struct v3d_dev
 990         mutex_unlock(&v3d->reset_lock);
 991  }
 992
 993 +/* If the current address or return address have changed, then the GPU
 994 + * has probably made progress and we should delay the reset.  This
 995 + * could fail if the GPU got in an infinite loop in the CL, but that
 996 + * is pretty unlikely outside of an i-g-t testcase.
 997 + */
 998  static void
 999 -v3d_job_timedout(struct drm_sched_job *sched_job)
1000 +v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
1001 +                     u32 *timedout_ctca, u32 *timedout_ctra)
1002  {
1003         struct v3d_job *job = to_v3d_job(sched_job);
1004 -       struct v3d_exec_info *exec = job->exec;
1005 -       struct v3d_dev *v3d = exec->v3d;
1006 -       enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
1007 -       u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
1008 -       u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
1009 -
1010 -       /* If the current address or return address have changed, then
1011 -        * the GPU has probably made progress and we should delay the
1012 -        * reset.  This could fail if the GPU got in an infinite loop
1013 -        * in the CL, but that is pretty unlikely outside of an i-g-t
1014 -        * testcase.
1015 -        */
1016 -       if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
1017 -               job->timedout_ctca = ctca;
1018 -               job->timedout_ctra = ctra;
1019 +       struct v3d_dev *v3d = job->v3d;
1020 +       u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
1021 +       u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
1022 +
1023 +       if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
1024 +               *timedout_ctca = ctca;
1025 +               *timedout_ctra = ctra;
1026                 schedule_delayed_work(&job->base.work_tdr,
1027                                       job->base.sched->timeout);
1028                 return;
1029 @@ -275,25 +296,50 @@ v3d_job_timedout(struct drm_sched_job *s
1030  }
1031
1032  static void
1033 +v3d_bin_job_timedout(struct drm_sched_job *sched_job)
1034 +{
1035 +       struct v3d_bin_job *job = to_bin_job(sched_job);
1036 +
1037 +       v3d_cl_job_timedout(sched_job, V3D_BIN,
1038 +                           &job->timedout_ctca, &job->timedout_ctra);
1039 +}
1040 +
1041 +static void
1042 +v3d_render_job_timedout(struct drm_sched_job *sched_job)
1043 +{
1044 +       struct v3d_render_job *job = to_render_job(sched_job);
1045 +
1046 +       v3d_cl_job_timedout(sched_job, V3D_RENDER,
1047 +                           &job->timedout_ctca, &job->timedout_ctra);
1048 +}
1049 +
1050 +static void
1051  v3d_tfu_job_timedout(struct drm_sched_job *sched_job)
1052  {
1053 -       struct v3d_tfu_job *job = to_tfu_job(sched_job);
1054 +       struct v3d_job *job = to_v3d_job(sched_job);
1055
1056         v3d_gpu_reset_for_timeout(job->v3d, sched_job);
1057  }
1058
1059 -static const struct drm_sched_backend_ops v3d_sched_ops = {
1060 +static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
1061         .dependency = v3d_job_dependency,
1062 -       .run_job = v3d_job_run,
1063 -       .timedout_job = v3d_job_timedout,
1064 -       .free_job = v3d_job_free
1065 +       .run_job = v3d_bin_job_run,
1066 +       .timedout_job = v3d_bin_job_timedout,
1067 +       .free_job = v3d_job_free,
1068 +};
1069 +
1070 +static const struct drm_sched_backend_ops v3d_render_sched_ops = {
1071 +       .dependency = v3d_render_job_dependency,
1072 +       .run_job = v3d_render_job_run,
1073 +       .timedout_job = v3d_render_job_timedout,
1074 +       .free_job = v3d_job_free,
1075  };
1076
1077  static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
1078 -       .dependency = v3d_tfu_job_dependency,
1079 +       .dependency = v3d_job_dependency,
1080         .run_job = v3d_tfu_job_run,
1081         .timedout_job = v3d_tfu_job_timedout,
1082 -       .free_job = v3d_tfu_job_free
1083 +       .free_job = v3d_job_free,
1084  };
1085
1086  int
1087 @@ -305,7 +351,7 @@ v3d_sched_init(struct v3d_dev *v3d)
1088         int ret;
1089
1090         ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
1091 -                            &v3d_sched_ops,
1092 +                            &v3d_bin_sched_ops,
1093                              hw_jobs_limit, job_hang_limit,
1094                              msecs_to_jiffies(hang_limit_ms),
1095                              "v3d_bin");
1096 @@ -315,7 +361,7 @@ v3d_sched_init(struct v3d_dev *v3d)
1097         }
1098
1099         ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
1100 -                            &v3d_sched_ops,
1101 +                            &v3d_render_sched_ops,
1102                              hw_jobs_limit, job_hang_limit,
1103                              msecs_to_jiffies(hang_limit_ms),
1104                              "v3d_render");