1 From 837fa77c227eda3ebdab64b24acb3dc94599e76d Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Mon, 7 Dec 2015 12:35:01 -0800
4 Subject: [PATCH] drm/vc4: Synchronize validation code for v2 submission
7 Signed-off-by: Eric Anholt <eric@anholt.net>
9 drivers/gpu/drm/vc4/vc4_drv.h | 24 +--
10 drivers/gpu/drm/vc4/vc4_gem.c | 14 +-
11 drivers/gpu/drm/vc4/vc4_render_cl.c | 6 +-
12 drivers/gpu/drm/vc4/vc4_validate.c | 287 +++++++++++++++---------------------
13 4 files changed, 135 insertions(+), 196 deletions(-)
15 --- a/drivers/gpu/drm/vc4/vc4_drv.h
16 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
17 @@ -189,17 +189,6 @@ to_vc4_encoder(struct drm_encoder *encod
18 #define HVS_READ(offset) readl(vc4->hvs->regs + offset)
19 #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
27 -struct vc4_bo_exec_state {
28 - struct drm_gem_cma_object *bo;
29 - enum vc4_bo_mode mode;
32 struct vc4_exec_info {
33 /* Sequence number for this bin/render job. */
35 @@ -210,7 +199,7 @@ struct vc4_exec_info {
36 /* This is the array of BOs that were looked up at the start of exec.
37 * Command validation will use indices into this array.
39 - struct vc4_bo_exec_state *bo;
40 + struct drm_gem_cma_object **bo;
43 /* Pointers for our position in vc4->job_list */
44 @@ -238,7 +227,6 @@ struct vc4_exec_info {
47 struct vc4_shader_state {
50 /* Maximum vertex index referenced by any primitive using this
52 @@ -254,6 +242,7 @@ struct vc4_exec_info {
53 bool found_tile_binning_mode_config_packet;
54 bool found_start_tile_binning_packet;
55 bool found_increment_semaphore_packet;
57 uint8_t bin_tiles_x, bin_tiles_y;
58 struct drm_gem_cma_object *tile_bo;
59 uint32_t tile_alloc_offset;
60 @@ -265,6 +254,9 @@ struct vc4_exec_info {
61 uint32_t ct0ca, ct0ea;
62 uint32_t ct1ca, ct1ea;
64 + /* Pointer to the unvalidated bin CL (if present). */
67 /* Pointers to the shader recs. These paddr gets incremented as CL
68 * packets are relocated in validate_gl_shader_state, and the vaddrs
69 * (u and v) get incremented and size decremented as the shader recs
70 @@ -455,10 +447,8 @@ vc4_validate_bin_cl(struct drm_device *d
72 vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
74 -bool vc4_use_bo(struct vc4_exec_info *exec,
76 - enum vc4_bo_mode mode,
77 - struct drm_gem_cma_object **obj);
78 +struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
81 int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
83 --- a/drivers/gpu/drm/vc4/vc4_gem.c
84 +++ b/drivers/gpu/drm/vc4/vc4_gem.c
85 @@ -169,8 +169,8 @@ vc4_save_hang_state(struct drm_device *d
88 for (i = 0; i < exec->bo_count; i++) {
89 - drm_gem_object_reference(&exec->bo[i].bo->base);
90 - kernel_state->bo[i] = &exec->bo[i].bo->base;
91 + drm_gem_object_reference(&exec->bo[i]->base);
92 + kernel_state->bo[i] = &exec->bo[i]->base;
95 list_for_each_entry(bo, &exec->unref_list, unref_head) {
96 @@ -397,7 +397,7 @@ vc4_update_bo_seqnos(struct vc4_exec_inf
99 for (i = 0; i < exec->bo_count; i++) {
100 - bo = to_vc4_bo(&exec->bo[i].bo->base);
101 + bo = to_vc4_bo(&exec->bo[i]->base);
105 @@ -467,7 +467,7 @@ vc4_cl_lookup_bos(struct drm_device *dev
109 - exec->bo = kcalloc(exec->bo_count, sizeof(struct vc4_bo_exec_state),
110 + exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
113 DRM_ERROR("Failed to allocate validated BO pointers\n");
114 @@ -500,7 +500,7 @@ vc4_cl_lookup_bos(struct drm_device *dev
117 drm_gem_object_reference(bo);
118 - exec->bo[i].bo = (struct drm_gem_cma_object *)bo;
119 + exec->bo[i] = (struct drm_gem_cma_object *)bo;
121 spin_unlock(&file_priv->table_lock);
123 @@ -591,6 +591,8 @@ vc4_get_bcl(struct drm_device *dev, stru
125 exec->ct0ca = exec->exec_bo->paddr + bin_offset;
129 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
130 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
131 exec->shader_rec_size = args->shader_rec_size;
132 @@ -622,7 +624,7 @@ vc4_complete_exec(struct drm_device *dev
133 mutex_lock(&dev->struct_mutex);
135 for (i = 0; i < exec->bo_count; i++)
136 - drm_gem_object_unreference(&exec->bo[i].bo->base);
137 + drm_gem_object_unreference(&exec->bo[i]->base);
141 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c
142 +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
143 @@ -436,7 +436,8 @@ static int vc4_rcl_surface_setup(struct
144 if (surf->hindex == ~0)
147 - if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
148 + *obj = vc4_use_bo(exec, surf->hindex);
152 if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
153 @@ -537,7 +538,8 @@ vc4_rcl_render_config_surface_setup(stru
154 if (surf->hindex == ~0)
157 - if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
158 + *obj = vc4_use_bo(exec, surf->hindex);
162 if (tiling > VC4_TILING_FORMAT_LT) {
163 --- a/drivers/gpu/drm/vc4/vc4_validate.c
164 +++ b/drivers/gpu/drm/vc4/vc4_validate.c
165 @@ -94,42 +94,42 @@ size_is_lt(uint32_t width, uint32_t heig
166 height <= 4 * utile_height(cpp));
170 -vc4_use_bo(struct vc4_exec_info *exec,
172 - enum vc4_bo_mode mode,
173 - struct drm_gem_cma_object **obj)
174 +struct drm_gem_cma_object *
175 +vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
178 + struct drm_gem_cma_object *obj;
181 if (hindex >= exec->bo_count) {
182 DRM_ERROR("BO index %d greater than BO count %d\n",
183 hindex, exec->bo_count);
187 + obj = exec->bo[hindex];
188 + bo = to_vc4_bo(&obj->base);
190 - if (exec->bo[hindex].mode != mode) {
191 - if (exec->bo[hindex].mode == VC4_MODE_UNDECIDED) {
192 - exec->bo[hindex].mode = mode;
194 - DRM_ERROR("BO index %d reused with mode %d vs %d\n",
195 - hindex, exec->bo[hindex].mode, mode);
198 + if (bo->validated_shader) {
199 + DRM_ERROR("Trying to use shader BO as something other than "
204 - *obj = exec->bo[hindex].bo;
209 +static struct drm_gem_cma_object *
210 +vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
212 + return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
216 -vc4_use_handle(struct vc4_exec_info *exec,
217 - uint32_t gem_handles_packet_index,
218 - enum vc4_bo_mode mode,
219 - struct drm_gem_cma_object **obj)
220 +validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
222 - return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index],
224 + /* Note that the untrusted pointer passed to these functions is
225 + * incremented past the packet byte.
227 + return (untrusted - 1 == exec->bin_u + pos);
231 @@ -202,13 +202,13 @@ vc4_check_tex_size(struct vc4_exec_info
235 -validate_flush_all(VALIDATE_ARGS)
236 +validate_flush(VALIDATE_ARGS)
238 - if (exec->found_increment_semaphore_packet) {
239 - DRM_ERROR("VC4_PACKET_FLUSH_ALL after "
240 - "VC4_PACKET_INCREMENT_SEMAPHORE\n");
241 + if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
242 + DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
245 + exec->found_flush = true;
249 @@ -233,17 +233,13 @@ validate_start_tile_binning(VALIDATE_ARG
251 validate_increment_semaphore(VALIDATE_ARGS)
253 - if (exec->found_increment_semaphore_packet) {
254 - DRM_ERROR("Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n");
255 + if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
256 + DRM_ERROR("Bin CL must end with "
257 + "VC4_PACKET_INCREMENT_SEMAPHORE\n");
260 exec->found_increment_semaphore_packet = true;
262 - /* Once we've found the semaphore increment, there should be one FLUSH
263 - * then the end of the command list. The FLUSH actually triggers the
264 - * increment, so we only need to make sure there
270 @@ -257,11 +253,6 @@ validate_indexed_prim_list(VALIDATE_ARGS
271 uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
272 struct vc4_shader_state *shader_state;
274 - if (exec->found_increment_semaphore_packet) {
275 - DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
279 /* Check overflow condition */
280 if (exec->shader_state_count == 0) {
281 DRM_ERROR("shader state must precede primitives\n");
282 @@ -272,7 +263,8 @@ validate_indexed_prim_list(VALIDATE_ARGS
283 if (max_index > shader_state->max_index)
284 shader_state->max_index = max_index;
286 - if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &ib))
287 + ib = vc4_use_handle(exec, 0);
291 if (offset > ib->base.size ||
292 @@ -295,11 +287,6 @@ validate_gl_array_primitive(VALIDATE_ARG
294 struct vc4_shader_state *shader_state;
296 - if (exec->found_increment_semaphore_packet) {
297 - DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
301 /* Check overflow condition */
302 if (exec->shader_state_count == 0) {
303 DRM_ERROR("shader state must precede primitives\n");
304 @@ -329,7 +316,6 @@ validate_gl_shader_state(VALIDATE_ARGS)
308 - exec->shader_state[i].packet = VC4_PACKET_GL_SHADER_STATE;
309 exec->shader_state[i].addr = *(uint32_t *)untrusted;
310 exec->shader_state[i].max_index = 0;
312 @@ -348,31 +334,6 @@ validate_gl_shader_state(VALIDATE_ARGS)
316 -validate_nv_shader_state(VALIDATE_ARGS)
318 - uint32_t i = exec->shader_state_count++;
320 - if (i >= exec->shader_state_size) {
321 - DRM_ERROR("More requests for shader states than declared\n");
325 - exec->shader_state[i].packet = VC4_PACKET_NV_SHADER_STATE;
326 - exec->shader_state[i].addr = *(uint32_t *)untrusted;
328 - if (exec->shader_state[i].addr & 15) {
329 - DRM_ERROR("NV shader state address 0x%08x misaligned\n",
330 - exec->shader_state[i].addr);
334 - *(uint32_t *)validated = (exec->shader_state[i].addr +
335 - exec->shader_rec_p);
341 validate_tile_binning_config(VALIDATE_ARGS)
343 struct drm_device *dev = exec->exec_bo->base.dev;
344 @@ -473,8 +434,8 @@ static const struct cmd_info {
346 VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
347 VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
348 - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, NULL),
349 - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, validate_flush_all),
350 + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
351 + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
352 VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
353 validate_start_tile_binning),
354 VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
355 @@ -488,7 +449,6 @@ static const struct cmd_info {
356 VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
358 VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
359 - VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, validate_nv_shader_state),
361 VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
362 VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
363 @@ -575,8 +535,16 @@ vc4_validate_bin_cl(struct drm_device *d
367 - if (!exec->found_increment_semaphore_packet) {
368 - DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE\n");
369 + /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The
370 + * semaphore is used to trigger the render CL to start up, and the
371 + * FLUSH is what caps the bin lists with
372 + * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
373 + * render CL when they get called to) and actually triggers the queued
374 + * semaphore increment.
376 + if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
377 + DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
378 + "VC4_PACKET_FLUSH\n");
382 @@ -607,7 +575,8 @@ reloc_tex(struct vc4_exec_info *exec,
383 uint32_t cube_map_stride = 0;
384 enum vc4_texture_data_type type;
386 - if (!vc4_use_bo(exec, texture_handle_index, VC4_MODE_RENDER, &tex))
387 + tex = vc4_use_bo(exec, texture_handle_index);
391 if (sample->is_direct) {
392 @@ -755,51 +724,28 @@ reloc_tex(struct vc4_exec_info *exec,
396 -validate_shader_rec(struct drm_device *dev,
397 - struct vc4_exec_info *exec,
398 - struct vc4_shader_state *state)
399 +validate_gl_shader_rec(struct drm_device *dev,
400 + struct vc4_exec_info *exec,
401 + struct vc4_shader_state *state)
403 uint32_t *src_handles;
405 - enum shader_rec_reloc_type {
409 - struct shader_rec_reloc {
410 - enum shader_rec_reloc_type type;
413 - static const struct shader_rec_reloc gl_relocs[] = {
414 - { RELOC_CODE, 4 }, /* fs */
415 - { RELOC_CODE, 16 }, /* vs */
416 - { RELOC_CODE, 28 }, /* cs */
418 - static const struct shader_rec_reloc nv_relocs[] = {
419 - { RELOC_CODE, 4 }, /* fs */
421 + static const uint32_t shader_reloc_offsets[] = {
426 - const struct shader_rec_reloc *relocs;
427 - struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
428 - uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size;
429 + uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
430 + struct drm_gem_cma_object *bo[shader_reloc_count + 8];
431 + uint32_t nr_attributes, nr_relocs, packet_size;
433 - struct vc4_validated_shader_info *shader;
435 - if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
436 - relocs = nv_relocs;
437 - nr_fixed_relocs = ARRAY_SIZE(nv_relocs);
441 - relocs = gl_relocs;
442 - nr_fixed_relocs = ARRAY_SIZE(gl_relocs);
444 - nr_attributes = state->addr & 0x7;
445 - if (nr_attributes == 0)
447 - packet_size = gl_shader_rec_size(state->addr);
449 - nr_relocs = nr_fixed_relocs + nr_attributes;
450 + nr_attributes = state->addr & 0x7;
451 + if (nr_attributes == 0)
453 + packet_size = gl_shader_rec_size(state->addr);
455 + nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
456 if (nr_relocs * 4 > exec->shader_rec_size) {
457 DRM_ERROR("overflowed shader recs reading %d handles "
458 "from %d bytes left\n",
459 @@ -829,21 +775,30 @@ validate_shader_rec(struct drm_device *d
460 exec->shader_rec_v += roundup(packet_size, 16);
461 exec->shader_rec_size -= packet_size;
463 - for (i = 0; i < nr_relocs; i++) {
464 - enum vc4_bo_mode mode;
465 + if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
466 + DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
470 - if (i < nr_fixed_relocs && relocs[i].type == RELOC_CODE)
471 - mode = VC4_MODE_SHADER;
473 - mode = VC4_MODE_RENDER;
474 + for (i = 0; i < shader_reloc_count; i++) {
475 + if (src_handles[i] > exec->bo_count) {
476 + DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
480 - if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i]))
482 + bo[i] = exec->bo[src_handles[i]];
486 + for (i = shader_reloc_count; i < nr_relocs; i++) {
487 + bo[i] = vc4_use_bo(exec, src_handles[i]);
492 - for (i = 0; i < nr_fixed_relocs; i++) {
493 - struct vc4_bo *vc4_bo;
494 - uint32_t o = relocs[i].offset;
495 + for (i = 0; i < shader_reloc_count; i++) {
496 + struct vc4_validated_shader_info *validated_shader;
497 + uint32_t o = shader_reloc_offsets[i];
498 uint32_t src_offset = *(uint32_t *)(pkt_u + o);
499 uint32_t *texture_handles_u;
500 void *uniform_data_u;
501 @@ -851,57 +806,50 @@ validate_shader_rec(struct drm_device *d
503 *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
505 - switch (relocs[i].type) {
507 - if (src_offset != 0) {
508 - DRM_ERROR("Shaders must be at offset 0 "
512 + if (src_offset != 0) {
513 + DRM_ERROR("Shaders must be at offset 0 of "
518 - vc4_bo = to_vc4_bo(&bo[i]->base);
519 - shader = vc4_bo->validated_shader;
522 + validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
523 + if (!validated_shader)
526 - if (shader->uniforms_src_size > exec->uniforms_size) {
527 - DRM_ERROR("Uniforms src buffer overflow\n");
530 + if (validated_shader->uniforms_src_size >
531 + exec->uniforms_size) {
532 + DRM_ERROR("Uniforms src buffer overflow\n");
536 - texture_handles_u = exec->uniforms_u;
537 - uniform_data_u = (texture_handles_u +
538 - shader->num_texture_samples);
540 - memcpy(exec->uniforms_v, uniform_data_u,
541 - shader->uniforms_size);
544 - tex < shader->num_texture_samples;
546 - if (!reloc_tex(exec,
548 - &shader->texture_samples[tex],
549 - texture_handles_u[tex])) {
552 + texture_handles_u = exec->uniforms_u;
553 + uniform_data_u = (texture_handles_u +
554 + validated_shader->num_texture_samples);
556 + memcpy(exec->uniforms_v, uniform_data_u,
557 + validated_shader->uniforms_size);
560 + tex < validated_shader->num_texture_samples;
562 + if (!reloc_tex(exec,
564 + &validated_shader->texture_samples[tex],
565 + texture_handles_u[tex])) {
570 - *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
572 - exec->uniforms_u += shader->uniforms_src_size;
573 - exec->uniforms_v += shader->uniforms_size;
574 - exec->uniforms_p += shader->uniforms_size;
577 + *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
582 + exec->uniforms_u += validated_shader->uniforms_src_size;
583 + exec->uniforms_v += validated_shader->uniforms_size;
584 + exec->uniforms_p += validated_shader->uniforms_size;
587 for (i = 0; i < nr_attributes; i++) {
588 - struct drm_gem_cma_object *vbo = bo[nr_fixed_relocs + i];
589 + struct drm_gem_cma_object *vbo =
590 + bo[ARRAY_SIZE(shader_reloc_offsets) + i];
591 uint32_t o = 36 + i * 8;
592 uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
593 uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
594 @@ -933,9 +881,6 @@ validate_shader_rec(struct drm_device *d
604 @@ -946,7 +891,7 @@ vc4_validate_shader_recs(struct drm_devi
607 for (i = 0; i < exec->shader_state_count; i++) {
608 - ret = validate_shader_rec(dev, exec, &exec->shader_state[i]);
609 + ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);