1 From f9cd25fae900b1251e14ec894849846e0a2a7cb3 Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Sat, 2 Jul 2016 12:17:10 -0700
4 Subject: [PATCH] drm/vc4: Add support for branching in shader validation.
6 We're already checking that branch instructions are between the start
7 of the shader and the proper PROG_END sequence. The other thing we
8 need to make branching safe is to verify that the shader doesn't read
9 past the end of the uniforms stream.
11 To do that, we require that at any basic block reading uniforms have
12 the following instructions:
14 load_imm temp, <next offset within uniform stream>
15 add unif_addr, temp, unif
17 The instructions are generated by userspace, and the kernel verifies
18 that the load_imm is of the expected offset, and that the add adds it
19 to a uniform. We track which uniform in the stream that is, and at
20 draw call time fix up the uniform stream to have the address of the
21 start of the shader's uniforms at that location.
23 Signed-off-by: Eric Anholt <eric@anholt.net>
24 (cherry picked from commit 6d45c81d229d71da54d374143e7d6abad4c0cf31)
26 drivers/gpu/drm/vc4/vc4_drv.h | 3 +
27 drivers/gpu/drm/vc4/vc4_qpu_defines.h | 3 +
28 drivers/gpu/drm/vc4/vc4_validate.c | 13 +-
29 drivers/gpu/drm/vc4/vc4_validate_shaders.c | 281 +++++++++++++++++++++++++++--
30 4 files changed, 283 insertions(+), 17 deletions(-)
32 --- a/drivers/gpu/drm/vc4/vc4_drv.h
33 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
34 @@ -363,6 +363,9 @@ struct vc4_validated_shader_info {
35 uint32_t uniforms_src_size;
36 uint32_t num_texture_samples;
37 struct vc4_texture_sample_info *texture_samples;
39 + uint32_t num_uniform_addr_offsets;
40 + uint32_t *uniform_addr_offsets;
44 --- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h
45 +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
46 @@ -270,6 +270,9 @@ enum qpu_unpack_r4 {
47 #define QPU_OP_ADD_SHIFT 24
48 #define QPU_OP_ADD_MASK QPU_MASK(28, 24)
50 +#define QPU_LOAD_IMM_SHIFT 0
51 +#define QPU_LOAD_IMM_MASK QPU_MASK(31, 0)
53 #define QPU_BRANCH_TARGET_SHIFT 0
54 #define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0)
56 --- a/drivers/gpu/drm/vc4/vc4_validate.c
57 +++ b/drivers/gpu/drm/vc4/vc4_validate.c
58 @@ -802,7 +802,7 @@ validate_gl_shader_rec(struct drm_device
59 uint32_t src_offset = *(uint32_t *)(pkt_u + o);
60 uint32_t *texture_handles_u;
65 *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
67 @@ -840,6 +840,17 @@ validate_gl_shader_rec(struct drm_device
71 + /* Fill in the uniform slots that need this shader's
72 + * start-of-uniforms address (used for resetting the uniform
73 + * stream in the presence of control flow).
76 + uni < validated_shader->num_uniform_addr_offsets;
78 + uint32_t o = validated_shader->uniform_addr_offsets[uni];
79 + ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
82 *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
84 exec->uniforms_u += validated_shader->uniforms_src_size;
85 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
86 +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
89 #include "vc4_qpu_defines.h"
91 +#define LIVE_REG_COUNT (32 + 32 + 4)
93 struct vc4_shader_validation_state {
94 /* Current IP being validated. */
96 @@ -57,8 +59,9 @@ struct vc4_shader_validation_state {
98 * This is used for the validation of direct address memory reads.
100 - uint32_t live_min_clamp_offsets[32 + 32 + 4];
101 - bool live_max_clamp_regs[32 + 32 + 4];
102 + uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
103 + bool live_max_clamp_regs[LIVE_REG_COUNT];
104 + uint32_t live_immediates[LIVE_REG_COUNT];
106 /* Bitfield of which IPs are used as branch targets.
108 @@ -66,6 +69,20 @@ struct vc4_shader_validation_state {
109 * points and clearing the texturing/clamping state.
111 unsigned long *branch_targets;
113 + /* Set when entering a basic block, and cleared when the uniform
114 + * address update is found. This is used to make sure that we don't
115 + * read uniforms when the address is undefined.
117 + bool needs_uniform_address_update;
119 + /* Set when we find a backwards branch. If the branch is backwards,
120 + * the taraget is probably doing an address reset to read uniforms,
121 + * and so we need to be sure that a uniforms address is present in the
122 + * stream, even if the shader didn't need to read uniforms in later
125 + bool needs_uniform_address_for_loop;
129 @@ -227,8 +244,14 @@ check_tmu_write(struct vc4_validated_sha
130 /* Since direct uses a RADDR uniform reference, it will get counted in
131 * check_instruction_reads()
135 + if (validation_state->needs_uniform_address_update) {
136 + DRM_ERROR("Texturing with undefined uniform address\n");
140 validated_shader->uniforms_size += 4;
144 if (!record_texture_sample(validated_shader,
145 @@ -242,6 +265,98 @@ check_tmu_write(struct vc4_validated_sha
149 +static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
151 + uint32_t o = validated_shader->num_uniform_addr_offsets;
152 + uint32_t num_uniforms = validated_shader->uniforms_size / 4;
154 + validated_shader->uniform_addr_offsets =
155 + krealloc(validated_shader->uniform_addr_offsets,
157 + sizeof(*validated_shader->uniform_addr_offsets),
159 + if (!validated_shader->uniform_addr_offsets)
162 + validated_shader->uniform_addr_offsets[o] = num_uniforms;
163 + validated_shader->num_uniform_addr_offsets++;
169 +validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
170 + struct vc4_shader_validation_state *validation_state,
173 + uint64_t inst = validation_state->shader[validation_state->ip];
174 + u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
175 + u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
176 + u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
177 + u32 add_lri = raddr_add_a_to_live_reg_index(inst);
178 + /* We want our reset to be pointing at whatever uniform follows the
179 + * uniforms base address.
181 + u32 expected_offset = validated_shader->uniforms_size + 4;
183 + /* We only support absolute uniform address changes, and we
184 + * require that they be in the current basic block before any
185 + * of its uniform reads.
187 + * One could potentially emit more efficient QPU code, by
188 + * noticing that (say) an if statement does uniform control
189 + * flow for all threads and that the if reads the same number
190 + * of uniforms on each side. However, this scheme is easy to
191 + * validate so it's all we allow for now.
194 + if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) {
195 + DRM_ERROR("uniforms address change must be "
200 + if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
201 + DRM_ERROR("Uniform address reset must be an ADD.\n");
205 + if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
206 + DRM_ERROR("Uniform address reset must be unconditional.\n");
210 + if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
211 + !(inst & QPU_PM)) {
212 + DRM_ERROR("No packing allowed on uniforms reset\n");
216 + if (add_lri == -1) {
217 + DRM_ERROR("First argument of uniform address write must be "
218 + "an immediate value.\n");
222 + if (validation_state->live_immediates[add_lri] != expected_offset) {
223 + DRM_ERROR("Resetting uniforms with offset %db instead of %db\n",
224 + validation_state->live_immediates[add_lri],
229 + if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
230 + !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
231 + DRM_ERROR("Second argument of uniform address write must be "
236 + validation_state->needs_uniform_address_update = false;
237 + validation_state->needs_uniform_address_for_loop = false;
238 + return require_uniform_address_uniform(validated_shader);
242 check_reg_write(struct vc4_validated_shader_info *validated_shader,
243 struct vc4_shader_validation_state *validation_state,
244 @@ -251,14 +366,37 @@ check_reg_write(struct vc4_validated_sha
245 uint32_t waddr = (is_mul ?
246 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
247 QPU_GET_FIELD(inst, QPU_WADDR_ADD));
248 + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
249 + bool ws = inst & QPU_WS;
250 + bool is_b = is_mul ^ ws;
251 + u32 lri = waddr_to_live_reg_index(waddr, is_b);
254 + uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
255 + uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
257 + if (sig == QPU_SIG_LOAD_IMM &&
258 + QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
259 + ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
260 + (!is_mul && cond_add == QPU_COND_ALWAYS))) {
261 + validation_state->live_immediates[lri] =
262 + QPU_GET_FIELD(inst, QPU_LOAD_IMM);
264 + validation_state->live_immediates[lri] = ~0;
269 case QPU_W_UNIFORMS_ADDRESS:
270 - /* XXX: We'll probably need to support this for reladdr, but
271 - * it's definitely a security-related one.
273 - DRM_ERROR("uniforms address load unsupported\n");
276 + DRM_ERROR("relative uniforms address change "
281 + return validate_uniform_address_write(validated_shader,
285 case QPU_W_TLB_COLOR_MS:
286 case QPU_W_TLB_COLOR_ALL:
287 @@ -406,9 +544,35 @@ check_instruction_writes(struct vc4_vali
291 -check_instruction_reads(uint64_t inst,
292 - struct vc4_validated_shader_info *validated_shader)
293 +check_branch(uint64_t inst,
294 + struct vc4_validated_shader_info *validated_shader,
295 + struct vc4_shader_validation_state *validation_state,
298 + int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
299 + uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
300 + uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
302 + if ((int)branch_imm < 0)
303 + validation_state->needs_uniform_address_for_loop = true;
305 + /* We don't want to have to worry about validation of this, and
306 + * there's no need for it.
308 + if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
309 + DRM_ERROR("branch instruction at %d wrote a register.\n",
310 + validation_state->ip);
318 +check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
319 + struct vc4_shader_validation_state *validation_state)
321 + uint64_t inst = validation_state->shader[validation_state->ip];
322 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
323 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
324 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
325 @@ -420,6 +584,12 @@ check_instruction_reads(uint64_t inst,
328 validated_shader->uniforms_size += 4;
330 + if (validation_state->needs_uniform_address_update) {
331 + DRM_ERROR("Uniform read with undefined uniform "
338 @@ -516,6 +686,65 @@ vc4_validate_branches(struct vc4_shader_
342 +/* Resets any known state for the shader, used when we may be branched to from
343 + * multiple locations in the program (or at shader start).
346 +reset_validation_state(struct vc4_shader_validation_state *validation_state)
350 + for (i = 0; i < 8; i++)
351 + validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
353 + for (i = 0; i < LIVE_REG_COUNT; i++) {
354 + validation_state->live_min_clamp_offsets[i] = ~0;
355 + validation_state->live_max_clamp_regs[i] = false;
356 + validation_state->live_immediates[i] = ~0;
361 +texturing_in_progress(struct vc4_shader_validation_state *validation_state)
363 + return (validation_state->tmu_write_count[0] != 0 ||
364 + validation_state->tmu_write_count[1] != 0);
368 +vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
370 + uint32_t ip = validation_state->ip;
372 + if (!test_bit(ip, validation_state->branch_targets))
375 + if (texturing_in_progress(validation_state)) {
376 + DRM_ERROR("Branch target landed during TMU setup\n");
380 + /* Reset our live values tracking, since this instruction may have
381 + * multiple predecessors.
383 + * One could potentially do analysis to determine that, for
384 + * example, all predecessors have a live max clamp in the same
385 + * register, but we don't bother with that.
387 + reset_validation_state(validation_state);
389 + /* Since we've entered a basic block from potentially multiple
390 + * predecessors, we need the uniforms address to be updated before any
391 + * unforms are read. We require that after any branch point, the next
392 + * uniform to be loaded is a uniform address offset. That uniform's
393 + * offset will be marked by the uniform address register write
394 + * validation, or a one-off the end-of-program check.
396 + validation_state->needs_uniform_address_update = true;
401 struct vc4_validated_shader_info *
402 vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
404 @@ -524,16 +753,12 @@ vc4_validate_shader(struct drm_gem_cma_o
406 struct vc4_validated_shader_info *validated_shader = NULL;
407 struct vc4_shader_validation_state validation_state;
410 memset(&validation_state, 0, sizeof(validation_state));
411 validation_state.shader = shader_obj->vaddr;
412 validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
414 - for (i = 0; i < 8; i++)
415 - validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
416 - for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
417 - validation_state.live_min_clamp_offsets[i] = ~0;
418 + reset_validation_state(&validation_state);
420 validation_state.branch_targets =
421 kcalloc(BITS_TO_LONGS(validation_state.max_ip),
422 @@ -554,6 +779,9 @@ vc4_validate_shader(struct drm_gem_cma_o
424 validation_state.ip = ip;
426 + if (!vc4_handle_branch_target(&validation_state))
431 case QPU_SIG_WAIT_FOR_SCOREBOARD:
432 @@ -569,7 +797,8 @@ vc4_validate_shader(struct drm_gem_cma_o
436 - if (!check_instruction_reads(inst, validated_shader))
437 + if (!check_instruction_reads(validated_shader,
438 + &validation_state))
441 if (sig == QPU_SIG_PROG_END) {
442 @@ -587,6 +816,11 @@ vc4_validate_shader(struct drm_gem_cma_o
446 + case QPU_SIG_BRANCH:
447 + if (!check_branch(inst, validated_shader,
448 + &validation_state, ip))
452 DRM_ERROR("Unsupported QPU signal %d at "
453 "instruction %d\n", sig, ip);
454 @@ -607,6 +841,21 @@ vc4_validate_shader(struct drm_gem_cma_o
458 + /* If we did a backwards branch and we haven't emitted a uniforms
459 + * reset since then, we still need the uniforms stream to have the
460 + * uniforms address available so that the backwards branch can do its
463 + * We could potentially prove that the backwards branch doesn't
464 + * contain any uses of uniforms until program exit, but that doesn't
465 + * seem to be worth the trouble.
467 + if (validation_state.needs_uniform_address_for_loop) {
468 + if (!require_uniform_address_uniform(validated_shader))
470 + validated_shader->uniforms_size += 4;
473 /* Again, no chance of integer overflow here because the worst case
474 * scenario is 8 bytes of uniforms plus handles per 8-byte