brcm2708: update linux 4.4 patches to latest version
[openwrt/openwrt.git] / target / linux / brcm2708 / patches-4.4 / 0114-drm-vc4-Update-a-bunch-of-code-to-match-upstream-sub.patch
1 From dd5e9636e87ee08b38b28626fc862099e5a038cf Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Fri, 4 Dec 2015 11:35:34 -0800
4 Subject: [PATCH] drm/vc4: Update a bunch of code to match upstream submission.
5
6 This gets almost everything matching, except for the MSAA support and
7 using generic PM domains.
8
9 Signed-off-by: Eric Anholt <eric@anholt.net>
10 ---
11 drivers/gpu/drm/drm_gem_cma_helper.c | 13 +-
12 drivers/gpu/drm/vc4/vc4_bo.c | 322 +++++++++++++++++------------
13 drivers/gpu/drm/vc4/vc4_crtc.c | 7 +-
14 drivers/gpu/drm/vc4/vc4_drv.c | 6 +-
15 drivers/gpu/drm/vc4/vc4_drv.h | 20 +-
16 drivers/gpu/drm/vc4/vc4_gem.c | 24 ++-
17 drivers/gpu/drm/vc4/vc4_irq.c | 5 +-
18 drivers/gpu/drm/vc4/vc4_kms.c | 1 +
19 drivers/gpu/drm/vc4/vc4_packet.h | 210 +++++++++----------
20 drivers/gpu/drm/vc4/vc4_qpu_defines.h | 308 ++++++++++++++-------------
21 drivers/gpu/drm/vc4/vc4_render_cl.c | 4 +-
22 drivers/gpu/drm/vc4/vc4_v3d.c | 10 +-
23 drivers/gpu/drm/vc4/vc4_validate.c | 130 ++++++------
24 drivers/gpu/drm/vc4/vc4_validate_shaders.c | 66 +++---
25 include/drm/drmP.h | 8 +-
26 15 files changed, 598 insertions(+), 536 deletions(-)
27
28 --- a/drivers/gpu/drm/drm_gem_cma_helper.c
29 +++ b/drivers/gpu/drm/drm_gem_cma_helper.c
30 @@ -58,15 +58,14 @@ __drm_gem_cma_create(struct drm_device *
31 struct drm_gem_cma_object *cma_obj;
32 struct drm_gem_object *gem_obj;
33 int ret;
34 - size_t obj_size = (drm->driver->gem_obj_size ?
35 - drm->driver->gem_obj_size :
36 - sizeof(*cma_obj));
37
38 - cma_obj = kzalloc(obj_size, GFP_KERNEL);
39 - if (!cma_obj)
40 + if (drm->driver->gem_create_object)
41 + gem_obj = drm->driver->gem_create_object(drm, size);
42 + else
43 + gem_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL);
44 + if (!gem_obj)
45 return ERR_PTR(-ENOMEM);
46 -
47 - gem_obj = &cma_obj->base;
48 + cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base);
49
50 ret = drm_gem_object_init(drm, gem_obj, size);
51 if (ret)
52 --- a/drivers/gpu/drm/vc4/vc4_bo.c
53 +++ b/drivers/gpu/drm/vc4/vc4_bo.c
54 @@ -12,6 +12,10 @@
55 * access to system memory with no MMU in between. To support it, we
56 * use the GEM CMA helper functions to allocate contiguous ranges of
57 * physical memory for our BOs.
58 + *
59 + * Since the CMA allocator is very slow, we keep a cache of recently
60 + * freed BOs around so that the kernel's allocation of objects for 3D
61 + * rendering can return quickly.
62 */
63
64 #include "vc4_drv.h"
65 @@ -34,6 +38,36 @@ static void vc4_bo_stats_dump(struct vc4
66 vc4->bo_stats.size_cached / 1024);
67 }
68
69 +#ifdef CONFIG_DEBUG_FS
70 +int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
71 +{
72 + struct drm_info_node *node = (struct drm_info_node *)m->private;
73 + struct drm_device *dev = node->minor->dev;
74 + struct vc4_dev *vc4 = to_vc4_dev(dev);
75 + struct vc4_bo_stats stats;
76 +
77 + /* Take a snapshot of the current stats with the lock held. */
78 + mutex_lock(&vc4->bo_lock);
79 + stats = vc4->bo_stats;
80 + mutex_unlock(&vc4->bo_lock);
81 +
82 + seq_printf(m, "num bos allocated: %d\n",
83 + stats.num_allocated);
84 + seq_printf(m, "size bos allocated: %dkb\n",
85 + stats.size_allocated / 1024);
86 + seq_printf(m, "num bos used: %d\n",
87 + stats.num_allocated - stats.num_cached);
88 + seq_printf(m, "size bos used: %dkb\n",
89 + (stats.size_allocated - stats.size_cached) / 1024);
90 + seq_printf(m, "num bos cached: %d\n",
91 + stats.num_cached);
92 + seq_printf(m, "size bos cached: %dkb\n",
93 + stats.size_cached / 1024);
94 +
95 + return 0;
96 +}
97 +#endif
98 +
99 static uint32_t bo_page_index(size_t size)
100 {
101 return (size / PAGE_SIZE) - 1;
102 @@ -81,8 +115,8 @@ static struct list_head *vc4_get_cache_l
103 struct list_head *new_list;
104 uint32_t i;
105
106 - new_list = kmalloc(new_size * sizeof(struct list_head),
107 - GFP_KERNEL);
108 + new_list = kmalloc_array(new_size, sizeof(struct list_head),
109 + GFP_KERNEL);
110 if (!new_list)
111 return NULL;
112
113 @@ -90,7 +124,9 @@ static struct list_head *vc4_get_cache_l
114 * head locations.
115 */
116 for (i = 0; i < vc4->bo_cache.size_list_size; i++) {
117 - struct list_head *old_list = &vc4->bo_cache.size_list[i];
118 + struct list_head *old_list =
119 + &vc4->bo_cache.size_list[i];
120 +
121 if (list_empty(old_list))
122 INIT_LIST_HEAD(&new_list[i]);
123 else
124 @@ -122,11 +158,60 @@ void vc4_bo_cache_purge(struct drm_devic
125 mutex_unlock(&vc4->bo_lock);
126 }
127
128 -struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size)
129 +static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
130 + uint32_t size)
131 {
132 struct vc4_dev *vc4 = to_vc4_dev(dev);
133 - uint32_t size = roundup(unaligned_size, PAGE_SIZE);
134 uint32_t page_index = bo_page_index(size);
135 + struct vc4_bo *bo = NULL;
136 +
137 + size = roundup(size, PAGE_SIZE);
138 +
139 + mutex_lock(&vc4->bo_lock);
140 + if (page_index >= vc4->bo_cache.size_list_size)
141 + goto out;
142 +
143 + if (list_empty(&vc4->bo_cache.size_list[page_index]))
144 + goto out;
145 +
146 + bo = list_first_entry(&vc4->bo_cache.size_list[page_index],
147 + struct vc4_bo, size_head);
148 + vc4_bo_remove_from_cache(bo);
149 + kref_init(&bo->base.base.refcount);
150 +
151 +out:
152 + mutex_unlock(&vc4->bo_lock);
153 + return bo;
154 +}
155 +
156 +/**
157 + * vc4_gem_create_object - Implementation of driver->gem_create_object.
158 + *
159 + * This lets the CMA helpers allocate object structs for us, and keep
160 + * our BO stats correct.
161 + */
162 +struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
163 +{
164 + struct vc4_dev *vc4 = to_vc4_dev(dev);
165 + struct vc4_bo *bo;
166 +
167 + bo = kzalloc(sizeof(*bo), GFP_KERNEL);
168 + if (!bo)
169 + return ERR_PTR(-ENOMEM);
170 +
171 + mutex_lock(&vc4->bo_lock);
172 + vc4->bo_stats.num_allocated++;
173 + vc4->bo_stats.size_allocated += size;
174 + mutex_unlock(&vc4->bo_lock);
175 +
176 + return &bo->base.base;
177 +}
178 +
179 +struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
180 + bool from_cache)
181 +{
182 + size_t size = roundup(unaligned_size, PAGE_SIZE);
183 + struct vc4_dev *vc4 = to_vc4_dev(dev);
184 struct drm_gem_cma_object *cma_obj;
185 int pass;
186
187 @@ -134,18 +219,12 @@ struct vc4_bo *vc4_bo_create(struct drm_
188 return NULL;
189
190 /* First, try to get a vc4_bo from the kernel BO cache. */
191 - mutex_lock(&vc4->bo_lock);
192 - if (page_index < vc4->bo_cache.size_list_size &&
193 - !list_empty(&vc4->bo_cache.size_list[page_index])) {
194 - struct vc4_bo *bo =
195 - list_first_entry(&vc4->bo_cache.size_list[page_index],
196 - struct vc4_bo, size_head);
197 - vc4_bo_remove_from_cache(bo);
198 - mutex_unlock(&vc4->bo_lock);
199 - kref_init(&bo->base.base.refcount);
200 - return bo;
201 + if (from_cache) {
202 + struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size);
203 +
204 + if (bo)
205 + return bo;
206 }
207 - mutex_unlock(&vc4->bo_lock);
208
209 /* Otherwise, make a new BO. */
210 for (pass = 0; ; pass++) {
211 @@ -179,9 +258,6 @@ struct vc4_bo *vc4_bo_create(struct drm_
212 }
213 }
214
215 - vc4->bo_stats.num_allocated++;
216 - vc4->bo_stats.size_allocated += size;
217 -
218 return to_vc4_bo(&cma_obj->base);
219 }
220
221 @@ -199,7 +275,7 @@ int vc4_dumb_create(struct drm_file *fil
222 if (args->size < args->pitch * args->height)
223 args->size = args->pitch * args->height;
224
225 - bo = vc4_bo_create(dev, args->size);
226 + bo = vc4_bo_create(dev, args->size, false);
227 if (!bo)
228 return -ENOMEM;
229
230 @@ -209,8 +285,8 @@ int vc4_dumb_create(struct drm_file *fil
231 return ret;
232 }
233
234 -static void
235 -vc4_bo_cache_free_old(struct drm_device *dev)
236 +/* Must be called with bo_lock held. */
237 +static void vc4_bo_cache_free_old(struct drm_device *dev)
238 {
239 struct vc4_dev *vc4 = to_vc4_dev(dev);
240 unsigned long expire_time = jiffies - msecs_to_jiffies(1000);
241 @@ -313,15 +389,77 @@ vc4_prime_export(struct drm_device *dev,
242 return drm_gem_prime_export(dev, obj, flags);
243 }
244
245 -int
246 -vc4_create_bo_ioctl(struct drm_device *dev, void *data,
247 - struct drm_file *file_priv)
248 +int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
249 +{
250 + struct drm_gem_object *gem_obj;
251 + struct vc4_bo *bo;
252 + int ret;
253 +
254 + ret = drm_gem_mmap(filp, vma);
255 + if (ret)
256 + return ret;
257 +
258 + gem_obj = vma->vm_private_data;
259 + bo = to_vc4_bo(gem_obj);
260 +
261 + if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
262 + DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
263 + return -EINVAL;
264 + }
265 +
266 + /*
267 + * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
268 + * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
269 + * the whole buffer.
270 + */
271 + vma->vm_flags &= ~VM_PFNMAP;
272 + vma->vm_pgoff = 0;
273 +
274 + ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
275 + bo->base.vaddr, bo->base.paddr,
276 + vma->vm_end - vma->vm_start);
277 + if (ret)
278 + drm_gem_vm_close(vma);
279 +
280 + return ret;
281 +}
282 +
283 +int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
284 +{
285 + struct vc4_bo *bo = to_vc4_bo(obj);
286 +
287 + if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
288 + DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
289 + return -EINVAL;
290 + }
291 +
292 + return drm_gem_cma_prime_mmap(obj, vma);
293 +}
294 +
295 +void *vc4_prime_vmap(struct drm_gem_object *obj)
296 +{
297 + struct vc4_bo *bo = to_vc4_bo(obj);
298 +
299 + if (bo->validated_shader) {
300 + DRM_ERROR("mmaping of shader BOs not allowed.\n");
301 + return ERR_PTR(-EINVAL);
302 + }
303 +
304 + return drm_gem_cma_prime_vmap(obj);
305 +}
306 +
307 +int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
308 + struct drm_file *file_priv)
309 {
310 struct drm_vc4_create_bo *args = data;
311 struct vc4_bo *bo = NULL;
312 int ret;
313
314 - bo = vc4_bo_create(dev, args->size);
315 + /*
316 + * We can't allocate from the BO cache, because the BOs don't
317 + * get zeroed, and that might leak data between users.
318 + */
319 + bo = vc4_bo_create(dev, args->size, false);
320 if (!bo)
321 return -ENOMEM;
322
323 @@ -331,6 +469,25 @@ vc4_create_bo_ioctl(struct drm_device *d
324 return ret;
325 }
326
327 +int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
328 + struct drm_file *file_priv)
329 +{
330 + struct drm_vc4_mmap_bo *args = data;
331 + struct drm_gem_object *gem_obj;
332 +
333 + gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
334 + if (!gem_obj) {
335 + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
336 + return -EINVAL;
337 + }
338 +
339 + /* The mmap offset was set up at BO allocation time. */
340 + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
341 +
342 + drm_gem_object_unreference_unlocked(gem_obj);
343 + return 0;
344 +}
345 +
346 int
347 vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
348 struct drm_file *file_priv)
349 @@ -355,7 +512,7 @@ vc4_create_shader_bo_ioctl(struct drm_de
350 return -EINVAL;
351 }
352
353 - bo = vc4_bo_create(dev, args->size);
354 + bo = vc4_bo_create(dev, args->size, true);
355 if (!bo)
356 return -ENOMEM;
357
358 @@ -364,6 +521,11 @@ vc4_create_shader_bo_ioctl(struct drm_de
359 args->size);
360 if (ret != 0)
361 goto fail;
362 + /* Clear the rest of the memory from allocating from the BO
363 + * cache.
364 + */
365 + memset(bo->base.vaddr + args->size, 0,
366 + bo->base.base.size - args->size);
367
368 bo->validated_shader = vc4_validate_shader(&bo->base);
369 if (!bo->validated_shader) {
370 @@ -382,85 +544,6 @@ vc4_create_shader_bo_ioctl(struct drm_de
371 return ret;
372 }
373
374 -int
375 -vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
376 - struct drm_file *file_priv)
377 -{
378 - struct drm_vc4_mmap_bo *args = data;
379 - struct drm_gem_object *gem_obj;
380 -
381 - gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
382 - if (!gem_obj) {
383 - DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
384 - return -EINVAL;
385 - }
386 -
387 - /* The mmap offset was set up at BO allocation time. */
388 - args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
389 -
390 - drm_gem_object_unreference(gem_obj);
391 - return 0;
392 -}
393 -
394 -int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
395 -{
396 - struct drm_gem_object *gem_obj;
397 - struct vc4_bo *bo;
398 - int ret;
399 -
400 - ret = drm_gem_mmap(filp, vma);
401 - if (ret)
402 - return ret;
403 -
404 - gem_obj = vma->vm_private_data;
405 - bo = to_vc4_bo(gem_obj);
406 -
407 - if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
408 - DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
409 - return -EINVAL;
410 - }
411 -
412 - /*
413 - * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
414 - * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
415 - * the whole buffer.
416 - */
417 - vma->vm_flags &= ~VM_PFNMAP;
418 - vma->vm_pgoff = 0;
419 -
420 - ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
421 - bo->base.vaddr, bo->base.paddr,
422 - vma->vm_end - vma->vm_start);
423 - if (ret)
424 - drm_gem_vm_close(vma);
425 -
426 - return ret;
427 -}
428 -
429 -int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
430 -{
431 - struct vc4_bo *bo = to_vc4_bo(obj);
432 -
433 - if (bo->validated_shader) {
434 - DRM_ERROR("mmaping of shader BOs not allowed.\n");
435 - return -EINVAL;
436 - }
437 -
438 - return drm_gem_cma_prime_mmap(obj, vma);
439 -}
440 -
441 -void *vc4_prime_vmap(struct drm_gem_object *obj)
442 -{
443 - struct vc4_bo *bo = to_vc4_bo(obj);
444 -
445 - if (bo->validated_shader) {
446 - DRM_ERROR("mmaping of shader BOs not allowed.\n");
447 - return ERR_PTR(-EINVAL);
448 - }
449 -
450 - return drm_gem_cma_prime_vmap(obj);
451 -}
452 -
453 void vc4_bo_cache_init(struct drm_device *dev)
454 {
455 struct vc4_dev *vc4 = to_vc4_dev(dev);
456 @@ -472,7 +555,7 @@ void vc4_bo_cache_init(struct drm_device
457 INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work);
458 setup_timer(&vc4->bo_cache.time_timer,
459 vc4_bo_cache_time_timer,
460 - (unsigned long) dev);
461 + (unsigned long)dev);
462 }
463
464 void vc4_bo_cache_destroy(struct drm_device *dev)
465 @@ -489,28 +572,3 @@ void vc4_bo_cache_destroy(struct drm_dev
466 vc4_bo_stats_dump(vc4);
467 }
468 }
469 -
470 -#ifdef CONFIG_DEBUG_FS
471 -int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
472 -{
473 - struct drm_info_node *node = (struct drm_info_node *) m->private;
474 - struct drm_device *dev = node->minor->dev;
475 - struct vc4_dev *vc4 = to_vc4_dev(dev);
476 - struct vc4_bo_stats stats;
477 -
478 - mutex_lock(&vc4->bo_lock);
479 - stats = vc4->bo_stats;
480 - mutex_unlock(&vc4->bo_lock);
481 -
482 - seq_printf(m, "num bos allocated: %d\n", stats.num_allocated);
483 - seq_printf(m, "size bos allocated: %dkb\n", stats.size_allocated / 1024);
484 - seq_printf(m, "num bos used: %d\n", (stats.num_allocated -
485 - stats.num_cached));
486 - seq_printf(m, "size bos used: %dkb\n", (stats.size_allocated -
487 - stats.size_cached) / 1024);
488 - seq_printf(m, "num bos cached: %d\n", stats.num_cached);
489 - seq_printf(m, "size bos cached: %dkb\n", stats.size_cached / 1024);
490 -
491 - return 0;
492 -}
493 -#endif
494 --- a/drivers/gpu/drm/vc4/vc4_crtc.c
495 +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
496 @@ -501,6 +501,7 @@ vc4_async_page_flip_complete(struct vc4_
497 vc4_plane_async_set_fb(plane, flip_state->fb);
498 if (flip_state->event) {
499 unsigned long flags;
500 +
501 spin_lock_irqsave(&dev->event_lock, flags);
502 drm_crtc_send_vblank_event(crtc, flip_state->event);
503 spin_unlock_irqrestore(&dev->event_lock, flags);
504 @@ -562,9 +563,9 @@ static int vc4_async_page_flip(struct dr
505 }
506
507 static int vc4_page_flip(struct drm_crtc *crtc,
508 - struct drm_framebuffer *fb,
509 - struct drm_pending_vblank_event *event,
510 - uint32_t flags)
511 + struct drm_framebuffer *fb,
512 + struct drm_pending_vblank_event *event,
513 + uint32_t flags)
514 {
515 if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
516 return vc4_async_page_flip(crtc, fb, event, flags);
517 --- a/drivers/gpu/drm/vc4/vc4_drv.c
518 +++ b/drivers/gpu/drm/vc4/vc4_drv.c
519 @@ -81,7 +81,8 @@ static const struct drm_ioctl_desc vc4_d
520 DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
521 DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
522 DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
523 - DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY),
524 + DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
525 + DRM_ROOT_ONLY),
526 };
527
528 static struct drm_driver vc4_drm_driver = {
529 @@ -107,6 +108,7 @@ static struct drm_driver vc4_drm_driver
530 .debugfs_cleanup = vc4_debugfs_cleanup,
531 #endif
532
533 + .gem_create_object = vc4_create_object,
534 .gem_free_object = vc4_free_object,
535 .gem_vm_ops = &drm_gem_cma_vm_ops,
536
537 @@ -128,8 +130,6 @@ static struct drm_driver vc4_drm_driver
538 .num_ioctls = ARRAY_SIZE(vc4_drm_ioctls),
539 .fops = &vc4_drm_fops,
540
541 - //.gem_obj_size = sizeof(struct vc4_bo),
542 -
543 .name = DRIVER_NAME,
544 .desc = DRIVER_DESC,
545 .date = DRIVER_DATE,
546 --- a/drivers/gpu/drm/vc4/vc4_drv.h
547 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
548 @@ -72,6 +72,9 @@ struct vc4_dev {
549 * job_done_work.
550 */
551 struct list_head job_done_list;
552 + /* Spinlock used to synchronize the job_list and seqno
553 + * accesses between the IRQ handler and GEM ioctls.
554 + */
555 spinlock_t job_lock;
556 wait_queue_head_t job_wait_queue;
557 struct work_struct job_done_work;
558 @@ -318,8 +321,7 @@ struct vc4_texture_sample_info {
559 * and validate the shader state record's uniforms that define the texture
560 * samples.
561 */
562 -struct vc4_validated_shader_info
563 -{
564 +struct vc4_validated_shader_info {
565 uint32_t uniforms_size;
566 uint32_t uniforms_src_size;
567 uint32_t num_texture_samples;
568 @@ -355,8 +357,10 @@ struct vc4_validated_shader_info
569 #define wait_for(COND, MS) _wait_for(COND, MS, 1)
570
571 /* vc4_bo.c */
572 +struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
573 void vc4_free_object(struct drm_gem_object *gem_obj);
574 -struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size);
575 +struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
576 + bool from_cache);
577 int vc4_dumb_create(struct drm_file *file_priv,
578 struct drm_device *dev,
579 struct drm_mode_create_dumb *args);
580 @@ -432,7 +436,8 @@ struct drm_plane *vc4_plane_init(struct
581 enum drm_plane_type type);
582 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
583 u32 vc4_plane_dlist_size(struct drm_plane_state *state);
584 -void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb);
585 +void vc4_plane_async_set_fb(struct drm_plane *plane,
586 + struct drm_framebuffer *fb);
587
588 /* vc4_v3d.c */
589 extern struct platform_driver vc4_v3d_driver;
590 @@ -450,9 +455,6 @@ vc4_validate_bin_cl(struct drm_device *d
591 int
592 vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
593
594 -struct vc4_validated_shader_info *
595 -vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
596 -
597 bool vc4_use_bo(struct vc4_exec_info *exec,
598 uint32_t hindex,
599 enum vc4_bo_mode mode,
600 @@ -464,3 +466,7 @@ bool vc4_check_tex_size(struct vc4_exec_
601 struct drm_gem_cma_object *fbo,
602 uint32_t offset, uint8_t tiling_format,
603 uint32_t width, uint32_t height, uint8_t cpp);
604 +
605 +/* vc4_validate_shader.c */
606 +struct vc4_validated_shader_info *
607 +vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
608 --- a/drivers/gpu/drm/vc4/vc4_gem.c
609 +++ b/drivers/gpu/drm/vc4/vc4_gem.c
610 @@ -53,9 +53,8 @@ vc4_free_hang_state(struct drm_device *d
611 unsigned int i;
612
613 mutex_lock(&dev->struct_mutex);
614 - for (i = 0; i < state->user_state.bo_count; i++) {
615 + for (i = 0; i < state->user_state.bo_count; i++)
616 drm_gem_object_unreference(state->bo[i]);
617 - }
618 mutex_unlock(&dev->struct_mutex);
619
620 kfree(state);
621 @@ -65,10 +64,10 @@ int
622 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
623 struct drm_file *file_priv)
624 {
625 - struct drm_vc4_get_hang_state *get_state = data;
626 + struct drm_vc4_get_hang_state *get_state = data;
627 struct drm_vc4_get_hang_state_bo *bo_state;
628 struct vc4_hang_state *kernel_state;
629 - struct drm_vc4_get_hang_state *state;
630 + struct drm_vc4_get_hang_state *state;
631 struct vc4_dev *vc4 = to_vc4_dev(dev);
632 unsigned long irqflags;
633 u32 i;
634 @@ -107,6 +106,7 @@ vc4_get_hang_state_ioctl(struct drm_devi
635 for (i = 0; i < state->bo_count; i++) {
636 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
637 u32 handle;
638 +
639 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
640 &handle);
641
642 @@ -124,7 +124,7 @@ vc4_get_hang_state_ioctl(struct drm_devi
643 state->bo_count * sizeof(*bo_state));
644 kfree(bo_state);
645
646 - err_free:
647 +err_free:
648
649 vc4_free_hang_state(dev, kernel_state);
650
651 @@ -578,7 +578,7 @@ vc4_get_bcl(struct drm_device *dev, stru
652 goto fail;
653 }
654
655 - bo = vc4_bo_create(dev, exec_size);
656 + bo = vc4_bo_create(dev, exec_size, true);
657 if (!bo) {
658 DRM_ERROR("Couldn't allocate BO for binning\n");
659 ret = PTR_ERR(exec->exec_bo);
660 @@ -668,6 +668,7 @@ vc4_job_handle_completed(struct vc4_dev
661 static void vc4_seqno_cb_work(struct work_struct *work)
662 {
663 struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
664 +
665 cb->func(cb);
666 }
667
668 @@ -717,6 +718,7 @@ vc4_wait_for_seqno_ioctl_helper(struct d
669
670 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
671 uint64_t delta = jiffies_to_nsecs(jiffies - start);
672 +
673 if (*timeout_ns >= delta)
674 *timeout_ns -= delta;
675 }
676 @@ -750,9 +752,10 @@ vc4_wait_bo_ioctl(struct drm_device *dev
677 }
678 bo = to_vc4_bo(gem_obj);
679
680 - ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, &args->timeout_ns);
681 + ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
682 + &args->timeout_ns);
683
684 - drm_gem_object_unreference(gem_obj);
685 + drm_gem_object_unreference_unlocked(gem_obj);
686 return ret;
687 }
688
689 @@ -793,7 +796,8 @@ vc4_submit_cl_ioctl(struct drm_device *d
690 if (ret)
691 goto fail;
692 } else {
693 - exec->ct0ca = exec->ct0ea = 0;
694 + exec->ct0ca = 0;
695 + exec->ct0ea = 0;
696 }
697
698 ret = vc4_get_rcl(dev, exec);
699 @@ -831,7 +835,7 @@ vc4_gem_init(struct drm_device *dev)
700 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
701 setup_timer(&vc4->hangcheck.timer,
702 vc4_hangcheck_elapsed,
703 - (unsigned long) dev);
704 + (unsigned long)dev);
705
706 INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
707 }
708 --- a/drivers/gpu/drm/vc4/vc4_irq.c
709 +++ b/drivers/gpu/drm/vc4/vc4_irq.c
710 @@ -56,7 +56,7 @@ vc4_overflow_mem_work(struct work_struct
711 struct drm_device *dev = vc4->dev;
712 struct vc4_bo *bo;
713
714 - bo = vc4_bo_create(dev, 256 * 1024);
715 + bo = vc4_bo_create(dev, 256 * 1024, true);
716 if (!bo) {
717 DRM_ERROR("Couldn't allocate binner overflow mem\n");
718 return;
719 @@ -87,9 +87,8 @@ vc4_overflow_mem_work(struct work_struct
720 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
721 }
722
723 - if (vc4->overflow_mem) {
724 + if (vc4->overflow_mem)
725 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
726 - }
727 vc4->overflow_mem = bo;
728
729 V3D_WRITE(V3D_BPOA, bo->base.paddr);
730 --- a/drivers/gpu/drm/vc4/vc4_kms.c
731 +++ b/drivers/gpu/drm/vc4/vc4_kms.c
732 @@ -132,6 +132,7 @@ static int vc4_atomic_commit(struct drm_
733 struct drm_gem_cma_object *cma_bo =
734 drm_fb_cma_get_gem_obj(new_state->fb, 0);
735 struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
736 +
737 wait_seqno = max(bo->seqno, wait_seqno);
738 }
739 }
740 --- a/drivers/gpu/drm/vc4/vc4_packet.h
741 +++ b/drivers/gpu/drm/vc4/vc4_packet.h
742 @@ -27,60 +27,60 @@
743 #include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
744
745 enum vc4_packet {
746 - VC4_PACKET_HALT = 0,
747 - VC4_PACKET_NOP = 1,
748 + VC4_PACKET_HALT = 0,
749 + VC4_PACKET_NOP = 1,
750
751 - VC4_PACKET_FLUSH = 4,
752 - VC4_PACKET_FLUSH_ALL = 5,
753 - VC4_PACKET_START_TILE_BINNING = 6,
754 - VC4_PACKET_INCREMENT_SEMAPHORE = 7,
755 - VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
756 -
757 - VC4_PACKET_BRANCH = 16,
758 - VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
759 -
760 - VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
761 - VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
762 - VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
763 - VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
764 - VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
765 - VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
766 -
767 - VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
768 - VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
769 -
770 - VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
771 - VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
772 -
773 - VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
774 -
775 - VC4_PACKET_GL_SHADER_STATE = 64,
776 - VC4_PACKET_NV_SHADER_STATE = 65,
777 - VC4_PACKET_VG_SHADER_STATE = 66,
778 -
779 - VC4_PACKET_CONFIGURATION_BITS = 96,
780 - VC4_PACKET_FLAT_SHADE_FLAGS = 97,
781 - VC4_PACKET_POINT_SIZE = 98,
782 - VC4_PACKET_LINE_WIDTH = 99,
783 - VC4_PACKET_RHT_X_BOUNDARY = 100,
784 - VC4_PACKET_DEPTH_OFFSET = 101,
785 - VC4_PACKET_CLIP_WINDOW = 102,
786 - VC4_PACKET_VIEWPORT_OFFSET = 103,
787 - VC4_PACKET_Z_CLIPPING = 104,
788 - VC4_PACKET_CLIPPER_XY_SCALING = 105,
789 - VC4_PACKET_CLIPPER_Z_SCALING = 106,
790 -
791 - VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
792 - VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
793 - VC4_PACKET_CLEAR_COLORS = 114,
794 - VC4_PACKET_TILE_COORDINATES = 115,
795 -
796 - /* Not an actual hardware packet -- this is what we use to put
797 - * references to GEM bos in the command stream, since we need the u32
798 - * int the actual address packet in order to store the offset from the
799 - * start of the BO.
800 - */
801 - VC4_PACKET_GEM_HANDLES = 254,
802 + VC4_PACKET_FLUSH = 4,
803 + VC4_PACKET_FLUSH_ALL = 5,
804 + VC4_PACKET_START_TILE_BINNING = 6,
805 + VC4_PACKET_INCREMENT_SEMAPHORE = 7,
806 + VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
807 +
808 + VC4_PACKET_BRANCH = 16,
809 + VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
810 +
811 + VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
812 + VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
813 + VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
814 + VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
815 + VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
816 + VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
817 +
818 + VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
819 + VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
820 +
821 + VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
822 + VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
823 +
824 + VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
825 +
826 + VC4_PACKET_GL_SHADER_STATE = 64,
827 + VC4_PACKET_NV_SHADER_STATE = 65,
828 + VC4_PACKET_VG_SHADER_STATE = 66,
829 +
830 + VC4_PACKET_CONFIGURATION_BITS = 96,
831 + VC4_PACKET_FLAT_SHADE_FLAGS = 97,
832 + VC4_PACKET_POINT_SIZE = 98,
833 + VC4_PACKET_LINE_WIDTH = 99,
834 + VC4_PACKET_RHT_X_BOUNDARY = 100,
835 + VC4_PACKET_DEPTH_OFFSET = 101,
836 + VC4_PACKET_CLIP_WINDOW = 102,
837 + VC4_PACKET_VIEWPORT_OFFSET = 103,
838 + VC4_PACKET_Z_CLIPPING = 104,
839 + VC4_PACKET_CLIPPER_XY_SCALING = 105,
840 + VC4_PACKET_CLIPPER_Z_SCALING = 106,
841 +
842 + VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
843 + VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
844 + VC4_PACKET_CLEAR_COLORS = 114,
845 + VC4_PACKET_TILE_COORDINATES = 115,
846 +
847 + /* Not an actual hardware packet -- this is what we use to put
848 + * references to GEM bos in the command stream, since we need the u32
849 + * int the actual address packet in order to store the offset from the
850 + * start of the BO.
851 + */
852 + VC4_PACKET_GEM_HANDLES = 254,
853 } __attribute__ ((__packed__));
854
855 #define VC4_PACKET_HALT_SIZE 1
856 @@ -148,10 +148,10 @@ enum vc4_packet {
857 * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
858 */
859
860 -#define VC4_LOADSTORE_TILE_BUFFER_EOF (1 << 3)
861 -#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK (1 << 2)
862 -#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS (1 << 1)
863 -#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR (1 << 0)
864 +#define VC4_LOADSTORE_TILE_BUFFER_EOF BIT(3)
865 +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2)
866 +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS BIT(1)
867 +#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR BIT(0)
868
869 /** @} */
870
871 @@ -160,10 +160,10 @@ enum vc4_packet {
872 * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
873 * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
874 */
875 -#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15)
876 -#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 14)
877 -#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 13)
878 -#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 12)
879 +#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15)
880 +#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR BIT(14)
881 +#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR BIT(13)
882 +#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP BIT(12)
883
884 #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8)
885 #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8
886 @@ -201,28 +201,28 @@ enum vc4_packet {
887 #define VC4_INDEX_BUFFER_U16 (1 << 4)
888
889 /* This flag is only present in NV shader state. */
890 -#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS (1 << 3)
891 -#define VC4_SHADER_FLAG_ENABLE_CLIPPING (1 << 2)
892 -#define VC4_SHADER_FLAG_VS_POINT_SIZE (1 << 1)
893 -#define VC4_SHADER_FLAG_FS_SINGLE_THREAD (1 << 0)
894 +#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS BIT(3)
895 +#define VC4_SHADER_FLAG_ENABLE_CLIPPING BIT(2)
896 +#define VC4_SHADER_FLAG_VS_POINT_SIZE BIT(1)
897 +#define VC4_SHADER_FLAG_FS_SINGLE_THREAD BIT(0)
898
899 /** @{ byte 2 of config bits. */
900 -#define VC4_CONFIG_BITS_EARLY_Z_UPDATE (1 << 1)
901 -#define VC4_CONFIG_BITS_EARLY_Z (1 << 0)
902 +#define VC4_CONFIG_BITS_EARLY_Z_UPDATE BIT(1)
903 +#define VC4_CONFIG_BITS_EARLY_Z BIT(0)
904 /** @} */
905
906 /** @{ byte 1 of config bits. */
907 -#define VC4_CONFIG_BITS_Z_UPDATE (1 << 7)
908 +#define VC4_CONFIG_BITS_Z_UPDATE BIT(7)
909 /** same values in this 3-bit field as PIPE_FUNC_* */
910 #define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4
911 -#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE (1 << 3)
912 +#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE BIT(3)
913
914 #define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1)
915 #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1)
916 #define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1)
917 #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1)
918
919 -#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT (1 << 0)
920 +#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT BIT(0)
921 /** @} */
922
923 /** @{ byte 0 of config bits. */
924 @@ -230,15 +230,15 @@ enum vc4_packet {
925 #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6)
926 #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6)
927
928 -#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES (1 << 4)
929 -#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET (1 << 3)
930 -#define VC4_CONFIG_BITS_CW_PRIMITIVES (1 << 2)
931 -#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK (1 << 1)
932 -#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT (1 << 0)
933 +#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES BIT(4)
934 +#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET BIT(3)
935 +#define VC4_CONFIG_BITS_CW_PRIMITIVES BIT(2)
936 +#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK BIT(1)
937 +#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT BIT(0)
938 /** @} */
939
940 /** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
941 -#define VC4_BIN_CONFIG_DB_NON_MS (1 << 7)
942 +#define VC4_BIN_CONFIG_DB_NON_MS BIT(7)
943
944 #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5)
945 #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5
946 @@ -254,17 +254,17 @@ enum vc4_packet {
947 #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2
948 #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3
949
950 -#define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2)
951 -#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1)
952 -#define VC4_BIN_CONFIG_MS_MODE_4X (1 << 0)
953 +#define VC4_BIN_CONFIG_AUTO_INIT_TSDA BIT(2)
954 +#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT BIT(1)
955 +#define VC4_BIN_CONFIG_MS_MODE_4X BIT(0)
956 /** @} */
957
958 /** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
959 -#define VC4_RENDER_CONFIG_DB_NON_MS (1 << 12)
960 -#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE (1 << 11)
961 -#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G (1 << 10)
962 -#define VC4_RENDER_CONFIG_COVERAGE_MODE (1 << 9)
963 -#define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8)
964 +#define VC4_RENDER_CONFIG_DB_NON_MS BIT(12)
965 +#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11)
966 +#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G BIT(10)
967 +#define VC4_RENDER_CONFIG_COVERAGE_MODE BIT(9)
968 +#define VC4_RENDER_CONFIG_ENABLE_VG_MASK BIT(8)
969
970 /** The values of the field are VC4_TILING_FORMAT_* */
971 #define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6)
972 @@ -280,8 +280,8 @@ enum vc4_packet {
973 #define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1
974 #define VC4_RENDER_CONFIG_FORMAT_BGR565 2
975
976 -#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1)
977 -#define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0)
978 +#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT BIT(1)
979 +#define VC4_RENDER_CONFIG_MS_MODE_4X BIT(0)
980
981 #define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4)
982 #define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4)
983 @@ -291,24 +291,24 @@ enum vc4_packet {
984 #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0)
985
986 enum vc4_texture_data_type {
987 - VC4_TEXTURE_TYPE_RGBA8888 = 0,
988 - VC4_TEXTURE_TYPE_RGBX8888 = 1,
989 - VC4_TEXTURE_TYPE_RGBA4444 = 2,
990 - VC4_TEXTURE_TYPE_RGBA5551 = 3,
991 - VC4_TEXTURE_TYPE_RGB565 = 4,
992 - VC4_TEXTURE_TYPE_LUMINANCE = 5,
993 - VC4_TEXTURE_TYPE_ALPHA = 6,
994 - VC4_TEXTURE_TYPE_LUMALPHA = 7,
995 - VC4_TEXTURE_TYPE_ETC1 = 8,
996 - VC4_TEXTURE_TYPE_S16F = 9,
997 - VC4_TEXTURE_TYPE_S8 = 10,
998 - VC4_TEXTURE_TYPE_S16 = 11,
999 - VC4_TEXTURE_TYPE_BW1 = 12,
1000 - VC4_TEXTURE_TYPE_A4 = 13,
1001 - VC4_TEXTURE_TYPE_A1 = 14,
1002 - VC4_TEXTURE_TYPE_RGBA64 = 15,
1003 - VC4_TEXTURE_TYPE_RGBA32R = 16,
1004 - VC4_TEXTURE_TYPE_YUV422R = 17,
1005 + VC4_TEXTURE_TYPE_RGBA8888 = 0,
1006 + VC4_TEXTURE_TYPE_RGBX8888 = 1,
1007 + VC4_TEXTURE_TYPE_RGBA4444 = 2,
1008 + VC4_TEXTURE_TYPE_RGBA5551 = 3,
1009 + VC4_TEXTURE_TYPE_RGB565 = 4,
1010 + VC4_TEXTURE_TYPE_LUMINANCE = 5,
1011 + VC4_TEXTURE_TYPE_ALPHA = 6,
1012 + VC4_TEXTURE_TYPE_LUMALPHA = 7,
1013 + VC4_TEXTURE_TYPE_ETC1 = 8,
1014 + VC4_TEXTURE_TYPE_S16F = 9,
1015 + VC4_TEXTURE_TYPE_S8 = 10,
1016 + VC4_TEXTURE_TYPE_S16 = 11,
1017 + VC4_TEXTURE_TYPE_BW1 = 12,
1018 + VC4_TEXTURE_TYPE_A4 = 13,
1019 + VC4_TEXTURE_TYPE_A1 = 14,
1020 + VC4_TEXTURE_TYPE_RGBA64 = 15,
1021 + VC4_TEXTURE_TYPE_RGBA32R = 16,
1022 + VC4_TEXTURE_TYPE_YUV422R = 17,
1023 };
1024
1025 #define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12)
1026 --- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h
1027 +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
1028 @@ -25,194 +25,190 @@
1029 #define VC4_QPU_DEFINES_H
1030
1031 enum qpu_op_add {
1032 - QPU_A_NOP,
1033 - QPU_A_FADD,
1034 - QPU_A_FSUB,
1035 - QPU_A_FMIN,
1036 - QPU_A_FMAX,
1037 - QPU_A_FMINABS,
1038 - QPU_A_FMAXABS,
1039 - QPU_A_FTOI,
1040 - QPU_A_ITOF,
1041 - QPU_A_ADD = 12,
1042 - QPU_A_SUB,
1043 - QPU_A_SHR,
1044 - QPU_A_ASR,
1045 - QPU_A_ROR,
1046 - QPU_A_SHL,
1047 - QPU_A_MIN,
1048 - QPU_A_MAX,
1049 - QPU_A_AND,
1050 - QPU_A_OR,
1051 - QPU_A_XOR,
1052 - QPU_A_NOT,
1053 - QPU_A_CLZ,
1054 - QPU_A_V8ADDS = 30,
1055 - QPU_A_V8SUBS = 31,
1056 + QPU_A_NOP,
1057 + QPU_A_FADD,
1058 + QPU_A_FSUB,
1059 + QPU_A_FMIN,
1060 + QPU_A_FMAX,
1061 + QPU_A_FMINABS,
1062 + QPU_A_FMAXABS,
1063 + QPU_A_FTOI,
1064 + QPU_A_ITOF,
1065 + QPU_A_ADD = 12,
1066 + QPU_A_SUB,
1067 + QPU_A_SHR,
1068 + QPU_A_ASR,
1069 + QPU_A_ROR,
1070 + QPU_A_SHL,
1071 + QPU_A_MIN,
1072 + QPU_A_MAX,
1073 + QPU_A_AND,
1074 + QPU_A_OR,
1075 + QPU_A_XOR,
1076 + QPU_A_NOT,
1077 + QPU_A_CLZ,
1078 + QPU_A_V8ADDS = 30,
1079 + QPU_A_V8SUBS = 31,
1080 };
1081
1082 enum qpu_op_mul {
1083 - QPU_M_NOP,
1084 - QPU_M_FMUL,
1085 - QPU_M_MUL24,
1086 - QPU_M_V8MULD,
1087 - QPU_M_V8MIN,
1088 - QPU_M_V8MAX,
1089 - QPU_M_V8ADDS,
1090 - QPU_M_V8SUBS,
1091 + QPU_M_NOP,
1092 + QPU_M_FMUL,
1093 + QPU_M_MUL24,
1094 + QPU_M_V8MULD,
1095 + QPU_M_V8MIN,
1096 + QPU_M_V8MAX,
1097 + QPU_M_V8ADDS,
1098 + QPU_M_V8SUBS,
1099 };
1100
1101 enum qpu_raddr {
1102 - QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
1103 - /* 0-31 are the plain regfile a or b fields */
1104 - QPU_R_UNIF = 32,
1105 - QPU_R_VARY = 35,
1106 - QPU_R_ELEM_QPU = 38,
1107 - QPU_R_NOP,
1108 - QPU_R_XY_PIXEL_COORD = 41,
1109 - QPU_R_MS_REV_FLAGS = 41,
1110 - QPU_R_VPM = 48,
1111 - QPU_R_VPM_LD_BUSY,
1112 - QPU_R_VPM_LD_WAIT,
1113 - QPU_R_MUTEX_ACQUIRE,
1114 + QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
1115 + /* 0-31 are the plain regfile a or b fields */
1116 + QPU_R_UNIF = 32,
1117 + QPU_R_VARY = 35,
1118 + QPU_R_ELEM_QPU = 38,
1119 + QPU_R_NOP,
1120 + QPU_R_XY_PIXEL_COORD = 41,
1121 + QPU_R_MS_REV_FLAGS = 41,
1122 + QPU_R_VPM = 48,
1123 + QPU_R_VPM_LD_BUSY,
1124 + QPU_R_VPM_LD_WAIT,
1125 + QPU_R_MUTEX_ACQUIRE,
1126 };
1127
1128 enum qpu_waddr {
1129 - /* 0-31 are the plain regfile a or b fields */
1130 - QPU_W_ACC0 = 32, /* aka r0 */
1131 - QPU_W_ACC1,
1132 - QPU_W_ACC2,
1133 - QPU_W_ACC3,
1134 - QPU_W_TMU_NOSWAP,
1135 - QPU_W_ACC5,
1136 - QPU_W_HOST_INT,
1137 - QPU_W_NOP,
1138 - QPU_W_UNIFORMS_ADDRESS,
1139 - QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
1140 - QPU_W_MS_FLAGS = 42,
1141 - QPU_W_REV_FLAG = 42,
1142 - QPU_W_TLB_STENCIL_SETUP = 43,
1143 - QPU_W_TLB_Z,
1144 - QPU_W_TLB_COLOR_MS,
1145 - QPU_W_TLB_COLOR_ALL,
1146 - QPU_W_TLB_ALPHA_MASK,
1147 - QPU_W_VPM,
1148 - QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
1149 - QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
1150 - QPU_W_MUTEX_RELEASE,
1151 - QPU_W_SFU_RECIP,
1152 - QPU_W_SFU_RECIPSQRT,
1153 - QPU_W_SFU_EXP,
1154 - QPU_W_SFU_LOG,
1155 - QPU_W_TMU0_S,
1156 - QPU_W_TMU0_T,
1157 - QPU_W_TMU0_R,
1158 - QPU_W_TMU0_B,
1159 - QPU_W_TMU1_S,
1160 - QPU_W_TMU1_T,
1161 - QPU_W_TMU1_R,
1162 - QPU_W_TMU1_B,
1163 + /* 0-31 are the plain regfile a or b fields */
1164 + QPU_W_ACC0 = 32, /* aka r0 */
1165 + QPU_W_ACC1,
1166 + QPU_W_ACC2,
1167 + QPU_W_ACC3,
1168 + QPU_W_TMU_NOSWAP,
1169 + QPU_W_ACC5,
1170 + QPU_W_HOST_INT,
1171 + QPU_W_NOP,
1172 + QPU_W_UNIFORMS_ADDRESS,
1173 + QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
1174 + QPU_W_MS_FLAGS = 42,
1175 + QPU_W_REV_FLAG = 42,
1176 + QPU_W_TLB_STENCIL_SETUP = 43,
1177 + QPU_W_TLB_Z,
1178 + QPU_W_TLB_COLOR_MS,
1179 + QPU_W_TLB_COLOR_ALL,
1180 + QPU_W_TLB_ALPHA_MASK,
1181 + QPU_W_VPM,
1182 + QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
1183 + QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
1184 + QPU_W_MUTEX_RELEASE,
1185 + QPU_W_SFU_RECIP,
1186 + QPU_W_SFU_RECIPSQRT,
1187 + QPU_W_SFU_EXP,
1188 + QPU_W_SFU_LOG,
1189 + QPU_W_TMU0_S,
1190 + QPU_W_TMU0_T,
1191 + QPU_W_TMU0_R,
1192 + QPU_W_TMU0_B,
1193 + QPU_W_TMU1_S,
1194 + QPU_W_TMU1_T,
1195 + QPU_W_TMU1_R,
1196 + QPU_W_TMU1_B,
1197 };
1198
1199 enum qpu_sig_bits {
1200 - QPU_SIG_SW_BREAKPOINT,
1201 - QPU_SIG_NONE,
1202 - QPU_SIG_THREAD_SWITCH,
1203 - QPU_SIG_PROG_END,
1204 - QPU_SIG_WAIT_FOR_SCOREBOARD,
1205 - QPU_SIG_SCOREBOARD_UNLOCK,
1206 - QPU_SIG_LAST_THREAD_SWITCH,
1207 - QPU_SIG_COVERAGE_LOAD,
1208 - QPU_SIG_COLOR_LOAD,
1209 - QPU_SIG_COLOR_LOAD_END,
1210 - QPU_SIG_LOAD_TMU0,
1211 - QPU_SIG_LOAD_TMU1,
1212 - QPU_SIG_ALPHA_MASK_LOAD,
1213 - QPU_SIG_SMALL_IMM,
1214 - QPU_SIG_LOAD_IMM,
1215 - QPU_SIG_BRANCH
1216 + QPU_SIG_SW_BREAKPOINT,
1217 + QPU_SIG_NONE,
1218 + QPU_SIG_THREAD_SWITCH,
1219 + QPU_SIG_PROG_END,
1220 + QPU_SIG_WAIT_FOR_SCOREBOARD,
1221 + QPU_SIG_SCOREBOARD_UNLOCK,
1222 + QPU_SIG_LAST_THREAD_SWITCH,
1223 + QPU_SIG_COVERAGE_LOAD,
1224 + QPU_SIG_COLOR_LOAD,
1225 + QPU_SIG_COLOR_LOAD_END,
1226 + QPU_SIG_LOAD_TMU0,
1227 + QPU_SIG_LOAD_TMU1,
1228 + QPU_SIG_ALPHA_MASK_LOAD,
1229 + QPU_SIG_SMALL_IMM,
1230 + QPU_SIG_LOAD_IMM,
1231 + QPU_SIG_BRANCH
1232 };
1233
1234 enum qpu_mux {
1235 - /* hardware mux values */
1236 - QPU_MUX_R0,
1237 - QPU_MUX_R1,
1238 - QPU_MUX_R2,
1239 - QPU_MUX_R3,
1240 - QPU_MUX_R4,
1241 - QPU_MUX_R5,
1242 - QPU_MUX_A,
1243 - QPU_MUX_B,
1244 + /* hardware mux values */
1245 + QPU_MUX_R0,
1246 + QPU_MUX_R1,
1247 + QPU_MUX_R2,
1248 + QPU_MUX_R3,
1249 + QPU_MUX_R4,
1250 + QPU_MUX_R5,
1251 + QPU_MUX_A,
1252 + QPU_MUX_B,
1253
1254 - /* non-hardware mux values */
1255 - QPU_MUX_IMM,
1256 + /* non-hardware mux values */
1257 + QPU_MUX_IMM,
1258 };
1259
1260 enum qpu_cond {
1261 - QPU_COND_NEVER,
1262 - QPU_COND_ALWAYS,
1263 - QPU_COND_ZS,
1264 - QPU_COND_ZC,
1265 - QPU_COND_NS,
1266 - QPU_COND_NC,
1267 - QPU_COND_CS,
1268 - QPU_COND_CC,
1269 + QPU_COND_NEVER,
1270 + QPU_COND_ALWAYS,
1271 + QPU_COND_ZS,
1272 + QPU_COND_ZC,
1273 + QPU_COND_NS,
1274 + QPU_COND_NC,
1275 + QPU_COND_CS,
1276 + QPU_COND_CC,
1277 };
1278
1279 enum qpu_pack_mul {
1280 - QPU_PACK_MUL_NOP,
1281 - QPU_PACK_MUL_8888 = 3, /* replicated to each 8 bits of the 32-bit dst. */
1282 - QPU_PACK_MUL_8A,
1283 - QPU_PACK_MUL_8B,
1284 - QPU_PACK_MUL_8C,
1285 - QPU_PACK_MUL_8D,
1286 + QPU_PACK_MUL_NOP,
1287 + /* replicated to each 8 bits of the 32-bit dst. */
1288 + QPU_PACK_MUL_8888 = 3,
1289 + QPU_PACK_MUL_8A,
1290 + QPU_PACK_MUL_8B,
1291 + QPU_PACK_MUL_8C,
1292 + QPU_PACK_MUL_8D,
1293 };
1294
1295 enum qpu_pack_a {
1296 - QPU_PACK_A_NOP,
1297 - /* convert to 16 bit float if float input, or to int16. */
1298 - QPU_PACK_A_16A,
1299 - QPU_PACK_A_16B,
1300 - /* replicated to each 8 bits of the 32-bit dst. */
1301 - QPU_PACK_A_8888,
1302 - /* Convert to 8-bit unsigned int. */
1303 - QPU_PACK_A_8A,
1304 - QPU_PACK_A_8B,
1305 - QPU_PACK_A_8C,
1306 - QPU_PACK_A_8D,
1307 -
1308 - /* Saturating variants of the previous instructions. */
1309 - QPU_PACK_A_32_SAT, /* int-only */
1310 - QPU_PACK_A_16A_SAT, /* int or float */
1311 - QPU_PACK_A_16B_SAT,
1312 - QPU_PACK_A_8888_SAT,
1313 - QPU_PACK_A_8A_SAT,
1314 - QPU_PACK_A_8B_SAT,
1315 - QPU_PACK_A_8C_SAT,
1316 - QPU_PACK_A_8D_SAT,
1317 + QPU_PACK_A_NOP,
1318 + /* convert to 16 bit float if float input, or to int16. */
1319 + QPU_PACK_A_16A,
1320 + QPU_PACK_A_16B,
1321 + /* replicated to each 8 bits of the 32-bit dst. */
1322 + QPU_PACK_A_8888,
1323 + /* Convert to 8-bit unsigned int. */
1324 + QPU_PACK_A_8A,
1325 + QPU_PACK_A_8B,
1326 + QPU_PACK_A_8C,
1327 + QPU_PACK_A_8D,
1328 +
1329 + /* Saturating variants of the previous instructions. */
1330 + QPU_PACK_A_32_SAT, /* int-only */
1331 + QPU_PACK_A_16A_SAT, /* int or float */
1332 + QPU_PACK_A_16B_SAT,
1333 + QPU_PACK_A_8888_SAT,
1334 + QPU_PACK_A_8A_SAT,
1335 + QPU_PACK_A_8B_SAT,
1336 + QPU_PACK_A_8C_SAT,
1337 + QPU_PACK_A_8D_SAT,
1338 };
1339
1340 enum qpu_unpack_r4 {
1341 - QPU_UNPACK_R4_NOP,
1342 - QPU_UNPACK_R4_F16A_TO_F32,
1343 - QPU_UNPACK_R4_F16B_TO_F32,
1344 - QPU_UNPACK_R4_8D_REP,
1345 - QPU_UNPACK_R4_8A,
1346 - QPU_UNPACK_R4_8B,
1347 - QPU_UNPACK_R4_8C,
1348 - QPU_UNPACK_R4_8D,
1349 -};
1350 -
1351 -#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
1352 -/* Using the GNU statement expression extension */
1353 -#define QPU_SET_FIELD(value, field) \
1354 - ({ \
1355 - uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
1356 - assert((fieldval & ~ field ## _MASK) == 0); \
1357 - fieldval & field ## _MASK; \
1358 - })
1359 + QPU_UNPACK_R4_NOP,
1360 + QPU_UNPACK_R4_F16A_TO_F32,
1361 + QPU_UNPACK_R4_F16B_TO_F32,
1362 + QPU_UNPACK_R4_8D_REP,
1363 + QPU_UNPACK_R4_8A,
1364 + QPU_UNPACK_R4_8B,
1365 + QPU_UNPACK_R4_8C,
1366 + QPU_UNPACK_R4_8D,
1367 +};
1368 +
1369 +#define QPU_MASK(high, low) \
1370 + ((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low))
1371
1372 -#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
1373 +#define QPU_GET_FIELD(word, field) \
1374 + ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
1375
1376 #define QPU_SIG_SHIFT 60
1377 #define QPU_SIG_MASK QPU_MASK(63, 60)
1378 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c
1379 +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
1380 @@ -63,7 +63,6 @@ static inline void rcl_u32(struct vc4_rc
1381 setup->next_offset += 4;
1382 }
1383
1384 -
1385 /*
1386 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
1387 *
1388 @@ -217,7 +216,7 @@ static int vc4_create_rcl_bo(struct drm_
1389 }
1390 size += xtiles * ytiles * loop_body_size;
1391
1392 - setup->rcl = &vc4_bo_create(dev, size)->base;
1393 + setup->rcl = &vc4_bo_create(dev, size, true)->base;
1394 if (!setup->rcl)
1395 return -ENOMEM;
1396 list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
1397 @@ -256,6 +255,7 @@ static int vc4_create_rcl_bo(struct drm_
1398 for (x = min_x_tile; x <= max_x_tile; x++) {
1399 bool first = (x == min_x_tile && y == min_y_tile);
1400 bool last = (x == max_x_tile && y == max_y_tile);
1401 +
1402 emit_tile(exec, setup, x, y, first, last);
1403 }
1404 }
1405 --- a/drivers/gpu/drm/vc4/vc4_v3d.c
1406 +++ b/drivers/gpu/drm/vc4/vc4_v3d.c
1407 @@ -125,7 +125,7 @@ int vc4_v3d_debugfs_regs(struct seq_file
1408
1409 int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
1410 {
1411 - struct drm_info_node *node = (struct drm_info_node *) m->private;
1412 + struct drm_info_node *node = (struct drm_info_node *)m->private;
1413 struct drm_device *dev = node->minor->dev;
1414 struct vc4_dev *vc4 = to_vc4_dev(dev);
1415 uint32_t ident1 = V3D_READ(V3D_IDENT1);
1416 @@ -133,11 +133,13 @@ int vc4_v3d_debugfs_ident(struct seq_fil
1417 uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
1418 uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
1419
1420 - seq_printf(m, "Revision: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
1421 + seq_printf(m, "Revision: %d\n",
1422 + VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
1423 seq_printf(m, "Slices: %d\n", nslc);
1424 seq_printf(m, "TMUs: %d\n", nslc * tups);
1425 seq_printf(m, "QPUs: %d\n", nslc * qups);
1426 - seq_printf(m, "Semaphores: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
1427 + seq_printf(m, "Semaphores: %d\n",
1428 + VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
1429
1430 return 0;
1431 }
1432 @@ -218,7 +220,7 @@ static int vc4_v3d_bind(struct device *d
1433 }
1434
1435 static void vc4_v3d_unbind(struct device *dev, struct device *master,
1436 - void *data)
1437 + void *data)
1438 {
1439 struct drm_device *drm = dev_get_drvdata(master);
1440 struct vc4_dev *vc4 = to_vc4_dev(drm);
1441 --- a/drivers/gpu/drm/vc4/vc4_validate.c
1442 +++ b/drivers/gpu/drm/vc4/vc4_validate.c
1443 @@ -48,7 +48,6 @@
1444 void *validated, \
1445 void *untrusted
1446
1447 -
1448 /** Return the width in pixels of a 64-byte microtile. */
1449 static uint32_t
1450 utile_width(int cpp)
1451 @@ -192,7 +191,7 @@ vc4_check_tex_size(struct vc4_exec_info
1452
1453 if (size + offset < size ||
1454 size + offset > fbo->base.size) {
1455 - DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %d)\n",
1456 + DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
1457 width, height,
1458 aligned_width, aligned_height,
1459 size, offset, fbo->base.size);
1460 @@ -278,7 +277,7 @@ validate_indexed_prim_list(VALIDATE_ARGS
1461
1462 if (offset > ib->base.size ||
1463 (ib->base.size - offset) / index_size < length) {
1464 - DRM_ERROR("IB access overflow (%d + %d*%d > %d)\n",
1465 + DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
1466 offset, length, index_size, ib->base.size);
1467 return -EINVAL;
1468 }
1469 @@ -377,6 +376,7 @@ static int
1470 validate_tile_binning_config(VALIDATE_ARGS)
1471 {
1472 struct drm_device *dev = exec->exec_bo->base.dev;
1473 + struct vc4_bo *tile_bo;
1474 uint8_t flags;
1475 uint32_t tile_state_size, tile_alloc_size;
1476 uint32_t tile_count;
1477 @@ -438,12 +438,12 @@ validate_tile_binning_config(VALIDATE_AR
1478 */
1479 tile_alloc_size += 1024 * 1024;
1480
1481 - exec->tile_bo = &vc4_bo_create(dev, exec->tile_alloc_offset +
1482 - tile_alloc_size)->base;
1483 + tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
1484 + true);
1485 + exec->tile_bo = &tile_bo->base;
1486 if (!exec->tile_bo)
1487 return -ENOMEM;
1488 - list_add_tail(&to_vc4_bo(&exec->tile_bo->base)->unref_head,
1489 - &exec->unref_list);
1490 + list_add_tail(&tile_bo->unref_head, &exec->unref_list);
1491
1492 /* tile alloc address. */
1493 *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
1494 @@ -463,8 +463,8 @@ validate_gem_handles(VALIDATE_ARGS)
1495 return 0;
1496 }
1497
1498 -#define VC4_DEFINE_PACKET(packet, name, func) \
1499 - [packet] = { packet ## _SIZE, name, func }
1500 +#define VC4_DEFINE_PACKET(packet, func) \
1501 + [packet] = { packet ## _SIZE, #packet, func }
1502
1503 static const struct cmd_info {
1504 uint16_t len;
1505 @@ -472,42 +472,43 @@ static const struct cmd_info {
1506 int (*func)(struct vc4_exec_info *exec, void *validated,
1507 void *untrusted);
1508 } cmd_info[] = {
1509 - VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL),
1510 - VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL),
1511 - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL),
1512 - VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all),
1513 - VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning),
1514 - VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore),
1515 -
1516 - VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list),
1517 -
1518 - VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive),
1519 -
1520 - /* This is only used by clipped primitives (packets 48 and 49), which
1521 - * we don't support parsing yet.
1522 - */
1523 - VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL),
1524 -
1525 - VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state),
1526 - VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state),
1527 -
1528 - VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL),
1529 - VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL),
1530 - VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL),
1531 - VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL),
1532 - VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL),
1533 - VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL),
1534 - VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL),
1535 - VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL),
1536 - VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL),
1537 + VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
1538 + VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
1539 + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, NULL),
1540 + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, validate_flush_all),
1541 + VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
1542 + validate_start_tile_binning),
1543 + VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
1544 + validate_increment_semaphore),
1545 +
1546 + VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
1547 + validate_indexed_prim_list),
1548 + VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
1549 + validate_gl_array_primitive),
1550 +
1551 + VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
1552 +
1553 + VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
1554 + VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, validate_nv_shader_state),
1555 +
1556 + VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
1557 + VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
1558 + VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
1559 + VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
1560 + VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
1561 + VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
1562 + VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
1563 + VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
1564 + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
1565 /* Note: The docs say this was also 105, but it was 106 in the
1566 * initial userland code drop.
1567 */
1568 - VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL),
1569 + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
1570
1571 - VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config),
1572 + VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
1573 + validate_tile_binning_config),
1574
1575 - VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles),
1576 + VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
1577 };
1578
1579 int
1580 @@ -526,7 +527,7 @@ vc4_validate_bin_cl(struct drm_device *d
1581 u8 cmd = *(uint8_t *)src_pkt;
1582 const struct cmd_info *info;
1583
1584 - if (cmd > ARRAY_SIZE(cmd_info)) {
1585 + if (cmd >= ARRAY_SIZE(cmd_info)) {
1586 DRM_ERROR("0x%08x: packet %d out of bounds\n",
1587 src_offset, cmd);
1588 return -EINVAL;
1589 @@ -539,11 +540,6 @@ vc4_validate_bin_cl(struct drm_device *d
1590 return -EINVAL;
1591 }
1592
1593 -#if 0
1594 - DRM_INFO("0x%08x: packet %d (%s) size %d processing...\n",
1595 - src_offset, cmd, info->name, info->len);
1596 -#endif
1597 -
1598 if (src_offset + info->len > len) {
1599 DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
1600 "exceeds bounds (0x%08x)\n",
1601 @@ -558,8 +554,7 @@ vc4_validate_bin_cl(struct drm_device *d
1602 if (info->func && info->func(exec,
1603 dst_pkt + 1,
1604 src_pkt + 1)) {
1605 - DRM_ERROR("0x%08x: packet %d (%s) failed to "
1606 - "validate\n",
1607 + DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
1608 src_offset, cmd, info->name);
1609 return -EINVAL;
1610 }
1611 @@ -618,12 +613,14 @@ reloc_tex(struct vc4_exec_info *exec,
1612
1613 if (sample->is_direct) {
1614 uint32_t remaining_size = tex->base.size - p0;
1615 +
1616 if (p0 > tex->base.size - 4) {
1617 DRM_ERROR("UBO offset greater than UBO size\n");
1618 goto fail;
1619 }
1620 if (p1 > remaining_size - 4) {
1621 - DRM_ERROR("UBO clamp would allow reads outside of UBO\n");
1622 + DRM_ERROR("UBO clamp would allow reads "
1623 + "outside of UBO\n");
1624 goto fail;
1625 }
1626 *validated_p0 = tex->paddr + p0;
1627 @@ -786,7 +783,7 @@ validate_shader_rec(struct drm_device *d
1628 struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
1629 uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size;
1630 int i;
1631 - struct vc4_validated_shader_info *validated_shader;
1632 + struct vc4_validated_shader_info *shader;
1633
1634 if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
1635 relocs = nv_relocs;
1636 @@ -841,12 +838,12 @@ validate_shader_rec(struct drm_device *d
1637 else
1638 mode = VC4_MODE_RENDER;
1639
1640 - if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i])) {
1641 + if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i]))
1642 return false;
1643 - }
1644 }
1645
1646 for (i = 0; i < nr_fixed_relocs; i++) {
1647 + struct vc4_bo *vc4_bo;
1648 uint32_t o = relocs[i].offset;
1649 uint32_t src_offset = *(uint32_t *)(pkt_u + o);
1650 uint32_t *texture_handles_u;
1651 @@ -858,34 +855,34 @@ validate_shader_rec(struct drm_device *d
1652 switch (relocs[i].type) {
1653 case RELOC_CODE:
1654 if (src_offset != 0) {
1655 - DRM_ERROR("Shaders must be at offset 0 of "
1656 - "the BO.\n");
1657 + DRM_ERROR("Shaders must be at offset 0 "
1658 + "of the BO.\n");
1659 goto fail;
1660 }
1661
1662 - validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
1663 - if (!validated_shader)
1664 + vc4_bo = to_vc4_bo(&bo[i]->base);
1665 + shader = vc4_bo->validated_shader;
1666 + if (!shader)
1667 goto fail;
1668
1669 - if (validated_shader->uniforms_src_size >
1670 - exec->uniforms_size) {
1671 + if (shader->uniforms_src_size > exec->uniforms_size) {
1672 DRM_ERROR("Uniforms src buffer overflow\n");
1673 goto fail;
1674 }
1675
1676 texture_handles_u = exec->uniforms_u;
1677 uniform_data_u = (texture_handles_u +
1678 - validated_shader->num_texture_samples);
1679 + shader->num_texture_samples);
1680
1681 memcpy(exec->uniforms_v, uniform_data_u,
1682 - validated_shader->uniforms_size);
1683 + shader->uniforms_size);
1684
1685 for (tex = 0;
1686 - tex < validated_shader->num_texture_samples;
1687 + tex < shader->num_texture_samples;
1688 tex++) {
1689 if (!reloc_tex(exec,
1690 uniform_data_u,
1691 - &validated_shader->texture_samples[tex],
1692 + &shader->texture_samples[tex],
1693 texture_handles_u[tex])) {
1694 goto fail;
1695 }
1696 @@ -893,9 +890,9 @@ validate_shader_rec(struct drm_device *d
1697
1698 *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
1699
1700 - exec->uniforms_u += validated_shader->uniforms_src_size;
1701 - exec->uniforms_v += validated_shader->uniforms_size;
1702 - exec->uniforms_p += validated_shader->uniforms_size;
1703 + exec->uniforms_u += shader->uniforms_src_size;
1704 + exec->uniforms_v += shader->uniforms_size;
1705 + exec->uniforms_p += shader->uniforms_size;
1706
1707 break;
1708
1709 @@ -926,7 +923,8 @@ validate_shader_rec(struct drm_device *d
1710 max_index = ((vbo->base.size - offset - attr_size) /
1711 stride);
1712 if (state->max_index > max_index) {
1713 - DRM_ERROR("primitives use index %d out of supplied %d\n",
1714 + DRM_ERROR("primitives use index %d out of "
1715 + "supplied %d\n",
1716 state->max_index, max_index);
1717 return -EINVAL;
1718 }
1719 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
1720 +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
1721 @@ -24,24 +24,16 @@
1722 /**
1723 * DOC: Shader validator for VC4.
1724 *
1725 - * The VC4 has no IOMMU between it and system memory. So, a user with access
1726 - * to execute shaders could escalate privilege by overwriting system memory
1727 - * (using the VPM write address register in the general-purpose DMA mode) or
1728 - * reading system memory it shouldn't (reading it as a texture, or uniform
1729 - * data, or vertex data).
1730 + * The VC4 has no IOMMU between it and system memory, so a user with
1731 + * access to execute shaders could escalate privilege by overwriting
1732 + * system memory (using the VPM write address register in the
1733 + * general-purpose DMA mode) or reading system memory it shouldn't
1734 + * (reading it as a texture, or uniform data, or vertex data).
1735 *
1736 - * This walks over a shader starting from some offset within a BO, ensuring
1737 - * that its accesses are appropriately bounded, and recording how many texture
1738 - * accesses are made and where so that we can do relocations for them in the
1739 + * This walks over a shader BO, ensuring that its accesses are
1740 + * appropriately bounded, and recording how many texture accesses are
1741 + * made and where so that we can do relocations for them in the
1742 * uniform stream.
1743 - *
1744 - * The kernel API has shaders stored in user-mapped BOs. The BOs will be
1745 - * forcibly unmapped from the process before validation, and any cache of
1746 - * validated state will be flushed if the mapping is faulted back in.
1747 - *
1748 - * Storing the shaders in BOs means that the validation process will be slow
1749 - * due to uncached reads, but since shaders are long-lived and shader BOs are
1750 - * never actually modified, this shouldn't be a problem.
1751 */
1752
1753 #include "vc4_drv.h"
1754 @@ -70,7 +62,6 @@ waddr_to_live_reg_index(uint32_t waddr,
1755 else
1756 return waddr;
1757 } else if (waddr <= QPU_W_ACC3) {
1758 -
1759 return 64 + waddr - QPU_W_ACC0;
1760 } else {
1761 return ~0;
1762 @@ -85,15 +76,14 @@ raddr_add_a_to_live_reg_index(uint64_t i
1763 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
1764 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
1765
1766 - if (add_a == QPU_MUX_A) {
1767 + if (add_a == QPU_MUX_A)
1768 return raddr_a;
1769 - } else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) {
1770 + else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
1771 return 32 + raddr_b;
1772 - } else if (add_a <= QPU_MUX_R3) {
1773 + else if (add_a <= QPU_MUX_R3)
1774 return 64 + add_a;
1775 - } else {
1776 + else
1777 return ~0;
1778 - }
1779 }
1780
1781 static bool
1782 @@ -111,9 +101,9 @@ is_tmu_write(uint32_t waddr)
1783 }
1784
1785 static bool
1786 -record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader,
1787 - struct vc4_shader_validation_state *validation_state,
1788 - int tmu)
1789 +record_texture_sample(struct vc4_validated_shader_info *validated_shader,
1790 + struct vc4_shader_validation_state *validation_state,
1791 + int tmu)
1792 {
1793 uint32_t s = validated_shader->num_texture_samples;
1794 int i;
1795 @@ -226,8 +216,8 @@ check_tmu_write(uint64_t inst,
1796 validated_shader->uniforms_size += 4;
1797
1798 if (submit) {
1799 - if (!record_validated_texture_sample(validated_shader,
1800 - validation_state, tmu)) {
1801 + if (!record_texture_sample(validated_shader,
1802 + validation_state, tmu)) {
1803 return false;
1804 }
1805
1806 @@ -238,10 +228,10 @@ check_tmu_write(uint64_t inst,
1807 }
1808
1809 static bool
1810 -check_register_write(uint64_t inst,
1811 - struct vc4_validated_shader_info *validated_shader,
1812 - struct vc4_shader_validation_state *validation_state,
1813 - bool is_mul)
1814 +check_reg_write(uint64_t inst,
1815 + struct vc4_validated_shader_info *validated_shader,
1816 + struct vc4_shader_validation_state *validation_state,
1817 + bool is_mul)
1818 {
1819 uint32_t waddr = (is_mul ?
1820 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
1821 @@ -297,7 +287,7 @@ check_register_write(uint64_t inst,
1822 return true;
1823
1824 case QPU_W_TLB_STENCIL_SETUP:
1825 - return true;
1826 + return true;
1827 }
1828
1829 return true;
1830 @@ -360,7 +350,7 @@ track_live_clamps(uint64_t inst,
1831 }
1832
1833 validation_state->live_max_clamp_regs[lri_add] = true;
1834 - } if (op_add == QPU_A_MIN) {
1835 + } else if (op_add == QPU_A_MIN) {
1836 /* Track live clamps of a value clamped to a minimum of 0 and
1837 * a maximum of some uniform's offset.
1838 */
1839 @@ -392,8 +382,10 @@ check_instruction_writes(uint64_t inst,
1840 return false;
1841 }
1842
1843 - ok = (check_register_write(inst, validated_shader, validation_state, false) &&
1844 - check_register_write(inst, validated_shader, validation_state, true));
1845 + ok = (check_reg_write(inst, validated_shader, validation_state,
1846 + false) &&
1847 + check_reg_write(inst, validated_shader, validation_state,
1848 + true));
1849
1850 track_live_clamps(inst, validated_shader, validation_state);
1851
1852 @@ -441,7 +433,7 @@ vc4_validate_shader(struct drm_gem_cma_o
1853 shader = shader_obj->vaddr;
1854 max_ip = shader_obj->base.size / sizeof(uint64_t);
1855
1856 - validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
1857 + validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
1858 if (!validated_shader)
1859 return NULL;
1860
1861 @@ -497,7 +489,7 @@ vc4_validate_shader(struct drm_gem_cma_o
1862
1863 if (ip == max_ip) {
1864 DRM_ERROR("shader failed to terminate before "
1865 - "shader BO end at %d\n",
1866 + "shader BO end at %zd\n",
1867 shader_obj->base.size);
1868 goto fail;
1869 }
1870 --- a/include/drm/drmP.h
1871 +++ b/include/drm/drmP.h
1872 @@ -585,6 +585,13 @@ struct drm_driver {
1873 int (*gem_open_object) (struct drm_gem_object *, struct drm_file *);
1874 void (*gem_close_object) (struct drm_gem_object *, struct drm_file *);
1875
1876 + /**
1877 + * Hook for allocating the GEM object struct, for use by core
1878 + * helpers.
1879 + */
1880 + struct drm_gem_object *(*gem_create_object)(struct drm_device *dev,
1881 + size_t size);
1882 +
1883 /* prime: */
1884 /* export handle -> fd (see drm_gem_prime_handle_to_fd() helper) */
1885 int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv,
1886 @@ -639,7 +646,6 @@ struct drm_driver {
1887
1888 u32 driver_features;
1889 int dev_priv_size;
1890 - size_t gem_obj_size;
1891 const struct drm_ioctl_desc *ioctls;
1892 int num_ioctls;
1893 const struct file_operations *fops;