brcm2708: add linux 4.19 support
[openwrt/openwrt.git] / target / linux / brcm2708 / patches-4.19 / 950-0593-drm-v3d-Add-missing-implicit-synchronization.patch
1 From 50482167989066e0fb9597fe37146a0ee5bc4067 Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Wed, 27 Mar 2019 17:44:40 -0700
4 Subject: [PATCH 593/703] drm/v3d: Add missing implicit synchronization.
5
6 It is the expectation of existing userspace (X11 + Mesa, in
7 particular) that jobs submitted to the kernel against a shared BO will
8 get implicitly synchronized by their submission order. If we want to
9 allow clever userspace to disable implicit synchronization, we should
10 do that under its own submit flag (as amdgpu and lima do).
11
12 Note that we currently only implicitly sync for the rendering pass,
13 not binning -- if you texture-from-pixmap in the binning vertex shader
14 (vertex coordinate generation), you'll miss out on synchronization.
15
16 Fixes flickering when multiple clients are running in parallel,
17 particularly GL apps and compositors.
18
19 Signed-off-by: Eric Anholt <eric@anholt.net>
20 ---
21 drivers/gpu/drm/v3d/v3d_drv.h | 10 +---
22 drivers/gpu/drm/v3d/v3d_gem.c | 98 ++++++++++++++++++++++++++++++---
23 drivers/gpu/drm/v3d/v3d_sched.c | 45 ++-------------
24 3 files changed, 96 insertions(+), 57 deletions(-)
25
26 --- a/drivers/gpu/drm/v3d/v3d_drv.h
27 +++ b/drivers/gpu/drm/v3d/v3d_drv.h
28 @@ -186,8 +186,9 @@ struct v3d_job {
29 struct v3d_bo **bo;
30 u32 bo_count;
31
32 - /* An optional fence userspace can pass in for the job to depend on. */
33 - struct dma_fence *in_fence;
34 + struct dma_fence **deps;
35 + int deps_count;
36 + int deps_size;
37
38 /* v3d fence to be signaled by IRQ handler when the job is complete. */
39 struct dma_fence *irq_fence;
40 @@ -219,11 +220,6 @@ struct v3d_bin_job {
41 struct v3d_render_job {
42 struct v3d_job base;
43
44 - /* Optional fence for the binner, to depend on before starting
45 - * our job.
46 - */
47 - struct dma_fence *bin_done_fence;
48 -
49 /* GPU virtual addresses of the start/end of the CL job. */
50 u32 start, end;
51
52 --- a/drivers/gpu/drm/v3d/v3d_gem.c
53 +++ b/drivers/gpu/drm/v3d/v3d_gem.c
54 @@ -218,6 +218,71 @@ v3d_unlock_bo_reservations(struct v3d_bo
55 ww_acquire_fini(acquire_ctx);
56 }
57
58 +static int
59 +v3d_add_dep(struct v3d_job *job, struct dma_fence *fence)
60 +{
61 + if (!fence)
62 + return 0;
63 +
64 + if (job->deps_size == job->deps_count) {
65 + int new_deps_size = max(job->deps_size * 2, 4);
66 + struct dma_fence **new_deps =
67 + krealloc(job->deps, new_deps_size * sizeof(*new_deps),
68 + GFP_KERNEL);
69 + if (!new_deps) {
70 + dma_fence_put(fence);
71 + return -ENOMEM;
72 + }
73 +
74 + job->deps = new_deps;
75 + job->deps_size = new_deps_size;
76 + }
77 +
78 + job->deps[job->deps_count++] = fence;
79 +
80 + return 0;
81 +}
82 +
83 +/**
84 + * Adds the required implicit fences before executing the job
85 + *
86 + * Userspace (X11 + Mesa) requires that a job submitted against a shared BO
87 + * from one fd will implicitly synchronize against previous jobs submitted
88 + * against that BO from other fds.
89 + *
90 + * Currently we don't bother trying to track the shared BOs, and instead just
91 + * sync everything. However, our synchronization is only for the render pass
92 + * -- the binning stage (VS coordinate calculations) ignores implicit sync,
93 + * since using shared buffers for texture coordinates seems unlikely, and
94 + * implicitly syncing them would break bin/render parallelism. If we want to
95 + * fix that, we should introduce a flag when VS texturing has been used in the
96 + * binning stage, or a set of flags for which BOs are sampled during binning.
97 + */
98 +static int
99 +v3d_add_implicit_fences(struct v3d_job *job, struct v3d_bo *bo)
100 +{
101 + int i, ret, nr_fences;
102 + struct dma_fence **fences;
103 +
104 + ret = reservation_object_get_fences_rcu(bo->resv, NULL,
105 + &nr_fences, &fences);
106 + if (ret || !nr_fences)
107 + return ret;
108 +
109 + for (i = 0; i < nr_fences; i++) {
110 + ret = v3d_add_dep(job, fences[i]);
111 + if (ret)
112 + break;
113 + }
114 +
115 + /* Free any remaining fences after error. */
116 + for (; i < nr_fences; i++)
117 + dma_fence_put(fences[i]);
118 + kfree(fences);
119 +
120 + return ret;
121 +}
122 +
123 /* Takes the reservation lock on all the BOs being referenced, so that
124 * at queue submit time we can update the reservations.
125 *
126 @@ -226,10 +291,11 @@ v3d_unlock_bo_reservations(struct v3d_bo
127 * to v3d, so we don't attach dma-buf fences to them.
128 */
129 static int
130 -v3d_lock_bo_reservations(struct v3d_bo **bos,
131 - int bo_count,
132 +v3d_lock_bo_reservations(struct v3d_job *job,
133 struct ww_acquire_ctx *acquire_ctx)
134 {
135 + struct v3d_bo **bos = job->bo;
136 + int bo_count = job->bo_count;
137 int contended_lock = -1;
138 int i, ret;
139
140 @@ -281,6 +347,13 @@ retry:
141 * before we commit the CL to the hardware.
142 */
143 for (i = 0; i < bo_count; i++) {
144 + ret = v3d_add_implicit_fences(job, bos[i]);
145 + if (ret) {
146 + v3d_unlock_bo_reservations(bos, bo_count,
147 + acquire_ctx);
148 + return ret;
149 + }
150 +
151 ret = reservation_object_reserve_shared(bos[i]->resv);
152 if (ret) {
153 v3d_unlock_bo_reservations(bos, bo_count,
154 @@ -383,7 +456,10 @@ v3d_job_free(struct kref *ref)
155 }
156 kvfree(job->bo);
157
158 - dma_fence_put(job->in_fence);
159 + for (i = 0; i < job->deps_count; i++)
160 + dma_fence_put(job->deps[i]);
161 + kfree(job->deps);
162 +
163 dma_fence_put(job->irq_fence);
164 dma_fence_put(job->done_fence);
165
166 @@ -464,15 +540,20 @@ v3d_job_init(struct v3d_dev *v3d, struct
167 struct v3d_job *job, void (*free)(struct kref *ref),
168 u32 in_sync)
169 {
170 + struct dma_fence *in_fence = NULL;
171 int ret;
172
173 job->v3d = v3d;
174 job->free = free;
175
176 - ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &job->in_fence);
177 + ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &in_fence);
178 if (ret == -EINVAL)
179 return ret;
180
181 + ret = v3d_add_dep(job, in_fence);
182 + if (ret)
183 + return ret;
184 +
185 kref_init(&job->refcount);
186
187 return 0;
188 @@ -590,8 +671,7 @@ v3d_submit_cl_ioctl(struct drm_device *d
189 if (ret)
190 goto fail;
191
192 - ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count,
193 - &acquire_ctx);
194 + ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
195 if (ret)
196 goto fail;
197
198 @@ -601,7 +681,8 @@ v3d_submit_cl_ioctl(struct drm_device *d
199 if (ret)
200 goto fail_unreserve;
201
202 - render->bin_done_fence = dma_fence_get(bin->base.done_fence);
203 + ret = v3d_add_dep(&render->base,
204 + dma_fence_get(bin->base.done_fence));
205 }
206
207 ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
208 @@ -692,8 +773,7 @@ v3d_submit_tfu_ioctl(struct drm_device *
209 }
210 spin_unlock(&file_priv->table_lock);
211
212 - ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count,
213 - &acquire_ctx);
214 + ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
215 if (ret)
216 goto fail;
217
218 --- a/drivers/gpu/drm/v3d/v3d_sched.c
219 +++ b/drivers/gpu/drm/v3d/v3d_sched.c
220 @@ -67,47 +67,10 @@ v3d_job_dependency(struct drm_sched_job
221 struct drm_sched_entity *s_entity)
222 {
223 struct v3d_job *job = to_v3d_job(sched_job);
224 - struct dma_fence *fence;
225 -
226 - fence = job->in_fence;
227 - if (fence) {
228 - job->in_fence = NULL;
229 - return fence;
230 - }
231 -
232 - return NULL;
233 -}
234
235 -/**
236 - * Returns the fences that the render job depends on, one by one.
237 - * v3d_job_run() won't be called until all of them have been signaled.
238 - */
239 -static struct dma_fence *
240 -v3d_render_job_dependency(struct drm_sched_job *sched_job,
241 - struct drm_sched_entity *s_entity)
242 -{
243 - struct v3d_render_job *job = to_render_job(sched_job);
244 - struct dma_fence *fence;
245 -
246 - fence = v3d_job_dependency(sched_job, s_entity);
247 - if (fence)
248 - return fence;
249 -
250 - /* If we had a bin job, the render job definitely depends on
251 - * it. We first have to wait for bin to be scheduled, so that
252 - * its done_fence is created.
253 - */
254 - fence = job->bin_done_fence;
255 - if (fence) {
256 - job->bin_done_fence = NULL;
257 - return fence;
258 - }
259 -
260 - /* XXX: Wait on a fence for switching the GMP if necessary,
261 - * and then do so.
262 - */
263 -
264 - return fence;
265 + if (!job->deps_count)
266 + return NULL;
267 + return job->deps[--job->deps_count];
268 }
269
270 static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
271 @@ -329,7 +292,7 @@ static const struct drm_sched_backend_op
272 };
273
274 static const struct drm_sched_backend_ops v3d_render_sched_ops = {
275 - .dependency = v3d_render_job_dependency,
276 + .dependency = v3d_job_dependency,
277 .run_job = v3d_render_job_run,
278 .timedout_job = v3d_render_job_timedout,
279 .free_job = v3d_job_free,