brcm2708: add linux 4.19 support
[oweals/openwrt.git] / target / linux / brcm2708 / patches-4.19 / 950-0593-drm-v3d-Add-missing-implicit-synchronization.patch
1 From 50482167989066e0fb9597fe37146a0ee5bc4067 Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Wed, 27 Mar 2019 17:44:40 -0700
4 Subject: [PATCH 593/703] drm/v3d: Add missing implicit synchronization.
5
6 It is the expectation of existing userspace (X11 + Mesa, in
7 particular) that jobs submitted to the kernel against a shared BO will
8 get implicitly synchronized by their submission order.  If we want to
9 allow clever userspace to disable implicit synchronization, we should
10 do that under its own submit flag (as amdgpu and lima do).
11
12 Note that we currently only implicitly sync for the rendering pass,
13 not binning -- if you texture-from-pixmap in the binning vertex shader
14 (vertex coordinate generation), you'll miss out on synchronization.
15
16 Fixes flickering when multiple clients are running in parallel,
17 particularly GL apps and compositors.
18
19 Signed-off-by: Eric Anholt <eric@anholt.net>
20 ---
21  drivers/gpu/drm/v3d/v3d_drv.h   | 10 +---
22  drivers/gpu/drm/v3d/v3d_gem.c   | 98 ++++++++++++++++++++++++++++++---
23  drivers/gpu/drm/v3d/v3d_sched.c | 45 ++-------------
24  3 files changed, 96 insertions(+), 57 deletions(-)
25
26 --- a/drivers/gpu/drm/v3d/v3d_drv.h
27 +++ b/drivers/gpu/drm/v3d/v3d_drv.h
28 @@ -186,8 +186,9 @@ struct v3d_job {
29         struct v3d_bo **bo;
30         u32 bo_count;
31  
32 -       /* An optional fence userspace can pass in for the job to depend on. */
33 -       struct dma_fence *in_fence;
34 +       struct dma_fence **deps;
35 +       int deps_count;
36 +       int deps_size;
37  
38         /* v3d fence to be signaled by IRQ handler when the job is complete. */
39         struct dma_fence *irq_fence;
40 @@ -219,11 +220,6 @@ struct v3d_bin_job {
41  struct v3d_render_job {
42         struct v3d_job base;
43  
44 -       /* Optional fence for the binner, to depend on before starting
45 -        * our job.
46 -        */
47 -       struct dma_fence *bin_done_fence;
48 -
49         /* GPU virtual addresses of the start/end of the CL job. */
50         u32 start, end;
51  
52 --- a/drivers/gpu/drm/v3d/v3d_gem.c
53 +++ b/drivers/gpu/drm/v3d/v3d_gem.c
54 @@ -218,6 +218,71 @@ v3d_unlock_bo_reservations(struct v3d_bo
55         ww_acquire_fini(acquire_ctx);
56  }
57  
58 +static int
59 +v3d_add_dep(struct v3d_job *job, struct dma_fence *fence)
60 +{
61 +       if (!fence)
62 +               return 0;
63 +
64 +       if (job->deps_size == job->deps_count) {
65 +               int new_deps_size = max(job->deps_size * 2, 4);
66 +               struct dma_fence **new_deps =
67 +                       krealloc(job->deps, new_deps_size * sizeof(*new_deps),
68 +                                GFP_KERNEL);
69 +               if (!new_deps) {
70 +                       dma_fence_put(fence);
71 +                       return -ENOMEM;
72 +               }
73 +
74 +               job->deps = new_deps;
75 +               job->deps_size = new_deps_size;
76 +       }
77 +
78 +       job->deps[job->deps_count++] = fence;
79 +
80 +       return 0;
81 +}
82 +
83 +/**
84 + * Adds the required implicit fences before executing the job
85 + *
86 + * Userspace (X11 + Mesa) requires that a job submitted against a shared BO
87 + * from one fd will implicitly synchronize against previous jobs submitted
88 + * against that BO from other fds.
89 + *
90 + * Currently we don't bother trying to track the shared BOs, and instead just
91 + * sync everything.  However, our synchronization is only for the render pass
92 + * -- the binning stage (VS coordinate calculations) ignores implicit sync,
93 + * since using shared buffers for texture coordinates seems unlikely, and
94 + * implicitly syncing them would break bin/render parallelism.  If we want to
95 + * fix that, we should introduce a flag when VS texturing has been used in the
96 + * binning stage, or a set of flags for which BOs are sampled during binning.
97 + */
98 +static int
99 +v3d_add_implicit_fences(struct v3d_job *job, struct v3d_bo *bo)
100 +{
101 +       int i, ret, nr_fences;
102 +       struct dma_fence **fences;
103 +
104 +       ret = reservation_object_get_fences_rcu(bo->resv, NULL,
105 +                                               &nr_fences, &fences);
106 +       if (ret || !nr_fences)
107 +               return ret;
108 +
109 +       for (i = 0; i < nr_fences; i++) {
110 +               ret = v3d_add_dep(job, fences[i]);
111 +               if (ret)
112 +                       break;
113 +       }
114 +
115 +       /* Free any remaining fences after error. */
116 +       for (; i < nr_fences; i++)
117 +               dma_fence_put(fences[i]);
118 +       kfree(fences);
119 +
120 +       return ret;
121 +}
122 +
123  /* Takes the reservation lock on all the BOs being referenced, so that
124   * at queue submit time we can update the reservations.
125   *
126 @@ -226,10 +291,11 @@ v3d_unlock_bo_reservations(struct v3d_bo
127   * to v3d, so we don't attach dma-buf fences to them.
128   */
129  static int
130 -v3d_lock_bo_reservations(struct v3d_bo **bos,
131 -                        int bo_count,
132 +v3d_lock_bo_reservations(struct v3d_job *job,
133                          struct ww_acquire_ctx *acquire_ctx)
134  {
135 +       struct v3d_bo **bos = job->bo;
136 +       int bo_count = job->bo_count;
137         int contended_lock = -1;
138         int i, ret;
139  
140 @@ -281,6 +347,13 @@ retry:
141          * before we commit the CL to the hardware.
142          */
143         for (i = 0; i < bo_count; i++) {
144 +               ret = v3d_add_implicit_fences(job, bos[i]);
145 +               if (ret) {
146 +                       v3d_unlock_bo_reservations(bos, bo_count,
147 +                                                  acquire_ctx);
148 +                       return ret;
149 +               }
150 +
151                 ret = reservation_object_reserve_shared(bos[i]->resv);
152                 if (ret) {
153                         v3d_unlock_bo_reservations(bos, bo_count,
154 @@ -383,7 +456,10 @@ v3d_job_free(struct kref *ref)
155         }
156         kvfree(job->bo);
157  
158 -       dma_fence_put(job->in_fence);
159 +       for (i = 0; i < job->deps_count; i++)
160 +               dma_fence_put(job->deps[i]);
161 +       kfree(job->deps);
162 +
163         dma_fence_put(job->irq_fence);
164         dma_fence_put(job->done_fence);
165  
166 @@ -464,15 +540,20 @@ v3d_job_init(struct v3d_dev *v3d, struct
167              struct v3d_job *job, void (*free)(struct kref *ref),
168              u32 in_sync)
169  {
170 +       struct dma_fence *in_fence = NULL;
171         int ret;
172  
173         job->v3d = v3d;
174         job->free = free;
175  
176 -       ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &job->in_fence);
177 +       ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &in_fence);
178         if (ret == -EINVAL)
179                 return ret;
180  
181 +       ret = v3d_add_dep(job, in_fence);
182 +       if (ret)
183 +               return ret;
184 +
185         kref_init(&job->refcount);
186  
187         return 0;
188 @@ -590,8 +671,7 @@ v3d_submit_cl_ioctl(struct drm_device *d
189         if (ret)
190                 goto fail;
191  
192 -       ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count,
193 -                                      &acquire_ctx);
194 +       ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
195         if (ret)
196                 goto fail;
197  
198 @@ -601,7 +681,8 @@ v3d_submit_cl_ioctl(struct drm_device *d
199                 if (ret)
200                         goto fail_unreserve;
201  
202 -               render->bin_done_fence = dma_fence_get(bin->base.done_fence);
203 +               ret = v3d_add_dep(&render->base,
204 +                                 dma_fence_get(bin->base.done_fence));
205         }
206  
207         ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
208 @@ -692,8 +773,7 @@ v3d_submit_tfu_ioctl(struct drm_device *
209         }
210         spin_unlock(&file_priv->table_lock);
211  
212 -       ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count,
213 -                                      &acquire_ctx);
214 +       ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
215         if (ret)
216                 goto fail;
217  
218 --- a/drivers/gpu/drm/v3d/v3d_sched.c
219 +++ b/drivers/gpu/drm/v3d/v3d_sched.c
220 @@ -67,47 +67,10 @@ v3d_job_dependency(struct drm_sched_job
221                    struct drm_sched_entity *s_entity)
222  {
223         struct v3d_job *job = to_v3d_job(sched_job);
224 -       struct dma_fence *fence;
225 -
226 -       fence = job->in_fence;
227 -       if (fence) {
228 -               job->in_fence = NULL;
229 -               return fence;
230 -       }
231 -
232 -       return NULL;
233 -}
234  
235 -/**
236 - * Returns the fences that the render job depends on, one by one.
237 - * v3d_job_run() won't be called until all of them have been signaled.
238 - */
239 -static struct dma_fence *
240 -v3d_render_job_dependency(struct drm_sched_job *sched_job,
241 -                         struct drm_sched_entity *s_entity)
242 -{
243 -       struct v3d_render_job *job = to_render_job(sched_job);
244 -       struct dma_fence *fence;
245 -
246 -       fence = v3d_job_dependency(sched_job, s_entity);
247 -       if (fence)
248 -               return fence;
249 -
250 -       /* If we had a bin job, the render job definitely depends on
251 -        * it. We first have to wait for bin to be scheduled, so that
252 -        * its done_fence is created.
253 -        */
254 -       fence = job->bin_done_fence;
255 -       if (fence) {
256 -               job->bin_done_fence = NULL;
257 -               return fence;
258 -       }
259 -
260 -       /* XXX: Wait on a fence for switching the GMP if necessary,
261 -        * and then do so.
262 -        */
263 -
264 -       return fence;
265 +       if (!job->deps_count)
266 +               return NULL;
267 +       return job->deps[--job->deps_count];
268  }
269  
270  static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
271 @@ -329,7 +292,7 @@ static const struct drm_sched_backend_op
272  };
273  
274  static const struct drm_sched_backend_ops v3d_render_sched_ops = {
275 -       .dependency = v3d_render_job_dependency,
276 +       .dependency = v3d_job_dependency,
277         .run_job = v3d_render_job_run,
278         .timedout_job = v3d_render_job_timedout,
279         .free_job = v3d_job_free,