ff7c346f5f6934d89817a479c1c4549fc4742f42
[oweals/openwrt.git] /
1 From bc4661703d132ae1fb91d66641c64851eae44959 Mon Sep 17 00:00:00 2001
2 From: Iago Toral Quiroga <itoral@igalia.com>
3 Date: Tue, 3 Sep 2019 08:45:24 +0200
4 Subject: [PATCH] drm/v3d: clean caches at the end of render jobs on
5  request from user space
6
7 Extends the user space ioctl for CL submissions so it can include a request
8 to flush the cache once the CL execution has completed. Fixes memory
9 write violation messages reported by the kernel in workloads involving
10 shader memory writes (SSBOs, shader images, scratch, etc) which sometimes
11 also lead to GPU resets during Piglit and CTS workloads.
12
13 v2: if v3d_job_init() fails we need to kfree() the job instead of
14     v3d_job_put() it (Eric Anholt).
15
16 v3 (Eric Anholt):
17   - Drop _FLAG suffix from the new flag name.
18   - Add a new param so userspace can tell whether cache flushing is
19     implemented in the kernel.
20
21 Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
22 ---
23  drivers/gpu/drm/v3d/v3d_drv.c |  3 ++
24  drivers/gpu/drm/v3d/v3d_gem.c | 54 +++++++++++++++++++++++++++++------
25  include/uapi/drm/v3d_drm.h    |  6 ++--
26  3 files changed, 53 insertions(+), 10 deletions(-)
27
28 --- a/drivers/gpu/drm/v3d/v3d_drv.c
29 +++ b/drivers/gpu/drm/v3d/v3d_drv.c
30 @@ -117,6 +117,9 @@ static int v3d_get_param_ioctl(struct dr
31         case DRM_V3D_PARAM_SUPPORTS_CSD:
32                 args->value = v3d_has_csd(v3d);
33                 return 0;
34 +       case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
35 +               args->value = 1;
36 +               return 0;
37         default:
38                 DRM_DEBUG("Unknown parameter %d\n", args->param);
39                 return -EINVAL;
40 --- a/drivers/gpu/drm/v3d/v3d_gem.c
41 +++ b/drivers/gpu/drm/v3d/v3d_gem.c
42 @@ -709,13 +709,16 @@ v3d_submit_cl_ioctl(struct drm_device *d
43         struct drm_v3d_submit_cl *args = data;
44         struct v3d_bin_job *bin = NULL;
45         struct v3d_render_job *render;
46 +       struct v3d_job *clean_job = NULL;
47 +       struct v3d_job *last_job;
48         struct ww_acquire_ctx acquire_ctx;
49         int ret = 0;
50  
51         trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
52  
53 -       if (args->pad != 0) {
54 -               DRM_INFO("pad must be zero: %d\n", args->pad);
55 +       if (args->flags != 0 &&
56 +           args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
57 +               DRM_INFO("invalid flags: %d\n", args->flags);
58                 return -EINVAL;
59         }
60  
61 @@ -755,12 +758,31 @@ v3d_submit_cl_ioctl(struct drm_device *d
62                 bin->render = render;
63         }
64  
65 -       ret = v3d_lookup_bos(dev, file_priv, &render->base,
66 +       if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
67 +               clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
68 +               if (!clean_job) {
69 +                       ret = -ENOMEM;
70 +                       goto fail;
71 +               }
72 +
73 +               ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
74 +               if (ret) {
75 +                       kfree(clean_job);
76 +                       clean_job = NULL;
77 +                       goto fail;
78 +               }
79 +
80 +               last_job = clean_job;
81 +       } else {
82 +               last_job = &render->base;
83 +       }
84 +
85 +       ret = v3d_lookup_bos(dev, file_priv, last_job,
86                              args->bo_handles, args->bo_handle_count);
87         if (ret)
88                 goto fail;
89  
90 -       ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
91 +       ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
92         if (ret)
93                 goto fail;
94  
95 @@ -772,33 +794,49 @@ v3d_submit_cl_ioctl(struct drm_device *d
96  
97                 ret = v3d_add_dep(&render->base,
98                                   dma_fence_get(bin->base.done_fence));
99 +               if (ret)
100 +                       goto fail_unreserve;
101         }
102  
103         ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
104         if (ret)
105                 goto fail_unreserve;
106 +
107 +       if (clean_job) {
108 +               ret = v3d_add_dep(clean_job,
109 +                                 dma_fence_get(render->base.done_fence));
110 +               if (ret)
111 +                       goto fail_unreserve;
112 +               ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
113 +               if (ret)
114 +                       goto fail_unreserve;
115 +       }
116         mutex_unlock(&v3d->sched_lock);
117  
118         v3d_attach_fences_and_unlock_reservation(file_priv,
119 -                                                &render->base,
120 +                                                last_job,
121                                                  &acquire_ctx,
122                                                  args->out_sync,
123 -                                                render->base.done_fence);
124 +                                                last_job->done_fence);
125  
126         if (bin)
127                 v3d_job_put(&bin->base);
128         v3d_job_put(&render->base);
129 +       if (clean_job)
130 +               v3d_job_put(clean_job);
131  
132         return 0;
133  
134  fail_unreserve:
135         mutex_unlock(&v3d->sched_lock);
136 -       v3d_unlock_bo_reservations(render->base.bo,
137 -                                  render->base.bo_count, &acquire_ctx);
138 +       v3d_unlock_bo_reservations(last_job->bo,
139 +                                  last_job->bo_count, &acquire_ctx);
140  fail:
141         if (bin)
142                 v3d_job_put(&bin->base);
143         v3d_job_put(&render->base);
144 +       if (clean_job)
145 +               v3d_job_put(clean_job);
146  
147         return ret;
148  }
149 --- a/include/uapi/drm/v3d_drm.h
150 +++ b/include/uapi/drm/v3d_drm.h
151 @@ -48,6 +48,8 @@ extern "C" {
152  #define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
153  #define DRM_IOCTL_V3D_SUBMIT_CSD          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
154  
155 +#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE             0x01
156 +
157  /**
158   * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
159   * engine.
160 @@ -106,8 +108,7 @@ struct drm_v3d_submit_cl {
161         /* Number of BO handles passed in (size is that times 4). */
162         __u32 bo_handle_count;
163  
164 -       /* Pad, must be zero-filled. */
165 -       __u32 pad;
166 +       __u32 flags;
167  };
168  
169  /**
170 @@ -175,6 +176,7 @@ enum drm_v3d_param {
171         DRM_V3D_PARAM_V3D_CORE0_IDENT2,
172         DRM_V3D_PARAM_SUPPORTS_TFU,
173         DRM_V3D_PARAM_SUPPORTS_CSD,
174 +       DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH,
175  };
176  
177  struct drm_v3d_get_param {