Linux-libre 5.3.12-gnu
[librecmc/linux-libre.git] / drivers / gpu / drm / i915 / gem / selftests / i915_gem_coherency.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6
7 #include <linux/prime_numbers.h>
8
9 #include "i915_selftest.h"
10 #include "selftests/i915_random.h"
11
12 static int cpu_set(struct drm_i915_gem_object *obj,
13                    unsigned long offset,
14                    u32 v)
15 {
16         unsigned int needs_clflush;
17         struct page *page;
18         void *map;
19         u32 *cpu;
20         int err;
21
22         err = i915_gem_object_prepare_write(obj, &needs_clflush);
23         if (err)
24                 return err;
25
26         page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
27         map = kmap_atomic(page);
28         cpu = map + offset_in_page(offset);
29
30         if (needs_clflush & CLFLUSH_BEFORE)
31                 drm_clflush_virt_range(cpu, sizeof(*cpu));
32
33         *cpu = v;
34
35         if (needs_clflush & CLFLUSH_AFTER)
36                 drm_clflush_virt_range(cpu, sizeof(*cpu));
37
38         kunmap_atomic(map);
39         i915_gem_object_finish_access(obj);
40
41         return 0;
42 }
43
44 static int cpu_get(struct drm_i915_gem_object *obj,
45                    unsigned long offset,
46                    u32 *v)
47 {
48         unsigned int needs_clflush;
49         struct page *page;
50         void *map;
51         u32 *cpu;
52         int err;
53
54         err = i915_gem_object_prepare_read(obj, &needs_clflush);
55         if (err)
56                 return err;
57
58         page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
59         map = kmap_atomic(page);
60         cpu = map + offset_in_page(offset);
61
62         if (needs_clflush & CLFLUSH_BEFORE)
63                 drm_clflush_virt_range(cpu, sizeof(*cpu));
64
65         *v = *cpu;
66
67         kunmap_atomic(map);
68         i915_gem_object_finish_access(obj);
69
70         return 0;
71 }
72
73 static int gtt_set(struct drm_i915_gem_object *obj,
74                    unsigned long offset,
75                    u32 v)
76 {
77         struct i915_vma *vma;
78         u32 __iomem *map;
79         int err;
80
81         i915_gem_object_lock(obj);
82         err = i915_gem_object_set_to_gtt_domain(obj, true);
83         i915_gem_object_unlock(obj);
84         if (err)
85                 return err;
86
87         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
88         if (IS_ERR(vma))
89                 return PTR_ERR(vma);
90
91         map = i915_vma_pin_iomap(vma);
92         i915_vma_unpin(vma);
93         if (IS_ERR(map))
94                 return PTR_ERR(map);
95
96         iowrite32(v, &map[offset / sizeof(*map)]);
97         i915_vma_unpin_iomap(vma);
98
99         return 0;
100 }
101
102 static int gtt_get(struct drm_i915_gem_object *obj,
103                    unsigned long offset,
104                    u32 *v)
105 {
106         struct i915_vma *vma;
107         u32 __iomem *map;
108         int err;
109
110         i915_gem_object_lock(obj);
111         err = i915_gem_object_set_to_gtt_domain(obj, false);
112         i915_gem_object_unlock(obj);
113         if (err)
114                 return err;
115
116         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
117         if (IS_ERR(vma))
118                 return PTR_ERR(vma);
119
120         map = i915_vma_pin_iomap(vma);
121         i915_vma_unpin(vma);
122         if (IS_ERR(map))
123                 return PTR_ERR(map);
124
125         *v = ioread32(&map[offset / sizeof(*map)]);
126         i915_vma_unpin_iomap(vma);
127
128         return 0;
129 }
130
131 static int wc_set(struct drm_i915_gem_object *obj,
132                   unsigned long offset,
133                   u32 v)
134 {
135         u32 *map;
136         int err;
137
138         i915_gem_object_lock(obj);
139         err = i915_gem_object_set_to_wc_domain(obj, true);
140         i915_gem_object_unlock(obj);
141         if (err)
142                 return err;
143
144         map = i915_gem_object_pin_map(obj, I915_MAP_WC);
145         if (IS_ERR(map))
146                 return PTR_ERR(map);
147
148         map[offset / sizeof(*map)] = v;
149         i915_gem_object_unpin_map(obj);
150
151         return 0;
152 }
153
154 static int wc_get(struct drm_i915_gem_object *obj,
155                   unsigned long offset,
156                   u32 *v)
157 {
158         u32 *map;
159         int err;
160
161         i915_gem_object_lock(obj);
162         err = i915_gem_object_set_to_wc_domain(obj, false);
163         i915_gem_object_unlock(obj);
164         if (err)
165                 return err;
166
167         map = i915_gem_object_pin_map(obj, I915_MAP_WC);
168         if (IS_ERR(map))
169                 return PTR_ERR(map);
170
171         *v = map[offset / sizeof(*map)];
172         i915_gem_object_unpin_map(obj);
173
174         return 0;
175 }
176
177 static int gpu_set(struct drm_i915_gem_object *obj,
178                    unsigned long offset,
179                    u32 v)
180 {
181         struct drm_i915_private *i915 = to_i915(obj->base.dev);
182         struct i915_request *rq;
183         struct i915_vma *vma;
184         u32 *cs;
185         int err;
186
187         i915_gem_object_lock(obj);
188         err = i915_gem_object_set_to_gtt_domain(obj, true);
189         i915_gem_object_unlock(obj);
190         if (err)
191                 return err;
192
193         vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
194         if (IS_ERR(vma))
195                 return PTR_ERR(vma);
196
197         rq = i915_request_create(i915->engine[RCS0]->kernel_context);
198         if (IS_ERR(rq)) {
199                 i915_vma_unpin(vma);
200                 return PTR_ERR(rq);
201         }
202
203         cs = intel_ring_begin(rq, 4);
204         if (IS_ERR(cs)) {
205                 i915_request_add(rq);
206                 i915_vma_unpin(vma);
207                 return PTR_ERR(cs);
208         }
209
210         if (INTEL_GEN(i915) >= 8) {
211                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
212                 *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset);
213                 *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset);
214                 *cs++ = v;
215         } else if (INTEL_GEN(i915) >= 4) {
216                 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
217                 *cs++ = 0;
218                 *cs++ = i915_ggtt_offset(vma) + offset;
219                 *cs++ = v;
220         } else {
221                 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
222                 *cs++ = i915_ggtt_offset(vma) + offset;
223                 *cs++ = v;
224                 *cs++ = MI_NOOP;
225         }
226         intel_ring_advance(rq, cs);
227
228         i915_vma_lock(vma);
229         err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
230         i915_vma_unlock(vma);
231         i915_vma_unpin(vma);
232
233         i915_request_add(rq);
234
235         return err;
236 }
237
238 static bool always_valid(struct drm_i915_private *i915)
239 {
240         return true;
241 }
242
243 static bool needs_fence_registers(struct drm_i915_private *i915)
244 {
245         return !i915_terminally_wedged(i915);
246 }
247
248 static bool needs_mi_store_dword(struct drm_i915_private *i915)
249 {
250         if (i915_terminally_wedged(i915))
251                 return false;
252
253         return intel_engine_can_store_dword(i915->engine[RCS0]);
254 }
255
256 static const struct igt_coherency_mode {
257         const char *name;
258         int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v);
259         int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v);
260         bool (*valid)(struct drm_i915_private *i915);
261 } igt_coherency_mode[] = {
262         { "cpu", cpu_set, cpu_get, always_valid },
263         { "gtt", gtt_set, gtt_get, needs_fence_registers },
264         { "wc", wc_set, wc_get, always_valid },
265         { "gpu", gpu_set, NULL, needs_mi_store_dword },
266         { },
267 };
268
269 static int igt_gem_coherency(void *arg)
270 {
271         const unsigned int ncachelines = PAGE_SIZE/64;
272         I915_RND_STATE(prng);
273         struct drm_i915_private *i915 = arg;
274         const struct igt_coherency_mode *read, *write, *over;
275         struct drm_i915_gem_object *obj;
276         intel_wakeref_t wakeref;
277         unsigned long count, n;
278         u32 *offsets, *values;
279         int err = 0;
280
281         /* We repeatedly write, overwrite and read from a sequence of
282          * cachelines in order to try and detect incoherency (unflushed writes
283          * from either the CPU or GPU). Each setter/getter uses our cache
284          * domain API which should prevent incoherency.
285          */
286
287         offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL);
288         if (!offsets)
289                 return -ENOMEM;
290         for (count = 0; count < ncachelines; count++)
291                 offsets[count] = count * 64 + 4 * (count % 16);
292
293         values = offsets + ncachelines;
294
295         mutex_lock(&i915->drm.struct_mutex);
296         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
297         for (over = igt_coherency_mode; over->name; over++) {
298                 if (!over->set)
299                         continue;
300
301                 if (!over->valid(i915))
302                         continue;
303
304                 for (write = igt_coherency_mode; write->name; write++) {
305                         if (!write->set)
306                                 continue;
307
308                         if (!write->valid(i915))
309                                 continue;
310
311                         for (read = igt_coherency_mode; read->name; read++) {
312                                 if (!read->get)
313                                         continue;
314
315                                 if (!read->valid(i915))
316                                         continue;
317
318                                 for_each_prime_number_from(count, 1, ncachelines) {
319                                         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
320                                         if (IS_ERR(obj)) {
321                                                 err = PTR_ERR(obj);
322                                                 goto unlock;
323                                         }
324
325                                         i915_random_reorder(offsets, ncachelines, &prng);
326                                         for (n = 0; n < count; n++)
327                                                 values[n] = prandom_u32_state(&prng);
328
329                                         for (n = 0; n < count; n++) {
330                                                 err = over->set(obj, offsets[n], ~values[n]);
331                                                 if (err) {
332                                                         pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
333                                                                n, count, over->name, err);
334                                                         goto put_object;
335                                                 }
336                                         }
337
338                                         for (n = 0; n < count; n++) {
339                                                 err = write->set(obj, offsets[n], values[n]);
340                                                 if (err) {
341                                                         pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
342                                                                n, count, write->name, err);
343                                                         goto put_object;
344                                                 }
345                                         }
346
347                                         for (n = 0; n < count; n++) {
348                                                 u32 found;
349
350                                                 err = read->get(obj, offsets[n], &found);
351                                                 if (err) {
352                                                         pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
353                                                                n, count, read->name, err);
354                                                         goto put_object;
355                                                 }
356
357                                                 if (found != values[n]) {
358                                                         pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n",
359                                                                n, count, over->name,
360                                                                write->name, values[n],
361                                                                read->name, found,
362                                                                ~values[n], offsets[n]);
363                                                         err = -EINVAL;
364                                                         goto put_object;
365                                                 }
366                                         }
367
368                                         i915_gem_object_put(obj);
369                                 }
370                         }
371                 }
372         }
373 unlock:
374         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
375         mutex_unlock(&i915->drm.struct_mutex);
376         kfree(offsets);
377         return err;
378
379 put_object:
380         i915_gem_object_put(obj);
381         goto unlock;
382 }
383
384 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
385 {
386         static const struct i915_subtest tests[] = {
387                 SUBTEST(igt_gem_coherency),
388         };
389
390         return i915_subtests(tests, i915);
391 }