Linux-libre 5.4-rc7-gnu
[librecmc/linux-libre.git] / drivers / gpu / drm / i915 / gem / i915_gem_domain.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2016 Intel Corporation
5  */
6
7 #include "display/intel_frontbuffer.h"
8
9 #include "i915_drv.h"
10 #include "i915_gem_clflush.h"
11 #include "i915_gem_gtt.h"
12 #include "i915_gem_ioctls.h"
13 #include "i915_gem_object.h"
14 #include "i915_vma.h"
15
16 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
17 {
18         /*
19          * We manually flush the CPU domain so that we can override and
20          * force the flush for the display, and perform it asyncrhonously.
21          */
22         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
23         if (obj->cache_dirty)
24                 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
25         obj->write_domain = 0;
26 }
27
28 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
29 {
30         if (!READ_ONCE(obj->pin_global))
31                 return;
32
33         i915_gem_object_lock(obj);
34         __i915_gem_object_flush_for_display(obj);
35         i915_gem_object_unlock(obj);
36 }
37
38 /**
39  * Moves a single object to the WC read, and possibly write domain.
40  * @obj: object to act on
41  * @write: ask for write access or read only
42  *
43  * This function returns when the move is complete, including waiting on
44  * flushes to occur.
45  */
46 int
47 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
48 {
49         int ret;
50
51         assert_object_held(obj);
52
53         ret = i915_gem_object_wait(obj,
54                                    I915_WAIT_INTERRUPTIBLE |
55                                    (write ? I915_WAIT_ALL : 0),
56                                    MAX_SCHEDULE_TIMEOUT);
57         if (ret)
58                 return ret;
59
60         if (obj->write_domain == I915_GEM_DOMAIN_WC)
61                 return 0;
62
63         /* Flush and acquire obj->pages so that we are coherent through
64          * direct access in memory with previous cached writes through
65          * shmemfs and that our cache domain tracking remains valid.
66          * For example, if the obj->filp was moved to swap without us
67          * being notified and releasing the pages, we would mistakenly
68          * continue to assume that the obj remained out of the CPU cached
69          * domain.
70          */
71         ret = i915_gem_object_pin_pages(obj);
72         if (ret)
73                 return ret;
74
75         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
76
77         /* Serialise direct access to this object with the barriers for
78          * coherent writes from the GPU, by effectively invalidating the
79          * WC domain upon first access.
80          */
81         if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
82                 mb();
83
84         /* It should now be out of any other write domains, and we can update
85          * the domain values for our changes.
86          */
87         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
88         obj->read_domains |= I915_GEM_DOMAIN_WC;
89         if (write) {
90                 obj->read_domains = I915_GEM_DOMAIN_WC;
91                 obj->write_domain = I915_GEM_DOMAIN_WC;
92                 obj->mm.dirty = true;
93         }
94
95         i915_gem_object_unpin_pages(obj);
96         return 0;
97 }
98
99 /**
100  * Moves a single object to the GTT read, and possibly write domain.
101  * @obj: object to act on
102  * @write: ask for write access or read only
103  *
104  * This function returns when the move is complete, including waiting on
105  * flushes to occur.
106  */
107 int
108 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
109 {
110         int ret;
111
112         assert_object_held(obj);
113
114         ret = i915_gem_object_wait(obj,
115                                    I915_WAIT_INTERRUPTIBLE |
116                                    (write ? I915_WAIT_ALL : 0),
117                                    MAX_SCHEDULE_TIMEOUT);
118         if (ret)
119                 return ret;
120
121         if (obj->write_domain == I915_GEM_DOMAIN_GTT)
122                 return 0;
123
124         /* Flush and acquire obj->pages so that we are coherent through
125          * direct access in memory with previous cached writes through
126          * shmemfs and that our cache domain tracking remains valid.
127          * For example, if the obj->filp was moved to swap without us
128          * being notified and releasing the pages, we would mistakenly
129          * continue to assume that the obj remained out of the CPU cached
130          * domain.
131          */
132         ret = i915_gem_object_pin_pages(obj);
133         if (ret)
134                 return ret;
135
136         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
137
138         /* Serialise direct access to this object with the barriers for
139          * coherent writes from the GPU, by effectively invalidating the
140          * GTT domain upon first access.
141          */
142         if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
143                 mb();
144
145         /* It should now be out of any other write domains, and we can update
146          * the domain values for our changes.
147          */
148         GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
149         obj->read_domains |= I915_GEM_DOMAIN_GTT;
150         if (write) {
151                 obj->read_domains = I915_GEM_DOMAIN_GTT;
152                 obj->write_domain = I915_GEM_DOMAIN_GTT;
153                 obj->mm.dirty = true;
154         }
155
156         i915_gem_object_unpin_pages(obj);
157         return 0;
158 }
159
160 /**
161  * Changes the cache-level of an object across all VMA.
162  * @obj: object to act on
163  * @cache_level: new cache level to set for the object
164  *
165  * After this function returns, the object will be in the new cache-level
166  * across all GTT and the contents of the backing storage will be coherent,
167  * with respect to the new cache-level. In order to keep the backing storage
168  * coherent for all users, we only allow a single cache level to be set
169  * globally on the object and prevent it from being changed whilst the
170  * hardware is reading from the object. That is if the object is currently
171  * on the scanout it will be set to uncached (or equivalent display
172  * cache coherency) and all non-MOCS GPU access will also be uncached so
173  * that all direct access to the scanout remains coherent.
174  */
175 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
176                                     enum i915_cache_level cache_level)
177 {
178         struct i915_vma *vma;
179         int ret;
180
181         assert_object_held(obj);
182
183         if (obj->cache_level == cache_level)
184                 return 0;
185
186         /* Inspect the list of currently bound VMA and unbind any that would
187          * be invalid given the new cache-level. This is principally to
188          * catch the issue of the CS prefetch crossing page boundaries and
189          * reading an invalid PTE on older architectures.
190          */
191 restart:
192         list_for_each_entry(vma, &obj->vma.list, obj_link) {
193                 if (!drm_mm_node_allocated(&vma->node))
194                         continue;
195
196                 if (i915_vma_is_pinned(vma)) {
197                         DRM_DEBUG("can not change the cache level of pinned objects\n");
198                         return -EBUSY;
199                 }
200
201                 if (!i915_vma_is_closed(vma) &&
202                     i915_gem_valid_gtt_space(vma, cache_level))
203                         continue;
204
205                 ret = i915_vma_unbind(vma);
206                 if (ret)
207                         return ret;
208
209                 /* As unbinding may affect other elements in the
210                  * obj->vma_list (due to side-effects from retiring
211                  * an active vma), play safe and restart the iterator.
212                  */
213                 goto restart;
214         }
215
216         /* We can reuse the existing drm_mm nodes but need to change the
217          * cache-level on the PTE. We could simply unbind them all and
218          * rebind with the correct cache-level on next use. However since
219          * we already have a valid slot, dma mapping, pages etc, we may as
220          * rewrite the PTE in the belief that doing so tramples upon less
221          * state and so involves less work.
222          */
223         if (atomic_read(&obj->bind_count)) {
224                 struct drm_i915_private *i915 = to_i915(obj->base.dev);
225
226                 /* Before we change the PTE, the GPU must not be accessing it.
227                  * If we wait upon the object, we know that all the bound
228                  * VMA are no longer active.
229                  */
230                 ret = i915_gem_object_wait(obj,
231                                            I915_WAIT_INTERRUPTIBLE |
232                                            I915_WAIT_ALL,
233                                            MAX_SCHEDULE_TIMEOUT);
234                 if (ret)
235                         return ret;
236
237                 if (!HAS_LLC(i915) && cache_level != I915_CACHE_NONE) {
238                         intel_wakeref_t wakeref =
239                                 intel_runtime_pm_get(&i915->runtime_pm);
240
241                         /*
242                          * Access to snoopable pages through the GTT is
243                          * incoherent and on some machines causes a hard
244                          * lockup. Relinquish the CPU mmaping to force
245                          * userspace to refault in the pages and we can
246                          * then double check if the GTT mapping is still
247                          * valid for that pointer access.
248                          */
249                         ret = mutex_lock_interruptible(&i915->ggtt.vm.mutex);
250                         if (ret) {
251                                 intel_runtime_pm_put(&i915->runtime_pm,
252                                                      wakeref);
253                                 return ret;
254                         }
255
256                         if (obj->userfault_count)
257                                 __i915_gem_object_release_mmap(obj);
258
259                         /*
260                          * As we no longer need a fence for GTT access,
261                          * we can relinquish it now (and so prevent having
262                          * to steal a fence from someone else on the next
263                          * fence request). Note GPU activity would have
264                          * dropped the fence as all snoopable access is
265                          * supposed to be linear.
266                          */
267                         for_each_ggtt_vma(vma, obj) {
268                                 ret = i915_vma_revoke_fence(vma);
269                                 if (ret)
270                                         break;
271                         }
272                         mutex_unlock(&i915->ggtt.vm.mutex);
273                         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
274                         if (ret)
275                                 return ret;
276                 } else {
277                         /*
278                          * We either have incoherent backing store and
279                          * so no GTT access or the architecture is fully
280                          * coherent. In such cases, existing GTT mmaps
281                          * ignore the cache bit in the PTE and we can
282                          * rewrite it without confusing the GPU or having
283                          * to force userspace to fault back in its mmaps.
284                          */
285                 }
286
287                 list_for_each_entry(vma, &obj->vma.list, obj_link) {
288                         if (!drm_mm_node_allocated(&vma->node))
289                                 continue;
290
291                         ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
292                         if (ret)
293                                 return ret;
294                 }
295         }
296
297         list_for_each_entry(vma, &obj->vma.list, obj_link)
298                 vma->node.color = cache_level;
299         i915_gem_object_set_cache_coherency(obj, cache_level);
300         obj->cache_dirty = true; /* Always invalidate stale cachelines */
301
302         return 0;
303 }
304
305 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
306                                struct drm_file *file)
307 {
308         struct drm_i915_gem_caching *args = data;
309         struct drm_i915_gem_object *obj;
310         int err = 0;
311
312         rcu_read_lock();
313         obj = i915_gem_object_lookup_rcu(file, args->handle);
314         if (!obj) {
315                 err = -ENOENT;
316                 goto out;
317         }
318
319         switch (obj->cache_level) {
320         case I915_CACHE_LLC:
321         case I915_CACHE_L3_LLC:
322                 args->caching = I915_CACHING_CACHED;
323                 break;
324
325         case I915_CACHE_WT:
326                 args->caching = I915_CACHING_DISPLAY;
327                 break;
328
329         default:
330                 args->caching = I915_CACHING_NONE;
331                 break;
332         }
333 out:
334         rcu_read_unlock();
335         return err;
336 }
337
338 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
339                                struct drm_file *file)
340 {
341         struct drm_i915_private *i915 = to_i915(dev);
342         struct drm_i915_gem_caching *args = data;
343         struct drm_i915_gem_object *obj;
344         enum i915_cache_level level;
345         int ret = 0;
346
347         switch (args->caching) {
348         case I915_CACHING_NONE:
349                 level = I915_CACHE_NONE;
350                 break;
351         case I915_CACHING_CACHED:
352                 /*
353                  * Due to a HW issue on BXT A stepping, GPU stores via a
354                  * snooped mapping may leave stale data in a corresponding CPU
355                  * cacheline, whereas normally such cachelines would get
356                  * invalidated.
357                  */
358                 if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
359                         return -ENODEV;
360
361                 level = I915_CACHE_LLC;
362                 break;
363         case I915_CACHING_DISPLAY:
364                 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
365                 break;
366         default:
367                 return -EINVAL;
368         }
369
370         obj = i915_gem_object_lookup(file, args->handle);
371         if (!obj)
372                 return -ENOENT;
373
374         /*
375          * The caching mode of proxy object is handled by its generator, and
376          * not allowed to be changed by userspace.
377          */
378         if (i915_gem_object_is_proxy(obj)) {
379                 ret = -ENXIO;
380                 goto out;
381         }
382
383         if (obj->cache_level == level)
384                 goto out;
385
386         ret = i915_gem_object_wait(obj,
387                                    I915_WAIT_INTERRUPTIBLE,
388                                    MAX_SCHEDULE_TIMEOUT);
389         if (ret)
390                 goto out;
391
392         ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
393         if (ret)
394                 goto out;
395
396         ret = i915_gem_object_lock_interruptible(obj);
397         if (ret == 0) {
398                 ret = i915_gem_object_set_cache_level(obj, level);
399                 i915_gem_object_unlock(obj);
400         }
401         mutex_unlock(&i915->drm.struct_mutex);
402
403 out:
404         i915_gem_object_put(obj);
405         return ret;
406 }
407
408 /*
409  * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
410  * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
411  * (for pageflips). We only flush the caches while preparing the buffer for
412  * display, the callers are responsible for frontbuffer flush.
413  */
414 struct i915_vma *
415 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
416                                      u32 alignment,
417                                      const struct i915_ggtt_view *view,
418                                      unsigned int flags)
419 {
420         struct i915_vma *vma;
421         int ret;
422
423         assert_object_held(obj);
424
425         /* Mark the global pin early so that we account for the
426          * display coherency whilst setting up the cache domains.
427          */
428         obj->pin_global++;
429
430         /* The display engine is not coherent with the LLC cache on gen6.  As
431          * a result, we make sure that the pinning that is about to occur is
432          * done with uncached PTEs. This is lowest common denominator for all
433          * chipsets.
434          *
435          * However for gen6+, we could do better by using the GFDT bit instead
436          * of uncaching, which would allow us to flush all the LLC-cached data
437          * with that bit in the PTE to main memory with just one PIPE_CONTROL.
438          */
439         ret = i915_gem_object_set_cache_level(obj,
440                                               HAS_WT(to_i915(obj->base.dev)) ?
441                                               I915_CACHE_WT : I915_CACHE_NONE);
442         if (ret) {
443                 vma = ERR_PTR(ret);
444                 goto err_unpin_global;
445         }
446
447         /* As the user may map the buffer once pinned in the display plane
448          * (e.g. libkms for the bootup splash), we have to ensure that we
449          * always use map_and_fenceable for all scanout buffers. However,
450          * it may simply be too big to fit into mappable, in which case
451          * put it anyway and hope that userspace can cope (but always first
452          * try to preserve the existing ABI).
453          */
454         vma = ERR_PTR(-ENOSPC);
455         if ((flags & PIN_MAPPABLE) == 0 &&
456             (!view || view->type == I915_GGTT_VIEW_NORMAL))
457                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
458                                                flags |
459                                                PIN_MAPPABLE |
460                                                PIN_NONBLOCK);
461         if (IS_ERR(vma))
462                 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
463         if (IS_ERR(vma))
464                 goto err_unpin_global;
465
466         vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
467
468         __i915_gem_object_flush_for_display(obj);
469
470         /* It should now be out of any other write domains, and we can update
471          * the domain values for our changes.
472          */
473         obj->read_domains |= I915_GEM_DOMAIN_GTT;
474
475         return vma;
476
477 err_unpin_global:
478         obj->pin_global--;
479         return vma;
480 }
481
482 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
483 {
484         struct drm_i915_private *i915 = to_i915(obj->base.dev);
485         struct i915_vma *vma;
486
487         GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
488
489         mutex_lock(&i915->ggtt.vm.mutex);
490         for_each_ggtt_vma(vma, obj) {
491                 if (!drm_mm_node_allocated(&vma->node))
492                         continue;
493
494                 list_move_tail(&vma->vm_link, &vma->vm->bound_list);
495         }
496         mutex_unlock(&i915->ggtt.vm.mutex);
497
498         if (i915_gem_object_is_shrinkable(obj)) {
499                 unsigned long flags;
500
501                 spin_lock_irqsave(&i915->mm.obj_lock, flags);
502
503                 if (obj->mm.madv == I915_MADV_WILLNEED)
504                         list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
505
506                 spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
507         }
508 }
509
510 void
511 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
512 {
513         struct drm_i915_gem_object *obj = vma->obj;
514
515         assert_object_held(obj);
516
517         if (WARN_ON(obj->pin_global == 0))
518                 return;
519
520         if (--obj->pin_global == 0)
521                 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
522
523         /* Bump the LRU to try and avoid premature eviction whilst flipping  */
524         i915_gem_object_bump_inactive_ggtt(obj);
525
526         i915_vma_unpin(vma);
527 }
528
529 /**
530  * Moves a single object to the CPU read, and possibly write domain.
531  * @obj: object to act on
532  * @write: requesting write or read-only access
533  *
534  * This function returns when the move is complete, including waiting on
535  * flushes to occur.
536  */
537 int
538 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
539 {
540         int ret;
541
542         assert_object_held(obj);
543
544         ret = i915_gem_object_wait(obj,
545                                    I915_WAIT_INTERRUPTIBLE |
546                                    (write ? I915_WAIT_ALL : 0),
547                                    MAX_SCHEDULE_TIMEOUT);
548         if (ret)
549                 return ret;
550
551         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
552
553         /* Flush the CPU cache if it's still invalid. */
554         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
555                 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
556                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
557         }
558
559         /* It should now be out of any other write domains, and we can update
560          * the domain values for our changes.
561          */
562         GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
563
564         /* If we're writing through the CPU, then the GPU read domains will
565          * need to be invalidated at next use.
566          */
567         if (write)
568                 __start_cpu_write(obj);
569
570         return 0;
571 }
572
573 /**
574  * Called when user space prepares to use an object with the CPU, either
575  * through the mmap ioctl's mapping or a GTT mapping.
576  * @dev: drm device
577  * @data: ioctl data blob
578  * @file: drm file
579  */
580 int
581 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
582                           struct drm_file *file)
583 {
584         struct drm_i915_gem_set_domain *args = data;
585         struct drm_i915_gem_object *obj;
586         u32 read_domains = args->read_domains;
587         u32 write_domain = args->write_domain;
588         int err;
589
590         /* Only handle setting domains to types used by the CPU. */
591         if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
592                 return -EINVAL;
593
594         /*
595          * Having something in the write domain implies it's in the read
596          * domain, and only that read domain.  Enforce that in the request.
597          */
598         if (write_domain && read_domains != write_domain)
599                 return -EINVAL;
600
601         if (!read_domains)
602                 return 0;
603
604         obj = i915_gem_object_lookup(file, args->handle);
605         if (!obj)
606                 return -ENOENT;
607
608         /*
609          * Already in the desired write domain? Nothing for us to do!
610          *
611          * We apply a little bit of cunning here to catch a broader set of
612          * no-ops. If obj->write_domain is set, we must be in the same
613          * obj->read_domains, and only that domain. Therefore, if that
614          * obj->write_domain matches the request read_domains, we are
615          * already in the same read/write domain and can skip the operation,
616          * without having to further check the requested write_domain.
617          */
618         if (READ_ONCE(obj->write_domain) == read_domains) {
619                 err = 0;
620                 goto out;
621         }
622
623         /*
624          * Try to flush the object off the GPU without holding the lock.
625          * We will repeat the flush holding the lock in the normal manner
626          * to catch cases where we are gazumped.
627          */
628         err = i915_gem_object_wait(obj,
629                                    I915_WAIT_INTERRUPTIBLE |
630                                    I915_WAIT_PRIORITY |
631                                    (write_domain ? I915_WAIT_ALL : 0),
632                                    MAX_SCHEDULE_TIMEOUT);
633         if (err)
634                 goto out;
635
636         /*
637          * Proxy objects do not control access to the backing storage, ergo
638          * they cannot be used as a means to manipulate the cache domain
639          * tracking for that backing storage. The proxy object is always
640          * considered to be outside of any cache domain.
641          */
642         if (i915_gem_object_is_proxy(obj)) {
643                 err = -ENXIO;
644                 goto out;
645         }
646
647         /*
648          * Flush and acquire obj->pages so that we are coherent through
649          * direct access in memory with previous cached writes through
650          * shmemfs and that our cache domain tracking remains valid.
651          * For example, if the obj->filp was moved to swap without us
652          * being notified and releasing the pages, we would mistakenly
653          * continue to assume that the obj remained out of the CPU cached
654          * domain.
655          */
656         err = i915_gem_object_pin_pages(obj);
657         if (err)
658                 goto out;
659
660         err = i915_gem_object_lock_interruptible(obj);
661         if (err)
662                 goto out_unpin;
663
664         if (read_domains & I915_GEM_DOMAIN_WC)
665                 err = i915_gem_object_set_to_wc_domain(obj, write_domain);
666         else if (read_domains & I915_GEM_DOMAIN_GTT)
667                 err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
668         else
669                 err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
670
671         /* And bump the LRU for this access */
672         i915_gem_object_bump_inactive_ggtt(obj);
673
674         i915_gem_object_unlock(obj);
675
676         if (write_domain)
677                 intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
678
679 out_unpin:
680         i915_gem_object_unpin_pages(obj);
681 out:
682         i915_gem_object_put(obj);
683         return err;
684 }
685
686 /*
687  * Pins the specified object's pages and synchronizes the object with
688  * GPU accesses. Sets needs_clflush to non-zero if the caller should
689  * flush the object from the CPU cache.
690  */
691 int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
692                                  unsigned int *needs_clflush)
693 {
694         int ret;
695
696         *needs_clflush = 0;
697         if (!i915_gem_object_has_struct_page(obj))
698                 return -ENODEV;
699
700         ret = i915_gem_object_lock_interruptible(obj);
701         if (ret)
702                 return ret;
703
704         ret = i915_gem_object_wait(obj,
705                                    I915_WAIT_INTERRUPTIBLE,
706                                    MAX_SCHEDULE_TIMEOUT);
707         if (ret)
708                 goto err_unlock;
709
710         ret = i915_gem_object_pin_pages(obj);
711         if (ret)
712                 goto err_unlock;
713
714         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
715             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
716                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
717                 if (ret)
718                         goto err_unpin;
719                 else
720                         goto out;
721         }
722
723         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
724
725         /* If we're not in the cpu read domain, set ourself into the gtt
726          * read domain and manually flush cachelines (if required). This
727          * optimizes for the case when the gpu will dirty the data
728          * anyway again before the next pread happens.
729          */
730         if (!obj->cache_dirty &&
731             !(obj->read_domains & I915_GEM_DOMAIN_CPU))
732                 *needs_clflush = CLFLUSH_BEFORE;
733
734 out:
735         /* return with the pages pinned */
736         return 0;
737
738 err_unpin:
739         i915_gem_object_unpin_pages(obj);
740 err_unlock:
741         i915_gem_object_unlock(obj);
742         return ret;
743 }
744
745 int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
746                                   unsigned int *needs_clflush)
747 {
748         int ret;
749
750         *needs_clflush = 0;
751         if (!i915_gem_object_has_struct_page(obj))
752                 return -ENODEV;
753
754         ret = i915_gem_object_lock_interruptible(obj);
755         if (ret)
756                 return ret;
757
758         ret = i915_gem_object_wait(obj,
759                                    I915_WAIT_INTERRUPTIBLE |
760                                    I915_WAIT_ALL,
761                                    MAX_SCHEDULE_TIMEOUT);
762         if (ret)
763                 goto err_unlock;
764
765         ret = i915_gem_object_pin_pages(obj);
766         if (ret)
767                 goto err_unlock;
768
769         if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
770             !static_cpu_has(X86_FEATURE_CLFLUSH)) {
771                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
772                 if (ret)
773                         goto err_unpin;
774                 else
775                         goto out;
776         }
777
778         i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
779
780         /* If we're not in the cpu write domain, set ourself into the
781          * gtt write domain and manually flush cachelines (as required).
782          * This optimizes for the case when the gpu will use the data
783          * right away and we therefore have to clflush anyway.
784          */
785         if (!obj->cache_dirty) {
786                 *needs_clflush |= CLFLUSH_AFTER;
787
788                 /*
789                  * Same trick applies to invalidate partially written
790                  * cachelines read before writing.
791                  */
792                 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
793                         *needs_clflush |= CLFLUSH_BEFORE;
794         }
795
796 out:
797         intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
798         obj->mm.dirty = true;
799         /* return with the pages pinned */
800         return 0;
801
802 err_unpin:
803         i915_gem_object_unpin_pages(obj);
804 err_unlock:
805         i915_gem_object_unlock(obj);
806         return ret;
807 }