drivers/gpu/drm/i915/gt/intel_timeline.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2016-2018 Intel Corporation
   5  */
   6
   7 #include "gt/intel_gt_types.h"
   8
   9 #include "i915_drv.h"
  10
  11 #include "i915_active.h"
  12 #include "i915_syncmap.h"
  13 #include "gt/intel_timeline.h"
  14
  15 #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
  16 #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
  17
  18 struct intel_timeline_hwsp {
  19         struct intel_gt *gt;
  20         struct intel_gt_timelines *gt_timelines;
  21         struct list_head free_link;
  22         struct i915_vma *vma;
  23         u64 free_bitmap;
  24 };
  25
  26 struct intel_timeline_cacheline {
  27         struct i915_active active;
  28         struct intel_timeline_hwsp *hwsp;
  29         void *vaddr;
  30 #define CACHELINE_BITS 6
  31 #define CACHELINE_FREE CACHELINE_BITS
  32 };
  33
  34 static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
  35 {
  36         struct drm_i915_private *i915 = gt->i915;
  37         struct drm_i915_gem_object *obj;
  38         struct i915_vma *vma;
  39
  40         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
  41         if (IS_ERR(obj))
  42                 return ERR_CAST(obj);
  43
  44         i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
  45
  46         vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
  47         if (IS_ERR(vma))
  48                 i915_gem_object_put(obj);
  49
  50         return vma;
  51 }
  52
  53 static struct i915_vma *
  54 hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline)
  55 {
  56         struct intel_gt_timelines *gt = &timeline->gt->timelines;
  57         struct intel_timeline_hwsp *hwsp;
  58
  59         BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
  60
  61         spin_lock_irq(&gt->hwsp_lock);
  62
  63         /* hwsp_free_list only contains HWSP that have available cachelines */
  64         hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
  65                                         typeof(*hwsp), free_link);
  66         if (!hwsp) {
  67                 struct i915_vma *vma;
  68
  69                 spin_unlock_irq(&gt->hwsp_lock);
  70
  71                 hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
  72                 if (!hwsp)
  73                         return ERR_PTR(-ENOMEM);
  74
  75                 vma = __hwsp_alloc(timeline->gt);
  76                 if (IS_ERR(vma)) {
  77                         kfree(hwsp);
  78                         return vma;
  79                 }
  80
  81                 vma->private = hwsp;
  82                 hwsp->gt = timeline->gt;
  83                 hwsp->vma = vma;
  84                 hwsp->free_bitmap = ~0ull;
  85                 hwsp->gt_timelines = gt;
  86
  87                 spin_lock_irq(&gt->hwsp_lock);
  88                 list_add(&hwsp->free_link, &gt->hwsp_free_list);
  89         }
  90
  91         GEM_BUG_ON(!hwsp->free_bitmap);
  92         *cacheline = __ffs64(hwsp->free_bitmap);
  93         hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
  94         if (!hwsp->free_bitmap)
  95                 list_del(&hwsp->free_link);
  96
  97         spin_unlock_irq(&gt->hwsp_lock);
  98
  99         GEM_BUG_ON(hwsp->vma->private != hwsp);
 100         return hwsp->vma;
 101 }
 102
 103 static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
 104 {
 105         struct intel_gt_timelines *gt = hwsp->gt_timelines;
 106         unsigned long flags;
 107
 108         spin_lock_irqsave(&gt->hwsp_lock, flags);
 109
 110         /* As a cacheline becomes available, publish the HWSP on the freelist */
 111         if (!hwsp->free_bitmap)
 112                 list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
 113
 114         GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
 115         hwsp->free_bitmap |= BIT_ULL(cacheline);
 116
 117         /* And if no one is left using it, give the page back to the system */
 118         if (hwsp->free_bitmap == ~0ull) {
 119                 i915_vma_put(hwsp->vma);
 120                 list_del(&hwsp->free_link);
 121                 kfree(hwsp);
 122         }
 123
 124         spin_unlock_irqrestore(&gt->hwsp_lock, flags);
 125 }
 126
 127 static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 128 {
 129         GEM_BUG_ON(!i915_active_is_idle(&cl->active));
 130
 131         i915_gem_object_unpin_map(cl->hwsp->vma->obj);
 132         i915_vma_put(cl->hwsp->vma);
 133         __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
 134
 135         i915_active_fini(&cl->active);
 136         kfree(cl);
 137 }
 138
 139 static void __cacheline_retire(struct i915_active *active)
 140 {
 141         struct intel_timeline_cacheline *cl =
 142                 container_of(active, typeof(*cl), active);
 143
 144         i915_vma_unpin(cl->hwsp->vma);
 145         if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
 146                 __idle_cacheline_free(cl);
 147 }
 148
 149 static int __cacheline_active(struct i915_active *active)
 150 {
 151         struct intel_timeline_cacheline *cl =
 152                 container_of(active, typeof(*cl), active);
 153
 154         __i915_vma_pin(cl->hwsp->vma);
 155         return 0;
 156 }
 157
 158 static struct intel_timeline_cacheline *
 159 cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 160 {
 161         struct intel_timeline_cacheline *cl;
 162         void *vaddr;
 163
 164         GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
 165
 166         cl = kmalloc(sizeof(*cl), GFP_KERNEL);
 167         if (!cl)
 168                 return ERR_PTR(-ENOMEM);
 169
 170         vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
 171         if (IS_ERR(vaddr)) {
 172                 kfree(cl);
 173                 return ERR_CAST(vaddr);
 174         }
 175
 176         i915_vma_get(hwsp->vma);
 177         cl->hwsp = hwsp;
 178         cl->vaddr = page_pack_bits(vaddr, cacheline);
 179
 180         i915_active_init(hwsp->gt->i915, &cl->active,
 181                          __cacheline_active, __cacheline_retire);
 182
 183         return cl;
 184 }
 185
 186 static void cacheline_acquire(struct intel_timeline_cacheline *cl)
 187 {
 188         if (cl)
 189                 i915_active_acquire(&cl->active);
 190 }
 191
 192 static void cacheline_release(struct intel_timeline_cacheline *cl)
 193 {
 194         if (cl)
 195                 i915_active_release(&cl->active);
 196 }
 197
 198 static void cacheline_free(struct intel_timeline_cacheline *cl)
 199 {
 200         GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
 201         cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
 202
 203         if (i915_active_is_idle(&cl->active))
 204                 __idle_cacheline_free(cl);
 205 }
 206
 207 int intel_timeline_init(struct intel_timeline *timeline,
 208                         struct intel_gt *gt,
 209                         struct i915_vma *hwsp)
 210 {
 211         void *vaddr;
 212
 213         kref_init(&timeline->kref);
 214         atomic_set(&timeline->pin_count, 0);
 215
 216         timeline->gt = gt;
 217
 218         timeline->has_initial_breadcrumb = !hwsp;
 219         timeline->hwsp_cacheline = NULL;
 220
 221         if (!hwsp) {
 222                 struct intel_timeline_cacheline *cl;
 223                 unsigned int cacheline;
 224
 225                 hwsp = hwsp_alloc(timeline, &cacheline);
 226                 if (IS_ERR(hwsp))
 227                         return PTR_ERR(hwsp);
 228
 229                 cl = cacheline_alloc(hwsp->private, cacheline);
 230                 if (IS_ERR(cl)) {
 231                         __idle_hwsp_free(hwsp->private, cacheline);
 232                         return PTR_ERR(cl);
 233                 }
 234
 235                 timeline->hwsp_cacheline = cl;
 236                 timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
 237
 238                 vaddr = page_mask_bits(cl->vaddr);
 239         } else {
 240                 timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
 241
 242                 vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
 243                 if (IS_ERR(vaddr))
 244                         return PTR_ERR(vaddr);
 245         }
 246
 247         timeline->hwsp_seqno =
 248                 memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
 249
 250         timeline->hwsp_ggtt = i915_vma_get(hwsp);
 251         GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
 252
 253         timeline->fence_context = dma_fence_context_alloc(1);
 254
 255         mutex_init(&timeline->mutex);
 256
 257         INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
 258         INIT_LIST_HEAD(&timeline->requests);
 259
 260         i915_syncmap_init(&timeline->sync);
 261
 262         return 0;
 263 }
 264
 265 static void timelines_init(struct intel_gt *gt)
 266 {
 267         struct intel_gt_timelines *timelines = &gt->timelines;
 268
 269         spin_lock_init(&timelines->lock);
 270         INIT_LIST_HEAD(&timelines->active_list);
 271
 272         spin_lock_init(&timelines->hwsp_lock);
 273         INIT_LIST_HEAD(&timelines->hwsp_free_list);
 274 }
 275
 276 void intel_timelines_init(struct drm_i915_private *i915)
 277 {
 278         timelines_init(&i915->gt);
 279 }
 280
 281 void intel_timeline_fini(struct intel_timeline *timeline)
 282 {
 283         GEM_BUG_ON(atomic_read(&timeline->pin_count));
 284         GEM_BUG_ON(!list_empty(&timeline->requests));
 285
 286         if (timeline->hwsp_cacheline)
 287                 cacheline_free(timeline->hwsp_cacheline);
 288         else
 289                 i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
 290
 291         i915_vma_put(timeline->hwsp_ggtt);
 292 }
 293
 294 struct intel_timeline *
 295 intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
 296 {
 297         struct intel_timeline *timeline;
 298         int err;
 299
 300         timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
 301         if (!timeline)
 302                 return ERR_PTR(-ENOMEM);
 303
 304         err = intel_timeline_init(timeline, gt, global_hwsp);
 305         if (err) {
 306                 kfree(timeline);
 307                 return ERR_PTR(err);
 308         }
 309
 310         return timeline;
 311 }
 312
 313 int intel_timeline_pin(struct intel_timeline *tl)
 314 {
 315         int err;
 316
 317         if (atomic_add_unless(&tl->pin_count, 1, 0))
 318                 return 0;
 319
 320         err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
 321         if (err)
 322                 return err;
 323
 324         tl->hwsp_offset =
 325                 i915_ggtt_offset(tl->hwsp_ggtt) +
 326                 offset_in_page(tl->hwsp_offset);
 327
 328         cacheline_acquire(tl->hwsp_cacheline);
 329         if (atomic_fetch_inc(&tl->pin_count)) {
 330                 cacheline_release(tl->hwsp_cacheline);
 331                 __i915_vma_unpin(tl->hwsp_ggtt);
 332         }
 333
 334         return 0;
 335 }
 336
 337 void intel_timeline_enter(struct intel_timeline *tl)
 338 {
 339         struct intel_gt_timelines *timelines = &tl->gt->timelines;
 340         unsigned long flags;
 341
 342         lockdep_assert_held(&tl->mutex);
 343
 344         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 345         if (tl->active_count++)
 346                 return;
 347         GEM_BUG_ON(!tl->active_count); /* overflow? */
 348
 349         spin_lock_irqsave(&timelines->lock, flags);
 350         list_add(&tl->link, &timelines->active_list);
 351         spin_unlock_irqrestore(&timelines->lock, flags);
 352 }
 353
 354 void intel_timeline_exit(struct intel_timeline *tl)
 355 {
 356         struct intel_gt_timelines *timelines = &tl->gt->timelines;
 357         unsigned long flags;
 358
 359         lockdep_assert_held(&tl->mutex);
 360
 361         GEM_BUG_ON(!tl->active_count);
 362         if (--tl->active_count)
 363                 return;
 364
 365         spin_lock_irqsave(&timelines->lock, flags);
 366         list_del(&tl->link);
 367         spin_unlock_irqrestore(&timelines->lock, flags);
 368
 369         /*
 370          * Since this timeline is idle, all bariers upon which we were waiting
 371          * must also be complete and so we can discard the last used barriers
 372          * without loss of information.
 373          */
 374         i915_syncmap_free(&tl->sync);
 375 }
 376
 377 static u32 timeline_advance(struct intel_timeline *tl)
 378 {
 379         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 380         GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
 381
 382         return tl->seqno += 1 + tl->has_initial_breadcrumb;
 383 }
 384
 385 static void timeline_rollback(struct intel_timeline *tl)
 386 {
 387         tl->seqno -= 1 + tl->has_initial_breadcrumb;
 388 }
 389
 390 static noinline int
 391 __intel_timeline_get_seqno(struct intel_timeline *tl,
 392                            struct i915_request *rq,
 393                            u32 *seqno)
 394 {
 395         struct intel_timeline_cacheline *cl;
 396         unsigned int cacheline;
 397         struct i915_vma *vma;
 398         void *vaddr;
 399         int err;
 400
 401         /*
 402          * If there is an outstanding GPU reference to this cacheline,
 403          * such as it being sampled by a HW semaphore on another timeline,
 404          * we cannot wraparound our seqno value (the HW semaphore does
 405          * a strict greater-than-or-equals compare, not i915_seqno_passed).
 406          * So if the cacheline is still busy, we must detach ourselves
 407          * from it and leave it inflight alongside its users.
 408          *
 409          * However, if nobody is watching and we can guarantee that nobody
 410          * will, we could simply reuse the same cacheline.
 411          *
 412          * if (i915_active_request_is_signaled(&tl->last_request) &&
 413          *     i915_active_is_signaled(&tl->hwsp_cacheline->active))
 414          *      return 0;
 415          *
 416          * That seems unlikely for a busy timeline that needed to wrap in
 417          * the first place, so just replace the cacheline.
 418          */
 419
 420         vma = hwsp_alloc(tl, &cacheline);
 421         if (IS_ERR(vma)) {
 422                 err = PTR_ERR(vma);
 423                 goto err_rollback;
 424         }
 425
 426         err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 427         if (err) {
 428                 __idle_hwsp_free(vma->private, cacheline);
 429                 goto err_rollback;
 430         }
 431
 432         cl = cacheline_alloc(vma->private, cacheline);
 433         if (IS_ERR(cl)) {
 434                 err = PTR_ERR(cl);
 435                 __idle_hwsp_free(vma->private, cacheline);
 436                 goto err_unpin;
 437         }
 438         GEM_BUG_ON(cl->hwsp->vma != vma);
 439
 440         /*
 441          * Attach the old cacheline to the current request, so that we only
 442          * free it after the current request is retired, which ensures that
 443          * all writes into the cacheline from previous requests are complete.
 444          */
 445         err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
 446         if (err)
 447                 goto err_cacheline;
 448
 449         cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
 450         cacheline_free(tl->hwsp_cacheline);
 451
 452         i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
 453         i915_vma_put(tl->hwsp_ggtt);
 454
 455         tl->hwsp_ggtt = i915_vma_get(vma);
 456
 457         vaddr = page_mask_bits(cl->vaddr);
 458         tl->hwsp_offset = cacheline * CACHELINE_BYTES;
 459         tl->hwsp_seqno =
 460                 memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
 461
 462         tl->hwsp_offset += i915_ggtt_offset(vma);
 463
 464         cacheline_acquire(cl);
 465         tl->hwsp_cacheline = cl;
 466
 467         *seqno = timeline_advance(tl);
 468         GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
 469         return 0;
 470
 471 err_cacheline:
 472         cacheline_free(cl);
 473 err_unpin:
 474         i915_vma_unpin(vma);
 475 err_rollback:
 476         timeline_rollback(tl);
 477         return err;
 478 }
 479
 480 int intel_timeline_get_seqno(struct intel_timeline *tl,
 481                              struct i915_request *rq,
 482                              u32 *seqno)
 483 {
 484         *seqno = timeline_advance(tl);
 485
 486         /* Replace the HWSP on wraparound for HW semaphores */
 487         if (unlikely(!*seqno && tl->hwsp_cacheline))
 488                 return __intel_timeline_get_seqno(tl, rq, seqno);
 489
 490         return 0;
 491 }
 492
 493 static int cacheline_ref(struct intel_timeline_cacheline *cl,
 494                          struct i915_request *rq)
 495 {
 496         return i915_active_ref(&cl->active, rq->timeline, rq);
 497 }
 498
 499 int intel_timeline_read_hwsp(struct i915_request *from,
 500                              struct i915_request *to,
 501                              u32 *hwsp)
 502 {
 503         struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
 504         struct intel_timeline *tl = from->timeline;
 505         int err;
 506
 507         GEM_BUG_ON(to->timeline == tl);
 508
 509         mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
 510         err = i915_request_completed(from);
 511         if (!err)
 512                 err = cacheline_ref(cl, to);
 513         if (!err) {
 514                 if (likely(cl == tl->hwsp_cacheline)) {
 515                         *hwsp = tl->hwsp_offset;
 516                 } else { /* across a seqno wrap, recover the original offset */
 517                         *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
 518                                 ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
 519                                 CACHELINE_BYTES;
 520                 }
 521         }
 522         mutex_unlock(&tl->mutex);
 523
 524         return err;
 525 }
 526
 527 void intel_timeline_unpin(struct intel_timeline *tl)
 528 {
 529         GEM_BUG_ON(!atomic_read(&tl->pin_count));
 530         if (!atomic_dec_and_test(&tl->pin_count))
 531                 return;
 532
 533         cacheline_release(tl->hwsp_cacheline);
 534
 535         __i915_vma_unpin(tl->hwsp_ggtt);
 536 }
 537
 538 void __intel_timeline_free(struct kref *kref)
 539 {
 540         struct intel_timeline *timeline =
 541                 container_of(kref, typeof(*timeline), kref);
 542
 543         intel_timeline_fini(timeline);
 544         kfree(timeline);
 545 }
 546
 547 static void timelines_fini(struct intel_gt *gt)
 548 {
 549         struct intel_gt_timelines *timelines = &gt->timelines;
 550
 551         GEM_BUG_ON(!list_empty(&timelines->active_list));
 552         GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
 553 }
 554
 555 void intel_timelines_fini(struct drm_i915_private *i915)
 556 {
 557         timelines_fini(&i915->gt);
 558 }
 559
 560 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 561 #include "gt/selftests/mock_timeline.c"
 562 #include "gt/selftest_timeline.c"
 563 #endif