drivers/gpu/drm/i915/selftests/i915_request.c

   1 /*
   2  * Copyright © 2016 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include <linux/prime_numbers.h>
  26
  27 #include "gem/i915_gem_pm.h"
  28 #include "gem/selftests/mock_context.h"
  29
  30 #include "i915_random.h"
  31 #include "i915_selftest.h"
  32 #include "igt_live_test.h"
  33 #include "lib_sw_fence.h"
  34
  35 #include "mock_drm.h"
  36 #include "mock_gem_device.h"
  37
  38 static int igt_add_request(void *arg)
  39 {
  40         struct drm_i915_private *i915 = arg;
  41         struct i915_request *request;
  42         int err = -ENOMEM;
  43
  44         /* Basic preliminary test to create a request and let it loose! */
  45
  46         mutex_lock(&i915->drm.struct_mutex);
  47         request = mock_request(i915->engine[RCS0],
  48                                i915->kernel_context,
  49                                HZ / 10);
  50         if (!request)
  51                 goto out_unlock;
  52
  53         i915_request_add(request);
  54
  55         err = 0;
  56 out_unlock:
  57         mutex_unlock(&i915->drm.struct_mutex);
  58         return err;
  59 }
  60
  61 static int igt_wait_request(void *arg)
  62 {
  63         const long T = HZ / 4;
  64         struct drm_i915_private *i915 = arg;
  65         struct i915_request *request;
  66         int err = -EINVAL;
  67
  68         /* Submit a request, then wait upon it */
  69
  70         mutex_lock(&i915->drm.struct_mutex);
  71         request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
  72         if (!request) {
  73                 err = -ENOMEM;
  74                 goto out_unlock;
  75         }
  76
  77         if (i915_request_wait(request, 0, 0) != -ETIME) {
  78                 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
  79                 goto out_unlock;
  80         }
  81
  82         if (i915_request_wait(request, 0, T) != -ETIME) {
  83                 pr_err("request wait succeeded (expected timeout before submit!)\n");
  84                 goto out_unlock;
  85         }
  86
  87         if (i915_request_completed(request)) {
  88                 pr_err("request completed before submit!!\n");
  89                 goto out_unlock;
  90         }
  91
  92         i915_request_add(request);
  93
  94         if (i915_request_wait(request, 0, 0) != -ETIME) {
  95                 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
  96                 goto out_unlock;
  97         }
  98
  99         if (i915_request_completed(request)) {
 100                 pr_err("request completed immediately!\n");
 101                 goto out_unlock;
 102         }
 103
 104         if (i915_request_wait(request, 0, T / 2) != -ETIME) {
 105                 pr_err("request wait succeeded (expected timeout!)\n");
 106                 goto out_unlock;
 107         }
 108
 109         if (i915_request_wait(request, 0, T) == -ETIME) {
 110                 pr_err("request wait timed out!\n");
 111                 goto out_unlock;
 112         }
 113
 114         if (!i915_request_completed(request)) {
 115                 pr_err("request not complete after waiting!\n");
 116                 goto out_unlock;
 117         }
 118
 119         if (i915_request_wait(request, 0, T) == -ETIME) {
 120                 pr_err("request wait timed out when already complete!\n");
 121                 goto out_unlock;
 122         }
 123
 124         err = 0;
 125 out_unlock:
 126         mock_device_flush(i915);
 127         mutex_unlock(&i915->drm.struct_mutex);
 128         return err;
 129 }
 130
 131 static int igt_fence_wait(void *arg)
 132 {
 133         const long T = HZ / 4;
 134         struct drm_i915_private *i915 = arg;
 135         struct i915_request *request;
 136         int err = -EINVAL;
 137
 138         /* Submit a request, treat it as a fence and wait upon it */
 139
 140         mutex_lock(&i915->drm.struct_mutex);
 141         request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
 142         if (!request) {
 143                 err = -ENOMEM;
 144                 goto out_locked;
 145         }
 146
 147         if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
 148                 pr_err("fence wait success before submit (expected timeout)!\n");
 149                 goto out_locked;
 150         }
 151
 152         i915_request_add(request);
 153         mutex_unlock(&i915->drm.struct_mutex);
 154
 155         if (dma_fence_is_signaled(&request->fence)) {
 156                 pr_err("fence signaled immediately!\n");
 157                 goto out_device;
 158         }
 159
 160         if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
 161                 pr_err("fence wait success after submit (expected timeout)!\n");
 162                 goto out_device;
 163         }
 164
 165         if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 166                 pr_err("fence wait timed out (expected success)!\n");
 167                 goto out_device;
 168         }
 169
 170         if (!dma_fence_is_signaled(&request->fence)) {
 171                 pr_err("fence unsignaled after waiting!\n");
 172                 goto out_device;
 173         }
 174
 175         if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
 176                 pr_err("fence wait timed out when complete (expected success)!\n");
 177                 goto out_device;
 178         }
 179
 180         err = 0;
 181 out_device:
 182         mutex_lock(&i915->drm.struct_mutex);
 183 out_locked:
 184         mock_device_flush(i915);
 185         mutex_unlock(&i915->drm.struct_mutex);
 186         return err;
 187 }
 188
 189 static int igt_request_rewind(void *arg)
 190 {
 191         struct drm_i915_private *i915 = arg;
 192         struct i915_request *request, *vip;
 193         struct i915_gem_context *ctx[2];
 194         int err = -EINVAL;
 195
 196         mutex_lock(&i915->drm.struct_mutex);
 197         ctx[0] = mock_context(i915, "A");
 198         request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ);
 199         if (!request) {
 200                 err = -ENOMEM;
 201                 goto err_context_0;
 202         }
 203
 204         i915_request_get(request);
 205         i915_request_add(request);
 206
 207         ctx[1] = mock_context(i915, "B");
 208         vip = mock_request(i915->engine[RCS0], ctx[1], 0);
 209         if (!vip) {
 210                 err = -ENOMEM;
 211                 goto err_context_1;
 212         }
 213
 214         /* Simulate preemption by manual reordering */
 215         if (!mock_cancel_request(request)) {
 216                 pr_err("failed to cancel request (already executed)!\n");
 217                 i915_request_add(vip);
 218                 goto err_context_1;
 219         }
 220         i915_request_get(vip);
 221         i915_request_add(vip);
 222         rcu_read_lock();
 223         request->engine->submit_request(request);
 224         rcu_read_unlock();
 225
 226         mutex_unlock(&i915->drm.struct_mutex);
 227
 228         if (i915_request_wait(vip, 0, HZ) == -ETIME) {
 229                 pr_err("timed out waiting for high priority request\n");
 230                 goto err;
 231         }
 232
 233         if (i915_request_completed(request)) {
 234                 pr_err("low priority request already completed\n");
 235                 goto err;
 236         }
 237
 238         err = 0;
 239 err:
 240         i915_request_put(vip);
 241         mutex_lock(&i915->drm.struct_mutex);
 242 err_context_1:
 243         mock_context_close(ctx[1]);
 244         i915_request_put(request);
 245 err_context_0:
 246         mock_context_close(ctx[0]);
 247         mock_device_flush(i915);
 248         mutex_unlock(&i915->drm.struct_mutex);
 249         return err;
 250 }
 251
 252 struct smoketest {
 253         struct intel_engine_cs *engine;
 254         struct i915_gem_context **contexts;
 255         atomic_long_t num_waits, num_fences;
 256         int ncontexts, max_batch;
 257         struct i915_request *(*request_alloc)(struct i915_gem_context *,
 258                                               struct intel_engine_cs *);
 259 };
 260
 261 static struct i915_request *
 262 __mock_request_alloc(struct i915_gem_context *ctx,
 263                      struct intel_engine_cs *engine)
 264 {
 265         return mock_request(engine, ctx, 0);
 266 }
 267
 268 static struct i915_request *
 269 __live_request_alloc(struct i915_gem_context *ctx,
 270                      struct intel_engine_cs *engine)
 271 {
 272         return igt_request_alloc(ctx, engine);
 273 }
 274
 275 static int __igt_breadcrumbs_smoketest(void *arg)
 276 {
 277         struct smoketest *t = arg;
 278         struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
 279         const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
 280         const unsigned int total = 4 * t->ncontexts + 1;
 281         unsigned int num_waits = 0, num_fences = 0;
 282         struct i915_request **requests;
 283         I915_RND_STATE(prng);
 284         unsigned int *order;
 285         int err = 0;
 286
 287         /*
 288          * A very simple test to catch the most egregious of list handling bugs.
 289          *
 290          * At its heart, we simply create oodles of requests running across
 291          * multiple kthreads and enable signaling on them, for the sole purpose
 292          * of stressing our breadcrumb handling. The only inspection we do is
 293          * that the fences were marked as signaled.
 294          */
 295
 296         requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
 297         if (!requests)
 298                 return -ENOMEM;
 299
 300         order = i915_random_order(total, &prng);
 301         if (!order) {
 302                 err = -ENOMEM;
 303                 goto out_requests;
 304         }
 305
 306         while (!kthread_should_stop()) {
 307                 struct i915_sw_fence *submit, *wait;
 308                 unsigned int n, count;
 309
 310                 submit = heap_fence_create(GFP_KERNEL);
 311                 if (!submit) {
 312                         err = -ENOMEM;
 313                         break;
 314                 }
 315
 316                 wait = heap_fence_create(GFP_KERNEL);
 317                 if (!wait) {
 318                         i915_sw_fence_commit(submit);
 319                         heap_fence_put(submit);
 320                         err = ENOMEM;
 321                         break;
 322                 }
 323
 324                 i915_random_reorder(order, total, &prng);
 325                 count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
 326
 327                 for (n = 0; n < count; n++) {
 328                         struct i915_gem_context *ctx =
 329                                 t->contexts[order[n] % t->ncontexts];
 330                         struct i915_request *rq;
 331
 332                         mutex_lock(BKL);
 333
 334                         rq = t->request_alloc(ctx, t->engine);
 335                         if (IS_ERR(rq)) {
 336                                 mutex_unlock(BKL);
 337                                 err = PTR_ERR(rq);
 338                                 count = n;
 339                                 break;
 340                         }
 341
 342                         err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
 343                                                                submit,
 344                                                                GFP_KERNEL);
 345
 346                         requests[n] = i915_request_get(rq);
 347                         i915_request_add(rq);
 348
 349                         mutex_unlock(BKL);
 350
 351                         if (err >= 0)
 352                                 err = i915_sw_fence_await_dma_fence(wait,
 353                                                                     &rq->fence,
 354                                                                     0,
 355                                                                     GFP_KERNEL);
 356
 357                         if (err < 0) {
 358                                 i915_request_put(rq);
 359                                 count = n;
 360                                 break;
 361                         }
 362                 }
 363
 364                 i915_sw_fence_commit(submit);
 365                 i915_sw_fence_commit(wait);
 366
 367                 if (!wait_event_timeout(wait->wait,
 368                                         i915_sw_fence_done(wait),
 369                                         HZ / 2)) {
 370                         struct i915_request *rq = requests[count - 1];
 371
 372                         pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
 373                                count,
 374                                rq->fence.context, rq->fence.seqno,
 375                                t->engine->name);
 376                         i915_gem_set_wedged(t->engine->i915);
 377                         GEM_BUG_ON(!i915_request_completed(rq));
 378                         i915_sw_fence_wait(wait);
 379                         err = -EIO;
 380                 }
 381
 382                 for (n = 0; n < count; n++) {
 383                         struct i915_request *rq = requests[n];
 384
 385                         if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 386                                       &rq->fence.flags)) {
 387                                 pr_err("%llu:%llu was not signaled!\n",
 388                                        rq->fence.context, rq->fence.seqno);
 389                                 err = -EINVAL;
 390                         }
 391
 392                         i915_request_put(rq);
 393                 }
 394
 395                 heap_fence_put(wait);
 396                 heap_fence_put(submit);
 397
 398                 if (err < 0)
 399                         break;
 400
 401                 num_fences += count;
 402                 num_waits++;
 403
 404                 cond_resched();
 405         }
 406
 407         atomic_long_add(num_fences, &t->num_fences);
 408         atomic_long_add(num_waits, &t->num_waits);
 409
 410         kfree(order);
 411 out_requests:
 412         kfree(requests);
 413         return err;
 414 }
 415
 416 static int mock_breadcrumbs_smoketest(void *arg)
 417 {
 418         struct drm_i915_private *i915 = arg;
 419         struct smoketest t = {
 420                 .engine = i915->engine[RCS0],
 421                 .ncontexts = 1024,
 422                 .max_batch = 1024,
 423                 .request_alloc = __mock_request_alloc
 424         };
 425         unsigned int ncpus = num_online_cpus();
 426         struct task_struct **threads;
 427         unsigned int n;
 428         int ret = 0;
 429
 430         /*
 431          * Smoketest our breadcrumb/signal handling for requests across multiple
 432          * threads. A very simple test to only catch the most egregious of bugs.
 433          * See __igt_breadcrumbs_smoketest();
 434          */
 435
 436         threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
 437         if (!threads)
 438                 return -ENOMEM;
 439
 440         t.contexts =
 441                 kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
 442         if (!t.contexts) {
 443                 ret = -ENOMEM;
 444                 goto out_threads;
 445         }
 446
 447         mutex_lock(&t.engine->i915->drm.struct_mutex);
 448         for (n = 0; n < t.ncontexts; n++) {
 449                 t.contexts[n] = mock_context(t.engine->i915, "mock");
 450                 if (!t.contexts[n]) {
 451                         ret = -ENOMEM;
 452                         goto out_contexts;
 453                 }
 454         }
 455         mutex_unlock(&t.engine->i915->drm.struct_mutex);
 456
 457         for (n = 0; n < ncpus; n++) {
 458                 threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
 459                                          &t, "igt/%d", n);
 460                 if (IS_ERR(threads[n])) {
 461                         ret = PTR_ERR(threads[n]);
 462                         ncpus = n;
 463                         break;
 464                 }
 465
 466                 get_task_struct(threads[n]);
 467         }
 468
 469         msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
 470
 471         for (n = 0; n < ncpus; n++) {
 472                 int err;
 473
 474                 err = kthread_stop(threads[n]);
 475                 if (err < 0 && !ret)
 476                         ret = err;
 477
 478                 put_task_struct(threads[n]);
 479         }
 480         pr_info("Completed %lu waits for %lu fence across %d cpus\n",
 481                 atomic_long_read(&t.num_waits),
 482                 atomic_long_read(&t.num_fences),
 483                 ncpus);
 484
 485         mutex_lock(&t.engine->i915->drm.struct_mutex);
 486 out_contexts:
 487         for (n = 0; n < t.ncontexts; n++) {
 488                 if (!t.contexts[n])
 489                         break;
 490                 mock_context_close(t.contexts[n]);
 491         }
 492         mutex_unlock(&t.engine->i915->drm.struct_mutex);
 493         kfree(t.contexts);
 494 out_threads:
 495         kfree(threads);
 496
 497         return ret;
 498 }
 499
 500 int i915_request_mock_selftests(void)
 501 {
 502         static const struct i915_subtest tests[] = {
 503                 SUBTEST(igt_add_request),
 504                 SUBTEST(igt_wait_request),
 505                 SUBTEST(igt_fence_wait),
 506                 SUBTEST(igt_request_rewind),
 507                 SUBTEST(mock_breadcrumbs_smoketest),
 508         };
 509         struct drm_i915_private *i915;
 510         intel_wakeref_t wakeref;
 511         int err = 0;
 512
 513         i915 = mock_gem_device();
 514         if (!i915)
 515                 return -ENOMEM;
 516
 517         with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 518                 err = i915_subtests(tests, i915);
 519
 520         drm_dev_put(&i915->drm);
 521
 522         return err;
 523 }
 524
 525 static int live_nop_request(void *arg)
 526 {
 527         struct drm_i915_private *i915 = arg;
 528         struct intel_engine_cs *engine;
 529         intel_wakeref_t wakeref;
 530         struct igt_live_test t;
 531         unsigned int id;
 532         int err = -ENODEV;
 533
 534         /* Submit various sized batches of empty requests, to each engine
 535          * (individually), and wait for the batch to complete. We can check
 536          * the overhead of submitting requests to the hardware.
 537          */
 538
 539         mutex_lock(&i915->drm.struct_mutex);
 540         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 541
 542         for_each_engine(engine, i915, id) {
 543                 struct i915_request *request = NULL;
 544                 unsigned long n, prime;
 545                 IGT_TIMEOUT(end_time);
 546                 ktime_t times[2] = {};
 547
 548                 err = igt_live_test_begin(&t, i915, __func__, engine->name);
 549                 if (err)
 550                         goto out_unlock;
 551
 552                 for_each_prime_number_from(prime, 1, 8192) {
 553                         times[1] = ktime_get_raw();
 554
 555                         for (n = 0; n < prime; n++) {
 556                                 request = i915_request_create(engine->kernel_context);
 557                                 if (IS_ERR(request)) {
 558                                         err = PTR_ERR(request);
 559                                         goto out_unlock;
 560                                 }
 561
 562                                 /* This space is left intentionally blank.
 563                                  *
 564                                  * We do not actually want to perform any
 565                                  * action with this request, we just want
 566                                  * to measure the latency in allocation
 567                                  * and submission of our breadcrumbs -
 568                                  * ensuring that the bare request is sufficient
 569                                  * for the system to work (i.e. proper HEAD
 570                                  * tracking of the rings, interrupt handling,
 571                                  * etc). It also gives us the lowest bounds
 572                                  * for latency.
 573                                  */
 574
 575                                 i915_request_add(request);
 576                         }
 577                         i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 578
 579                         times[1] = ktime_sub(ktime_get_raw(), times[1]);
 580                         if (prime == 1)
 581                                 times[0] = times[1];
 582
 583                         if (__igt_timeout(end_time, NULL))
 584                                 break;
 585                 }
 586
 587                 err = igt_live_test_end(&t);
 588                 if (err)
 589                         goto out_unlock;
 590
 591                 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
 592                         engine->name,
 593                         ktime_to_ns(times[0]),
 594                         prime, div64_u64(ktime_to_ns(times[1]), prime));
 595         }
 596
 597 out_unlock:
 598         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 599         mutex_unlock(&i915->drm.struct_mutex);
 600         return err;
 601 }
 602
 603 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 604 {
 605         struct drm_i915_gem_object *obj;
 606         struct i915_vma *vma;
 607         u32 *cmd;
 608         int err;
 609
 610         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 611         if (IS_ERR(obj))
 612                 return ERR_CAST(obj);
 613
 614         cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
 615         if (IS_ERR(cmd)) {
 616                 err = PTR_ERR(cmd);
 617                 goto err;
 618         }
 619
 620         *cmd = MI_BATCH_BUFFER_END;
 621
 622         __i915_gem_object_flush_map(obj, 0, 64);
 623         i915_gem_object_unpin_map(obj);
 624
 625         i915_gem_chipset_flush(i915);
 626
 627         vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
 628         if (IS_ERR(vma)) {
 629                 err = PTR_ERR(vma);
 630                 goto err;
 631         }
 632
 633         err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
 634         if (err)
 635                 goto err;
 636
 637         return vma;
 638
 639 err:
 640         i915_gem_object_put(obj);
 641         return ERR_PTR(err);
 642 }
 643
 644 static struct i915_request *
 645 empty_request(struct intel_engine_cs *engine,
 646               struct i915_vma *batch)
 647 {
 648         struct i915_request *request;
 649         int err;
 650
 651         request = i915_request_create(engine->kernel_context);
 652         if (IS_ERR(request))
 653                 return request;
 654
 655         err = engine->emit_bb_start(request,
 656                                     batch->node.start,
 657                                     batch->node.size,
 658                                     I915_DISPATCH_SECURE);
 659         if (err)
 660                 goto out_request;
 661
 662 out_request:
 663         i915_request_add(request);
 664         return err ? ERR_PTR(err) : request;
 665 }
 666
 667 static int live_empty_request(void *arg)
 668 {
 669         struct drm_i915_private *i915 = arg;
 670         struct intel_engine_cs *engine;
 671         intel_wakeref_t wakeref;
 672         struct igt_live_test t;
 673         struct i915_vma *batch;
 674         unsigned int id;
 675         int err = 0;
 676
 677         /* Submit various sized batches of empty requests, to each engine
 678          * (individually), and wait for the batch to complete. We can check
 679          * the overhead of submitting requests to the hardware.
 680          */
 681
 682         mutex_lock(&i915->drm.struct_mutex);
 683         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 684
 685         batch = empty_batch(i915);
 686         if (IS_ERR(batch)) {
 687                 err = PTR_ERR(batch);
 688                 goto out_unlock;
 689         }
 690
 691         for_each_engine(engine, i915, id) {
 692                 IGT_TIMEOUT(end_time);
 693                 struct i915_request *request;
 694                 unsigned long n, prime;
 695                 ktime_t times[2] = {};
 696
 697                 err = igt_live_test_begin(&t, i915, __func__, engine->name);
 698                 if (err)
 699                         goto out_batch;
 700
 701                 /* Warmup / preload */
 702                 request = empty_request(engine, batch);
 703                 if (IS_ERR(request)) {
 704                         err = PTR_ERR(request);
 705                         goto out_batch;
 706                 }
 707                 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 708
 709                 for_each_prime_number_from(prime, 1, 8192) {
 710                         times[1] = ktime_get_raw();
 711
 712                         for (n = 0; n < prime; n++) {
 713                                 request = empty_request(engine, batch);
 714                                 if (IS_ERR(request)) {
 715                                         err = PTR_ERR(request);
 716                                         goto out_batch;
 717                                 }
 718                         }
 719                         i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 720
 721                         times[1] = ktime_sub(ktime_get_raw(), times[1]);
 722                         if (prime == 1)
 723                                 times[0] = times[1];
 724
 725                         if (__igt_timeout(end_time, NULL))
 726                                 break;
 727                 }
 728
 729                 err = igt_live_test_end(&t);
 730                 if (err)
 731                         goto out_batch;
 732
 733                 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
 734                         engine->name,
 735                         ktime_to_ns(times[0]),
 736                         prime, div64_u64(ktime_to_ns(times[1]), prime));
 737         }
 738
 739 out_batch:
 740         i915_vma_unpin(batch);
 741         i915_vma_put(batch);
 742 out_unlock:
 743         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 744         mutex_unlock(&i915->drm.struct_mutex);
 745         return err;
 746 }
 747
 748 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 749 {
 750         struct i915_gem_context *ctx = i915->kernel_context;
 751         struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 752         struct drm_i915_gem_object *obj;
 753         const int gen = INTEL_GEN(i915);
 754         struct i915_vma *vma;
 755         u32 *cmd;
 756         int err;
 757
 758         obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
 759         if (IS_ERR(obj))
 760                 return ERR_CAST(obj);
 761
 762         vma = i915_vma_instance(obj, vm, NULL);
 763         if (IS_ERR(vma)) {
 764                 err = PTR_ERR(vma);
 765                 goto err;
 766         }
 767
 768         err = i915_vma_pin(vma, 0, 0, PIN_USER);
 769         if (err)
 770                 goto err;
 771
 772         cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
 773         if (IS_ERR(cmd)) {
 774                 err = PTR_ERR(cmd);
 775                 goto err;
 776         }
 777
 778         if (gen >= 8) {
 779                 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
 780                 *cmd++ = lower_32_bits(vma->node.start);
 781                 *cmd++ = upper_32_bits(vma->node.start);
 782         } else if (gen >= 6) {
 783                 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
 784                 *cmd++ = lower_32_bits(vma->node.start);
 785         } else {
 786                 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
 787                 *cmd++ = lower_32_bits(vma->node.start);
 788         }
 789         *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
 790
 791         __i915_gem_object_flush_map(obj, 0, 64);
 792         i915_gem_object_unpin_map(obj);
 793
 794         i915_gem_chipset_flush(i915);
 795
 796         return vma;
 797
 798 err:
 799         i915_gem_object_put(obj);
 800         return ERR_PTR(err);
 801 }
 802
 803 static int recursive_batch_resolve(struct i915_vma *batch)
 804 {
 805         u32 *cmd;
 806
 807         cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
 808         if (IS_ERR(cmd))
 809                 return PTR_ERR(cmd);
 810
 811         *cmd = MI_BATCH_BUFFER_END;
 812         i915_gem_chipset_flush(batch->vm->i915);
 813
 814         i915_gem_object_unpin_map(batch->obj);
 815
 816         return 0;
 817 }
 818
 819 static int live_all_engines(void *arg)
 820 {
 821         struct drm_i915_private *i915 = arg;
 822         struct intel_engine_cs *engine;
 823         struct i915_request *request[I915_NUM_ENGINES];
 824         intel_wakeref_t wakeref;
 825         struct igt_live_test t;
 826         struct i915_vma *batch;
 827         unsigned int id;
 828         int err;
 829
 830         /* Check we can submit requests to all engines simultaneously. We
 831          * send a recursive batch to each engine - checking that we don't
 832          * block doing so, and that they don't complete too soon.
 833          */
 834
 835         mutex_lock(&i915->drm.struct_mutex);
 836         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 837
 838         err = igt_live_test_begin(&t, i915, __func__, "");
 839         if (err)
 840                 goto out_unlock;
 841
 842         batch = recursive_batch(i915);
 843         if (IS_ERR(batch)) {
 844                 err = PTR_ERR(batch);
 845                 pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
 846                 goto out_unlock;
 847         }
 848
 849         for_each_engine(engine, i915, id) {
 850                 request[id] = i915_request_create(engine->kernel_context);
 851                 if (IS_ERR(request[id])) {
 852                         err = PTR_ERR(request[id]);
 853                         pr_err("%s: Request allocation failed with err=%d\n",
 854                                __func__, err);
 855                         goto out_request;
 856                 }
 857
 858                 err = engine->emit_bb_start(request[id],
 859                                             batch->node.start,
 860                                             batch->node.size,
 861                                             0);
 862                 GEM_BUG_ON(err);
 863                 request[id]->batch = batch;
 864
 865                 i915_vma_lock(batch);
 866                 err = i915_vma_move_to_active(batch, request[id], 0);
 867                 i915_vma_unlock(batch);
 868                 GEM_BUG_ON(err);
 869
 870                 i915_request_get(request[id]);
 871                 i915_request_add(request[id]);
 872         }
 873
 874         for_each_engine(engine, i915, id) {
 875                 if (i915_request_completed(request[id])) {
 876                         pr_err("%s(%s): request completed too early!\n",
 877                                __func__, engine->name);
 878                         err = -EINVAL;
 879                         goto out_request;
 880                 }
 881         }
 882
 883         err = recursive_batch_resolve(batch);
 884         if (err) {
 885                 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
 886                 goto out_request;
 887         }
 888
 889         for_each_engine(engine, i915, id) {
 890                 long timeout;
 891
 892                 timeout = i915_request_wait(request[id], 0,
 893                                             MAX_SCHEDULE_TIMEOUT);
 894                 if (timeout < 0) {
 895                         err = timeout;
 896                         pr_err("%s: error waiting for request on %s, err=%d\n",
 897                                __func__, engine->name, err);
 898                         goto out_request;
 899                 }
 900
 901                 GEM_BUG_ON(!i915_request_completed(request[id]));
 902                 i915_request_put(request[id]);
 903                 request[id] = NULL;
 904         }
 905
 906         err = igt_live_test_end(&t);
 907
 908 out_request:
 909         for_each_engine(engine, i915, id)
 910                 if (request[id])
 911                         i915_request_put(request[id]);
 912         i915_vma_unpin(batch);
 913         i915_vma_put(batch);
 914 out_unlock:
 915         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 916         mutex_unlock(&i915->drm.struct_mutex);
 917         return err;
 918 }
 919
 920 static int live_sequential_engines(void *arg)
 921 {
 922         struct drm_i915_private *i915 = arg;
 923         struct i915_request *request[I915_NUM_ENGINES] = {};
 924         struct i915_request *prev = NULL;
 925         struct intel_engine_cs *engine;
 926         intel_wakeref_t wakeref;
 927         struct igt_live_test t;
 928         unsigned int id;
 929         int err;
 930
 931         /* Check we can submit requests to all engines sequentially, such
 932          * that each successive request waits for the earlier ones. This
 933          * tests that we don't execute requests out of order, even though
 934          * they are running on independent engines.
 935          */
 936
 937         mutex_lock(&i915->drm.struct_mutex);
 938         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 939
 940         err = igt_live_test_begin(&t, i915, __func__, "");
 941         if (err)
 942                 goto out_unlock;
 943
 944         for_each_engine(engine, i915, id) {
 945                 struct i915_vma *batch;
 946
 947                 batch = recursive_batch(i915);
 948                 if (IS_ERR(batch)) {
 949                         err = PTR_ERR(batch);
 950                         pr_err("%s: Unable to create batch for %s, err=%d\n",
 951                                __func__, engine->name, err);
 952                         goto out_unlock;
 953                 }
 954
 955                 request[id] = i915_request_create(engine->kernel_context);
 956                 if (IS_ERR(request[id])) {
 957                         err = PTR_ERR(request[id]);
 958                         pr_err("%s: Request allocation failed for %s with err=%d\n",
 959                                __func__, engine->name, err);
 960                         goto out_request;
 961                 }
 962
 963                 if (prev) {
 964                         err = i915_request_await_dma_fence(request[id],
 965                                                            &prev->fence);
 966                         if (err) {
 967                                 i915_request_add(request[id]);
 968                                 pr_err("%s: Request await failed for %s with err=%d\n",
 969                                        __func__, engine->name, err);
 970                                 goto out_request;
 971                         }
 972                 }
 973
 974                 err = engine->emit_bb_start(request[id],
 975                                             batch->node.start,
 976                                             batch->node.size,
 977                                             0);
 978                 GEM_BUG_ON(err);
 979                 request[id]->batch = batch;
 980
 981                 i915_vma_lock(batch);
 982                 err = i915_vma_move_to_active(batch, request[id], 0);
 983                 i915_vma_unlock(batch);
 984                 GEM_BUG_ON(err);
 985
 986                 i915_request_get(request[id]);
 987                 i915_request_add(request[id]);
 988
 989                 prev = request[id];
 990         }
 991
 992         for_each_engine(engine, i915, id) {
 993                 long timeout;
 994
 995                 if (i915_request_completed(request[id])) {
 996                         pr_err("%s(%s): request completed too early!\n",
 997                                __func__, engine->name);
 998                         err = -EINVAL;
 999                         goto out_request;
1000                 }
1001
1002                 err = recursive_batch_resolve(request[id]->batch);
1003                 if (err) {
1004                         pr_err("%s: failed to resolve batch, err=%d\n",
1005                                __func__, err);
1006                         goto out_request;
1007                 }
1008
1009                 timeout = i915_request_wait(request[id], 0,
1010                                             MAX_SCHEDULE_TIMEOUT);
1011                 if (timeout < 0) {
1012                         err = timeout;
1013                         pr_err("%s: error waiting for request on %s, err=%d\n",
1014                                __func__, engine->name, err);
1015                         goto out_request;
1016                 }
1017
1018                 GEM_BUG_ON(!i915_request_completed(request[id]));
1019         }
1020
1021         err = igt_live_test_end(&t);
1022
1023 out_request:
1024         for_each_engine(engine, i915, id) {
1025                 u32 *cmd;
1026
1027                 if (!request[id])
1028                         break;
1029
1030                 cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1031                                               I915_MAP_WC);
1032                 if (!IS_ERR(cmd)) {
1033                         *cmd = MI_BATCH_BUFFER_END;
1034                         i915_gem_chipset_flush(i915);
1035
1036                         i915_gem_object_unpin_map(request[id]->batch->obj);
1037                 }
1038
1039                 i915_vma_put(request[id]->batch);
1040                 i915_request_put(request[id]);
1041         }
1042 out_unlock:
1043         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1044         mutex_unlock(&i915->drm.struct_mutex);
1045         return err;
1046 }
1047
1048 static int
1049 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1050 {
1051         struct i915_request *rq;
1052         int ret;
1053
1054         /*
1055          * Before execlists, all contexts share the same ringbuffer. With
1056          * execlists, each context/engine has a separate ringbuffer and
1057          * for the purposes of this test, inexhaustible.
1058          *
1059          * For the global ringbuffer though, we have to be very careful
1060          * that we do not wrap while preventing the execution of requests
1061          * with a unsignaled fence.
1062          */
1063         if (HAS_EXECLISTS(ctx->i915))
1064                 return INT_MAX;
1065
1066         rq = igt_request_alloc(ctx, engine);
1067         if (IS_ERR(rq)) {
1068                 ret = PTR_ERR(rq);
1069         } else {
1070                 int sz;
1071
1072                 ret = rq->ring->size - rq->reserved_space;
1073                 i915_request_add(rq);
1074
1075                 sz = rq->ring->emit - rq->head;
1076                 if (sz < 0)
1077                         sz += rq->ring->size;
1078                 ret /= sz;
1079                 ret /= 2; /* leave half spare, in case of emergency! */
1080         }
1081
1082         return ret;
1083 }
1084
1085 static int live_breadcrumbs_smoketest(void *arg)
1086 {
1087         struct drm_i915_private *i915 = arg;
1088         struct smoketest t[I915_NUM_ENGINES];
1089         unsigned int ncpus = num_online_cpus();
1090         unsigned long num_waits, num_fences;
1091         struct intel_engine_cs *engine;
1092         struct task_struct **threads;
1093         struct igt_live_test live;
1094         enum intel_engine_id id;
1095         intel_wakeref_t wakeref;
1096         struct drm_file *file;
1097         unsigned int n;
1098         int ret = 0;
1099
1100         /*
1101          * Smoketest our breadcrumb/signal handling for requests across multiple
1102          * threads. A very simple test to only catch the most egregious of bugs.
1103          * See __igt_breadcrumbs_smoketest();
1104          *
1105          * On real hardware this time.
1106          */
1107
1108         wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1109
1110         file = mock_file(i915);
1111         if (IS_ERR(file)) {
1112                 ret = PTR_ERR(file);
1113                 goto out_rpm;
1114         }
1115
1116         threads = kcalloc(ncpus * I915_NUM_ENGINES,
1117                           sizeof(*threads),
1118                           GFP_KERNEL);
1119         if (!threads) {
1120                 ret = -ENOMEM;
1121                 goto out_file;
1122         }
1123
1124         memset(&t[0], 0, sizeof(t[0]));
1125         t[0].request_alloc = __live_request_alloc;
1126         t[0].ncontexts = 64;
1127         t[0].contexts = kmalloc_array(t[0].ncontexts,
1128                                       sizeof(*t[0].contexts),
1129                                       GFP_KERNEL);
1130         if (!t[0].contexts) {
1131                 ret = -ENOMEM;
1132                 goto out_threads;
1133         }
1134
1135         mutex_lock(&i915->drm.struct_mutex);
1136         for (n = 0; n < t[0].ncontexts; n++) {
1137                 t[0].contexts[n] = live_context(i915, file);
1138                 if (!t[0].contexts[n]) {
1139                         ret = -ENOMEM;
1140                         goto out_contexts;
1141                 }
1142         }
1143
1144         ret = igt_live_test_begin(&live, i915, __func__, "");
1145         if (ret)
1146                 goto out_contexts;
1147
1148         for_each_engine(engine, i915, id) {
1149                 t[id] = t[0];
1150                 t[id].engine = engine;
1151                 t[id].max_batch = max_batches(t[0].contexts[0], engine);
1152                 if (t[id].max_batch < 0) {
1153                         ret = t[id].max_batch;
1154                         mutex_unlock(&i915->drm.struct_mutex);
1155                         goto out_flush;
1156                 }
1157                 /* One ring interleaved between requests from all cpus */
1158                 t[id].max_batch /= num_online_cpus() + 1;
1159                 pr_debug("Limiting batches to %d requests on %s\n",
1160                          t[id].max_batch, engine->name);
1161
1162                 for (n = 0; n < ncpus; n++) {
1163                         struct task_struct *tsk;
1164
1165                         tsk = kthread_run(__igt_breadcrumbs_smoketest,
1166                                           &t[id], "igt/%d.%d", id, n);
1167                         if (IS_ERR(tsk)) {
1168                                 ret = PTR_ERR(tsk);
1169                                 mutex_unlock(&i915->drm.struct_mutex);
1170                                 goto out_flush;
1171                         }
1172
1173                         get_task_struct(tsk);
1174                         threads[id * ncpus + n] = tsk;
1175                 }
1176         }
1177         mutex_unlock(&i915->drm.struct_mutex);
1178
1179         msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1180
1181 out_flush:
1182         num_waits = 0;
1183         num_fences = 0;
1184         for_each_engine(engine, i915, id) {
1185                 for (n = 0; n < ncpus; n++) {
1186                         struct task_struct *tsk = threads[id * ncpus + n];
1187                         int err;
1188
1189                         if (!tsk)
1190                                 continue;
1191
1192                         err = kthread_stop(tsk);
1193                         if (err < 0 && !ret)
1194                                 ret = err;
1195
1196                         put_task_struct(tsk);
1197                 }
1198
1199                 num_waits += atomic_long_read(&t[id].num_waits);
1200                 num_fences += atomic_long_read(&t[id].num_fences);
1201         }
1202         pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1203                 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1204
1205         mutex_lock(&i915->drm.struct_mutex);
1206         ret = igt_live_test_end(&live) ?: ret;
1207 out_contexts:
1208         mutex_unlock(&i915->drm.struct_mutex);
1209         kfree(t[0].contexts);
1210 out_threads:
1211         kfree(threads);
1212 out_file:
1213         mock_file_free(i915, file);
1214 out_rpm:
1215         intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1216
1217         return ret;
1218 }
1219
1220 int i915_request_live_selftests(struct drm_i915_private *i915)
1221 {
1222         static const struct i915_subtest tests[] = {
1223                 SUBTEST(live_nop_request),
1224                 SUBTEST(live_all_engines),
1225                 SUBTEST(live_sequential_engines),
1226                 SUBTEST(live_empty_request),
1227                 SUBTEST(live_breadcrumbs_smoketest),
1228         };
1229
1230         if (i915_terminally_wedged(i915))
1231                 return 0;
1232
1233         return i915_subtests(tests, i915);
1234 }