drivers/gpu/drm/xe/xe_exec.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2022 Intel Corporation
   4  */
   5
   6 #include "xe_exec.h"
   7
   8 #include <drm/drm_device.h>
   9 #include <drm/drm_exec.h>
  10 #include <drm/drm_file.h>
  11 #include <drm/xe_drm.h>
  12 #include <linux/delay.h>
  13
  14 #include "xe_bo.h"
  15 #include "xe_device.h"
  16 #include "xe_exec_queue.h"
  17 #include "xe_macros.h"
  18 #include "xe_ring_ops_types.h"
  19 #include "xe_sched_job.h"
  20 #include "xe_sync.h"
  21 #include "xe_vm.h"
  22
  23 /**
  24  * DOC: Execbuf (User GPU command submission)
  25  *
  26  * Execs have historically been rather complicated in DRM drivers (at least in
  27  * the i915) because a few things:
  28  *
  29  * - Passing in a list BO which are read / written to creating implicit syncs
  30  * - Binding at exec time
  31  * - Flow controlling the ring at exec time
  32  *
  33  * In XE we avoid all of this complication by not allowing a BO list to be
  34  * passed into an exec, using the dma-buf implicit sync uAPI, have binds as
  35  * seperate operations, and using the DRM scheduler to flow control the ring.
  36  * Let's deep dive on each of these.
  37  *
  38  * We can get away from a BO list by forcing the user to use in / out fences on
  39  * every exec rather than the kernel tracking dependencies of BO (e.g. if the
  40  * user knows an exec writes to a BO and reads from the BO in the next exec, it
  41  * is the user's responsibility to pass in / out fence between the two execs).
  42  *
  43  * Implicit dependencies for external BOs are handled by using the dma-buf
  44  * implicit dependency uAPI (TODO: add link). To make this works each exec must
  45  * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external
  46  * BO mapped in the VM.
  47  *
  48  * We do not allow a user to trigger a bind at exec time rather we have a VM
  49  * bind IOCTL which uses the same in / out fence interface as exec. In that
  50  * sense, a VM bind is basically the same operation as an exec from the user
  51  * perspective. e.g. If an exec depends on a VM bind use the in / out fence
  52  * interface (struct drm_xe_sync) to synchronize like syncing between two
  53  * dependent execs.
  54  *
  55  * Although a user cannot trigger a bind, we still have to rebind userptrs in
  56  * the VM that have been invalidated since the last exec, likewise we also have
  57  * to rebind BOs that have been evicted by the kernel. We schedule these rebinds
  58  * behind any pending kernel operations on any external BOs in VM or any BOs
  59  * private to the VM. This is accomplished by the rebinds waiting on BOs
  60  * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs
  61  * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and
  62  * in DMA_RESV_USAGE_WRITE for external BOs).
  63  *
  64  * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute
  65  * mode VMs we use preempt fences and a rebind worker (TODO: add link).
  66  *
  67  * There is no need to flow control the ring in the exec as we write the ring at
  68  * submission time and set the DRM scheduler max job limit SIZE_OF_RING /
  69  * MAX_JOB_SIZE. The DRM scheduler will then hold all jobs until space in the
  70  * ring is available.
  71  *
  72  * All of this results in a rather simple exec implementation.
  73  *
  74  * Flow
  75  * ~~~~
  76  *
  77  * .. code-block::
  78  *
  79  *      Parse input arguments
  80  *      Wait for any async VM bind passed as in-fences to start
  81  *      <----------------------------------------------------------------------|
  82  *      Lock global VM lock in read mode                                       |
  83  *      Pin userptrs (also finds userptr invalidated since last exec)          |
  84  *      Lock exec (VM dma-resv lock, external BOs dma-resv locks)              |
  85  *      Validate BOs that have been evicted                                    |
  86  *      Create job                                                             |
  87  *      Rebind invalidated userptrs + evicted BOs (non-compute-mode)           |
  88  *      Add rebind fence dependency to job                                     |
  89  *      Add job VM dma-resv bookkeeping slot (non-compute mode)                |
  90  *      Add job to external BOs dma-resv write slots (non-compute mode)        |
  91  *      Check if any userptrs invalidated since pin ------ Drop locks ---------|
  92  *      Install in / out fences for job
  93  *      Submit job
  94  *      Unlock all
  95  */
  96
  97 static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
  98 {
  99         struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
 100         struct drm_gem_object *obj;
 101         unsigned long index;
 102         int num_fences;
 103         int ret;
 104
 105         ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
 106         if (ret)
 107                 return ret;
 108
 109         /*
 110          * 1 fence slot for the final submit, and 1 more for every per-tile for
 111          * GPU bind and 1 extra for CPU bind. Note that there are potentially
 112          * many vma per object/dma-resv, however the fence slot will just be
 113          * re-used, since they are largely the same timeline and the seqno
 114          * should be in order. In the case of CPU bind there is dummy fence used
 115          * for all CPU binds, so no need to have a per-tile slot for that.
 116          */
 117         num_fences = 1 + 1 + vm->xe->info.tile_count;
 118
 119         /*
 120          * We don't know upfront exactly how many fence slots we will need at
 121          * the start of the exec, since the TTM bo_validate above can consume
 122          * numerous fence slots. Also due to how the dma_resv_reserve_fences()
 123          * works it only ensures that at least that many fence slots are
 124          * available i.e if there are already 10 slots available and we reserve
 125          * two more, it can just noop without reserving anything.  With this it
 126          * is quite possible that TTM steals some of the fence slots and then
 127          * when it comes time to do the vma binding and final exec stage we are
 128          * lacking enough fence slots, leading to some nasty BUG_ON() when
 129          * adding the fences. Hence just add our own fences here, after the
 130          * validate stage.
 131          */
 132         drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) {
 133                 ret = dma_resv_reserve_fences(obj->resv, num_fences);
 134                 if (ret)
 135                         return ret;
 136         }
 137
 138         return 0;
 139 }
 140
 141 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 142 {
 143         struct xe_device *xe = to_xe_device(dev);
 144         struct xe_file *xef = to_xe_file(file);
 145         struct drm_xe_exec *args = data;
 146         struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs);
 147         u64 __user *addresses_user = u64_to_user_ptr(args->address);
 148         struct xe_exec_queue *q;
 149         struct xe_sync_entry *syncs = NULL;
 150         u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
 151         struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
 152         struct drm_exec *exec = &vm_exec.exec;
 153         u32 i, num_syncs = 0, num_ufence = 0;
 154         struct xe_sched_job *job;
 155         struct dma_fence *rebind_fence;
 156         struct xe_vm *vm;
 157         bool write_locked, skip_retry = false;
 158         ktime_t end = 0;
 159         int err = 0;
 160
 161         if (XE_IOCTL_DBG(xe, args->extensions) ||
 162             XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
 163             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 164                 return -EINVAL;
 165
 166         q = xe_exec_queue_lookup(xef, args->exec_queue_id);
 167         if (XE_IOCTL_DBG(xe, !q))
 168                 return -ENOENT;
 169
 170         if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
 171                 return -EINVAL;
 172
 173         if (XE_IOCTL_DBG(xe, args->num_batch_buffer &&
 174                          q->width != args->num_batch_buffer))
 175                 return -EINVAL;
 176
 177         if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
 178                 err = -ECANCELED;
 179                 goto err_exec_queue;
 180         }
 181
 182         if (args->num_syncs) {
 183                 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
 184                 if (!syncs) {
 185                         err = -ENOMEM;
 186                         goto err_exec_queue;
 187                 }
 188         }
 189
 190         vm = q->vm;
 191
 192         for (i = 0; i < args->num_syncs; i++) {
 193                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
 194                                           &syncs_user[i], SYNC_PARSE_FLAG_EXEC |
 195                                           (xe_vm_in_lr_mode(vm) ?
 196                                            SYNC_PARSE_FLAG_LR_MODE : 0));
 197                 if (err)
 198                         goto err_syncs;
 199
 200                 if (xe_sync_is_ufence(&syncs[i]))
 201                         num_ufence++;
 202         }
 203
 204         if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
 205                 err = -EINVAL;
 206                 goto err_syncs;
 207         }
 208
 209         if (xe_exec_queue_is_parallel(q)) {
 210                 err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
 211                                        q->width);
 212                 if (err) {
 213                         err = -EFAULT;
 214                         goto err_syncs;
 215                 }
 216         }
 217
 218 retry:
 219         if (!xe_vm_in_lr_mode(vm) && xe_vm_userptr_check_repin(vm)) {
 220                 err = down_write_killable(&vm->lock);
 221                 write_locked = true;
 222         } else {
 223                 /* We don't allow execs while the VM is in error state */
 224                 err = down_read_interruptible(&vm->lock);
 225                 write_locked = false;
 226         }
 227         if (err)
 228                 goto err_syncs;
 229
 230         if (write_locked) {
 231                 err = xe_vm_userptr_pin(vm);
 232                 downgrade_write(&vm->lock);
 233                 write_locked = false;
 234                 if (err)
 235                         goto err_unlock_list;
 236         }
 237
 238         if (!args->num_batch_buffer) {
 239                 err = xe_vm_lock(vm, true);
 240                 if (err)
 241                         goto err_unlock_list;
 242
 243                 if (!xe_vm_in_lr_mode(vm)) {
 244                         struct dma_fence *fence;
 245
 246                         fence = xe_sync_in_fence_get(syncs, num_syncs, q, vm);
 247                         if (IS_ERR(fence)) {
 248                                 err = PTR_ERR(fence);
 249                                 goto err_unlock_list;
 250                         }
 251                         for (i = 0; i < num_syncs; i++)
 252                                 xe_sync_entry_signal(&syncs[i], NULL, fence);
 253                         xe_exec_queue_last_fence_set(q, vm, fence);
 254                         dma_fence_put(fence);
 255                 }
 256
 257                 xe_vm_unlock(vm);
 258                 goto err_unlock_list;
 259         }
 260
 261         vm_exec.vm = &vm->gpuvm;
 262         vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
 263         if (xe_vm_in_lr_mode(vm)) {
 264                 drm_exec_init(exec, vm_exec.flags, 0);
 265         } else {
 266                 err = drm_gpuvm_exec_lock(&vm_exec);
 267                 if (err) {
 268                         if (xe_vm_validate_should_retry(exec, err, &end))
 269                                 err = -EAGAIN;
 270                         goto err_unlock_list;
 271                 }
 272         }
 273
 274         if (xe_vm_is_closed_or_banned(q->vm)) {
 275                 drm_warn(&xe->drm, "Trying to schedule after vm is closed or banned\n");
 276                 err = -ECANCELED;
 277                 goto err_exec;
 278         }
 279
 280         if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
 281                 err = -EWOULDBLOCK;     /* Aliased to -EAGAIN */
 282                 skip_retry = true;
 283                 goto err_exec;
 284         }
 285
 286         job = xe_sched_job_create(q, xe_exec_queue_is_parallel(q) ?
 287                                   addresses : &args->address);
 288         if (IS_ERR(job)) {
 289                 err = PTR_ERR(job);
 290                 goto err_exec;
 291         }
 292
 293         /*
 294          * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
 295          * VM mode only.
 296          */
 297         rebind_fence = xe_vm_rebind(vm, false);
 298         if (IS_ERR(rebind_fence)) {
 299                 err = PTR_ERR(rebind_fence);
 300                 goto err_put_job;
 301         }
 302
 303         /*
 304          * We store the rebind_fence in the VM so subsequent execs don't get
 305          * scheduled before the rebinds of userptrs / evicted BOs is complete.
 306          */
 307         if (rebind_fence) {
 308                 dma_fence_put(vm->rebind_fence);
 309                 vm->rebind_fence = rebind_fence;
 310         }
 311         if (vm->rebind_fence) {
 312                 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
 313                              &vm->rebind_fence->flags)) {
 314                         dma_fence_put(vm->rebind_fence);
 315                         vm->rebind_fence = NULL;
 316                 } else {
 317                         dma_fence_get(vm->rebind_fence);
 318                         err = drm_sched_job_add_dependency(&job->drm,
 319                                                            vm->rebind_fence);
 320                         if (err)
 321                                 goto err_put_job;
 322                 }
 323         }
 324
 325         /* Wait behind munmap style rebinds */
 326         if (!xe_vm_in_lr_mode(vm)) {
 327                 err = drm_sched_job_add_resv_dependencies(&job->drm,
 328                                                           xe_vm_resv(vm),
 329                                                           DMA_RESV_USAGE_KERNEL);
 330                 if (err)
 331                         goto err_put_job;
 332         }
 333
 334         for (i = 0; i < num_syncs && !err; i++)
 335                 err = xe_sync_entry_add_deps(&syncs[i], job);
 336         if (err)
 337                 goto err_put_job;
 338
 339         if (!xe_vm_in_lr_mode(vm)) {
 340                 err = xe_sched_job_last_fence_add_dep(job, vm);
 341                 if (err)
 342                         goto err_put_job;
 343
 344                 err = down_read_interruptible(&vm->userptr.notifier_lock);
 345                 if (err)
 346                         goto err_put_job;
 347
 348                 err = __xe_vm_userptr_needs_repin(vm);
 349                 if (err)
 350                         goto err_repin;
 351         }
 352
 353         /*
 354          * Point of no return, if we error after this point just set an error on
 355          * the job and let the DRM scheduler / backend clean up the job.
 356          */
 357         xe_sched_job_arm(job);
 358         if (!xe_vm_in_lr_mode(vm))
 359                 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished,
 360                                          DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
 361
 362         for (i = 0; i < num_syncs; i++)
 363                 xe_sync_entry_signal(&syncs[i], job,
 364                                      &job->drm.s_fence->finished);
 365
 366         if (xe_exec_queue_is_lr(q))
 367                 q->ring_ops->emit_job(job);
 368         if (!xe_vm_in_lr_mode(vm))
 369                 xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished);
 370         xe_sched_job_push(job);
 371         xe_vm_reactivate_rebind(vm);
 372
 373         if (!err && !xe_vm_in_lr_mode(vm)) {
 374                 spin_lock(&xe->ttm.lru_lock);
 375                 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
 376                 spin_unlock(&xe->ttm.lru_lock);
 377         }
 378
 379 err_repin:
 380         if (!xe_vm_in_lr_mode(vm))
 381                 up_read(&vm->userptr.notifier_lock);
 382 err_put_job:
 383         if (err)
 384                 xe_sched_job_put(job);
 385 err_exec:
 386         drm_exec_fini(exec);
 387 err_unlock_list:
 388         if (write_locked)
 389                 up_write(&vm->lock);
 390         else
 391                 up_read(&vm->lock);
 392         if (err == -EAGAIN && !skip_retry)
 393                 goto retry;
 394 err_syncs:
 395         for (i = 0; i < num_syncs; i++)
 396                 xe_sync_entry_cleanup(&syncs[i]);
 397         kfree(syncs);
 398 err_exec_queue:
 399         xe_exec_queue_put(q);
 400
 401         return err;
 402 }