Merge tag 'rtc-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux
[sfrench/cifs-2.6.git] / drivers / gpu / drm / xe / xe_vm.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5
6 #include "xe_vm.h"
7
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_exec.h>
12 #include <drm/drm_print.h>
13 #include <drm/ttm/ttm_execbuf_util.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21
22 #include <generated/xe_wa_oob.h>
23
24 #include "xe_assert.h"
25 #include "xe_bo.h"
26 #include "xe_device.h"
27 #include "xe_drm_client.h"
28 #include "xe_exec_queue.h"
29 #include "xe_gt.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_gt_tlb_invalidation.h"
32 #include "xe_migrate.h"
33 #include "xe_pat.h"
34 #include "xe_pm.h"
35 #include "xe_preempt_fence.h"
36 #include "xe_pt.h"
37 #include "xe_res_cursor.h"
38 #include "xe_sync.h"
39 #include "xe_trace.h"
40 #include "xe_wa.h"
41
42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
43 {
44         return vm->gpuvm.r_obj;
45 }
46
47 /**
48  * xe_vma_userptr_check_repin() - Advisory check for repin needed
49  * @uvma: The userptr vma
50  *
51  * Check if the userptr vma has been invalidated since last successful
52  * repin. The check is advisory only and can the function can be called
53  * without the vm->userptr.notifier_lock held. There is no guarantee that the
54  * vma userptr will remain valid after a lockless check, so typically
55  * the call needs to be followed by a proper check under the notifier_lock.
56  *
57  * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
58  */
59 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
60 {
61         return mmu_interval_check_retry(&uvma->userptr.notifier,
62                                         uvma->userptr.notifier_seq) ?
63                 -EAGAIN : 0;
64 }
65
66 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
67 {
68         struct xe_userptr *userptr = &uvma->userptr;
69         struct xe_vma *vma = &uvma->vma;
70         struct xe_vm *vm = xe_vma_vm(vma);
71         struct xe_device *xe = vm->xe;
72         const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
73         struct page **pages;
74         bool in_kthread = !current->mm;
75         unsigned long notifier_seq;
76         int pinned, ret, i;
77         bool read_only = xe_vma_read_only(vma);
78
79         lockdep_assert_held(&vm->lock);
80         xe_assert(xe, xe_vma_is_userptr(vma));
81 retry:
82         if (vma->gpuva.flags & XE_VMA_DESTROYED)
83                 return 0;
84
85         notifier_seq = mmu_interval_read_begin(&userptr->notifier);
86         if (notifier_seq == userptr->notifier_seq)
87                 return 0;
88
89         pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
90         if (!pages)
91                 return -ENOMEM;
92
93         if (userptr->sg) {
94                 dma_unmap_sgtable(xe->drm.dev,
95                                   userptr->sg,
96                                   read_only ? DMA_TO_DEVICE :
97                                   DMA_BIDIRECTIONAL, 0);
98                 sg_free_table(userptr->sg);
99                 userptr->sg = NULL;
100         }
101
102         pinned = ret = 0;
103         if (in_kthread) {
104                 if (!mmget_not_zero(userptr->notifier.mm)) {
105                         ret = -EFAULT;
106                         goto mm_closed;
107                 }
108                 kthread_use_mm(userptr->notifier.mm);
109         }
110
111         while (pinned < num_pages) {
112                 ret = get_user_pages_fast(xe_vma_userptr(vma) +
113                                           pinned * PAGE_SIZE,
114                                           num_pages - pinned,
115                                           read_only ? 0 : FOLL_WRITE,
116                                           &pages[pinned]);
117                 if (ret < 0)
118                         break;
119
120                 pinned += ret;
121                 ret = 0;
122         }
123
124         if (in_kthread) {
125                 kthread_unuse_mm(userptr->notifier.mm);
126                 mmput(userptr->notifier.mm);
127         }
128 mm_closed:
129         if (ret)
130                 goto out;
131
132         ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages,
133                                                 pinned, 0,
134                                                 (u64)pinned << PAGE_SHIFT,
135                                                 xe_sg_segment_size(xe->drm.dev),
136                                                 GFP_KERNEL);
137         if (ret) {
138                 userptr->sg = NULL;
139                 goto out;
140         }
141         userptr->sg = &userptr->sgt;
142
143         ret = dma_map_sgtable(xe->drm.dev, userptr->sg,
144                               read_only ? DMA_TO_DEVICE :
145                               DMA_BIDIRECTIONAL,
146                               DMA_ATTR_SKIP_CPU_SYNC |
147                               DMA_ATTR_NO_KERNEL_MAPPING);
148         if (ret) {
149                 sg_free_table(userptr->sg);
150                 userptr->sg = NULL;
151                 goto out;
152         }
153
154         for (i = 0; i < pinned; ++i) {
155                 if (!read_only) {
156                         lock_page(pages[i]);
157                         set_page_dirty(pages[i]);
158                         unlock_page(pages[i]);
159                 }
160
161                 mark_page_accessed(pages[i]);
162         }
163
164 out:
165         release_pages(pages, pinned);
166         kvfree(pages);
167
168         if (!(ret < 0)) {
169                 userptr->notifier_seq = notifier_seq;
170                 if (xe_vma_userptr_check_repin(uvma) == -EAGAIN)
171                         goto retry;
172         }
173
174         return ret < 0 ? ret : 0;
175 }
176
177 static bool preempt_fences_waiting(struct xe_vm *vm)
178 {
179         struct xe_exec_queue *q;
180
181         lockdep_assert_held(&vm->lock);
182         xe_vm_assert_held(vm);
183
184         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
185                 if (!q->compute.pfence ||
186                     (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
187                                                    &q->compute.pfence->flags))) {
188                         return true;
189                 }
190         }
191
192         return false;
193 }
194
195 static void free_preempt_fences(struct list_head *list)
196 {
197         struct list_head *link, *next;
198
199         list_for_each_safe(link, next, list)
200                 xe_preempt_fence_free(to_preempt_fence_from_link(link));
201 }
202
203 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
204                                 unsigned int *count)
205 {
206         lockdep_assert_held(&vm->lock);
207         xe_vm_assert_held(vm);
208
209         if (*count >= vm->preempt.num_exec_queues)
210                 return 0;
211
212         for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
213                 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
214
215                 if (IS_ERR(pfence))
216                         return PTR_ERR(pfence);
217
218                 list_move_tail(xe_preempt_fence_link(pfence), list);
219         }
220
221         return 0;
222 }
223
224 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
225 {
226         struct xe_exec_queue *q;
227
228         xe_vm_assert_held(vm);
229
230         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
231                 if (q->compute.pfence) {
232                         long timeout = dma_fence_wait(q->compute.pfence, false);
233
234                         if (timeout < 0)
235                                 return -ETIME;
236                         dma_fence_put(q->compute.pfence);
237                         q->compute.pfence = NULL;
238                 }
239         }
240
241         return 0;
242 }
243
244 static bool xe_vm_is_idle(struct xe_vm *vm)
245 {
246         struct xe_exec_queue *q;
247
248         xe_vm_assert_held(vm);
249         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
250                 if (!xe_exec_queue_is_idle(q))
251                         return false;
252         }
253
254         return true;
255 }
256
257 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
258 {
259         struct list_head *link;
260         struct xe_exec_queue *q;
261
262         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
263                 struct dma_fence *fence;
264
265                 link = list->next;
266                 xe_assert(vm->xe, link != list);
267
268                 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
269                                              q, q->compute.context,
270                                              ++q->compute.seqno);
271                 dma_fence_put(q->compute.pfence);
272                 q->compute.pfence = fence;
273         }
274 }
275
276 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
277 {
278         struct xe_exec_queue *q;
279         int err;
280
281         if (!vm->preempt.num_exec_queues)
282                 return 0;
283
284         err = xe_bo_lock(bo, true);
285         if (err)
286                 return err;
287
288         err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
289         if (err)
290                 goto out_unlock;
291
292         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
293                 if (q->compute.pfence) {
294                         dma_resv_add_fence(bo->ttm.base.resv,
295                                            q->compute.pfence,
296                                            DMA_RESV_USAGE_BOOKKEEP);
297                 }
298
299 out_unlock:
300         xe_bo_unlock(bo);
301         return err;
302 }
303
304 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
305                                                 struct drm_exec *exec)
306 {
307         struct xe_exec_queue *q;
308
309         lockdep_assert_held(&vm->lock);
310         xe_vm_assert_held(vm);
311
312         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
313                 q->ops->resume(q);
314
315                 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence,
316                                          DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
317         }
318 }
319
320 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
321 {
322         struct drm_gpuvm_exec vm_exec = {
323                 .vm = &vm->gpuvm,
324                 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
325                 .num_fences = 1,
326         };
327         struct drm_exec *exec = &vm_exec.exec;
328         struct dma_fence *pfence;
329         int err;
330         bool wait;
331
332         xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
333
334         down_write(&vm->lock);
335         err = drm_gpuvm_exec_lock(&vm_exec);
336         if (err)
337                 goto out_up_write;
338
339         pfence = xe_preempt_fence_create(q, q->compute.context,
340                                          ++q->compute.seqno);
341         if (!pfence) {
342                 err = -ENOMEM;
343                 goto out_fini;
344         }
345
346         list_add(&q->compute.link, &vm->preempt.exec_queues);
347         ++vm->preempt.num_exec_queues;
348         q->compute.pfence = pfence;
349
350         down_read(&vm->userptr.notifier_lock);
351
352         drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
353                                  DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
354
355         /*
356          * Check to see if a preemption on VM is in flight or userptr
357          * invalidation, if so trigger this preempt fence to sync state with
358          * other preempt fences on the VM.
359          */
360         wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
361         if (wait)
362                 dma_fence_enable_sw_signaling(pfence);
363
364         up_read(&vm->userptr.notifier_lock);
365
366 out_fini:
367         drm_exec_fini(exec);
368 out_up_write:
369         up_write(&vm->lock);
370
371         return err;
372 }
373
374 /**
375  * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
376  * @vm: The VM.
377  * @q: The exec_queue
378  */
379 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
380 {
381         if (!xe_vm_in_preempt_fence_mode(vm))
382                 return;
383
384         down_write(&vm->lock);
385         list_del(&q->compute.link);
386         --vm->preempt.num_exec_queues;
387         if (q->compute.pfence) {
388                 dma_fence_enable_sw_signaling(q->compute.pfence);
389                 dma_fence_put(q->compute.pfence);
390                 q->compute.pfence = NULL;
391         }
392         up_write(&vm->lock);
393 }
394
395 /**
396  * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
397  * that need repinning.
398  * @vm: The VM.
399  *
400  * This function checks for whether the VM has userptrs that need repinning,
401  * and provides a release-type barrier on the userptr.notifier_lock after
402  * checking.
403  *
404  * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
405  */
406 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
407 {
408         lockdep_assert_held_read(&vm->userptr.notifier_lock);
409
410         return (list_empty(&vm->userptr.repin_list) &&
411                 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
412 }
413
414 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
415
416 static void xe_vm_kill(struct xe_vm *vm)
417 {
418         struct xe_exec_queue *q;
419
420         lockdep_assert_held(&vm->lock);
421
422         xe_vm_lock(vm, false);
423         vm->flags |= XE_VM_FLAG_BANNED;
424         trace_xe_vm_kill(vm);
425
426         list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
427                 q->ops->kill(q);
428         xe_vm_unlock(vm);
429
430         /* TODO: Inform user the VM is banned */
431 }
432
433 /**
434  * xe_vm_validate_should_retry() - Whether to retry after a validate error.
435  * @exec: The drm_exec object used for locking before validation.
436  * @err: The error returned from ttm_bo_validate().
437  * @end: A ktime_t cookie that should be set to 0 before first use and
438  * that should be reused on subsequent calls.
439  *
440  * With multiple active VMs, under memory pressure, it is possible that
441  * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
442  * Until ttm properly handles locking in such scenarios, best thing the
443  * driver can do is retry with a timeout. Check if that is necessary, and
444  * if so unlock the drm_exec's objects while keeping the ticket to prepare
445  * for a rerun.
446  *
447  * Return: true if a retry after drm_exec_init() is recommended;
448  * false otherwise.
449  */
450 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
451 {
452         ktime_t cur;
453
454         if (err != -ENOMEM)
455                 return false;
456
457         cur = ktime_get();
458         *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
459         if (!ktime_before(cur, *end))
460                 return false;
461
462         msleep(20);
463         return true;
464 }
465
466 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
467 {
468         struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
469         struct drm_gpuva *gpuva;
470         int ret;
471
472         lockdep_assert_held(&vm->lock);
473         drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
474                 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
475                                &vm->rebind_list);
476
477         ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
478         if (ret)
479                 return ret;
480
481         vm_bo->evicted = false;
482         return 0;
483 }
484
485 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
486                                  bool *done)
487 {
488         int err;
489
490         /*
491          * 1 fence for each preempt fence plus a fence for each tile from a
492          * possible rebind
493          */
494         err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
495                                    vm->xe->info.tile_count);
496         if (err)
497                 return err;
498
499         if (xe_vm_is_idle(vm)) {
500                 vm->preempt.rebind_deactivated = true;
501                 *done = true;
502                 return 0;
503         }
504
505         if (!preempt_fences_waiting(vm)) {
506                 *done = true;
507                 return 0;
508         }
509
510         err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
511         if (err)
512                 return err;
513
514         err = wait_for_existing_preempt_fences(vm);
515         if (err)
516                 return err;
517
518         return drm_gpuvm_validate(&vm->gpuvm, exec);
519 }
520
521 static void preempt_rebind_work_func(struct work_struct *w)
522 {
523         struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
524         struct drm_exec exec;
525         struct dma_fence *rebind_fence;
526         unsigned int fence_count = 0;
527         LIST_HEAD(preempt_fences);
528         ktime_t end = 0;
529         int err = 0;
530         long wait;
531         int __maybe_unused tries = 0;
532
533         xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
534         trace_xe_vm_rebind_worker_enter(vm);
535
536         down_write(&vm->lock);
537
538         if (xe_vm_is_closed_or_banned(vm)) {
539                 up_write(&vm->lock);
540                 trace_xe_vm_rebind_worker_exit(vm);
541                 return;
542         }
543
544 retry:
545         if (xe_vm_userptr_check_repin(vm)) {
546                 err = xe_vm_userptr_pin(vm);
547                 if (err)
548                         goto out_unlock_outer;
549         }
550
551         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
552
553         drm_exec_until_all_locked(&exec) {
554                 bool done = false;
555
556                 err = xe_preempt_work_begin(&exec, vm, &done);
557                 drm_exec_retry_on_contention(&exec);
558                 if (err || done) {
559                         drm_exec_fini(&exec);
560                         if (err && xe_vm_validate_should_retry(&exec, err, &end))
561                                 err = -EAGAIN;
562
563                         goto out_unlock_outer;
564                 }
565         }
566
567         err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
568         if (err)
569                 goto out_unlock;
570
571         rebind_fence = xe_vm_rebind(vm, true);
572         if (IS_ERR(rebind_fence)) {
573                 err = PTR_ERR(rebind_fence);
574                 goto out_unlock;
575         }
576
577         if (rebind_fence) {
578                 dma_fence_wait(rebind_fence, false);
579                 dma_fence_put(rebind_fence);
580         }
581
582         /* Wait on munmap style VM unbinds */
583         wait = dma_resv_wait_timeout(xe_vm_resv(vm),
584                                      DMA_RESV_USAGE_KERNEL,
585                                      false, MAX_SCHEDULE_TIMEOUT);
586         if (wait <= 0) {
587                 err = -ETIME;
588                 goto out_unlock;
589         }
590
591 #define retry_required(__tries, __vm) \
592         (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
593         (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
594         __xe_vm_userptr_needs_repin(__vm))
595
596         down_read(&vm->userptr.notifier_lock);
597         if (retry_required(tries, vm)) {
598                 up_read(&vm->userptr.notifier_lock);
599                 err = -EAGAIN;
600                 goto out_unlock;
601         }
602
603 #undef retry_required
604
605         spin_lock(&vm->xe->ttm.lru_lock);
606         ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
607         spin_unlock(&vm->xe->ttm.lru_lock);
608
609         /* Point of no return. */
610         arm_preempt_fences(vm, &preempt_fences);
611         resume_and_reinstall_preempt_fences(vm, &exec);
612         up_read(&vm->userptr.notifier_lock);
613
614 out_unlock:
615         drm_exec_fini(&exec);
616 out_unlock_outer:
617         if (err == -EAGAIN) {
618                 trace_xe_vm_rebind_worker_retry(vm);
619                 goto retry;
620         }
621
622         if (err) {
623                 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
624                 xe_vm_kill(vm);
625         }
626         up_write(&vm->lock);
627
628         free_preempt_fences(&preempt_fences);
629
630         trace_xe_vm_rebind_worker_exit(vm);
631 }
632
633 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
634                                    const struct mmu_notifier_range *range,
635                                    unsigned long cur_seq)
636 {
637         struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
638         struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
639         struct xe_vma *vma = &uvma->vma;
640         struct xe_vm *vm = xe_vma_vm(vma);
641         struct dma_resv_iter cursor;
642         struct dma_fence *fence;
643         long err;
644
645         xe_assert(vm->xe, xe_vma_is_userptr(vma));
646         trace_xe_vma_userptr_invalidate(vma);
647
648         if (!mmu_notifier_range_blockable(range))
649                 return false;
650
651         down_write(&vm->userptr.notifier_lock);
652         mmu_interval_set_seq(mni, cur_seq);
653
654         /* No need to stop gpu access if the userptr is not yet bound. */
655         if (!userptr->initial_bind) {
656                 up_write(&vm->userptr.notifier_lock);
657                 return true;
658         }
659
660         /*
661          * Tell exec and rebind worker they need to repin and rebind this
662          * userptr.
663          */
664         if (!xe_vm_in_fault_mode(vm) &&
665             !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
666                 spin_lock(&vm->userptr.invalidated_lock);
667                 list_move_tail(&userptr->invalidate_link,
668                                &vm->userptr.invalidated);
669                 spin_unlock(&vm->userptr.invalidated_lock);
670         }
671
672         up_write(&vm->userptr.notifier_lock);
673
674         /*
675          * Preempt fences turn into schedule disables, pipeline these.
676          * Note that even in fault mode, we need to wait for binds and
677          * unbinds to complete, and those are attached as BOOKMARK fences
678          * to the vm.
679          */
680         dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
681                             DMA_RESV_USAGE_BOOKKEEP);
682         dma_resv_for_each_fence_unlocked(&cursor, fence)
683                 dma_fence_enable_sw_signaling(fence);
684         dma_resv_iter_end(&cursor);
685
686         err = dma_resv_wait_timeout(xe_vm_resv(vm),
687                                     DMA_RESV_USAGE_BOOKKEEP,
688                                     false, MAX_SCHEDULE_TIMEOUT);
689         XE_WARN_ON(err <= 0);
690
691         if (xe_vm_in_fault_mode(vm)) {
692                 err = xe_vm_invalidate_vma(vma);
693                 XE_WARN_ON(err);
694         }
695
696         trace_xe_vma_userptr_invalidate_complete(vma);
697
698         return true;
699 }
700
701 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
702         .invalidate = vma_userptr_invalidate,
703 };
704
705 int xe_vm_userptr_pin(struct xe_vm *vm)
706 {
707         struct xe_userptr_vma *uvma, *next;
708         int err = 0;
709         LIST_HEAD(tmp_evict);
710
711         lockdep_assert_held_write(&vm->lock);
712
713         /* Collect invalidated userptrs */
714         spin_lock(&vm->userptr.invalidated_lock);
715         list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
716                                  userptr.invalidate_link) {
717                 list_del_init(&uvma->userptr.invalidate_link);
718                 list_move_tail(&uvma->userptr.repin_link,
719                                &vm->userptr.repin_list);
720         }
721         spin_unlock(&vm->userptr.invalidated_lock);
722
723         /* Pin and move to temporary list */
724         list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
725                                  userptr.repin_link) {
726                 err = xe_vma_userptr_pin_pages(uvma);
727                 if (err < 0)
728                         return err;
729
730                 list_del_init(&uvma->userptr.repin_link);
731                 list_move_tail(&uvma->vma.combined_links.rebind, &vm->rebind_list);
732         }
733
734         return 0;
735 }
736
737 /**
738  * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
739  * that need repinning.
740  * @vm: The VM.
741  *
742  * This function does an advisory check for whether the VM has userptrs that
743  * need repinning.
744  *
745  * Return: 0 if there are no indications of userptrs needing repinning,
746  * -EAGAIN if there are.
747  */
748 int xe_vm_userptr_check_repin(struct xe_vm *vm)
749 {
750         return (list_empty_careful(&vm->userptr.repin_list) &&
751                 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
752 }
753
754 static struct dma_fence *
755 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
756                struct xe_sync_entry *syncs, u32 num_syncs,
757                bool first_op, bool last_op);
758
759 struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
760 {
761         struct dma_fence *fence = NULL;
762         struct xe_vma *vma, *next;
763
764         lockdep_assert_held(&vm->lock);
765         if (xe_vm_in_lr_mode(vm) && !rebind_worker)
766                 return NULL;
767
768         xe_vm_assert_held(vm);
769         list_for_each_entry_safe(vma, next, &vm->rebind_list,
770                                  combined_links.rebind) {
771                 xe_assert(vm->xe, vma->tile_present);
772
773                 list_del_init(&vma->combined_links.rebind);
774                 dma_fence_put(fence);
775                 if (rebind_worker)
776                         trace_xe_vma_rebind_worker(vma);
777                 else
778                         trace_xe_vma_rebind_exec(vma);
779                 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
780                 if (IS_ERR(fence))
781                         return fence;
782         }
783
784         return fence;
785 }
786
787 static void xe_vma_free(struct xe_vma *vma)
788 {
789         if (xe_vma_is_userptr(vma))
790                 kfree(to_userptr_vma(vma));
791         else
792                 kfree(vma);
793 }
794
795 #define VMA_CREATE_FLAG_READ_ONLY       BIT(0)
796 #define VMA_CREATE_FLAG_IS_NULL         BIT(1)
797 #define VMA_CREATE_FLAG_DUMPABLE        BIT(2)
798
799 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
800                                     struct xe_bo *bo,
801                                     u64 bo_offset_or_userptr,
802                                     u64 start, u64 end,
803                                     u16 pat_index, unsigned int flags)
804 {
805         struct xe_vma *vma;
806         struct xe_tile *tile;
807         u8 id;
808         bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
809         bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
810         bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
811
812         xe_assert(vm->xe, start < end);
813         xe_assert(vm->xe, end < vm->size);
814
815         /*
816          * Allocate and ensure that the xe_vma_is_userptr() return
817          * matches what was allocated.
818          */
819         if (!bo && !is_null) {
820                 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
821
822                 if (!uvma)
823                         return ERR_PTR(-ENOMEM);
824
825                 vma = &uvma->vma;
826         } else {
827                 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
828                 if (!vma)
829                         return ERR_PTR(-ENOMEM);
830
831                 if (is_null)
832                         vma->gpuva.flags |= DRM_GPUVA_SPARSE;
833                 if (bo)
834                         vma->gpuva.gem.obj = &bo->ttm.base;
835         }
836
837         INIT_LIST_HEAD(&vma->combined_links.rebind);
838
839         INIT_LIST_HEAD(&vma->gpuva.gem.entry);
840         vma->gpuva.vm = &vm->gpuvm;
841         vma->gpuva.va.addr = start;
842         vma->gpuva.va.range = end - start + 1;
843         if (read_only)
844                 vma->gpuva.flags |= XE_VMA_READ_ONLY;
845         if (dumpable)
846                 vma->gpuva.flags |= XE_VMA_DUMPABLE;
847
848         for_each_tile(tile, vm->xe, id)
849                 vma->tile_mask |= 0x1 << id;
850
851         if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
852                 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
853
854         vma->pat_index = pat_index;
855
856         if (bo) {
857                 struct drm_gpuvm_bo *vm_bo;
858
859                 xe_bo_assert_held(bo);
860
861                 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
862                 if (IS_ERR(vm_bo)) {
863                         xe_vma_free(vma);
864                         return ERR_CAST(vm_bo);
865                 }
866
867                 drm_gpuvm_bo_extobj_add(vm_bo);
868                 drm_gem_object_get(&bo->ttm.base);
869                 vma->gpuva.gem.offset = bo_offset_or_userptr;
870                 drm_gpuva_link(&vma->gpuva, vm_bo);
871                 drm_gpuvm_bo_put(vm_bo);
872         } else /* userptr or null */ {
873                 if (!is_null) {
874                         struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
875                         u64 size = end - start + 1;
876                         int err;
877
878                         INIT_LIST_HEAD(&userptr->invalidate_link);
879                         INIT_LIST_HEAD(&userptr->repin_link);
880                         vma->gpuva.gem.offset = bo_offset_or_userptr;
881
882                         err = mmu_interval_notifier_insert(&userptr->notifier,
883                                                            current->mm,
884                                                            xe_vma_userptr(vma), size,
885                                                            &vma_userptr_notifier_ops);
886                         if (err) {
887                                 xe_vma_free(vma);
888                                 return ERR_PTR(err);
889                         }
890
891                         userptr->notifier_seq = LONG_MAX;
892                 }
893
894                 xe_vm_get(vm);
895         }
896
897         return vma;
898 }
899
900 static void xe_vma_destroy_late(struct xe_vma *vma)
901 {
902         struct xe_vm *vm = xe_vma_vm(vma);
903         struct xe_device *xe = vm->xe;
904         bool read_only = xe_vma_read_only(vma);
905
906         if (vma->ufence) {
907                 xe_sync_ufence_put(vma->ufence);
908                 vma->ufence = NULL;
909         }
910
911         if (xe_vma_is_userptr(vma)) {
912                 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
913
914                 if (userptr->sg) {
915                         dma_unmap_sgtable(xe->drm.dev,
916                                           userptr->sg,
917                                           read_only ? DMA_TO_DEVICE :
918                                           DMA_BIDIRECTIONAL, 0);
919                         sg_free_table(userptr->sg);
920                         userptr->sg = NULL;
921                 }
922
923                 /*
924                  * Since userptr pages are not pinned, we can't remove
925                  * the notifer until we're sure the GPU is not accessing
926                  * them anymore
927                  */
928                 mmu_interval_notifier_remove(&userptr->notifier);
929                 xe_vm_put(vm);
930         } else if (xe_vma_is_null(vma)) {
931                 xe_vm_put(vm);
932         } else {
933                 xe_bo_put(xe_vma_bo(vma));
934         }
935
936         xe_vma_free(vma);
937 }
938
939 static void vma_destroy_work_func(struct work_struct *w)
940 {
941         struct xe_vma *vma =
942                 container_of(w, struct xe_vma, destroy_work);
943
944         xe_vma_destroy_late(vma);
945 }
946
947 static void vma_destroy_cb(struct dma_fence *fence,
948                            struct dma_fence_cb *cb)
949 {
950         struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
951
952         INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
953         queue_work(system_unbound_wq, &vma->destroy_work);
954 }
955
956 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
957 {
958         struct xe_vm *vm = xe_vma_vm(vma);
959
960         lockdep_assert_held_write(&vm->lock);
961         xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
962
963         if (xe_vma_is_userptr(vma)) {
964                 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
965
966                 spin_lock(&vm->userptr.invalidated_lock);
967                 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
968                 spin_unlock(&vm->userptr.invalidated_lock);
969         } else if (!xe_vma_is_null(vma)) {
970                 xe_bo_assert_held(xe_vma_bo(vma));
971
972                 drm_gpuva_unlink(&vma->gpuva);
973         }
974
975         xe_vm_assert_held(vm);
976         if (fence) {
977                 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
978                                                  vma_destroy_cb);
979
980                 if (ret) {
981                         XE_WARN_ON(ret != -ENOENT);
982                         xe_vma_destroy_late(vma);
983                 }
984         } else {
985                 xe_vma_destroy_late(vma);
986         }
987 }
988
989 /**
990  * xe_vm_prepare_vma() - drm_exec utility to lock a vma
991  * @exec: The drm_exec object we're currently locking for.
992  * @vma: The vma for witch we want to lock the vm resv and any attached
993  * object's resv.
994  * @num_shared: The number of dma-fence slots to pre-allocate in the
995  * objects' reservation objects.
996  *
997  * Return: 0 on success, negative error code on error. In particular
998  * may return -EDEADLK on WW transaction contention and -EINTR if
999  * an interruptible wait is terminated by a signal.
1000  */
1001 int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
1002                       unsigned int num_shared)
1003 {
1004         struct xe_vm *vm = xe_vma_vm(vma);
1005         struct xe_bo *bo = xe_vma_bo(vma);
1006         int err;
1007
1008         XE_WARN_ON(!vm);
1009         if (num_shared)
1010                 err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
1011         else
1012                 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1013         if (!err && bo && !bo->vm) {
1014                 if (num_shared)
1015                         err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
1016                 else
1017                         err = drm_exec_lock_obj(exec, &bo->ttm.base);
1018         }
1019
1020         return err;
1021 }
1022
1023 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1024 {
1025         struct drm_exec exec;
1026         int err;
1027
1028         drm_exec_init(&exec, 0, 0);
1029         drm_exec_until_all_locked(&exec) {
1030                 err = xe_vm_prepare_vma(&exec, vma, 0);
1031                 drm_exec_retry_on_contention(&exec);
1032                 if (XE_WARN_ON(err))
1033                         break;
1034         }
1035
1036         xe_vma_destroy(vma, NULL);
1037
1038         drm_exec_fini(&exec);
1039 }
1040
1041 struct xe_vma *
1042 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1043 {
1044         struct drm_gpuva *gpuva;
1045
1046         lockdep_assert_held(&vm->lock);
1047
1048         if (xe_vm_is_closed_or_banned(vm))
1049                 return NULL;
1050
1051         xe_assert(vm->xe, start + range <= vm->size);
1052
1053         gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1054
1055         return gpuva ? gpuva_to_vma(gpuva) : NULL;
1056 }
1057
1058 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1059 {
1060         int err;
1061
1062         xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1063         lockdep_assert_held(&vm->lock);
1064
1065         mutex_lock(&vm->snap_mutex);
1066         err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1067         mutex_unlock(&vm->snap_mutex);
1068         XE_WARN_ON(err);        /* Shouldn't be possible */
1069
1070         return err;
1071 }
1072
1073 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1074 {
1075         xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1076         lockdep_assert_held(&vm->lock);
1077
1078         mutex_lock(&vm->snap_mutex);
1079         drm_gpuva_remove(&vma->gpuva);
1080         mutex_unlock(&vm->snap_mutex);
1081         if (vm->usm.last_fault_vma == vma)
1082                 vm->usm.last_fault_vma = NULL;
1083 }
1084
1085 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1086 {
1087         struct xe_vma_op *op;
1088
1089         op = kzalloc(sizeof(*op), GFP_KERNEL);
1090
1091         if (unlikely(!op))
1092                 return NULL;
1093
1094         return &op->base;
1095 }
1096
1097 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1098
1099 static const struct drm_gpuvm_ops gpuvm_ops = {
1100         .op_alloc = xe_vm_op_alloc,
1101         .vm_bo_validate = xe_gpuvm_validate,
1102         .vm_free = xe_vm_free,
1103 };
1104
1105 static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
1106 {
1107         u64 pte = 0;
1108
1109         if (pat_index & BIT(0))
1110                 pte |= XE_PPGTT_PTE_PAT0;
1111
1112         if (pat_index & BIT(1))
1113                 pte |= XE_PPGTT_PTE_PAT1;
1114
1115         return pte;
1116 }
1117
1118 static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
1119                                 u32 pt_level)
1120 {
1121         u64 pte = 0;
1122
1123         if (pat_index & BIT(0))
1124                 pte |= XE_PPGTT_PTE_PAT0;
1125
1126         if (pat_index & BIT(1))
1127                 pte |= XE_PPGTT_PTE_PAT1;
1128
1129         if (pat_index & BIT(2)) {
1130                 if (pt_level)
1131                         pte |= XE_PPGTT_PDE_PDPE_PAT2;
1132                 else
1133                         pte |= XE_PPGTT_PTE_PAT2;
1134         }
1135
1136         if (pat_index & BIT(3))
1137                 pte |= XELPG_PPGTT_PTE_PAT3;
1138
1139         if (pat_index & (BIT(4)))
1140                 pte |= XE2_PPGTT_PTE_PAT4;
1141
1142         return pte;
1143 }
1144
1145 static u64 pte_encode_ps(u32 pt_level)
1146 {
1147         XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1148
1149         if (pt_level == 1)
1150                 return XE_PDE_PS_2M;
1151         else if (pt_level == 2)
1152                 return XE_PDPE_PS_1G;
1153
1154         return 0;
1155 }
1156
1157 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1158                               const u16 pat_index)
1159 {
1160         struct xe_device *xe = xe_bo_device(bo);
1161         u64 pde;
1162
1163         pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1164         pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1165         pde |= pde_encode_pat_index(xe, pat_index);
1166
1167         return pde;
1168 }
1169
1170 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1171                               u16 pat_index, u32 pt_level)
1172 {
1173         struct xe_device *xe = xe_bo_device(bo);
1174         u64 pte;
1175
1176         pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1177         pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1178         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1179         pte |= pte_encode_ps(pt_level);
1180
1181         if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1182                 pte |= XE_PPGTT_PTE_DM;
1183
1184         return pte;
1185 }
1186
1187 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1188                                u16 pat_index, u32 pt_level)
1189 {
1190         struct xe_device *xe = xe_vma_vm(vma)->xe;
1191
1192         pte |= XE_PAGE_PRESENT;
1193
1194         if (likely(!xe_vma_read_only(vma)))
1195                 pte |= XE_PAGE_RW;
1196
1197         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1198         pte |= pte_encode_ps(pt_level);
1199
1200         if (unlikely(xe_vma_is_null(vma)))
1201                 pte |= XE_PTE_NULL;
1202
1203         return pte;
1204 }
1205
1206 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1207                                 u16 pat_index,
1208                                 u32 pt_level, bool devmem, u64 flags)
1209 {
1210         u64 pte;
1211
1212         /* Avoid passing random bits directly as flags */
1213         xe_assert(xe, !(flags & ~XE_PTE_PS64));
1214
1215         pte = addr;
1216         pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1217         pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1218         pte |= pte_encode_ps(pt_level);
1219
1220         if (devmem)
1221                 pte |= XE_PPGTT_PTE_DM;
1222
1223         pte |= flags;
1224
1225         return pte;
1226 }
1227
1228 static const struct xe_pt_ops xelp_pt_ops = {
1229         .pte_encode_bo = xelp_pte_encode_bo,
1230         .pte_encode_vma = xelp_pte_encode_vma,
1231         .pte_encode_addr = xelp_pte_encode_addr,
1232         .pde_encode_bo = xelp_pde_encode_bo,
1233 };
1234
1235 static void vm_destroy_work_func(struct work_struct *w);
1236
1237 /**
1238  * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1239  * given tile and vm.
1240  * @xe: xe device.
1241  * @tile: tile to set up for.
1242  * @vm: vm to set up for.
1243  *
1244  * Sets up a pagetable tree with one page-table per level and a single
1245  * leaf PTE. All pagetable entries point to the single page-table or,
1246  * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1247  * writes become NOPs.
1248  *
1249  * Return: 0 on success, negative error code on error.
1250  */
1251 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1252                                 struct xe_vm *vm)
1253 {
1254         u8 id = tile->id;
1255         int i;
1256
1257         for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1258                 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1259                 if (IS_ERR(vm->scratch_pt[id][i]))
1260                         return PTR_ERR(vm->scratch_pt[id][i]);
1261
1262                 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1263         }
1264
1265         return 0;
1266 }
1267
1268 static void xe_vm_free_scratch(struct xe_vm *vm)
1269 {
1270         struct xe_tile *tile;
1271         u8 id;
1272
1273         if (!xe_vm_has_scratch(vm))
1274                 return;
1275
1276         for_each_tile(tile, vm->xe, id) {
1277                 u32 i;
1278
1279                 if (!vm->pt_root[id])
1280                         continue;
1281
1282                 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1283                         if (vm->scratch_pt[id][i])
1284                                 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1285         }
1286 }
1287
1288 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1289 {
1290         struct drm_gem_object *vm_resv_obj;
1291         struct xe_vm *vm;
1292         int err, number_tiles = 0;
1293         struct xe_tile *tile;
1294         u8 id;
1295
1296         vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1297         if (!vm)
1298                 return ERR_PTR(-ENOMEM);
1299
1300         vm->xe = xe;
1301
1302         vm->size = 1ull << xe->info.va_bits;
1303
1304         vm->flags = flags;
1305
1306         init_rwsem(&vm->lock);
1307         mutex_init(&vm->snap_mutex);
1308
1309         INIT_LIST_HEAD(&vm->rebind_list);
1310
1311         INIT_LIST_HEAD(&vm->userptr.repin_list);
1312         INIT_LIST_HEAD(&vm->userptr.invalidated);
1313         init_rwsem(&vm->userptr.notifier_lock);
1314         spin_lock_init(&vm->userptr.invalidated_lock);
1315
1316         INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1317
1318         INIT_LIST_HEAD(&vm->preempt.exec_queues);
1319         vm->preempt.min_run_period_ms = 10;     /* FIXME: Wire up to uAPI */
1320
1321         for_each_tile(tile, xe, id)
1322                 xe_range_fence_tree_init(&vm->rftree[id]);
1323
1324         vm->pt_ops = &xelp_pt_ops;
1325
1326         if (!(flags & XE_VM_FLAG_MIGRATION))
1327                 xe_device_mem_access_get(xe);
1328
1329         vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1330         if (!vm_resv_obj) {
1331                 err = -ENOMEM;
1332                 goto err_no_resv;
1333         }
1334
1335         drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1336                        vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1337
1338         drm_gem_object_put(vm_resv_obj);
1339
1340         err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
1341         if (err)
1342                 goto err_close;
1343
1344         if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1345                 vm->flags |= XE_VM_FLAG_64K;
1346
1347         for_each_tile(tile, xe, id) {
1348                 if (flags & XE_VM_FLAG_MIGRATION &&
1349                     tile->id != XE_VM_FLAG_TILE_ID(flags))
1350                         continue;
1351
1352                 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1353                 if (IS_ERR(vm->pt_root[id])) {
1354                         err = PTR_ERR(vm->pt_root[id]);
1355                         vm->pt_root[id] = NULL;
1356                         goto err_unlock_close;
1357                 }
1358         }
1359
1360         if (xe_vm_has_scratch(vm)) {
1361                 for_each_tile(tile, xe, id) {
1362                         if (!vm->pt_root[id])
1363                                 continue;
1364
1365                         err = xe_vm_create_scratch(xe, tile, vm);
1366                         if (err)
1367                                 goto err_unlock_close;
1368                 }
1369                 vm->batch_invalidate_tlb = true;
1370         }
1371
1372         if (flags & XE_VM_FLAG_LR_MODE) {
1373                 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1374                 vm->flags |= XE_VM_FLAG_LR_MODE;
1375                 vm->batch_invalidate_tlb = false;
1376         }
1377
1378         /* Fill pt_root after allocating scratch tables */
1379         for_each_tile(tile, xe, id) {
1380                 if (!vm->pt_root[id])
1381                         continue;
1382
1383                 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1384         }
1385         dma_resv_unlock(xe_vm_resv(vm));
1386
1387         /* Kernel migration VM shouldn't have a circular loop.. */
1388         if (!(flags & XE_VM_FLAG_MIGRATION)) {
1389                 for_each_tile(tile, xe, id) {
1390                         struct xe_gt *gt = tile->primary_gt;
1391                         struct xe_vm *migrate_vm;
1392                         struct xe_exec_queue *q;
1393                         u32 create_flags = EXEC_QUEUE_FLAG_VM;
1394
1395                         if (!vm->pt_root[id])
1396                                 continue;
1397
1398                         migrate_vm = xe_migrate_get_vm(tile->migrate);
1399                         q = xe_exec_queue_create_class(xe, gt, migrate_vm,
1400                                                        XE_ENGINE_CLASS_COPY,
1401                                                        create_flags);
1402                         xe_vm_put(migrate_vm);
1403                         if (IS_ERR(q)) {
1404                                 err = PTR_ERR(q);
1405                                 goto err_close;
1406                         }
1407                         vm->q[id] = q;
1408                         number_tiles++;
1409                 }
1410         }
1411
1412         if (number_tiles > 1)
1413                 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1414
1415         mutex_lock(&xe->usm.lock);
1416         if (flags & XE_VM_FLAG_FAULT_MODE)
1417                 xe->usm.num_vm_in_fault_mode++;
1418         else if (!(flags & XE_VM_FLAG_MIGRATION))
1419                 xe->usm.num_vm_in_non_fault_mode++;
1420         mutex_unlock(&xe->usm.lock);
1421
1422         trace_xe_vm_create(vm);
1423
1424         return vm;
1425
1426 err_unlock_close:
1427         dma_resv_unlock(xe_vm_resv(vm));
1428 err_close:
1429         xe_vm_close_and_put(vm);
1430         return ERR_PTR(err);
1431
1432 err_no_resv:
1433         mutex_destroy(&vm->snap_mutex);
1434         for_each_tile(tile, xe, id)
1435                 xe_range_fence_tree_fini(&vm->rftree[id]);
1436         kfree(vm);
1437         if (!(flags & XE_VM_FLAG_MIGRATION))
1438                 xe_device_mem_access_put(xe);
1439         return ERR_PTR(err);
1440 }
1441
1442 static void xe_vm_close(struct xe_vm *vm)
1443 {
1444         down_write(&vm->lock);
1445         vm->size = 0;
1446         up_write(&vm->lock);
1447 }
1448
1449 void xe_vm_close_and_put(struct xe_vm *vm)
1450 {
1451         LIST_HEAD(contested);
1452         struct xe_device *xe = vm->xe;
1453         struct xe_tile *tile;
1454         struct xe_vma *vma, *next_vma;
1455         struct drm_gpuva *gpuva, *next;
1456         u8 id;
1457
1458         xe_assert(xe, !vm->preempt.num_exec_queues);
1459
1460         xe_vm_close(vm);
1461         if (xe_vm_in_preempt_fence_mode(vm))
1462                 flush_work(&vm->preempt.rebind_work);
1463
1464         down_write(&vm->lock);
1465         for_each_tile(tile, xe, id) {
1466                 if (vm->q[id])
1467                         xe_exec_queue_last_fence_put(vm->q[id], vm);
1468         }
1469         up_write(&vm->lock);
1470
1471         for_each_tile(tile, xe, id) {
1472                 if (vm->q[id]) {
1473                         xe_exec_queue_kill(vm->q[id]);
1474                         xe_exec_queue_put(vm->q[id]);
1475                         vm->q[id] = NULL;
1476                 }
1477         }
1478
1479         down_write(&vm->lock);
1480         xe_vm_lock(vm, false);
1481         drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1482                 vma = gpuva_to_vma(gpuva);
1483
1484                 if (xe_vma_has_no_bo(vma)) {
1485                         down_read(&vm->userptr.notifier_lock);
1486                         vma->gpuva.flags |= XE_VMA_DESTROYED;
1487                         up_read(&vm->userptr.notifier_lock);
1488                 }
1489
1490                 xe_vm_remove_vma(vm, vma);
1491
1492                 /* easy case, remove from VMA? */
1493                 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1494                         list_del_init(&vma->combined_links.rebind);
1495                         xe_vma_destroy(vma, NULL);
1496                         continue;
1497                 }
1498
1499                 list_move_tail(&vma->combined_links.destroy, &contested);
1500                 vma->gpuva.flags |= XE_VMA_DESTROYED;
1501         }
1502
1503         /*
1504          * All vm operations will add shared fences to resv.
1505          * The only exception is eviction for a shared object,
1506          * but even so, the unbind when evicted would still
1507          * install a fence to resv. Hence it's safe to
1508          * destroy the pagetables immediately.
1509          */
1510         xe_vm_free_scratch(vm);
1511
1512         for_each_tile(tile, xe, id) {
1513                 if (vm->pt_root[id]) {
1514                         xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1515                         vm->pt_root[id] = NULL;
1516                 }
1517         }
1518         xe_vm_unlock(vm);
1519
1520         /*
1521          * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1522          * Since we hold a refcount to the bo, we can remove and free
1523          * the members safely without locking.
1524          */
1525         list_for_each_entry_safe(vma, next_vma, &contested,
1526                                  combined_links.destroy) {
1527                 list_del_init(&vma->combined_links.destroy);
1528                 xe_vma_destroy_unlocked(vma);
1529         }
1530
1531         up_write(&vm->lock);
1532
1533         mutex_lock(&xe->usm.lock);
1534         if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1535                 xe->usm.num_vm_in_fault_mode--;
1536         else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1537                 xe->usm.num_vm_in_non_fault_mode--;
1538         mutex_unlock(&xe->usm.lock);
1539
1540         for_each_tile(tile, xe, id)
1541                 xe_range_fence_tree_fini(&vm->rftree[id]);
1542
1543         xe_vm_put(vm);
1544 }
1545
1546 static void vm_destroy_work_func(struct work_struct *w)
1547 {
1548         struct xe_vm *vm =
1549                 container_of(w, struct xe_vm, destroy_work);
1550         struct xe_device *xe = vm->xe;
1551         struct xe_tile *tile;
1552         u8 id;
1553         void *lookup;
1554
1555         /* xe_vm_close_and_put was not called? */
1556         xe_assert(xe, !vm->size);
1557
1558         mutex_destroy(&vm->snap_mutex);
1559
1560         if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
1561                 xe_device_mem_access_put(xe);
1562
1563                 if (xe->info.has_asid && vm->usm.asid) {
1564                         mutex_lock(&xe->usm.lock);
1565                         lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1566                         xe_assert(xe, lookup == vm);
1567                         mutex_unlock(&xe->usm.lock);
1568                 }
1569         }
1570
1571         for_each_tile(tile, xe, id)
1572                 XE_WARN_ON(vm->pt_root[id]);
1573
1574         trace_xe_vm_free(vm);
1575         dma_fence_put(vm->rebind_fence);
1576         kfree(vm);
1577 }
1578
1579 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1580 {
1581         struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1582
1583         /* To destroy the VM we need to be able to sleep */
1584         queue_work(system_unbound_wq, &vm->destroy_work);
1585 }
1586
1587 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1588 {
1589         struct xe_vm *vm;
1590
1591         mutex_lock(&xef->vm.lock);
1592         vm = xa_load(&xef->vm.xa, id);
1593         if (vm)
1594                 xe_vm_get(vm);
1595         mutex_unlock(&xef->vm.lock);
1596
1597         return vm;
1598 }
1599
1600 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1601 {
1602         return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1603                                          tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1604 }
1605
1606 static struct xe_exec_queue *
1607 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1608 {
1609         return q ? q : vm->q[0];
1610 }
1611
1612 static struct dma_fence *
1613 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1614                  struct xe_sync_entry *syncs, u32 num_syncs,
1615                  bool first_op, bool last_op)
1616 {
1617         struct xe_vm *vm = xe_vma_vm(vma);
1618         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1619         struct xe_tile *tile;
1620         struct dma_fence *fence = NULL;
1621         struct dma_fence **fences = NULL;
1622         struct dma_fence_array *cf = NULL;
1623         int cur_fence = 0, i;
1624         int number_tiles = hweight8(vma->tile_present);
1625         int err;
1626         u8 id;
1627
1628         trace_xe_vma_unbind(vma);
1629
1630         if (vma->ufence) {
1631                 struct xe_user_fence * const f = vma->ufence;
1632
1633                 if (!xe_sync_ufence_get_status(f))
1634                         return ERR_PTR(-EBUSY);
1635
1636                 vma->ufence = NULL;
1637                 xe_sync_ufence_put(f);
1638         }
1639
1640         if (number_tiles > 1) {
1641                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1642                                        GFP_KERNEL);
1643                 if (!fences)
1644                         return ERR_PTR(-ENOMEM);
1645         }
1646
1647         for_each_tile(tile, vm->xe, id) {
1648                 if (!(vma->tile_present & BIT(id)))
1649                         goto next;
1650
1651                 fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
1652                                            first_op ? syncs : NULL,
1653                                            first_op ? num_syncs : 0);
1654                 if (IS_ERR(fence)) {
1655                         err = PTR_ERR(fence);
1656                         goto err_fences;
1657                 }
1658
1659                 if (fences)
1660                         fences[cur_fence++] = fence;
1661
1662 next:
1663                 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1664                         q = list_next_entry(q, multi_gt_list);
1665         }
1666
1667         if (fences) {
1668                 cf = dma_fence_array_create(number_tiles, fences,
1669                                             vm->composite_fence_ctx,
1670                                             vm->composite_fence_seqno++,
1671                                             false);
1672                 if (!cf) {
1673                         --vm->composite_fence_seqno;
1674                         err = -ENOMEM;
1675                         goto err_fences;
1676                 }
1677         }
1678
1679         fence = cf ? &cf->base : !fence ?
1680                 xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
1681         if (last_op) {
1682                 for (i = 0; i < num_syncs; i++)
1683                         xe_sync_entry_signal(&syncs[i], NULL, fence);
1684         }
1685
1686         return fence;
1687
1688 err_fences:
1689         if (fences) {
1690                 while (cur_fence)
1691                         dma_fence_put(fences[--cur_fence]);
1692                 kfree(fences);
1693         }
1694
1695         return ERR_PTR(err);
1696 }
1697
1698 static struct dma_fence *
1699 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1700                struct xe_sync_entry *syncs, u32 num_syncs,
1701                bool first_op, bool last_op)
1702 {
1703         struct xe_tile *tile;
1704         struct dma_fence *fence;
1705         struct dma_fence **fences = NULL;
1706         struct dma_fence_array *cf = NULL;
1707         struct xe_vm *vm = xe_vma_vm(vma);
1708         int cur_fence = 0, i;
1709         int number_tiles = hweight8(vma->tile_mask);
1710         int err;
1711         u8 id;
1712
1713         trace_xe_vma_bind(vma);
1714
1715         if (number_tiles > 1) {
1716                 fences = kmalloc_array(number_tiles, sizeof(*fences),
1717                                        GFP_KERNEL);
1718                 if (!fences)
1719                         return ERR_PTR(-ENOMEM);
1720         }
1721
1722         for_each_tile(tile, vm->xe, id) {
1723                 if (!(vma->tile_mask & BIT(id)))
1724                         goto next;
1725
1726                 fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
1727                                          first_op ? syncs : NULL,
1728                                          first_op ? num_syncs : 0,
1729                                          vma->tile_present & BIT(id));
1730                 if (IS_ERR(fence)) {
1731                         err = PTR_ERR(fence);
1732                         goto err_fences;
1733                 }
1734
1735                 if (fences)
1736                         fences[cur_fence++] = fence;
1737
1738 next:
1739                 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1740                         q = list_next_entry(q, multi_gt_list);
1741         }
1742
1743         if (fences) {
1744                 cf = dma_fence_array_create(number_tiles, fences,
1745                                             vm->composite_fence_ctx,
1746                                             vm->composite_fence_seqno++,
1747                                             false);
1748                 if (!cf) {
1749                         --vm->composite_fence_seqno;
1750                         err = -ENOMEM;
1751                         goto err_fences;
1752                 }
1753         }
1754
1755         if (last_op) {
1756                 for (i = 0; i < num_syncs; i++)
1757                         xe_sync_entry_signal(&syncs[i], NULL,
1758                                              cf ? &cf->base : fence);
1759         }
1760
1761         return cf ? &cf->base : fence;
1762
1763 err_fences:
1764         if (fences) {
1765                 while (cur_fence)
1766                         dma_fence_put(fences[--cur_fence]);
1767                 kfree(fences);
1768         }
1769
1770         return ERR_PTR(err);
1771 }
1772
1773 static struct xe_user_fence *
1774 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1775 {
1776         unsigned int i;
1777
1778         for (i = 0; i < num_syncs; i++) {
1779                 struct xe_sync_entry *e = &syncs[i];
1780
1781                 if (xe_sync_is_ufence(e))
1782                         return xe_sync_ufence_get(e);
1783         }
1784
1785         return NULL;
1786 }
1787
1788 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
1789                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1790                         u32 num_syncs, bool immediate, bool first_op,
1791                         bool last_op)
1792 {
1793         struct dma_fence *fence;
1794         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1795         struct xe_user_fence *ufence;
1796
1797         xe_vm_assert_held(vm);
1798
1799         ufence = find_ufence_get(syncs, num_syncs);
1800         if (vma->ufence && ufence)
1801                 xe_sync_ufence_put(vma->ufence);
1802
1803         vma->ufence = ufence ?: vma->ufence;
1804
1805         if (immediate) {
1806                 fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
1807                                        last_op);
1808                 if (IS_ERR(fence))
1809                         return PTR_ERR(fence);
1810         } else {
1811                 int i;
1812
1813                 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1814
1815                 fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
1816                 if (last_op) {
1817                         for (i = 0; i < num_syncs; i++)
1818                                 xe_sync_entry_signal(&syncs[i], NULL, fence);
1819                 }
1820         }
1821
1822         if (last_op)
1823                 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
1824         dma_fence_put(fence);
1825
1826         return 0;
1827 }
1828
1829 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
1830                       struct xe_bo *bo, struct xe_sync_entry *syncs,
1831                       u32 num_syncs, bool immediate, bool first_op,
1832                       bool last_op)
1833 {
1834         int err;
1835
1836         xe_vm_assert_held(vm);
1837         xe_bo_assert_held(bo);
1838
1839         if (bo && immediate) {
1840                 err = xe_bo_validate(bo, vm, true);
1841                 if (err)
1842                         return err;
1843         }
1844
1845         return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
1846                             last_op);
1847 }
1848
1849 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
1850                         struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1851                         u32 num_syncs, bool first_op, bool last_op)
1852 {
1853         struct dma_fence *fence;
1854         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1855
1856         xe_vm_assert_held(vm);
1857         xe_bo_assert_held(xe_vma_bo(vma));
1858
1859         fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
1860         if (IS_ERR(fence))
1861                 return PTR_ERR(fence);
1862
1863         xe_vma_destroy(vma, fence);
1864         if (last_op)
1865                 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
1866         dma_fence_put(fence);
1867
1868         return 0;
1869 }
1870
1871 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1872                                     DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1873                                     DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1874
1875 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1876                        struct drm_file *file)
1877 {
1878         struct xe_device *xe = to_xe_device(dev);
1879         struct xe_file *xef = to_xe_file(file);
1880         struct drm_xe_vm_create *args = data;
1881         struct xe_tile *tile;
1882         struct xe_vm *vm;
1883         u32 id, asid;
1884         int err;
1885         u32 flags = 0;
1886
1887         if (XE_IOCTL_DBG(xe, args->extensions))
1888                 return -EINVAL;
1889
1890         if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1891                 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1892
1893         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1894                          !xe->info.has_usm))
1895                 return -EINVAL;
1896
1897         if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1898                 return -EINVAL;
1899
1900         if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1901                 return -EINVAL;
1902
1903         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1904                          args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1905                 return -EINVAL;
1906
1907         if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1908                          args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1909                 return -EINVAL;
1910
1911         if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1912                          xe_device_in_non_fault_mode(xe)))
1913                 return -EINVAL;
1914
1915         if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
1916                          xe_device_in_fault_mode(xe)))
1917                 return -EINVAL;
1918
1919         if (XE_IOCTL_DBG(xe, args->extensions))
1920                 return -EINVAL;
1921
1922         if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1923                 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1924         if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1925                 flags |= XE_VM_FLAG_LR_MODE;
1926         if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1927                 flags |= XE_VM_FLAG_FAULT_MODE;
1928
1929         vm = xe_vm_create(xe, flags);
1930         if (IS_ERR(vm))
1931                 return PTR_ERR(vm);
1932
1933         mutex_lock(&xef->vm.lock);
1934         err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1935         mutex_unlock(&xef->vm.lock);
1936         if (err)
1937                 goto err_close_and_put;
1938
1939         if (xe->info.has_asid) {
1940                 mutex_lock(&xe->usm.lock);
1941                 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1942                                       XA_LIMIT(1, XE_MAX_ASID - 1),
1943                                       &xe->usm.next_asid, GFP_KERNEL);
1944                 mutex_unlock(&xe->usm.lock);
1945                 if (err < 0)
1946                         goto err_free_id;
1947
1948                 vm->usm.asid = asid;
1949         }
1950
1951         args->vm_id = id;
1952         vm->xef = xef;
1953
1954         /* Record BO memory for VM pagetable created against client */
1955         for_each_tile(tile, xe, id)
1956                 if (vm->pt_root[id])
1957                         xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1958
1959 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1960         /* Warning: Security issue - never enable by default */
1961         args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1962 #endif
1963
1964         return 0;
1965
1966 err_free_id:
1967         mutex_lock(&xef->vm.lock);
1968         xa_erase(&xef->vm.xa, id);
1969         mutex_unlock(&xef->vm.lock);
1970 err_close_and_put:
1971         xe_vm_close_and_put(vm);
1972
1973         return err;
1974 }
1975
1976 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1977                         struct drm_file *file)
1978 {
1979         struct xe_device *xe = to_xe_device(dev);
1980         struct xe_file *xef = to_xe_file(file);
1981         struct drm_xe_vm_destroy *args = data;
1982         struct xe_vm *vm;
1983         int err = 0;
1984
1985         if (XE_IOCTL_DBG(xe, args->pad) ||
1986             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1987                 return -EINVAL;
1988
1989         mutex_lock(&xef->vm.lock);
1990         vm = xa_load(&xef->vm.xa, args->vm_id);
1991         if (XE_IOCTL_DBG(xe, !vm))
1992                 err = -ENOENT;
1993         else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1994                 err = -EBUSY;
1995         else
1996                 xa_erase(&xef->vm.xa, args->vm_id);
1997         mutex_unlock(&xef->vm.lock);
1998
1999         if (!err)
2000                 xe_vm_close_and_put(vm);
2001
2002         return err;
2003 }
2004
2005 static const u32 region_to_mem_type[] = {
2006         XE_PL_TT,
2007         XE_PL_VRAM0,
2008         XE_PL_VRAM1,
2009 };
2010
2011 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
2012                           struct xe_exec_queue *q, u32 region,
2013                           struct xe_sync_entry *syncs, u32 num_syncs,
2014                           bool first_op, bool last_op)
2015 {
2016         struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
2017         int err;
2018
2019         xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2020
2021         if (!xe_vma_has_no_bo(vma)) {
2022                 err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
2023                 if (err)
2024                         return err;
2025         }
2026
2027         if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
2028                 return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
2029                                   true, first_op, last_op);
2030         } else {
2031                 int i;
2032
2033                 /* Nothing to do, signal fences now */
2034                 if (last_op) {
2035                         for (i = 0; i < num_syncs; i++) {
2036                                 struct dma_fence *fence =
2037                                         xe_exec_queue_last_fence_get(wait_exec_queue, vm);
2038
2039                                 xe_sync_entry_signal(&syncs[i], NULL, fence);
2040                                 dma_fence_put(fence);
2041                         }
2042                 }
2043
2044                 return 0;
2045         }
2046 }
2047
2048 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2049                              bool post_commit)
2050 {
2051         down_read(&vm->userptr.notifier_lock);
2052         vma->gpuva.flags |= XE_VMA_DESTROYED;
2053         up_read(&vm->userptr.notifier_lock);
2054         if (post_commit)
2055                 xe_vm_remove_vma(vm, vma);
2056 }
2057
2058 #undef ULL
2059 #define ULL     unsigned long long
2060
2061 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
2062 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2063 {
2064         struct xe_vma *vma;
2065
2066         switch (op->op) {
2067         case DRM_GPUVA_OP_MAP:
2068                 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2069                        (ULL)op->map.va.addr, (ULL)op->map.va.range);
2070                 break;
2071         case DRM_GPUVA_OP_REMAP:
2072                 vma = gpuva_to_vma(op->remap.unmap->va);
2073                 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2074                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2075                        op->remap.unmap->keep ? 1 : 0);
2076                 if (op->remap.prev)
2077                         vm_dbg(&xe->drm,
2078                                "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2079                                (ULL)op->remap.prev->va.addr,
2080                                (ULL)op->remap.prev->va.range);
2081                 if (op->remap.next)
2082                         vm_dbg(&xe->drm,
2083                                "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2084                                (ULL)op->remap.next->va.addr,
2085                                (ULL)op->remap.next->va.range);
2086                 break;
2087         case DRM_GPUVA_OP_UNMAP:
2088                 vma = gpuva_to_vma(op->unmap.va);
2089                 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2090                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2091                        op->unmap.keep ? 1 : 0);
2092                 break;
2093         case DRM_GPUVA_OP_PREFETCH:
2094                 vma = gpuva_to_vma(op->prefetch.va);
2095                 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2096                        (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2097                 break;
2098         default:
2099                 drm_warn(&xe->drm, "NOT POSSIBLE");
2100         }
2101 }
2102 #else
2103 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2104 {
2105 }
2106 #endif
2107
2108 /*
2109  * Create operations list from IOCTL arguments, setup operations fields so parse
2110  * and commit steps are decoupled from IOCTL arguments. This step can fail.
2111  */
2112 static struct drm_gpuva_ops *
2113 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2114                          u64 bo_offset_or_userptr, u64 addr, u64 range,
2115                          u32 operation, u32 flags,
2116                          u32 prefetch_region, u16 pat_index)
2117 {
2118         struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2119         struct drm_gpuva_ops *ops;
2120         struct drm_gpuva_op *__op;
2121         struct drm_gpuvm_bo *vm_bo;
2122         int err;
2123
2124         lockdep_assert_held_write(&vm->lock);
2125
2126         vm_dbg(&vm->xe->drm,
2127                "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2128                operation, (ULL)addr, (ULL)range,
2129                (ULL)bo_offset_or_userptr);
2130
2131         switch (operation) {
2132         case DRM_XE_VM_BIND_OP_MAP:
2133         case DRM_XE_VM_BIND_OP_MAP_USERPTR:
2134                 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2135                                                   obj, bo_offset_or_userptr);
2136                 break;
2137         case DRM_XE_VM_BIND_OP_UNMAP:
2138                 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2139                 break;
2140         case DRM_XE_VM_BIND_OP_PREFETCH:
2141                 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2142                 break;
2143         case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2144                 xe_assert(vm->xe, bo);
2145
2146                 err = xe_bo_lock(bo, true);
2147                 if (err)
2148                         return ERR_PTR(err);
2149
2150                 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2151                 if (IS_ERR(vm_bo)) {
2152                         xe_bo_unlock(bo);
2153                         return ERR_CAST(vm_bo);
2154                 }
2155
2156                 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2157                 drm_gpuvm_bo_put(vm_bo);
2158                 xe_bo_unlock(bo);
2159                 break;
2160         default:
2161                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2162                 ops = ERR_PTR(-EINVAL);
2163         }
2164         if (IS_ERR(ops))
2165                 return ops;
2166
2167         drm_gpuva_for_each_op(__op, ops) {
2168                 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2169
2170                 if (__op->op == DRM_GPUVA_OP_MAP) {
2171                         op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2172                         op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2173                         op->map.pat_index = pat_index;
2174                 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2175                         op->prefetch.region = prefetch_region;
2176                 }
2177
2178                 print_op(vm->xe, __op);
2179         }
2180
2181         return ops;
2182 }
2183
2184 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2185                               u16 pat_index, unsigned int flags)
2186 {
2187         struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2188         struct drm_exec exec;
2189         struct xe_vma *vma;
2190         int err;
2191
2192         lockdep_assert_held_write(&vm->lock);
2193
2194         if (bo) {
2195                 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2196                 drm_exec_until_all_locked(&exec) {
2197                         err = 0;
2198                         if (!bo->vm) {
2199                                 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2200                                 drm_exec_retry_on_contention(&exec);
2201                         }
2202                         if (!err) {
2203                                 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2204                                 drm_exec_retry_on_contention(&exec);
2205                         }
2206                         if (err) {
2207                                 drm_exec_fini(&exec);
2208                                 return ERR_PTR(err);
2209                         }
2210                 }
2211         }
2212         vma = xe_vma_create(vm, bo, op->gem.offset,
2213                             op->va.addr, op->va.addr +
2214                             op->va.range - 1, pat_index, flags);
2215         if (bo)
2216                 drm_exec_fini(&exec);
2217
2218         if (xe_vma_is_userptr(vma)) {
2219                 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2220                 if (err) {
2221                         prep_vma_destroy(vm, vma, false);
2222                         xe_vma_destroy_unlocked(vma);
2223                         return ERR_PTR(err);
2224                 }
2225         } else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
2226                 err = add_preempt_fences(vm, bo);
2227                 if (err) {
2228                         prep_vma_destroy(vm, vma, false);
2229                         xe_vma_destroy_unlocked(vma);
2230                         return ERR_PTR(err);
2231                 }
2232         }
2233
2234         return vma;
2235 }
2236
2237 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2238 {
2239         if (vma->gpuva.flags & XE_VMA_PTE_1G)
2240                 return SZ_1G;
2241         else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2242                 return SZ_2M;
2243         else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2244                 return SZ_64K;
2245         else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2246                 return SZ_4K;
2247
2248         return SZ_1G;   /* Uninitialized, used max size */
2249 }
2250
2251 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2252 {
2253         switch (size) {
2254         case SZ_1G:
2255                 vma->gpuva.flags |= XE_VMA_PTE_1G;
2256                 break;
2257         case SZ_2M:
2258                 vma->gpuva.flags |= XE_VMA_PTE_2M;
2259                 break;
2260         case SZ_64K:
2261                 vma->gpuva.flags |= XE_VMA_PTE_64K;
2262                 break;
2263         case SZ_4K:
2264                 vma->gpuva.flags |= XE_VMA_PTE_4K;
2265                 break;
2266         }
2267 }
2268
2269 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2270 {
2271         int err = 0;
2272
2273         lockdep_assert_held_write(&vm->lock);
2274
2275         switch (op->base.op) {
2276         case DRM_GPUVA_OP_MAP:
2277                 err |= xe_vm_insert_vma(vm, op->map.vma);
2278                 if (!err)
2279                         op->flags |= XE_VMA_OP_COMMITTED;
2280                 break;
2281         case DRM_GPUVA_OP_REMAP:
2282         {
2283                 u8 tile_present =
2284                         gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2285
2286                 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2287                                  true);
2288                 op->flags |= XE_VMA_OP_COMMITTED;
2289
2290                 if (op->remap.prev) {
2291                         err |= xe_vm_insert_vma(vm, op->remap.prev);
2292                         if (!err)
2293                                 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2294                         if (!err && op->remap.skip_prev) {
2295                                 op->remap.prev->tile_present =
2296                                         tile_present;
2297                                 op->remap.prev = NULL;
2298                         }
2299                 }
2300                 if (op->remap.next) {
2301                         err |= xe_vm_insert_vma(vm, op->remap.next);
2302                         if (!err)
2303                                 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2304                         if (!err && op->remap.skip_next) {
2305                                 op->remap.next->tile_present =
2306                                         tile_present;
2307                                 op->remap.next = NULL;
2308                         }
2309                 }
2310
2311                 /* Adjust for partial unbind after removin VMA from VM */
2312                 if (!err) {
2313                         op->base.remap.unmap->va->va.addr = op->remap.start;
2314                         op->base.remap.unmap->va->va.range = op->remap.range;
2315                 }
2316                 break;
2317         }
2318         case DRM_GPUVA_OP_UNMAP:
2319                 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2320                 op->flags |= XE_VMA_OP_COMMITTED;
2321                 break;
2322         case DRM_GPUVA_OP_PREFETCH:
2323                 op->flags |= XE_VMA_OP_COMMITTED;
2324                 break;
2325         default:
2326                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2327         }
2328
2329         return err;
2330 }
2331
2332
2333 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
2334                                    struct drm_gpuva_ops *ops,
2335                                    struct xe_sync_entry *syncs, u32 num_syncs,
2336                                    struct list_head *ops_list, bool last)
2337 {
2338         struct xe_device *xe = vm->xe;
2339         struct xe_vma_op *last_op = NULL;
2340         struct drm_gpuva_op *__op;
2341         int err = 0;
2342
2343         lockdep_assert_held_write(&vm->lock);
2344
2345         drm_gpuva_for_each_op(__op, ops) {
2346                 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2347                 struct xe_vma *vma;
2348                 bool first = list_empty(ops_list);
2349                 unsigned int flags = 0;
2350
2351                 INIT_LIST_HEAD(&op->link);
2352                 list_add_tail(&op->link, ops_list);
2353
2354                 if (first) {
2355                         op->flags |= XE_VMA_OP_FIRST;
2356                         op->num_syncs = num_syncs;
2357                         op->syncs = syncs;
2358                 }
2359
2360                 op->q = q;
2361
2362                 switch (op->base.op) {
2363                 case DRM_GPUVA_OP_MAP:
2364                 {
2365                         flags |= op->map.is_null ?
2366                                 VMA_CREATE_FLAG_IS_NULL : 0;
2367                         flags |= op->map.dumpable ?
2368                                 VMA_CREATE_FLAG_DUMPABLE : 0;
2369
2370                         vma = new_vma(vm, &op->base.map, op->map.pat_index,
2371                                       flags);
2372                         if (IS_ERR(vma))
2373                                 return PTR_ERR(vma);
2374
2375                         op->map.vma = vma;
2376                         break;
2377                 }
2378                 case DRM_GPUVA_OP_REMAP:
2379                 {
2380                         struct xe_vma *old =
2381                                 gpuva_to_vma(op->base.remap.unmap->va);
2382
2383                         op->remap.start = xe_vma_start(old);
2384                         op->remap.range = xe_vma_size(old);
2385
2386                         if (op->base.remap.prev) {
2387                                 flags |= op->base.remap.unmap->va->flags &
2388                                         XE_VMA_READ_ONLY ?
2389                                         VMA_CREATE_FLAG_READ_ONLY : 0;
2390                                 flags |= op->base.remap.unmap->va->flags &
2391                                         DRM_GPUVA_SPARSE ?
2392                                         VMA_CREATE_FLAG_IS_NULL : 0;
2393                                 flags |= op->base.remap.unmap->va->flags &
2394                                         XE_VMA_DUMPABLE ?
2395                                         VMA_CREATE_FLAG_DUMPABLE : 0;
2396
2397                                 vma = new_vma(vm, op->base.remap.prev,
2398                                               old->pat_index, flags);
2399                                 if (IS_ERR(vma))
2400                                         return PTR_ERR(vma);
2401
2402                                 op->remap.prev = vma;
2403
2404                                 /*
2405                                  * Userptr creates a new SG mapping so
2406                                  * we must also rebind.
2407                                  */
2408                                 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2409                                         IS_ALIGNED(xe_vma_end(vma),
2410                                                    xe_vma_max_pte_size(old));
2411                                 if (op->remap.skip_prev) {
2412                                         xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2413                                         op->remap.range -=
2414                                                 xe_vma_end(vma) -
2415                                                 xe_vma_start(old);
2416                                         op->remap.start = xe_vma_end(vma);
2417                                         vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2418                                                (ULL)op->remap.start,
2419                                                (ULL)op->remap.range);
2420                                 }
2421                         }
2422
2423                         if (op->base.remap.next) {
2424                                 flags |= op->base.remap.unmap->va->flags &
2425                                         XE_VMA_READ_ONLY ?
2426                                         VMA_CREATE_FLAG_READ_ONLY : 0;
2427                                 flags |= op->base.remap.unmap->va->flags &
2428                                         DRM_GPUVA_SPARSE ?
2429                                         VMA_CREATE_FLAG_IS_NULL : 0;
2430                                 flags |= op->base.remap.unmap->va->flags &
2431                                         XE_VMA_DUMPABLE ?
2432                                         VMA_CREATE_FLAG_DUMPABLE : 0;
2433
2434                                 vma = new_vma(vm, op->base.remap.next,
2435                                               old->pat_index, flags);
2436                                 if (IS_ERR(vma))
2437                                         return PTR_ERR(vma);
2438
2439                                 op->remap.next = vma;
2440
2441                                 /*
2442                                  * Userptr creates a new SG mapping so
2443                                  * we must also rebind.
2444                                  */
2445                                 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2446                                         IS_ALIGNED(xe_vma_start(vma),
2447                                                    xe_vma_max_pte_size(old));
2448                                 if (op->remap.skip_next) {
2449                                         xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2450                                         op->remap.range -=
2451                                                 xe_vma_end(old) -
2452                                                 xe_vma_start(vma);
2453                                         vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2454                                                (ULL)op->remap.start,
2455                                                (ULL)op->remap.range);
2456                                 }
2457                         }
2458                         break;
2459                 }
2460                 case DRM_GPUVA_OP_UNMAP:
2461                 case DRM_GPUVA_OP_PREFETCH:
2462                         /* Nothing to do */
2463                         break;
2464                 default:
2465                         drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2466                 }
2467
2468                 last_op = op;
2469
2470                 err = xe_vma_op_commit(vm, op);
2471                 if (err)
2472                         return err;
2473         }
2474
2475         /* FIXME: Unhandled corner case */
2476         XE_WARN_ON(!last_op && last && !list_empty(ops_list));
2477
2478         if (!last_op)
2479                 return 0;
2480
2481         last_op->ops = ops;
2482         if (last) {
2483                 last_op->flags |= XE_VMA_OP_LAST;
2484                 last_op->num_syncs = num_syncs;
2485                 last_op->syncs = syncs;
2486         }
2487
2488         return 0;
2489 }
2490
2491 static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
2492                       struct xe_vma *vma, struct xe_vma_op *op)
2493 {
2494         int err;
2495
2496         lockdep_assert_held_write(&vm->lock);
2497
2498         err = xe_vm_prepare_vma(exec, vma, 1);
2499         if (err)
2500                 return err;
2501
2502         xe_vm_assert_held(vm);
2503         xe_bo_assert_held(xe_vma_bo(vma));
2504
2505         switch (op->base.op) {
2506         case DRM_GPUVA_OP_MAP:
2507                 err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
2508                                  op->syncs, op->num_syncs,
2509                                  !xe_vm_in_fault_mode(vm),
2510                                  op->flags & XE_VMA_OP_FIRST,
2511                                  op->flags & XE_VMA_OP_LAST);
2512                 break;
2513         case DRM_GPUVA_OP_REMAP:
2514         {
2515                 bool prev = !!op->remap.prev;
2516                 bool next = !!op->remap.next;
2517
2518                 if (!op->remap.unmap_done) {
2519                         if (prev || next)
2520                                 vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
2521                         err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2522                                            op->num_syncs,
2523                                            op->flags & XE_VMA_OP_FIRST,
2524                                            op->flags & XE_VMA_OP_LAST &&
2525                                            !prev && !next);
2526                         if (err)
2527                                 break;
2528                         op->remap.unmap_done = true;
2529                 }
2530
2531                 if (prev) {
2532                         op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
2533                         err = xe_vm_bind(vm, op->remap.prev, op->q,
2534                                          xe_vma_bo(op->remap.prev), op->syncs,
2535                                          op->num_syncs, true, false,
2536                                          op->flags & XE_VMA_OP_LAST && !next);
2537                         op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2538                         if (err)
2539                                 break;
2540                         op->remap.prev = NULL;
2541                 }
2542
2543                 if (next) {
2544                         op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
2545                         err = xe_vm_bind(vm, op->remap.next, op->q,
2546                                          xe_vma_bo(op->remap.next),
2547                                          op->syncs, op->num_syncs,
2548                                          true, false,
2549                                          op->flags & XE_VMA_OP_LAST);
2550                         op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2551                         if (err)
2552                                 break;
2553                         op->remap.next = NULL;
2554                 }
2555
2556                 break;
2557         }
2558         case DRM_GPUVA_OP_UNMAP:
2559                 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2560                                    op->num_syncs, op->flags & XE_VMA_OP_FIRST,
2561                                    op->flags & XE_VMA_OP_LAST);
2562                 break;
2563         case DRM_GPUVA_OP_PREFETCH:
2564                 err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
2565                                      op->syncs, op->num_syncs,
2566                                      op->flags & XE_VMA_OP_FIRST,
2567                                      op->flags & XE_VMA_OP_LAST);
2568                 break;
2569         default:
2570                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2571         }
2572
2573         if (err)
2574                 trace_xe_vma_fail(vma);
2575
2576         return err;
2577 }
2578
2579 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
2580                                struct xe_vma_op *op)
2581 {
2582         struct drm_exec exec;
2583         int err;
2584
2585 retry_userptr:
2586         drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2587         drm_exec_until_all_locked(&exec) {
2588                 err = op_execute(&exec, vm, vma, op);
2589                 drm_exec_retry_on_contention(&exec);
2590                 if (err)
2591                         break;
2592         }
2593         drm_exec_fini(&exec);
2594
2595         if (err == -EAGAIN) {
2596                 lockdep_assert_held_write(&vm->lock);
2597
2598                 if (op->base.op == DRM_GPUVA_OP_REMAP) {
2599                         if (!op->remap.unmap_done)
2600                                 vma = gpuva_to_vma(op->base.remap.unmap->va);
2601                         else if (op->remap.prev)
2602                                 vma = op->remap.prev;
2603                         else
2604                                 vma = op->remap.next;
2605                 }
2606
2607                 if (xe_vma_is_userptr(vma)) {
2608                         err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2609                         if (!err)
2610                                 goto retry_userptr;
2611
2612                         trace_xe_vma_fail(vma);
2613                 }
2614         }
2615
2616         return err;
2617 }
2618
2619 static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
2620 {
2621         int ret = 0;
2622
2623         lockdep_assert_held_write(&vm->lock);
2624
2625         switch (op->base.op) {
2626         case DRM_GPUVA_OP_MAP:
2627                 ret = __xe_vma_op_execute(vm, op->map.vma, op);
2628                 break;
2629         case DRM_GPUVA_OP_REMAP:
2630         {
2631                 struct xe_vma *vma;
2632
2633                 if (!op->remap.unmap_done)
2634                         vma = gpuva_to_vma(op->base.remap.unmap->va);
2635                 else if (op->remap.prev)
2636                         vma = op->remap.prev;
2637                 else
2638                         vma = op->remap.next;
2639
2640                 ret = __xe_vma_op_execute(vm, vma, op);
2641                 break;
2642         }
2643         case DRM_GPUVA_OP_UNMAP:
2644                 ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
2645                                           op);
2646                 break;
2647         case DRM_GPUVA_OP_PREFETCH:
2648                 ret = __xe_vma_op_execute(vm,
2649                                           gpuva_to_vma(op->base.prefetch.va),
2650                                           op);
2651                 break;
2652         default:
2653                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2654         }
2655
2656         return ret;
2657 }
2658
2659 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
2660 {
2661         bool last = op->flags & XE_VMA_OP_LAST;
2662
2663         if (last) {
2664                 while (op->num_syncs--)
2665                         xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2666                 kfree(op->syncs);
2667                 if (op->q)
2668                         xe_exec_queue_put(op->q);
2669         }
2670         if (!list_empty(&op->link))
2671                 list_del(&op->link);
2672         if (op->ops)
2673                 drm_gpuva_ops_free(&vm->gpuvm, op->ops);
2674         if (last)
2675                 xe_vm_put(vm);
2676 }
2677
2678 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2679                              bool post_commit, bool prev_post_commit,
2680                              bool next_post_commit)
2681 {
2682         lockdep_assert_held_write(&vm->lock);
2683
2684         switch (op->base.op) {
2685         case DRM_GPUVA_OP_MAP:
2686                 if (op->map.vma) {
2687                         prep_vma_destroy(vm, op->map.vma, post_commit);
2688                         xe_vma_destroy_unlocked(op->map.vma);
2689                 }
2690                 break;
2691         case DRM_GPUVA_OP_UNMAP:
2692         {
2693                 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2694
2695                 if (vma) {
2696                         down_read(&vm->userptr.notifier_lock);
2697                         vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2698                         up_read(&vm->userptr.notifier_lock);
2699                         if (post_commit)
2700                                 xe_vm_insert_vma(vm, vma);
2701                 }
2702                 break;
2703         }
2704         case DRM_GPUVA_OP_REMAP:
2705         {
2706                 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2707
2708                 if (op->remap.prev) {
2709                         prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2710                         xe_vma_destroy_unlocked(op->remap.prev);
2711                 }
2712                 if (op->remap.next) {
2713                         prep_vma_destroy(vm, op->remap.next, next_post_commit);
2714                         xe_vma_destroy_unlocked(op->remap.next);
2715                 }
2716                 if (vma) {
2717                         down_read(&vm->userptr.notifier_lock);
2718                         vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2719                         up_read(&vm->userptr.notifier_lock);
2720                         if (post_commit)
2721                                 xe_vm_insert_vma(vm, vma);
2722                 }
2723                 break;
2724         }
2725         case DRM_GPUVA_OP_PREFETCH:
2726                 /* Nothing to do */
2727                 break;
2728         default:
2729                 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2730         }
2731 }
2732
2733 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2734                                      struct drm_gpuva_ops **ops,
2735                                      int num_ops_list)
2736 {
2737         int i;
2738
2739         for (i = num_ops_list - 1; i >= 0; --i) {
2740                 struct drm_gpuva_ops *__ops = ops[i];
2741                 struct drm_gpuva_op *__op;
2742
2743                 if (!__ops)
2744                         continue;
2745
2746                 drm_gpuva_for_each_op_reverse(__op, __ops) {
2747                         struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2748
2749                         xe_vma_op_unwind(vm, op,
2750                                          op->flags & XE_VMA_OP_COMMITTED,
2751                                          op->flags & XE_VMA_OP_PREV_COMMITTED,
2752                                          op->flags & XE_VMA_OP_NEXT_COMMITTED);
2753                 }
2754
2755                 drm_gpuva_ops_free(&vm->gpuvm, __ops);
2756         }
2757 }
2758
2759 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2760                                      struct list_head *ops_list)
2761 {
2762         struct xe_vma_op *op, *next;
2763         int err;
2764
2765         lockdep_assert_held_write(&vm->lock);
2766
2767         list_for_each_entry_safe(op, next, ops_list, link) {
2768                 err = xe_vma_op_execute(vm, op);
2769                 if (err) {
2770                         drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
2771                                  op->base.op, err);
2772                         /*
2773                          * FIXME: Killing VM rather than proper error handling
2774                          */
2775                         xe_vm_kill(vm);
2776                         return -ENOSPC;
2777                 }
2778                 xe_vma_op_cleanup(vm, op);
2779         }
2780
2781         return 0;
2782 }
2783
2784 #define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \
2785          DRM_XE_VM_BIND_FLAG_DUMPABLE)
2786 #define XE_64K_PAGE_MASK 0xffffull
2787 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
2788
2789 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2790                                     struct drm_xe_vm_bind *args,
2791                                     struct drm_xe_vm_bind_op **bind_ops)
2792 {
2793         int err;
2794         int i;
2795
2796         if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2797             XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2798                 return -EINVAL;
2799
2800         if (XE_IOCTL_DBG(xe, args->extensions))
2801                 return -EINVAL;
2802
2803         if (args->num_binds > 1) {
2804                 u64 __user *bind_user =
2805                         u64_to_user_ptr(args->vector_of_binds);
2806
2807                 *bind_ops = kvmalloc_array(args->num_binds,
2808                                            sizeof(struct drm_xe_vm_bind_op),
2809                                            GFP_KERNEL | __GFP_ACCOUNT);
2810                 if (!*bind_ops)
2811                         return -ENOMEM;
2812
2813                 err = __copy_from_user(*bind_ops, bind_user,
2814                                        sizeof(struct drm_xe_vm_bind_op) *
2815                                        args->num_binds);
2816                 if (XE_IOCTL_DBG(xe, err)) {
2817                         err = -EFAULT;
2818                         goto free_bind_ops;
2819                 }
2820         } else {
2821                 *bind_ops = &args->bind;
2822         }
2823
2824         for (i = 0; i < args->num_binds; ++i) {
2825                 u64 range = (*bind_ops)[i].range;
2826                 u64 addr = (*bind_ops)[i].addr;
2827                 u32 op = (*bind_ops)[i].op;
2828                 u32 flags = (*bind_ops)[i].flags;
2829                 u32 obj = (*bind_ops)[i].obj;
2830                 u64 obj_offset = (*bind_ops)[i].obj_offset;
2831                 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
2832                 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2833                 u16 pat_index = (*bind_ops)[i].pat_index;
2834                 u16 coh_mode;
2835
2836                 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2837                         err = -EINVAL;
2838                         goto free_bind_ops;
2839                 }
2840
2841                 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2842                 (*bind_ops)[i].pat_index = pat_index;
2843                 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2844                 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2845                         err = -EINVAL;
2846                         goto free_bind_ops;
2847                 }
2848
2849                 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2850                         err = -EINVAL;
2851                         goto free_bind_ops;
2852                 }
2853
2854                 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
2855                     XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
2856                     XE_IOCTL_DBG(xe, obj && is_null) ||
2857                     XE_IOCTL_DBG(xe, obj_offset && is_null) ||
2858                     XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
2859                                  is_null) ||
2860                     XE_IOCTL_DBG(xe, !obj &&
2861                                  op == DRM_XE_VM_BIND_OP_MAP &&
2862                                  !is_null) ||
2863                     XE_IOCTL_DBG(xe, !obj &&
2864                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2865                     XE_IOCTL_DBG(xe, addr &&
2866                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2867                     XE_IOCTL_DBG(xe, range &&
2868                                  op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2869                     XE_IOCTL_DBG(xe, obj &&
2870                                  op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2871                     XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2872                                  op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2873                     XE_IOCTL_DBG(xe, obj &&
2874                                  op == DRM_XE_VM_BIND_OP_PREFETCH) ||
2875                     XE_IOCTL_DBG(xe, prefetch_region &&
2876                                  op != DRM_XE_VM_BIND_OP_PREFETCH) ||
2877                     XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
2878                                        xe->info.mem_region_mask)) ||
2879                     XE_IOCTL_DBG(xe, obj &&
2880                                  op == DRM_XE_VM_BIND_OP_UNMAP)) {
2881                         err = -EINVAL;
2882                         goto free_bind_ops;
2883                 }
2884
2885                 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2886                     XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2887                     XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
2888                     XE_IOCTL_DBG(xe, !range &&
2889                                  op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
2890                         err = -EINVAL;
2891                         goto free_bind_ops;
2892                 }
2893         }
2894
2895         return 0;
2896
2897 free_bind_ops:
2898         if (args->num_binds > 1)
2899                 kvfree(*bind_ops);
2900         return err;
2901 }
2902
2903 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2904                                        struct xe_exec_queue *q,
2905                                        struct xe_sync_entry *syncs,
2906                                        int num_syncs)
2907 {
2908         struct dma_fence *fence;
2909         int i, err = 0;
2910
2911         fence = xe_sync_in_fence_get(syncs, num_syncs,
2912                                      to_wait_exec_queue(vm, q), vm);
2913         if (IS_ERR(fence))
2914                 return PTR_ERR(fence);
2915
2916         for (i = 0; i < num_syncs; i++)
2917                 xe_sync_entry_signal(&syncs[i], NULL, fence);
2918
2919         xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2920                                      fence);
2921         dma_fence_put(fence);
2922
2923         return err;
2924 }
2925
2926 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2927 {
2928         struct xe_device *xe = to_xe_device(dev);
2929         struct xe_file *xef = to_xe_file(file);
2930         struct drm_xe_vm_bind *args = data;
2931         struct drm_xe_sync __user *syncs_user;
2932         struct xe_bo **bos = NULL;
2933         struct drm_gpuva_ops **ops = NULL;
2934         struct xe_vm *vm;
2935         struct xe_exec_queue *q = NULL;
2936         u32 num_syncs, num_ufence = 0;
2937         struct xe_sync_entry *syncs = NULL;
2938         struct drm_xe_vm_bind_op *bind_ops;
2939         LIST_HEAD(ops_list);
2940         int err;
2941         int i;
2942
2943         err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
2944         if (err)
2945                 return err;
2946
2947         if (args->exec_queue_id) {
2948                 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
2949                 if (XE_IOCTL_DBG(xe, !q)) {
2950                         err = -ENOENT;
2951                         goto free_objs;
2952                 }
2953
2954                 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
2955                         err = -EINVAL;
2956                         goto put_exec_queue;
2957                 }
2958         }
2959
2960         vm = xe_vm_lookup(xef, args->vm_id);
2961         if (XE_IOCTL_DBG(xe, !vm)) {
2962                 err = -EINVAL;
2963                 goto put_exec_queue;
2964         }
2965
2966         err = down_write_killable(&vm->lock);
2967         if (err)
2968                 goto put_vm;
2969
2970         if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
2971                 err = -ENOENT;
2972                 goto release_vm_lock;
2973         }
2974
2975         for (i = 0; i < args->num_binds; ++i) {
2976                 u64 range = bind_ops[i].range;
2977                 u64 addr = bind_ops[i].addr;
2978
2979                 if (XE_IOCTL_DBG(xe, range > vm->size) ||
2980                     XE_IOCTL_DBG(xe, addr > vm->size - range)) {
2981                         err = -EINVAL;
2982                         goto release_vm_lock;
2983                 }
2984         }
2985
2986         if (args->num_binds) {
2987                 bos = kvcalloc(args->num_binds, sizeof(*bos),
2988                                GFP_KERNEL | __GFP_ACCOUNT);
2989                 if (!bos) {
2990                         err = -ENOMEM;
2991                         goto release_vm_lock;
2992                 }
2993
2994                 ops = kvcalloc(args->num_binds, sizeof(*ops),
2995                                GFP_KERNEL | __GFP_ACCOUNT);
2996                 if (!ops) {
2997                         err = -ENOMEM;
2998                         goto release_vm_lock;
2999                 }
3000         }
3001
3002         for (i = 0; i < args->num_binds; ++i) {
3003                 struct drm_gem_object *gem_obj;
3004                 u64 range = bind_ops[i].range;
3005                 u64 addr = bind_ops[i].addr;
3006                 u32 obj = bind_ops[i].obj;
3007                 u64 obj_offset = bind_ops[i].obj_offset;
3008                 u16 pat_index = bind_ops[i].pat_index;
3009                 u16 coh_mode;
3010
3011                 if (!obj)
3012                         continue;
3013
3014                 gem_obj = drm_gem_object_lookup(file, obj);
3015                 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3016                         err = -ENOENT;
3017                         goto put_obj;
3018                 }
3019                 bos[i] = gem_to_xe_bo(gem_obj);
3020
3021                 if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
3022                     XE_IOCTL_DBG(xe, obj_offset >
3023                                  bos[i]->size - range)) {
3024                         err = -EINVAL;
3025                         goto put_obj;
3026                 }
3027
3028                 if (bos[i]->flags & XE_BO_INTERNAL_64K) {
3029                         if (XE_IOCTL_DBG(xe, obj_offset &
3030                                          XE_64K_PAGE_MASK) ||
3031                             XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3032                             XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3033                                 err = -EINVAL;
3034                                 goto put_obj;
3035                         }
3036                 }
3037
3038                 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3039                 if (bos[i]->cpu_caching) {
3040                         if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3041                                          bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3042                                 err = -EINVAL;
3043                                 goto put_obj;
3044                         }
3045                 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3046                         /*
3047                          * Imported dma-buf from a different device should
3048                          * require 1way or 2way coherency since we don't know
3049                          * how it was mapped on the CPU. Just assume is it
3050                          * potentially cached on CPU side.
3051                          */
3052                         err = -EINVAL;
3053                         goto put_obj;
3054                 }
3055         }
3056
3057         if (args->num_syncs) {
3058                 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3059                 if (!syncs) {
3060                         err = -ENOMEM;
3061                         goto put_obj;
3062                 }
3063         }
3064
3065         syncs_user = u64_to_user_ptr(args->syncs);
3066         for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3067                 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3068                                           &syncs_user[num_syncs],
3069                                           (xe_vm_in_lr_mode(vm) ?
3070                                            SYNC_PARSE_FLAG_LR_MODE : 0) |
3071                                           (!args->num_binds ?
3072                                            SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3073                 if (err)
3074                         goto free_syncs;
3075
3076                 if (xe_sync_is_ufence(&syncs[num_syncs]))
3077                         num_ufence++;
3078         }
3079
3080         if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3081                 err = -EINVAL;
3082                 goto free_syncs;
3083         }
3084
3085         if (!args->num_binds) {
3086                 err = -ENODATA;
3087                 goto free_syncs;
3088         }
3089
3090         for (i = 0; i < args->num_binds; ++i) {
3091                 u64 range = bind_ops[i].range;
3092                 u64 addr = bind_ops[i].addr;
3093                 u32 op = bind_ops[i].op;
3094                 u32 flags = bind_ops[i].flags;
3095                 u64 obj_offset = bind_ops[i].obj_offset;
3096                 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3097                 u16 pat_index = bind_ops[i].pat_index;
3098
3099                 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3100                                                   addr, range, op, flags,
3101                                                   prefetch_region, pat_index);
3102                 if (IS_ERR(ops[i])) {
3103                         err = PTR_ERR(ops[i]);
3104                         ops[i] = NULL;
3105                         goto unwind_ops;
3106                 }
3107
3108                 err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
3109                                               &ops_list,
3110                                               i == args->num_binds - 1);
3111                 if (err)
3112                         goto unwind_ops;
3113         }
3114
3115         /* Nothing to do */
3116         if (list_empty(&ops_list)) {
3117                 err = -ENODATA;
3118                 goto unwind_ops;
3119         }
3120
3121         xe_vm_get(vm);
3122         if (q)
3123                 xe_exec_queue_get(q);
3124
3125         err = vm_bind_ioctl_ops_execute(vm, &ops_list);
3126
3127         up_write(&vm->lock);
3128
3129         if (q)
3130                 xe_exec_queue_put(q);
3131         xe_vm_put(vm);
3132
3133         for (i = 0; bos && i < args->num_binds; ++i)
3134                 xe_bo_put(bos[i]);
3135
3136         kvfree(bos);
3137         kvfree(ops);
3138         if (args->num_binds > 1)
3139                 kvfree(bind_ops);
3140
3141         return err;
3142
3143 unwind_ops:
3144         vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3145 free_syncs:
3146         if (err == -ENODATA)
3147                 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3148         while (num_syncs--)
3149                 xe_sync_entry_cleanup(&syncs[num_syncs]);
3150
3151         kfree(syncs);
3152 put_obj:
3153         for (i = 0; i < args->num_binds; ++i)
3154                 xe_bo_put(bos[i]);
3155 release_vm_lock:
3156         up_write(&vm->lock);
3157 put_vm:
3158         xe_vm_put(vm);
3159 put_exec_queue:
3160         if (q)
3161                 xe_exec_queue_put(q);
3162 free_objs:
3163         kvfree(bos);
3164         kvfree(ops);
3165         if (args->num_binds > 1)
3166                 kvfree(bind_ops);
3167         return err;
3168 }
3169
3170 /**
3171  * xe_vm_lock() - Lock the vm's dma_resv object
3172  * @vm: The struct xe_vm whose lock is to be locked
3173  * @intr: Whether to perform any wait interruptible
3174  *
3175  * Return: 0 on success, -EINTR if @intr is true and the wait for a
3176  * contended lock was interrupted. If @intr is false, the function
3177  * always returns 0.
3178  */
3179 int xe_vm_lock(struct xe_vm *vm, bool intr)
3180 {
3181         if (intr)
3182                 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3183
3184         return dma_resv_lock(xe_vm_resv(vm), NULL);
3185 }
3186
3187 /**
3188  * xe_vm_unlock() - Unlock the vm's dma_resv object
3189  * @vm: The struct xe_vm whose lock is to be released.
3190  *
3191  * Unlock a buffer object lock that was locked by xe_vm_lock().
3192  */
3193 void xe_vm_unlock(struct xe_vm *vm)
3194 {
3195         dma_resv_unlock(xe_vm_resv(vm));
3196 }
3197
3198 /**
3199  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3200  * @vma: VMA to invalidate
3201  *
3202  * Walks a list of page tables leaves which it memset the entries owned by this
3203  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3204  * complete.
3205  *
3206  * Returns 0 for success, negative error code otherwise.
3207  */
3208 int xe_vm_invalidate_vma(struct xe_vma *vma)
3209 {
3210         struct xe_device *xe = xe_vma_vm(vma)->xe;
3211         struct xe_tile *tile;
3212         u32 tile_needs_invalidate = 0;
3213         int seqno[XE_MAX_TILES_PER_DEVICE];
3214         u8 id;
3215         int ret;
3216
3217         xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma)));
3218         xe_assert(xe, !xe_vma_is_null(vma));
3219         trace_xe_vma_usm_invalidate(vma);
3220
3221         /* Check that we don't race with page-table updates */
3222         if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3223                 if (xe_vma_is_userptr(vma)) {
3224                         WARN_ON_ONCE(!mmu_interval_check_retry
3225                                      (&to_userptr_vma(vma)->userptr.notifier,
3226                                       to_userptr_vma(vma)->userptr.notifier_seq));
3227                         WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3228                                                              DMA_RESV_USAGE_BOOKKEEP));
3229
3230                 } else {
3231                         xe_bo_assert_held(xe_vma_bo(vma));
3232                 }
3233         }
3234
3235         for_each_tile(tile, xe, id) {
3236                 if (xe_pt_zap_ptes(tile, vma)) {
3237                         tile_needs_invalidate |= BIT(id);
3238                         xe_device_wmb(xe);
3239                         /*
3240                          * FIXME: We potentially need to invalidate multiple
3241                          * GTs within the tile
3242                          */
3243                         seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
3244                         if (seqno[id] < 0)
3245                                 return seqno[id];
3246                 }
3247         }
3248
3249         for_each_tile(tile, xe, id) {
3250                 if (tile_needs_invalidate & BIT(id)) {
3251                         ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
3252                         if (ret < 0)
3253                                 return ret;
3254                 }
3255         }
3256
3257         vma->usm.tile_invalidated = vma->tile_mask;
3258
3259         return 0;
3260 }
3261
3262 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3263 {
3264         struct drm_gpuva *gpuva;
3265         bool is_vram;
3266         uint64_t addr;
3267
3268         if (!down_read_trylock(&vm->lock)) {
3269                 drm_printf(p, " Failed to acquire VM lock to dump capture");
3270                 return 0;
3271         }
3272         if (vm->pt_root[gt_id]) {
3273                 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
3274                 is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
3275                 drm_printf(p, " VM root: A:0x%llx %s\n", addr,
3276                            is_vram ? "VRAM" : "SYS");
3277         }
3278
3279         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3280                 struct xe_vma *vma = gpuva_to_vma(gpuva);
3281                 bool is_userptr = xe_vma_is_userptr(vma);
3282                 bool is_null = xe_vma_is_null(vma);
3283
3284                 if (is_null) {
3285                         addr = 0;
3286                 } else if (is_userptr) {
3287                         struct sg_table *sg = to_userptr_vma(vma)->userptr.sg;
3288                         struct xe_res_cursor cur;
3289
3290                         if (sg) {
3291                                 xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur);
3292                                 addr = xe_res_dma(&cur);
3293                         } else {
3294                                 addr = 0;
3295                         }
3296                 } else {
3297                         addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
3298                         is_vram = xe_bo_is_vram(xe_vma_bo(vma));
3299                 }
3300                 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
3301                            xe_vma_start(vma), xe_vma_end(vma) - 1,
3302                            xe_vma_size(vma),
3303                            addr, is_null ? "NULL" : is_userptr ? "USR" :
3304                            is_vram ? "VRAM" : "SYS");
3305         }
3306         up_read(&vm->lock);
3307
3308         return 0;
3309 }
3310
3311 struct xe_vm_snapshot {
3312         unsigned long num_snaps;
3313         struct {
3314                 u64 ofs, bo_ofs;
3315                 unsigned long len;
3316                 struct xe_bo *bo;
3317                 void *data;
3318                 struct mm_struct *mm;
3319         } snap[];
3320 };
3321
3322 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3323 {
3324         unsigned long num_snaps = 0, i;
3325         struct xe_vm_snapshot *snap = NULL;
3326         struct drm_gpuva *gpuva;
3327
3328         if (!vm)
3329                 return NULL;
3330
3331         mutex_lock(&vm->snap_mutex);
3332         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3333                 if (gpuva->flags & XE_VMA_DUMPABLE)
3334                         num_snaps++;
3335         }
3336
3337         if (num_snaps)
3338                 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
3339         if (!snap)
3340                 goto out_unlock;
3341
3342         snap->num_snaps = num_snaps;
3343         i = 0;
3344         drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3345                 struct xe_vma *vma = gpuva_to_vma(gpuva);
3346                 struct xe_bo *bo = vma->gpuva.gem.obj ?
3347                         gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3348
3349                 if (!(gpuva->flags & XE_VMA_DUMPABLE))
3350                         continue;
3351
3352                 snap->snap[i].ofs = xe_vma_start(vma);
3353                 snap->snap[i].len = xe_vma_size(vma);
3354                 if (bo) {
3355                         snap->snap[i].bo = xe_bo_get(bo);
3356                         snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
3357                 } else if (xe_vma_is_userptr(vma)) {
3358                         struct mm_struct *mm =
3359                                 to_userptr_vma(vma)->userptr.notifier.mm;
3360
3361                         if (mmget_not_zero(mm))
3362                                 snap->snap[i].mm = mm;
3363                         else
3364                                 snap->snap[i].data = ERR_PTR(-EFAULT);
3365
3366                         snap->snap[i].bo_ofs = xe_vma_userptr(vma);
3367                 } else {
3368                         snap->snap[i].data = ERR_PTR(-ENOENT);
3369                 }
3370                 i++;
3371         }
3372
3373 out_unlock:
3374         mutex_unlock(&vm->snap_mutex);
3375         return snap;
3376 }
3377
3378 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
3379 {
3380         for (int i = 0; i < snap->num_snaps; i++) {
3381                 struct xe_bo *bo = snap->snap[i].bo;
3382                 struct iosys_map src;
3383                 int err;
3384
3385                 if (IS_ERR(snap->snap[i].data))
3386                         continue;
3387
3388                 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
3389                 if (!snap->snap[i].data) {
3390                         snap->snap[i].data = ERR_PTR(-ENOMEM);
3391                         goto cleanup_bo;
3392                 }
3393
3394                 if (bo) {
3395                         dma_resv_lock(bo->ttm.base.resv, NULL);
3396                         err = ttm_bo_vmap(&bo->ttm, &src);
3397                         if (!err) {
3398                                 xe_map_memcpy_from(xe_bo_device(bo),
3399                                                    snap->snap[i].data,
3400                                                    &src, snap->snap[i].bo_ofs,
3401                                                    snap->snap[i].len);
3402                                 ttm_bo_vunmap(&bo->ttm, &src);
3403                         }
3404                         dma_resv_unlock(bo->ttm.base.resv);
3405                 } else {
3406                         void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
3407
3408                         kthread_use_mm(snap->snap[i].mm);
3409                         if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
3410                                 err = 0;
3411                         else
3412                                 err = -EFAULT;
3413                         kthread_unuse_mm(snap->snap[i].mm);
3414
3415                         mmput(snap->snap[i].mm);
3416                         snap->snap[i].mm = NULL;
3417                 }
3418
3419                 if (err) {
3420                         kvfree(snap->snap[i].data);
3421                         snap->snap[i].data = ERR_PTR(err);
3422                 }
3423
3424 cleanup_bo:
3425                 xe_bo_put(bo);
3426                 snap->snap[i].bo = NULL;
3427         }
3428 }
3429
3430 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
3431 {
3432         unsigned long i, j;
3433
3434         for (i = 0; i < snap->num_snaps; i++) {
3435                 if (IS_ERR(snap->snap[i].data))
3436                         goto uncaptured;
3437
3438                 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
3439                 drm_printf(p, "[%llx].data: ",
3440                            snap->snap[i].ofs);
3441
3442                 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
3443                         u32 *val = snap->snap[i].data + j;
3444                         char dumped[ASCII85_BUFSZ];
3445
3446                         drm_puts(p, ascii85_encode(*val, dumped));
3447                 }
3448
3449                 drm_puts(p, "\n");
3450                 continue;
3451
3452 uncaptured:
3453                 drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n",
3454                            snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1,
3455                            PTR_ERR(snap->snap[i].data));
3456         }
3457 }
3458
3459 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
3460 {
3461         unsigned long i;
3462
3463         if (!snap)
3464                 return;
3465
3466         for (i = 0; i < snap->num_snaps; i++) {
3467                 if (!IS_ERR(snap->snap[i].data))
3468                         kvfree(snap->snap[i].data);
3469                 xe_bo_put(snap->snap[i].bo);
3470                 if (snap->snap[i].mm)
3471                         mmput(snap->snap[i].mm);
3472         }
3473         kvfree(snap);
3474 }