1 // SPDX-License-Identifier: MIT
3 * Copyright © 2021 Intel Corporation
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
11 #include <drm/drm_exec.h>
12 #include <drm/drm_print.h>
13 #include <drm/ttm/ttm_execbuf_util.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
20 #include <linux/swap.h>
22 #include <generated/xe_wa_oob.h>
24 #include "xe_assert.h"
26 #include "xe_device.h"
27 #include "xe_drm_client.h"
28 #include "xe_exec_queue.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_gt_tlb_invalidation.h"
32 #include "xe_migrate.h"
35 #include "xe_preempt_fence.h"
37 #include "xe_res_cursor.h"
42 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
44 return vm->gpuvm.r_obj;
48 * xe_vma_userptr_check_repin() - Advisory check for repin needed
49 * @uvma: The userptr vma
51 * Check if the userptr vma has been invalidated since last successful
52 * repin. The check is advisory only and can the function can be called
53 * without the vm->userptr.notifier_lock held. There is no guarantee that the
54 * vma userptr will remain valid after a lockless check, so typically
55 * the call needs to be followed by a proper check under the notifier_lock.
57 * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
59 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
61 return mmu_interval_check_retry(&uvma->userptr.notifier,
62 uvma->userptr.notifier_seq) ?
66 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
68 struct xe_userptr *userptr = &uvma->userptr;
69 struct xe_vma *vma = &uvma->vma;
70 struct xe_vm *vm = xe_vma_vm(vma);
71 struct xe_device *xe = vm->xe;
72 const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
74 bool in_kthread = !current->mm;
75 unsigned long notifier_seq;
77 bool read_only = xe_vma_read_only(vma);
79 lockdep_assert_held(&vm->lock);
80 xe_assert(xe, xe_vma_is_userptr(vma));
82 if (vma->gpuva.flags & XE_VMA_DESTROYED)
85 notifier_seq = mmu_interval_read_begin(&userptr->notifier);
86 if (notifier_seq == userptr->notifier_seq)
89 pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
94 dma_unmap_sgtable(xe->drm.dev,
96 read_only ? DMA_TO_DEVICE :
97 DMA_BIDIRECTIONAL, 0);
98 sg_free_table(userptr->sg);
104 if (!mmget_not_zero(userptr->notifier.mm)) {
108 kthread_use_mm(userptr->notifier.mm);
111 while (pinned < num_pages) {
112 ret = get_user_pages_fast(xe_vma_userptr(vma) +
115 read_only ? 0 : FOLL_WRITE,
125 kthread_unuse_mm(userptr->notifier.mm);
126 mmput(userptr->notifier.mm);
132 ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages,
134 (u64)pinned << PAGE_SHIFT,
135 xe_sg_segment_size(xe->drm.dev),
141 userptr->sg = &userptr->sgt;
143 ret = dma_map_sgtable(xe->drm.dev, userptr->sg,
144 read_only ? DMA_TO_DEVICE :
146 DMA_ATTR_SKIP_CPU_SYNC |
147 DMA_ATTR_NO_KERNEL_MAPPING);
149 sg_free_table(userptr->sg);
154 for (i = 0; i < pinned; ++i) {
157 set_page_dirty(pages[i]);
158 unlock_page(pages[i]);
161 mark_page_accessed(pages[i]);
165 release_pages(pages, pinned);
169 userptr->notifier_seq = notifier_seq;
170 if (xe_vma_userptr_check_repin(uvma) == -EAGAIN)
174 return ret < 0 ? ret : 0;
177 static bool preempt_fences_waiting(struct xe_vm *vm)
179 struct xe_exec_queue *q;
181 lockdep_assert_held(&vm->lock);
182 xe_vm_assert_held(vm);
184 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
185 if (!q->compute.pfence ||
186 (q->compute.pfence && test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
187 &q->compute.pfence->flags))) {
195 static void free_preempt_fences(struct list_head *list)
197 struct list_head *link, *next;
199 list_for_each_safe(link, next, list)
200 xe_preempt_fence_free(to_preempt_fence_from_link(link));
203 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
206 lockdep_assert_held(&vm->lock);
207 xe_vm_assert_held(vm);
209 if (*count >= vm->preempt.num_exec_queues)
212 for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
213 struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
216 return PTR_ERR(pfence);
218 list_move_tail(xe_preempt_fence_link(pfence), list);
224 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
226 struct xe_exec_queue *q;
228 xe_vm_assert_held(vm);
230 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
231 if (q->compute.pfence) {
232 long timeout = dma_fence_wait(q->compute.pfence, false);
236 dma_fence_put(q->compute.pfence);
237 q->compute.pfence = NULL;
244 static bool xe_vm_is_idle(struct xe_vm *vm)
246 struct xe_exec_queue *q;
248 xe_vm_assert_held(vm);
249 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
250 if (!xe_exec_queue_is_idle(q))
257 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
259 struct list_head *link;
260 struct xe_exec_queue *q;
262 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
263 struct dma_fence *fence;
266 xe_assert(vm->xe, link != list);
268 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
269 q, q->compute.context,
271 dma_fence_put(q->compute.pfence);
272 q->compute.pfence = fence;
276 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
278 struct xe_exec_queue *q;
281 if (!vm->preempt.num_exec_queues)
284 err = xe_bo_lock(bo, true);
288 err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
292 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
293 if (q->compute.pfence) {
294 dma_resv_add_fence(bo->ttm.base.resv,
296 DMA_RESV_USAGE_BOOKKEEP);
304 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
305 struct drm_exec *exec)
307 struct xe_exec_queue *q;
309 lockdep_assert_held(&vm->lock);
310 xe_vm_assert_held(vm);
312 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) {
315 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence,
316 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
320 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
322 struct drm_gpuvm_exec vm_exec = {
324 .flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
327 struct drm_exec *exec = &vm_exec.exec;
328 struct dma_fence *pfence;
332 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
334 down_write(&vm->lock);
335 err = drm_gpuvm_exec_lock(&vm_exec);
339 pfence = xe_preempt_fence_create(q, q->compute.context,
346 list_add(&q->compute.link, &vm->preempt.exec_queues);
347 ++vm->preempt.num_exec_queues;
348 q->compute.pfence = pfence;
350 down_read(&vm->userptr.notifier_lock);
352 drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
353 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
356 * Check to see if a preemption on VM is in flight or userptr
357 * invalidation, if so trigger this preempt fence to sync state with
358 * other preempt fences on the VM.
360 wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
362 dma_fence_enable_sw_signaling(pfence);
364 up_read(&vm->userptr.notifier_lock);
375 * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
379 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
381 if (!xe_vm_in_preempt_fence_mode(vm))
384 down_write(&vm->lock);
385 list_del(&q->compute.link);
386 --vm->preempt.num_exec_queues;
387 if (q->compute.pfence) {
388 dma_fence_enable_sw_signaling(q->compute.pfence);
389 dma_fence_put(q->compute.pfence);
390 q->compute.pfence = NULL;
396 * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
397 * that need repinning.
400 * This function checks for whether the VM has userptrs that need repinning,
401 * and provides a release-type barrier on the userptr.notifier_lock after
404 * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
406 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
408 lockdep_assert_held_read(&vm->userptr.notifier_lock);
410 return (list_empty(&vm->userptr.repin_list) &&
411 list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
414 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
416 static void xe_vm_kill(struct xe_vm *vm)
418 struct xe_exec_queue *q;
420 lockdep_assert_held(&vm->lock);
422 xe_vm_lock(vm, false);
423 vm->flags |= XE_VM_FLAG_BANNED;
424 trace_xe_vm_kill(vm);
426 list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
430 /* TODO: Inform user the VM is banned */
434 * xe_vm_validate_should_retry() - Whether to retry after a validate error.
435 * @exec: The drm_exec object used for locking before validation.
436 * @err: The error returned from ttm_bo_validate().
437 * @end: A ktime_t cookie that should be set to 0 before first use and
438 * that should be reused on subsequent calls.
440 * With multiple active VMs, under memory pressure, it is possible that
441 * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
442 * Until ttm properly handles locking in such scenarios, best thing the
443 * driver can do is retry with a timeout. Check if that is necessary, and
444 * if so unlock the drm_exec's objects while keeping the ticket to prepare
447 * Return: true if a retry after drm_exec_init() is recommended;
450 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
458 *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
459 if (!ktime_before(cur, *end))
466 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
468 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
469 struct drm_gpuva *gpuva;
472 lockdep_assert_held(&vm->lock);
473 drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
474 list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
477 ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
481 vm_bo->evicted = false;
485 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
491 * 1 fence for each preempt fence plus a fence for each tile from a
494 err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
495 vm->xe->info.tile_count);
499 if (xe_vm_is_idle(vm)) {
500 vm->preempt.rebind_deactivated = true;
505 if (!preempt_fences_waiting(vm)) {
510 err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
514 err = wait_for_existing_preempt_fences(vm);
518 return drm_gpuvm_validate(&vm->gpuvm, exec);
521 static void preempt_rebind_work_func(struct work_struct *w)
523 struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
524 struct drm_exec exec;
525 struct dma_fence *rebind_fence;
526 unsigned int fence_count = 0;
527 LIST_HEAD(preempt_fences);
531 int __maybe_unused tries = 0;
533 xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
534 trace_xe_vm_rebind_worker_enter(vm);
536 down_write(&vm->lock);
538 if (xe_vm_is_closed_or_banned(vm)) {
540 trace_xe_vm_rebind_worker_exit(vm);
545 if (xe_vm_userptr_check_repin(vm)) {
546 err = xe_vm_userptr_pin(vm);
548 goto out_unlock_outer;
551 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
553 drm_exec_until_all_locked(&exec) {
556 err = xe_preempt_work_begin(&exec, vm, &done);
557 drm_exec_retry_on_contention(&exec);
559 drm_exec_fini(&exec);
560 if (err && xe_vm_validate_should_retry(&exec, err, &end))
563 goto out_unlock_outer;
567 err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
571 rebind_fence = xe_vm_rebind(vm, true);
572 if (IS_ERR(rebind_fence)) {
573 err = PTR_ERR(rebind_fence);
578 dma_fence_wait(rebind_fence, false);
579 dma_fence_put(rebind_fence);
582 /* Wait on munmap style VM unbinds */
583 wait = dma_resv_wait_timeout(xe_vm_resv(vm),
584 DMA_RESV_USAGE_KERNEL,
585 false, MAX_SCHEDULE_TIMEOUT);
591 #define retry_required(__tries, __vm) \
592 (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
593 (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
594 __xe_vm_userptr_needs_repin(__vm))
596 down_read(&vm->userptr.notifier_lock);
597 if (retry_required(tries, vm)) {
598 up_read(&vm->userptr.notifier_lock);
603 #undef retry_required
605 spin_lock(&vm->xe->ttm.lru_lock);
606 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
607 spin_unlock(&vm->xe->ttm.lru_lock);
609 /* Point of no return. */
610 arm_preempt_fences(vm, &preempt_fences);
611 resume_and_reinstall_preempt_fences(vm, &exec);
612 up_read(&vm->userptr.notifier_lock);
615 drm_exec_fini(&exec);
617 if (err == -EAGAIN) {
618 trace_xe_vm_rebind_worker_retry(vm);
623 drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
628 free_preempt_fences(&preempt_fences);
630 trace_xe_vm_rebind_worker_exit(vm);
633 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
634 const struct mmu_notifier_range *range,
635 unsigned long cur_seq)
637 struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier);
638 struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr);
639 struct xe_vma *vma = &uvma->vma;
640 struct xe_vm *vm = xe_vma_vm(vma);
641 struct dma_resv_iter cursor;
642 struct dma_fence *fence;
645 xe_assert(vm->xe, xe_vma_is_userptr(vma));
646 trace_xe_vma_userptr_invalidate(vma);
648 if (!mmu_notifier_range_blockable(range))
651 down_write(&vm->userptr.notifier_lock);
652 mmu_interval_set_seq(mni, cur_seq);
654 /* No need to stop gpu access if the userptr is not yet bound. */
655 if (!userptr->initial_bind) {
656 up_write(&vm->userptr.notifier_lock);
661 * Tell exec and rebind worker they need to repin and rebind this
664 if (!xe_vm_in_fault_mode(vm) &&
665 !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) {
666 spin_lock(&vm->userptr.invalidated_lock);
667 list_move_tail(&userptr->invalidate_link,
668 &vm->userptr.invalidated);
669 spin_unlock(&vm->userptr.invalidated_lock);
672 up_write(&vm->userptr.notifier_lock);
675 * Preempt fences turn into schedule disables, pipeline these.
676 * Note that even in fault mode, we need to wait for binds and
677 * unbinds to complete, and those are attached as BOOKMARK fences
680 dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
681 DMA_RESV_USAGE_BOOKKEEP);
682 dma_resv_for_each_fence_unlocked(&cursor, fence)
683 dma_fence_enable_sw_signaling(fence);
684 dma_resv_iter_end(&cursor);
686 err = dma_resv_wait_timeout(xe_vm_resv(vm),
687 DMA_RESV_USAGE_BOOKKEEP,
688 false, MAX_SCHEDULE_TIMEOUT);
689 XE_WARN_ON(err <= 0);
691 if (xe_vm_in_fault_mode(vm)) {
692 err = xe_vm_invalidate_vma(vma);
696 trace_xe_vma_userptr_invalidate_complete(vma);
701 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
702 .invalidate = vma_userptr_invalidate,
705 int xe_vm_userptr_pin(struct xe_vm *vm)
707 struct xe_userptr_vma *uvma, *next;
709 LIST_HEAD(tmp_evict);
711 lockdep_assert_held_write(&vm->lock);
713 /* Collect invalidated userptrs */
714 spin_lock(&vm->userptr.invalidated_lock);
715 list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
716 userptr.invalidate_link) {
717 list_del_init(&uvma->userptr.invalidate_link);
718 list_move_tail(&uvma->userptr.repin_link,
719 &vm->userptr.repin_list);
721 spin_unlock(&vm->userptr.invalidated_lock);
723 /* Pin and move to temporary list */
724 list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
725 userptr.repin_link) {
726 err = xe_vma_userptr_pin_pages(uvma);
730 list_del_init(&uvma->userptr.repin_link);
731 list_move_tail(&uvma->vma.combined_links.rebind, &vm->rebind_list);
738 * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
739 * that need repinning.
742 * This function does an advisory check for whether the VM has userptrs that
745 * Return: 0 if there are no indications of userptrs needing repinning,
746 * -EAGAIN if there are.
748 int xe_vm_userptr_check_repin(struct xe_vm *vm)
750 return (list_empty_careful(&vm->userptr.repin_list) &&
751 list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
754 static struct dma_fence *
755 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
756 struct xe_sync_entry *syncs, u32 num_syncs,
757 bool first_op, bool last_op);
759 struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
761 struct dma_fence *fence = NULL;
762 struct xe_vma *vma, *next;
764 lockdep_assert_held(&vm->lock);
765 if (xe_vm_in_lr_mode(vm) && !rebind_worker)
768 xe_vm_assert_held(vm);
769 list_for_each_entry_safe(vma, next, &vm->rebind_list,
770 combined_links.rebind) {
771 xe_assert(vm->xe, vma->tile_present);
773 list_del_init(&vma->combined_links.rebind);
774 dma_fence_put(fence);
776 trace_xe_vma_rebind_worker(vma);
778 trace_xe_vma_rebind_exec(vma);
779 fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
787 static void xe_vma_free(struct xe_vma *vma)
789 if (xe_vma_is_userptr(vma))
790 kfree(to_userptr_vma(vma));
795 #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
796 #define VMA_CREATE_FLAG_IS_NULL BIT(1)
797 #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
799 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
801 u64 bo_offset_or_userptr,
803 u16 pat_index, unsigned int flags)
806 struct xe_tile *tile;
808 bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
809 bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
810 bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
812 xe_assert(vm->xe, start < end);
813 xe_assert(vm->xe, end < vm->size);
816 * Allocate and ensure that the xe_vma_is_userptr() return
817 * matches what was allocated.
819 if (!bo && !is_null) {
820 struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
823 return ERR_PTR(-ENOMEM);
827 vma = kzalloc(sizeof(*vma), GFP_KERNEL);
829 return ERR_PTR(-ENOMEM);
832 vma->gpuva.flags |= DRM_GPUVA_SPARSE;
834 vma->gpuva.gem.obj = &bo->ttm.base;
837 INIT_LIST_HEAD(&vma->combined_links.rebind);
839 INIT_LIST_HEAD(&vma->gpuva.gem.entry);
840 vma->gpuva.vm = &vm->gpuvm;
841 vma->gpuva.va.addr = start;
842 vma->gpuva.va.range = end - start + 1;
844 vma->gpuva.flags |= XE_VMA_READ_ONLY;
846 vma->gpuva.flags |= XE_VMA_DUMPABLE;
848 for_each_tile(tile, vm->xe, id)
849 vma->tile_mask |= 0x1 << id;
851 if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC)
852 vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
854 vma->pat_index = pat_index;
857 struct drm_gpuvm_bo *vm_bo;
859 xe_bo_assert_held(bo);
861 vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
864 return ERR_CAST(vm_bo);
867 drm_gpuvm_bo_extobj_add(vm_bo);
868 drm_gem_object_get(&bo->ttm.base);
869 vma->gpuva.gem.offset = bo_offset_or_userptr;
870 drm_gpuva_link(&vma->gpuva, vm_bo);
871 drm_gpuvm_bo_put(vm_bo);
872 } else /* userptr or null */ {
874 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
875 u64 size = end - start + 1;
878 INIT_LIST_HEAD(&userptr->invalidate_link);
879 INIT_LIST_HEAD(&userptr->repin_link);
880 vma->gpuva.gem.offset = bo_offset_or_userptr;
882 err = mmu_interval_notifier_insert(&userptr->notifier,
884 xe_vma_userptr(vma), size,
885 &vma_userptr_notifier_ops);
891 userptr->notifier_seq = LONG_MAX;
900 static void xe_vma_destroy_late(struct xe_vma *vma)
902 struct xe_vm *vm = xe_vma_vm(vma);
903 struct xe_device *xe = vm->xe;
904 bool read_only = xe_vma_read_only(vma);
907 xe_sync_ufence_put(vma->ufence);
911 if (xe_vma_is_userptr(vma)) {
912 struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
915 dma_unmap_sgtable(xe->drm.dev,
917 read_only ? DMA_TO_DEVICE :
918 DMA_BIDIRECTIONAL, 0);
919 sg_free_table(userptr->sg);
924 * Since userptr pages are not pinned, we can't remove
925 * the notifer until we're sure the GPU is not accessing
928 mmu_interval_notifier_remove(&userptr->notifier);
930 } else if (xe_vma_is_null(vma)) {
933 xe_bo_put(xe_vma_bo(vma));
939 static void vma_destroy_work_func(struct work_struct *w)
942 container_of(w, struct xe_vma, destroy_work);
944 xe_vma_destroy_late(vma);
947 static void vma_destroy_cb(struct dma_fence *fence,
948 struct dma_fence_cb *cb)
950 struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
952 INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
953 queue_work(system_unbound_wq, &vma->destroy_work);
956 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
958 struct xe_vm *vm = xe_vma_vm(vma);
960 lockdep_assert_held_write(&vm->lock);
961 xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
963 if (xe_vma_is_userptr(vma)) {
964 xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
966 spin_lock(&vm->userptr.invalidated_lock);
967 list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
968 spin_unlock(&vm->userptr.invalidated_lock);
969 } else if (!xe_vma_is_null(vma)) {
970 xe_bo_assert_held(xe_vma_bo(vma));
972 drm_gpuva_unlink(&vma->gpuva);
975 xe_vm_assert_held(vm);
977 int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
981 XE_WARN_ON(ret != -ENOENT);
982 xe_vma_destroy_late(vma);
985 xe_vma_destroy_late(vma);
990 * xe_vm_prepare_vma() - drm_exec utility to lock a vma
991 * @exec: The drm_exec object we're currently locking for.
992 * @vma: The vma for witch we want to lock the vm resv and any attached
994 * @num_shared: The number of dma-fence slots to pre-allocate in the
995 * objects' reservation objects.
997 * Return: 0 on success, negative error code on error. In particular
998 * may return -EDEADLK on WW transaction contention and -EINTR if
999 * an interruptible wait is terminated by a signal.
1001 int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
1002 unsigned int num_shared)
1004 struct xe_vm *vm = xe_vma_vm(vma);
1005 struct xe_bo *bo = xe_vma_bo(vma);
1010 err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
1012 err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1013 if (!err && bo && !bo->vm) {
1015 err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
1017 err = drm_exec_lock_obj(exec, &bo->ttm.base);
1023 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1025 struct drm_exec exec;
1028 drm_exec_init(&exec, 0, 0);
1029 drm_exec_until_all_locked(&exec) {
1030 err = xe_vm_prepare_vma(&exec, vma, 0);
1031 drm_exec_retry_on_contention(&exec);
1032 if (XE_WARN_ON(err))
1036 xe_vma_destroy(vma, NULL);
1038 drm_exec_fini(&exec);
1042 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1044 struct drm_gpuva *gpuva;
1046 lockdep_assert_held(&vm->lock);
1048 if (xe_vm_is_closed_or_banned(vm))
1051 xe_assert(vm->xe, start + range <= vm->size);
1053 gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1055 return gpuva ? gpuva_to_vma(gpuva) : NULL;
1058 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1062 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1063 lockdep_assert_held(&vm->lock);
1065 mutex_lock(&vm->snap_mutex);
1066 err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1067 mutex_unlock(&vm->snap_mutex);
1068 XE_WARN_ON(err); /* Shouldn't be possible */
1073 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1075 xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1076 lockdep_assert_held(&vm->lock);
1078 mutex_lock(&vm->snap_mutex);
1079 drm_gpuva_remove(&vma->gpuva);
1080 mutex_unlock(&vm->snap_mutex);
1081 if (vm->usm.last_fault_vma == vma)
1082 vm->usm.last_fault_vma = NULL;
1085 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1087 struct xe_vma_op *op;
1089 op = kzalloc(sizeof(*op), GFP_KERNEL);
1097 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1099 static const struct drm_gpuvm_ops gpuvm_ops = {
1100 .op_alloc = xe_vm_op_alloc,
1101 .vm_bo_validate = xe_gpuvm_validate,
1102 .vm_free = xe_vm_free,
1105 static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index)
1109 if (pat_index & BIT(0))
1110 pte |= XE_PPGTT_PTE_PAT0;
1112 if (pat_index & BIT(1))
1113 pte |= XE_PPGTT_PTE_PAT1;
1118 static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index,
1123 if (pat_index & BIT(0))
1124 pte |= XE_PPGTT_PTE_PAT0;
1126 if (pat_index & BIT(1))
1127 pte |= XE_PPGTT_PTE_PAT1;
1129 if (pat_index & BIT(2)) {
1131 pte |= XE_PPGTT_PDE_PDPE_PAT2;
1133 pte |= XE_PPGTT_PTE_PAT2;
1136 if (pat_index & BIT(3))
1137 pte |= XELPG_PPGTT_PTE_PAT3;
1139 if (pat_index & (BIT(4)))
1140 pte |= XE2_PPGTT_PTE_PAT4;
1145 static u64 pte_encode_ps(u32 pt_level)
1147 XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1150 return XE_PDE_PS_2M;
1151 else if (pt_level == 2)
1152 return XE_PDPE_PS_1G;
1157 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1158 const u16 pat_index)
1160 struct xe_device *xe = xe_bo_device(bo);
1163 pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1164 pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1165 pde |= pde_encode_pat_index(xe, pat_index);
1170 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1171 u16 pat_index, u32 pt_level)
1173 struct xe_device *xe = xe_bo_device(bo);
1176 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1177 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1178 pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1179 pte |= pte_encode_ps(pt_level);
1181 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1182 pte |= XE_PPGTT_PTE_DM;
1187 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1188 u16 pat_index, u32 pt_level)
1190 struct xe_device *xe = xe_vma_vm(vma)->xe;
1192 pte |= XE_PAGE_PRESENT;
1194 if (likely(!xe_vma_read_only(vma)))
1197 pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1198 pte |= pte_encode_ps(pt_level);
1200 if (unlikely(xe_vma_is_null(vma)))
1206 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1208 u32 pt_level, bool devmem, u64 flags)
1212 /* Avoid passing random bits directly as flags */
1213 xe_assert(xe, !(flags & ~XE_PTE_PS64));
1216 pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1217 pte |= pte_encode_pat_index(xe, pat_index, pt_level);
1218 pte |= pte_encode_ps(pt_level);
1221 pte |= XE_PPGTT_PTE_DM;
1228 static const struct xe_pt_ops xelp_pt_ops = {
1229 .pte_encode_bo = xelp_pte_encode_bo,
1230 .pte_encode_vma = xelp_pte_encode_vma,
1231 .pte_encode_addr = xelp_pte_encode_addr,
1232 .pde_encode_bo = xelp_pde_encode_bo,
1235 static void vm_destroy_work_func(struct work_struct *w);
1238 * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1239 * given tile and vm.
1241 * @tile: tile to set up for.
1242 * @vm: vm to set up for.
1244 * Sets up a pagetable tree with one page-table per level and a single
1245 * leaf PTE. All pagetable entries point to the single page-table or,
1246 * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1247 * writes become NOPs.
1249 * Return: 0 on success, negative error code on error.
1251 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1257 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1258 vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1259 if (IS_ERR(vm->scratch_pt[id][i]))
1260 return PTR_ERR(vm->scratch_pt[id][i]);
1262 xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1268 static void xe_vm_free_scratch(struct xe_vm *vm)
1270 struct xe_tile *tile;
1273 if (!xe_vm_has_scratch(vm))
1276 for_each_tile(tile, vm->xe, id) {
1279 if (!vm->pt_root[id])
1282 for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1283 if (vm->scratch_pt[id][i])
1284 xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1288 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
1290 struct drm_gem_object *vm_resv_obj;
1292 int err, number_tiles = 0;
1293 struct xe_tile *tile;
1296 vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1298 return ERR_PTR(-ENOMEM);
1302 vm->size = 1ull << xe->info.va_bits;
1306 init_rwsem(&vm->lock);
1307 mutex_init(&vm->snap_mutex);
1309 INIT_LIST_HEAD(&vm->rebind_list);
1311 INIT_LIST_HEAD(&vm->userptr.repin_list);
1312 INIT_LIST_HEAD(&vm->userptr.invalidated);
1313 init_rwsem(&vm->userptr.notifier_lock);
1314 spin_lock_init(&vm->userptr.invalidated_lock);
1316 INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1318 INIT_LIST_HEAD(&vm->preempt.exec_queues);
1319 vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
1321 for_each_tile(tile, xe, id)
1322 xe_range_fence_tree_init(&vm->rftree[id]);
1324 vm->pt_ops = &xelp_pt_ops;
1326 if (!(flags & XE_VM_FLAG_MIGRATION))
1327 xe_device_mem_access_get(xe);
1329 vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1335 drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1336 vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1338 drm_gem_object_put(vm_resv_obj);
1340 err = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
1344 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1345 vm->flags |= XE_VM_FLAG_64K;
1347 for_each_tile(tile, xe, id) {
1348 if (flags & XE_VM_FLAG_MIGRATION &&
1349 tile->id != XE_VM_FLAG_TILE_ID(flags))
1352 vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1353 if (IS_ERR(vm->pt_root[id])) {
1354 err = PTR_ERR(vm->pt_root[id]);
1355 vm->pt_root[id] = NULL;
1356 goto err_unlock_close;
1360 if (xe_vm_has_scratch(vm)) {
1361 for_each_tile(tile, xe, id) {
1362 if (!vm->pt_root[id])
1365 err = xe_vm_create_scratch(xe, tile, vm);
1367 goto err_unlock_close;
1369 vm->batch_invalidate_tlb = true;
1372 if (flags & XE_VM_FLAG_LR_MODE) {
1373 INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1374 vm->flags |= XE_VM_FLAG_LR_MODE;
1375 vm->batch_invalidate_tlb = false;
1378 /* Fill pt_root after allocating scratch tables */
1379 for_each_tile(tile, xe, id) {
1380 if (!vm->pt_root[id])
1383 xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1385 dma_resv_unlock(xe_vm_resv(vm));
1387 /* Kernel migration VM shouldn't have a circular loop.. */
1388 if (!(flags & XE_VM_FLAG_MIGRATION)) {
1389 for_each_tile(tile, xe, id) {
1390 struct xe_gt *gt = tile->primary_gt;
1391 struct xe_vm *migrate_vm;
1392 struct xe_exec_queue *q;
1393 u32 create_flags = EXEC_QUEUE_FLAG_VM;
1395 if (!vm->pt_root[id])
1398 migrate_vm = xe_migrate_get_vm(tile->migrate);
1399 q = xe_exec_queue_create_class(xe, gt, migrate_vm,
1400 XE_ENGINE_CLASS_COPY,
1402 xe_vm_put(migrate_vm);
1412 if (number_tiles > 1)
1413 vm->composite_fence_ctx = dma_fence_context_alloc(1);
1415 mutex_lock(&xe->usm.lock);
1416 if (flags & XE_VM_FLAG_FAULT_MODE)
1417 xe->usm.num_vm_in_fault_mode++;
1418 else if (!(flags & XE_VM_FLAG_MIGRATION))
1419 xe->usm.num_vm_in_non_fault_mode++;
1420 mutex_unlock(&xe->usm.lock);
1422 trace_xe_vm_create(vm);
1427 dma_resv_unlock(xe_vm_resv(vm));
1429 xe_vm_close_and_put(vm);
1430 return ERR_PTR(err);
1433 mutex_destroy(&vm->snap_mutex);
1434 for_each_tile(tile, xe, id)
1435 xe_range_fence_tree_fini(&vm->rftree[id]);
1437 if (!(flags & XE_VM_FLAG_MIGRATION))
1438 xe_device_mem_access_put(xe);
1439 return ERR_PTR(err);
1442 static void xe_vm_close(struct xe_vm *vm)
1444 down_write(&vm->lock);
1446 up_write(&vm->lock);
1449 void xe_vm_close_and_put(struct xe_vm *vm)
1451 LIST_HEAD(contested);
1452 struct xe_device *xe = vm->xe;
1453 struct xe_tile *tile;
1454 struct xe_vma *vma, *next_vma;
1455 struct drm_gpuva *gpuva, *next;
1458 xe_assert(xe, !vm->preempt.num_exec_queues);
1461 if (xe_vm_in_preempt_fence_mode(vm))
1462 flush_work(&vm->preempt.rebind_work);
1464 down_write(&vm->lock);
1465 for_each_tile(tile, xe, id) {
1467 xe_exec_queue_last_fence_put(vm->q[id], vm);
1469 up_write(&vm->lock);
1471 for_each_tile(tile, xe, id) {
1473 xe_exec_queue_kill(vm->q[id]);
1474 xe_exec_queue_put(vm->q[id]);
1479 down_write(&vm->lock);
1480 xe_vm_lock(vm, false);
1481 drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1482 vma = gpuva_to_vma(gpuva);
1484 if (xe_vma_has_no_bo(vma)) {
1485 down_read(&vm->userptr.notifier_lock);
1486 vma->gpuva.flags |= XE_VMA_DESTROYED;
1487 up_read(&vm->userptr.notifier_lock);
1490 xe_vm_remove_vma(vm, vma);
1492 /* easy case, remove from VMA? */
1493 if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1494 list_del_init(&vma->combined_links.rebind);
1495 xe_vma_destroy(vma, NULL);
1499 list_move_tail(&vma->combined_links.destroy, &contested);
1500 vma->gpuva.flags |= XE_VMA_DESTROYED;
1504 * All vm operations will add shared fences to resv.
1505 * The only exception is eviction for a shared object,
1506 * but even so, the unbind when evicted would still
1507 * install a fence to resv. Hence it's safe to
1508 * destroy the pagetables immediately.
1510 xe_vm_free_scratch(vm);
1512 for_each_tile(tile, xe, id) {
1513 if (vm->pt_root[id]) {
1514 xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1515 vm->pt_root[id] = NULL;
1521 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1522 * Since we hold a refcount to the bo, we can remove and free
1523 * the members safely without locking.
1525 list_for_each_entry_safe(vma, next_vma, &contested,
1526 combined_links.destroy) {
1527 list_del_init(&vma->combined_links.destroy);
1528 xe_vma_destroy_unlocked(vma);
1531 up_write(&vm->lock);
1533 mutex_lock(&xe->usm.lock);
1534 if (vm->flags & XE_VM_FLAG_FAULT_MODE)
1535 xe->usm.num_vm_in_fault_mode--;
1536 else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
1537 xe->usm.num_vm_in_non_fault_mode--;
1538 mutex_unlock(&xe->usm.lock);
1540 for_each_tile(tile, xe, id)
1541 xe_range_fence_tree_fini(&vm->rftree[id]);
1546 static void vm_destroy_work_func(struct work_struct *w)
1549 container_of(w, struct xe_vm, destroy_work);
1550 struct xe_device *xe = vm->xe;
1551 struct xe_tile *tile;
1555 /* xe_vm_close_and_put was not called? */
1556 xe_assert(xe, !vm->size);
1558 mutex_destroy(&vm->snap_mutex);
1560 if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
1561 xe_device_mem_access_put(xe);
1563 if (xe->info.has_asid && vm->usm.asid) {
1564 mutex_lock(&xe->usm.lock);
1565 lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1566 xe_assert(xe, lookup == vm);
1567 mutex_unlock(&xe->usm.lock);
1571 for_each_tile(tile, xe, id)
1572 XE_WARN_ON(vm->pt_root[id]);
1574 trace_xe_vm_free(vm);
1575 dma_fence_put(vm->rebind_fence);
1579 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1581 struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1583 /* To destroy the VM we need to be able to sleep */
1584 queue_work(system_unbound_wq, &vm->destroy_work);
1587 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1591 mutex_lock(&xef->vm.lock);
1592 vm = xa_load(&xef->vm.xa, id);
1595 mutex_unlock(&xef->vm.lock);
1600 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1602 return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
1603 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
1606 static struct xe_exec_queue *
1607 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1609 return q ? q : vm->q[0];
1612 static struct dma_fence *
1613 xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1614 struct xe_sync_entry *syncs, u32 num_syncs,
1615 bool first_op, bool last_op)
1617 struct xe_vm *vm = xe_vma_vm(vma);
1618 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1619 struct xe_tile *tile;
1620 struct dma_fence *fence = NULL;
1621 struct dma_fence **fences = NULL;
1622 struct dma_fence_array *cf = NULL;
1623 int cur_fence = 0, i;
1624 int number_tiles = hweight8(vma->tile_present);
1628 trace_xe_vma_unbind(vma);
1631 struct xe_user_fence * const f = vma->ufence;
1633 if (!xe_sync_ufence_get_status(f))
1634 return ERR_PTR(-EBUSY);
1637 xe_sync_ufence_put(f);
1640 if (number_tiles > 1) {
1641 fences = kmalloc_array(number_tiles, sizeof(*fences),
1644 return ERR_PTR(-ENOMEM);
1647 for_each_tile(tile, vm->xe, id) {
1648 if (!(vma->tile_present & BIT(id)))
1651 fence = __xe_pt_unbind_vma(tile, vma, q ? q : vm->q[id],
1652 first_op ? syncs : NULL,
1653 first_op ? num_syncs : 0);
1654 if (IS_ERR(fence)) {
1655 err = PTR_ERR(fence);
1660 fences[cur_fence++] = fence;
1663 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1664 q = list_next_entry(q, multi_gt_list);
1668 cf = dma_fence_array_create(number_tiles, fences,
1669 vm->composite_fence_ctx,
1670 vm->composite_fence_seqno++,
1673 --vm->composite_fence_seqno;
1679 fence = cf ? &cf->base : !fence ?
1680 xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
1682 for (i = 0; i < num_syncs; i++)
1683 xe_sync_entry_signal(&syncs[i], NULL, fence);
1691 dma_fence_put(fences[--cur_fence]);
1695 return ERR_PTR(err);
1698 static struct dma_fence *
1699 xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
1700 struct xe_sync_entry *syncs, u32 num_syncs,
1701 bool first_op, bool last_op)
1703 struct xe_tile *tile;
1704 struct dma_fence *fence;
1705 struct dma_fence **fences = NULL;
1706 struct dma_fence_array *cf = NULL;
1707 struct xe_vm *vm = xe_vma_vm(vma);
1708 int cur_fence = 0, i;
1709 int number_tiles = hweight8(vma->tile_mask);
1713 trace_xe_vma_bind(vma);
1715 if (number_tiles > 1) {
1716 fences = kmalloc_array(number_tiles, sizeof(*fences),
1719 return ERR_PTR(-ENOMEM);
1722 for_each_tile(tile, vm->xe, id) {
1723 if (!(vma->tile_mask & BIT(id)))
1726 fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
1727 first_op ? syncs : NULL,
1728 first_op ? num_syncs : 0,
1729 vma->tile_present & BIT(id));
1730 if (IS_ERR(fence)) {
1731 err = PTR_ERR(fence);
1736 fences[cur_fence++] = fence;
1739 if (q && vm->pt_root[id] && !list_empty(&q->multi_gt_list))
1740 q = list_next_entry(q, multi_gt_list);
1744 cf = dma_fence_array_create(number_tiles, fences,
1745 vm->composite_fence_ctx,
1746 vm->composite_fence_seqno++,
1749 --vm->composite_fence_seqno;
1756 for (i = 0; i < num_syncs; i++)
1757 xe_sync_entry_signal(&syncs[i], NULL,
1758 cf ? &cf->base : fence);
1761 return cf ? &cf->base : fence;
1766 dma_fence_put(fences[--cur_fence]);
1770 return ERR_PTR(err);
1773 static struct xe_user_fence *
1774 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1778 for (i = 0; i < num_syncs; i++) {
1779 struct xe_sync_entry *e = &syncs[i];
1781 if (xe_sync_is_ufence(e))
1782 return xe_sync_ufence_get(e);
1788 static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
1789 struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1790 u32 num_syncs, bool immediate, bool first_op,
1793 struct dma_fence *fence;
1794 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1795 struct xe_user_fence *ufence;
1797 xe_vm_assert_held(vm);
1799 ufence = find_ufence_get(syncs, num_syncs);
1800 if (vma->ufence && ufence)
1801 xe_sync_ufence_put(vma->ufence);
1803 vma->ufence = ufence ?: vma->ufence;
1806 fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
1809 return PTR_ERR(fence);
1813 xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1815 fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
1817 for (i = 0; i < num_syncs; i++)
1818 xe_sync_entry_signal(&syncs[i], NULL, fence);
1823 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
1824 dma_fence_put(fence);
1829 static int xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q,
1830 struct xe_bo *bo, struct xe_sync_entry *syncs,
1831 u32 num_syncs, bool immediate, bool first_op,
1836 xe_vm_assert_held(vm);
1837 xe_bo_assert_held(bo);
1839 if (bo && immediate) {
1840 err = xe_bo_validate(bo, vm, true);
1845 return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
1849 static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
1850 struct xe_exec_queue *q, struct xe_sync_entry *syncs,
1851 u32 num_syncs, bool first_op, bool last_op)
1853 struct dma_fence *fence;
1854 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
1856 xe_vm_assert_held(vm);
1857 xe_bo_assert_held(xe_vma_bo(vma));
1859 fence = xe_vm_unbind_vma(vma, q, syncs, num_syncs, first_op, last_op);
1861 return PTR_ERR(fence);
1863 xe_vma_destroy(vma, fence);
1865 xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
1866 dma_fence_put(fence);
1871 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1872 DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1873 DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1875 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1876 struct drm_file *file)
1878 struct xe_device *xe = to_xe_device(dev);
1879 struct xe_file *xef = to_xe_file(file);
1880 struct drm_xe_vm_create *args = data;
1881 struct xe_tile *tile;
1887 if (XE_IOCTL_DBG(xe, args->extensions))
1890 if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
1891 args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1893 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1897 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1900 if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1903 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1904 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1907 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1908 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1911 if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1912 xe_device_in_non_fault_mode(xe)))
1915 if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
1916 xe_device_in_fault_mode(xe)))
1919 if (XE_IOCTL_DBG(xe, args->extensions))
1922 if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1923 flags |= XE_VM_FLAG_SCRATCH_PAGE;
1924 if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1925 flags |= XE_VM_FLAG_LR_MODE;
1926 if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1927 flags |= XE_VM_FLAG_FAULT_MODE;
1929 vm = xe_vm_create(xe, flags);
1933 mutex_lock(&xef->vm.lock);
1934 err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1935 mutex_unlock(&xef->vm.lock);
1937 goto err_close_and_put;
1939 if (xe->info.has_asid) {
1940 mutex_lock(&xe->usm.lock);
1941 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1942 XA_LIMIT(1, XE_MAX_ASID - 1),
1943 &xe->usm.next_asid, GFP_KERNEL);
1944 mutex_unlock(&xe->usm.lock);
1948 vm->usm.asid = asid;
1954 /* Record BO memory for VM pagetable created against client */
1955 for_each_tile(tile, xe, id)
1956 if (vm->pt_root[id])
1957 xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
1959 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1960 /* Warning: Security issue - never enable by default */
1961 args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1967 mutex_lock(&xef->vm.lock);
1968 xa_erase(&xef->vm.xa, id);
1969 mutex_unlock(&xef->vm.lock);
1971 xe_vm_close_and_put(vm);
1976 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1977 struct drm_file *file)
1979 struct xe_device *xe = to_xe_device(dev);
1980 struct xe_file *xef = to_xe_file(file);
1981 struct drm_xe_vm_destroy *args = data;
1985 if (XE_IOCTL_DBG(xe, args->pad) ||
1986 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1989 mutex_lock(&xef->vm.lock);
1990 vm = xa_load(&xef->vm.xa, args->vm_id);
1991 if (XE_IOCTL_DBG(xe, !vm))
1993 else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1996 xa_erase(&xef->vm.xa, args->vm_id);
1997 mutex_unlock(&xef->vm.lock);
2000 xe_vm_close_and_put(vm);
2005 static const u32 region_to_mem_type[] = {
2011 static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
2012 struct xe_exec_queue *q, u32 region,
2013 struct xe_sync_entry *syncs, u32 num_syncs,
2014 bool first_op, bool last_op)
2016 struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
2019 xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
2021 if (!xe_vma_has_no_bo(vma)) {
2022 err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
2027 if (vma->tile_mask != (vma->tile_present & ~vma->usm.tile_invalidated)) {
2028 return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs, num_syncs,
2029 true, first_op, last_op);
2033 /* Nothing to do, signal fences now */
2035 for (i = 0; i < num_syncs; i++) {
2036 struct dma_fence *fence =
2037 xe_exec_queue_last_fence_get(wait_exec_queue, vm);
2039 xe_sync_entry_signal(&syncs[i], NULL, fence);
2040 dma_fence_put(fence);
2048 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2051 down_read(&vm->userptr.notifier_lock);
2052 vma->gpuva.flags |= XE_VMA_DESTROYED;
2053 up_read(&vm->userptr.notifier_lock);
2055 xe_vm_remove_vma(vm, vma);
2059 #define ULL unsigned long long
2061 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
2062 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2067 case DRM_GPUVA_OP_MAP:
2068 vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2069 (ULL)op->map.va.addr, (ULL)op->map.va.range);
2071 case DRM_GPUVA_OP_REMAP:
2072 vma = gpuva_to_vma(op->remap.unmap->va);
2073 vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2074 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2075 op->remap.unmap->keep ? 1 : 0);
2078 "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2079 (ULL)op->remap.prev->va.addr,
2080 (ULL)op->remap.prev->va.range);
2083 "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2084 (ULL)op->remap.next->va.addr,
2085 (ULL)op->remap.next->va.range);
2087 case DRM_GPUVA_OP_UNMAP:
2088 vma = gpuva_to_vma(op->unmap.va);
2089 vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2090 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2091 op->unmap.keep ? 1 : 0);
2093 case DRM_GPUVA_OP_PREFETCH:
2094 vma = gpuva_to_vma(op->prefetch.va);
2095 vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2096 (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2099 drm_warn(&xe->drm, "NOT POSSIBLE");
2103 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2109 * Create operations list from IOCTL arguments, setup operations fields so parse
2110 * and commit steps are decoupled from IOCTL arguments. This step can fail.
2112 static struct drm_gpuva_ops *
2113 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
2114 u64 bo_offset_or_userptr, u64 addr, u64 range,
2115 u32 operation, u32 flags,
2116 u32 prefetch_region, u16 pat_index)
2118 struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2119 struct drm_gpuva_ops *ops;
2120 struct drm_gpuva_op *__op;
2121 struct drm_gpuvm_bo *vm_bo;
2124 lockdep_assert_held_write(&vm->lock);
2126 vm_dbg(&vm->xe->drm,
2127 "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2128 operation, (ULL)addr, (ULL)range,
2129 (ULL)bo_offset_or_userptr);
2131 switch (operation) {
2132 case DRM_XE_VM_BIND_OP_MAP:
2133 case DRM_XE_VM_BIND_OP_MAP_USERPTR:
2134 ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2135 obj, bo_offset_or_userptr);
2137 case DRM_XE_VM_BIND_OP_UNMAP:
2138 ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2140 case DRM_XE_VM_BIND_OP_PREFETCH:
2141 ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2143 case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2144 xe_assert(vm->xe, bo);
2146 err = xe_bo_lock(bo, true);
2148 return ERR_PTR(err);
2150 vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2151 if (IS_ERR(vm_bo)) {
2153 return ERR_CAST(vm_bo);
2156 ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2157 drm_gpuvm_bo_put(vm_bo);
2161 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2162 ops = ERR_PTR(-EINVAL);
2167 drm_gpuva_for_each_op(__op, ops) {
2168 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2170 if (__op->op == DRM_GPUVA_OP_MAP) {
2171 op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2172 op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2173 op->map.pat_index = pat_index;
2174 } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2175 op->prefetch.region = prefetch_region;
2178 print_op(vm->xe, __op);
2184 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2185 u16 pat_index, unsigned int flags)
2187 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2188 struct drm_exec exec;
2192 lockdep_assert_held_write(&vm->lock);
2195 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2196 drm_exec_until_all_locked(&exec) {
2199 err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2200 drm_exec_retry_on_contention(&exec);
2203 err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2204 drm_exec_retry_on_contention(&exec);
2207 drm_exec_fini(&exec);
2208 return ERR_PTR(err);
2212 vma = xe_vma_create(vm, bo, op->gem.offset,
2213 op->va.addr, op->va.addr +
2214 op->va.range - 1, pat_index, flags);
2216 drm_exec_fini(&exec);
2218 if (xe_vma_is_userptr(vma)) {
2219 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2221 prep_vma_destroy(vm, vma, false);
2222 xe_vma_destroy_unlocked(vma);
2223 return ERR_PTR(err);
2225 } else if (!xe_vma_has_no_bo(vma) && !bo->vm) {
2226 err = add_preempt_fences(vm, bo);
2228 prep_vma_destroy(vm, vma, false);
2229 xe_vma_destroy_unlocked(vma);
2230 return ERR_PTR(err);
2237 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2239 if (vma->gpuva.flags & XE_VMA_PTE_1G)
2241 else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2243 else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2245 else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2248 return SZ_1G; /* Uninitialized, used max size */
2251 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2255 vma->gpuva.flags |= XE_VMA_PTE_1G;
2258 vma->gpuva.flags |= XE_VMA_PTE_2M;
2261 vma->gpuva.flags |= XE_VMA_PTE_64K;
2264 vma->gpuva.flags |= XE_VMA_PTE_4K;
2269 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2273 lockdep_assert_held_write(&vm->lock);
2275 switch (op->base.op) {
2276 case DRM_GPUVA_OP_MAP:
2277 err |= xe_vm_insert_vma(vm, op->map.vma);
2279 op->flags |= XE_VMA_OP_COMMITTED;
2281 case DRM_GPUVA_OP_REMAP:
2284 gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2286 prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2288 op->flags |= XE_VMA_OP_COMMITTED;
2290 if (op->remap.prev) {
2291 err |= xe_vm_insert_vma(vm, op->remap.prev);
2293 op->flags |= XE_VMA_OP_PREV_COMMITTED;
2294 if (!err && op->remap.skip_prev) {
2295 op->remap.prev->tile_present =
2297 op->remap.prev = NULL;
2300 if (op->remap.next) {
2301 err |= xe_vm_insert_vma(vm, op->remap.next);
2303 op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2304 if (!err && op->remap.skip_next) {
2305 op->remap.next->tile_present =
2307 op->remap.next = NULL;
2311 /* Adjust for partial unbind after removin VMA from VM */
2313 op->base.remap.unmap->va->va.addr = op->remap.start;
2314 op->base.remap.unmap->va->va.range = op->remap.range;
2318 case DRM_GPUVA_OP_UNMAP:
2319 prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2320 op->flags |= XE_VMA_OP_COMMITTED;
2322 case DRM_GPUVA_OP_PREFETCH:
2323 op->flags |= XE_VMA_OP_COMMITTED;
2326 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2333 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
2334 struct drm_gpuva_ops *ops,
2335 struct xe_sync_entry *syncs, u32 num_syncs,
2336 struct list_head *ops_list, bool last)
2338 struct xe_device *xe = vm->xe;
2339 struct xe_vma_op *last_op = NULL;
2340 struct drm_gpuva_op *__op;
2343 lockdep_assert_held_write(&vm->lock);
2345 drm_gpuva_for_each_op(__op, ops) {
2346 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2348 bool first = list_empty(ops_list);
2349 unsigned int flags = 0;
2351 INIT_LIST_HEAD(&op->link);
2352 list_add_tail(&op->link, ops_list);
2355 op->flags |= XE_VMA_OP_FIRST;
2356 op->num_syncs = num_syncs;
2362 switch (op->base.op) {
2363 case DRM_GPUVA_OP_MAP:
2365 flags |= op->map.is_null ?
2366 VMA_CREATE_FLAG_IS_NULL : 0;
2367 flags |= op->map.dumpable ?
2368 VMA_CREATE_FLAG_DUMPABLE : 0;
2370 vma = new_vma(vm, &op->base.map, op->map.pat_index,
2373 return PTR_ERR(vma);
2378 case DRM_GPUVA_OP_REMAP:
2380 struct xe_vma *old =
2381 gpuva_to_vma(op->base.remap.unmap->va);
2383 op->remap.start = xe_vma_start(old);
2384 op->remap.range = xe_vma_size(old);
2386 if (op->base.remap.prev) {
2387 flags |= op->base.remap.unmap->va->flags &
2389 VMA_CREATE_FLAG_READ_ONLY : 0;
2390 flags |= op->base.remap.unmap->va->flags &
2392 VMA_CREATE_FLAG_IS_NULL : 0;
2393 flags |= op->base.remap.unmap->va->flags &
2395 VMA_CREATE_FLAG_DUMPABLE : 0;
2397 vma = new_vma(vm, op->base.remap.prev,
2398 old->pat_index, flags);
2400 return PTR_ERR(vma);
2402 op->remap.prev = vma;
2405 * Userptr creates a new SG mapping so
2406 * we must also rebind.
2408 op->remap.skip_prev = !xe_vma_is_userptr(old) &&
2409 IS_ALIGNED(xe_vma_end(vma),
2410 xe_vma_max_pte_size(old));
2411 if (op->remap.skip_prev) {
2412 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2416 op->remap.start = xe_vma_end(vma);
2417 vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2418 (ULL)op->remap.start,
2419 (ULL)op->remap.range);
2423 if (op->base.remap.next) {
2424 flags |= op->base.remap.unmap->va->flags &
2426 VMA_CREATE_FLAG_READ_ONLY : 0;
2427 flags |= op->base.remap.unmap->va->flags &
2429 VMA_CREATE_FLAG_IS_NULL : 0;
2430 flags |= op->base.remap.unmap->va->flags &
2432 VMA_CREATE_FLAG_DUMPABLE : 0;
2434 vma = new_vma(vm, op->base.remap.next,
2435 old->pat_index, flags);
2437 return PTR_ERR(vma);
2439 op->remap.next = vma;
2442 * Userptr creates a new SG mapping so
2443 * we must also rebind.
2445 op->remap.skip_next = !xe_vma_is_userptr(old) &&
2446 IS_ALIGNED(xe_vma_start(vma),
2447 xe_vma_max_pte_size(old));
2448 if (op->remap.skip_next) {
2449 xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2453 vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2454 (ULL)op->remap.start,
2455 (ULL)op->remap.range);
2460 case DRM_GPUVA_OP_UNMAP:
2461 case DRM_GPUVA_OP_PREFETCH:
2465 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2470 err = xe_vma_op_commit(vm, op);
2475 /* FIXME: Unhandled corner case */
2476 XE_WARN_ON(!last_op && last && !list_empty(ops_list));
2483 last_op->flags |= XE_VMA_OP_LAST;
2484 last_op->num_syncs = num_syncs;
2485 last_op->syncs = syncs;
2491 static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
2492 struct xe_vma *vma, struct xe_vma_op *op)
2496 lockdep_assert_held_write(&vm->lock);
2498 err = xe_vm_prepare_vma(exec, vma, 1);
2502 xe_vm_assert_held(vm);
2503 xe_bo_assert_held(xe_vma_bo(vma));
2505 switch (op->base.op) {
2506 case DRM_GPUVA_OP_MAP:
2507 err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
2508 op->syncs, op->num_syncs,
2509 !xe_vm_in_fault_mode(vm),
2510 op->flags & XE_VMA_OP_FIRST,
2511 op->flags & XE_VMA_OP_LAST);
2513 case DRM_GPUVA_OP_REMAP:
2515 bool prev = !!op->remap.prev;
2516 bool next = !!op->remap.next;
2518 if (!op->remap.unmap_done) {
2520 vma->gpuva.flags |= XE_VMA_FIRST_REBIND;
2521 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2523 op->flags & XE_VMA_OP_FIRST,
2524 op->flags & XE_VMA_OP_LAST &&
2528 op->remap.unmap_done = true;
2532 op->remap.prev->gpuva.flags |= XE_VMA_LAST_REBIND;
2533 err = xe_vm_bind(vm, op->remap.prev, op->q,
2534 xe_vma_bo(op->remap.prev), op->syncs,
2535 op->num_syncs, true, false,
2536 op->flags & XE_VMA_OP_LAST && !next);
2537 op->remap.prev->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2540 op->remap.prev = NULL;
2544 op->remap.next->gpuva.flags |= XE_VMA_LAST_REBIND;
2545 err = xe_vm_bind(vm, op->remap.next, op->q,
2546 xe_vma_bo(op->remap.next),
2547 op->syncs, op->num_syncs,
2549 op->flags & XE_VMA_OP_LAST);
2550 op->remap.next->gpuva.flags &= ~XE_VMA_LAST_REBIND;
2553 op->remap.next = NULL;
2558 case DRM_GPUVA_OP_UNMAP:
2559 err = xe_vm_unbind(vm, vma, op->q, op->syncs,
2560 op->num_syncs, op->flags & XE_VMA_OP_FIRST,
2561 op->flags & XE_VMA_OP_LAST);
2563 case DRM_GPUVA_OP_PREFETCH:
2564 err = xe_vm_prefetch(vm, vma, op->q, op->prefetch.region,
2565 op->syncs, op->num_syncs,
2566 op->flags & XE_VMA_OP_FIRST,
2567 op->flags & XE_VMA_OP_LAST);
2570 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2574 trace_xe_vma_fail(vma);
2579 static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
2580 struct xe_vma_op *op)
2582 struct drm_exec exec;
2586 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2587 drm_exec_until_all_locked(&exec) {
2588 err = op_execute(&exec, vm, vma, op);
2589 drm_exec_retry_on_contention(&exec);
2593 drm_exec_fini(&exec);
2595 if (err == -EAGAIN) {
2596 lockdep_assert_held_write(&vm->lock);
2598 if (op->base.op == DRM_GPUVA_OP_REMAP) {
2599 if (!op->remap.unmap_done)
2600 vma = gpuva_to_vma(op->base.remap.unmap->va);
2601 else if (op->remap.prev)
2602 vma = op->remap.prev;
2604 vma = op->remap.next;
2607 if (xe_vma_is_userptr(vma)) {
2608 err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2612 trace_xe_vma_fail(vma);
2619 static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op)
2623 lockdep_assert_held_write(&vm->lock);
2625 switch (op->base.op) {
2626 case DRM_GPUVA_OP_MAP:
2627 ret = __xe_vma_op_execute(vm, op->map.vma, op);
2629 case DRM_GPUVA_OP_REMAP:
2633 if (!op->remap.unmap_done)
2634 vma = gpuva_to_vma(op->base.remap.unmap->va);
2635 else if (op->remap.prev)
2636 vma = op->remap.prev;
2638 vma = op->remap.next;
2640 ret = __xe_vma_op_execute(vm, vma, op);
2643 case DRM_GPUVA_OP_UNMAP:
2644 ret = __xe_vma_op_execute(vm, gpuva_to_vma(op->base.unmap.va),
2647 case DRM_GPUVA_OP_PREFETCH:
2648 ret = __xe_vma_op_execute(vm,
2649 gpuva_to_vma(op->base.prefetch.va),
2653 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2659 static void xe_vma_op_cleanup(struct xe_vm *vm, struct xe_vma_op *op)
2661 bool last = op->flags & XE_VMA_OP_LAST;
2664 while (op->num_syncs--)
2665 xe_sync_entry_cleanup(&op->syncs[op->num_syncs]);
2668 xe_exec_queue_put(op->q);
2670 if (!list_empty(&op->link))
2671 list_del(&op->link);
2673 drm_gpuva_ops_free(&vm->gpuvm, op->ops);
2678 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2679 bool post_commit, bool prev_post_commit,
2680 bool next_post_commit)
2682 lockdep_assert_held_write(&vm->lock);
2684 switch (op->base.op) {
2685 case DRM_GPUVA_OP_MAP:
2687 prep_vma_destroy(vm, op->map.vma, post_commit);
2688 xe_vma_destroy_unlocked(op->map.vma);
2691 case DRM_GPUVA_OP_UNMAP:
2693 struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2696 down_read(&vm->userptr.notifier_lock);
2697 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2698 up_read(&vm->userptr.notifier_lock);
2700 xe_vm_insert_vma(vm, vma);
2704 case DRM_GPUVA_OP_REMAP:
2706 struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2708 if (op->remap.prev) {
2709 prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2710 xe_vma_destroy_unlocked(op->remap.prev);
2712 if (op->remap.next) {
2713 prep_vma_destroy(vm, op->remap.next, next_post_commit);
2714 xe_vma_destroy_unlocked(op->remap.next);
2717 down_read(&vm->userptr.notifier_lock);
2718 vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2719 up_read(&vm->userptr.notifier_lock);
2721 xe_vm_insert_vma(vm, vma);
2725 case DRM_GPUVA_OP_PREFETCH:
2729 drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2733 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2734 struct drm_gpuva_ops **ops,
2739 for (i = num_ops_list - 1; i >= 0; --i) {
2740 struct drm_gpuva_ops *__ops = ops[i];
2741 struct drm_gpuva_op *__op;
2746 drm_gpuva_for_each_op_reverse(__op, __ops) {
2747 struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2749 xe_vma_op_unwind(vm, op,
2750 op->flags & XE_VMA_OP_COMMITTED,
2751 op->flags & XE_VMA_OP_PREV_COMMITTED,
2752 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2755 drm_gpuva_ops_free(&vm->gpuvm, __ops);
2759 static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
2760 struct list_head *ops_list)
2762 struct xe_vma_op *op, *next;
2765 lockdep_assert_held_write(&vm->lock);
2767 list_for_each_entry_safe(op, next, ops_list, link) {
2768 err = xe_vma_op_execute(vm, op);
2770 drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
2773 * FIXME: Killing VM rather than proper error handling
2778 xe_vma_op_cleanup(vm, op);
2784 #define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \
2785 DRM_XE_VM_BIND_FLAG_DUMPABLE)
2786 #define XE_64K_PAGE_MASK 0xffffull
2787 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
2789 static int vm_bind_ioctl_check_args(struct xe_device *xe,
2790 struct drm_xe_vm_bind *args,
2791 struct drm_xe_vm_bind_op **bind_ops)
2796 if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
2797 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2800 if (XE_IOCTL_DBG(xe, args->extensions))
2803 if (args->num_binds > 1) {
2804 u64 __user *bind_user =
2805 u64_to_user_ptr(args->vector_of_binds);
2807 *bind_ops = kvmalloc_array(args->num_binds,
2808 sizeof(struct drm_xe_vm_bind_op),
2809 GFP_KERNEL | __GFP_ACCOUNT);
2813 err = __copy_from_user(*bind_ops, bind_user,
2814 sizeof(struct drm_xe_vm_bind_op) *
2816 if (XE_IOCTL_DBG(xe, err)) {
2821 *bind_ops = &args->bind;
2824 for (i = 0; i < args->num_binds; ++i) {
2825 u64 range = (*bind_ops)[i].range;
2826 u64 addr = (*bind_ops)[i].addr;
2827 u32 op = (*bind_ops)[i].op;
2828 u32 flags = (*bind_ops)[i].flags;
2829 u32 obj = (*bind_ops)[i].obj;
2830 u64 obj_offset = (*bind_ops)[i].obj_offset;
2831 u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
2832 bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2833 u16 pat_index = (*bind_ops)[i].pat_index;
2836 if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
2841 pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
2842 (*bind_ops)[i].pat_index = pat_index;
2843 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
2844 if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
2849 if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
2854 if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
2855 XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
2856 XE_IOCTL_DBG(xe, obj && is_null) ||
2857 XE_IOCTL_DBG(xe, obj_offset && is_null) ||
2858 XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
2860 XE_IOCTL_DBG(xe, !obj &&
2861 op == DRM_XE_VM_BIND_OP_MAP &&
2863 XE_IOCTL_DBG(xe, !obj &&
2864 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2865 XE_IOCTL_DBG(xe, addr &&
2866 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2867 XE_IOCTL_DBG(xe, range &&
2868 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
2869 XE_IOCTL_DBG(xe, obj &&
2870 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2871 XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
2872 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
2873 XE_IOCTL_DBG(xe, obj &&
2874 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
2875 XE_IOCTL_DBG(xe, prefetch_region &&
2876 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
2877 XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
2878 xe->info.mem_region_mask)) ||
2879 XE_IOCTL_DBG(xe, obj &&
2880 op == DRM_XE_VM_BIND_OP_UNMAP)) {
2885 if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
2886 XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
2887 XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
2888 XE_IOCTL_DBG(xe, !range &&
2889 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
2898 if (args->num_binds > 1)
2903 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
2904 struct xe_exec_queue *q,
2905 struct xe_sync_entry *syncs,
2908 struct dma_fence *fence;
2911 fence = xe_sync_in_fence_get(syncs, num_syncs,
2912 to_wait_exec_queue(vm, q), vm);
2914 return PTR_ERR(fence);
2916 for (i = 0; i < num_syncs; i++)
2917 xe_sync_entry_signal(&syncs[i], NULL, fence);
2919 xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
2921 dma_fence_put(fence);
2926 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2928 struct xe_device *xe = to_xe_device(dev);
2929 struct xe_file *xef = to_xe_file(file);
2930 struct drm_xe_vm_bind *args = data;
2931 struct drm_xe_sync __user *syncs_user;
2932 struct xe_bo **bos = NULL;
2933 struct drm_gpuva_ops **ops = NULL;
2935 struct xe_exec_queue *q = NULL;
2936 u32 num_syncs, num_ufence = 0;
2937 struct xe_sync_entry *syncs = NULL;
2938 struct drm_xe_vm_bind_op *bind_ops;
2939 LIST_HEAD(ops_list);
2943 err = vm_bind_ioctl_check_args(xe, args, &bind_ops);
2947 if (args->exec_queue_id) {
2948 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
2949 if (XE_IOCTL_DBG(xe, !q)) {
2954 if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
2956 goto put_exec_queue;
2960 vm = xe_vm_lookup(xef, args->vm_id);
2961 if (XE_IOCTL_DBG(xe, !vm)) {
2963 goto put_exec_queue;
2966 err = down_write_killable(&vm->lock);
2970 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
2972 goto release_vm_lock;
2975 for (i = 0; i < args->num_binds; ++i) {
2976 u64 range = bind_ops[i].range;
2977 u64 addr = bind_ops[i].addr;
2979 if (XE_IOCTL_DBG(xe, range > vm->size) ||
2980 XE_IOCTL_DBG(xe, addr > vm->size - range)) {
2982 goto release_vm_lock;
2986 if (args->num_binds) {
2987 bos = kvcalloc(args->num_binds, sizeof(*bos),
2988 GFP_KERNEL | __GFP_ACCOUNT);
2991 goto release_vm_lock;
2994 ops = kvcalloc(args->num_binds, sizeof(*ops),
2995 GFP_KERNEL | __GFP_ACCOUNT);
2998 goto release_vm_lock;
3002 for (i = 0; i < args->num_binds; ++i) {
3003 struct drm_gem_object *gem_obj;
3004 u64 range = bind_ops[i].range;
3005 u64 addr = bind_ops[i].addr;
3006 u32 obj = bind_ops[i].obj;
3007 u64 obj_offset = bind_ops[i].obj_offset;
3008 u16 pat_index = bind_ops[i].pat_index;
3014 gem_obj = drm_gem_object_lookup(file, obj);
3015 if (XE_IOCTL_DBG(xe, !gem_obj)) {
3019 bos[i] = gem_to_xe_bo(gem_obj);
3021 if (XE_IOCTL_DBG(xe, range > bos[i]->size) ||
3022 XE_IOCTL_DBG(xe, obj_offset >
3023 bos[i]->size - range)) {
3028 if (bos[i]->flags & XE_BO_INTERNAL_64K) {
3029 if (XE_IOCTL_DBG(xe, obj_offset &
3030 XE_64K_PAGE_MASK) ||
3031 XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3032 XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3038 coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3039 if (bos[i]->cpu_caching) {
3040 if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3041 bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3045 } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3047 * Imported dma-buf from a different device should
3048 * require 1way or 2way coherency since we don't know
3049 * how it was mapped on the CPU. Just assume is it
3050 * potentially cached on CPU side.
3057 if (args->num_syncs) {
3058 syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3065 syncs_user = u64_to_user_ptr(args->syncs);
3066 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3067 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3068 &syncs_user[num_syncs],
3069 (xe_vm_in_lr_mode(vm) ?
3070 SYNC_PARSE_FLAG_LR_MODE : 0) |
3072 SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3076 if (xe_sync_is_ufence(&syncs[num_syncs]))
3080 if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3085 if (!args->num_binds) {
3090 for (i = 0; i < args->num_binds; ++i) {
3091 u64 range = bind_ops[i].range;
3092 u64 addr = bind_ops[i].addr;
3093 u32 op = bind_ops[i].op;
3094 u32 flags = bind_ops[i].flags;
3095 u64 obj_offset = bind_ops[i].obj_offset;
3096 u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3097 u16 pat_index = bind_ops[i].pat_index;
3099 ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
3100 addr, range, op, flags,
3101 prefetch_region, pat_index);
3102 if (IS_ERR(ops[i])) {
3103 err = PTR_ERR(ops[i]);
3108 err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
3110 i == args->num_binds - 1);
3116 if (list_empty(&ops_list)) {
3123 xe_exec_queue_get(q);
3125 err = vm_bind_ioctl_ops_execute(vm, &ops_list);
3127 up_write(&vm->lock);
3130 xe_exec_queue_put(q);
3133 for (i = 0; bos && i < args->num_binds; ++i)
3138 if (args->num_binds > 1)
3144 vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3146 if (err == -ENODATA)
3147 err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3149 xe_sync_entry_cleanup(&syncs[num_syncs]);
3153 for (i = 0; i < args->num_binds; ++i)
3156 up_write(&vm->lock);
3161 xe_exec_queue_put(q);
3165 if (args->num_binds > 1)
3171 * xe_vm_lock() - Lock the vm's dma_resv object
3172 * @vm: The struct xe_vm whose lock is to be locked
3173 * @intr: Whether to perform any wait interruptible
3175 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3176 * contended lock was interrupted. If @intr is false, the function
3179 int xe_vm_lock(struct xe_vm *vm, bool intr)
3182 return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3184 return dma_resv_lock(xe_vm_resv(vm), NULL);
3188 * xe_vm_unlock() - Unlock the vm's dma_resv object
3189 * @vm: The struct xe_vm whose lock is to be released.
3191 * Unlock a buffer object lock that was locked by xe_vm_lock().
3193 void xe_vm_unlock(struct xe_vm *vm)
3195 dma_resv_unlock(xe_vm_resv(vm));
3199 * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3200 * @vma: VMA to invalidate
3202 * Walks a list of page tables leaves which it memset the entries owned by this
3203 * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3206 * Returns 0 for success, negative error code otherwise.
3208 int xe_vm_invalidate_vma(struct xe_vma *vma)
3210 struct xe_device *xe = xe_vma_vm(vma)->xe;
3211 struct xe_tile *tile;
3212 u32 tile_needs_invalidate = 0;
3213 int seqno[XE_MAX_TILES_PER_DEVICE];
3217 xe_assert(xe, xe_vm_in_fault_mode(xe_vma_vm(vma)));
3218 xe_assert(xe, !xe_vma_is_null(vma));
3219 trace_xe_vma_usm_invalidate(vma);
3221 /* Check that we don't race with page-table updates */
3222 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3223 if (xe_vma_is_userptr(vma)) {
3224 WARN_ON_ONCE(!mmu_interval_check_retry
3225 (&to_userptr_vma(vma)->userptr.notifier,
3226 to_userptr_vma(vma)->userptr.notifier_seq));
3227 WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
3228 DMA_RESV_USAGE_BOOKKEEP));
3231 xe_bo_assert_held(xe_vma_bo(vma));
3235 for_each_tile(tile, xe, id) {
3236 if (xe_pt_zap_ptes(tile, vma)) {
3237 tile_needs_invalidate |= BIT(id);
3240 * FIXME: We potentially need to invalidate multiple
3241 * GTs within the tile
3243 seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma);
3249 for_each_tile(tile, xe, id) {
3250 if (tile_needs_invalidate & BIT(id)) {
3251 ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]);
3257 vma->usm.tile_invalidated = vma->tile_mask;
3262 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
3264 struct drm_gpuva *gpuva;
3268 if (!down_read_trylock(&vm->lock)) {
3269 drm_printf(p, " Failed to acquire VM lock to dump capture");
3272 if (vm->pt_root[gt_id]) {
3273 addr = xe_bo_addr(vm->pt_root[gt_id]->bo, 0, XE_PAGE_SIZE);
3274 is_vram = xe_bo_is_vram(vm->pt_root[gt_id]->bo);
3275 drm_printf(p, " VM root: A:0x%llx %s\n", addr,
3276 is_vram ? "VRAM" : "SYS");
3279 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3280 struct xe_vma *vma = gpuva_to_vma(gpuva);
3281 bool is_userptr = xe_vma_is_userptr(vma);
3282 bool is_null = xe_vma_is_null(vma);
3286 } else if (is_userptr) {
3287 struct sg_table *sg = to_userptr_vma(vma)->userptr.sg;
3288 struct xe_res_cursor cur;
3291 xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur);
3292 addr = xe_res_dma(&cur);
3297 addr = __xe_bo_addr(xe_vma_bo(vma), 0, XE_PAGE_SIZE);
3298 is_vram = xe_bo_is_vram(xe_vma_bo(vma));
3300 drm_printf(p, " [%016llx-%016llx] S:0x%016llx A:%016llx %s\n",
3301 xe_vma_start(vma), xe_vma_end(vma) - 1,
3303 addr, is_null ? "NULL" : is_userptr ? "USR" :
3304 is_vram ? "VRAM" : "SYS");
3311 struct xe_vm_snapshot {
3312 unsigned long num_snaps;
3318 struct mm_struct *mm;
3322 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3324 unsigned long num_snaps = 0, i;
3325 struct xe_vm_snapshot *snap = NULL;
3326 struct drm_gpuva *gpuva;
3331 mutex_lock(&vm->snap_mutex);
3332 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3333 if (gpuva->flags & XE_VMA_DUMPABLE)
3338 snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
3342 snap->num_snaps = num_snaps;
3344 drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3345 struct xe_vma *vma = gpuva_to_vma(gpuva);
3346 struct xe_bo *bo = vma->gpuva.gem.obj ?
3347 gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3349 if (!(gpuva->flags & XE_VMA_DUMPABLE))
3352 snap->snap[i].ofs = xe_vma_start(vma);
3353 snap->snap[i].len = xe_vma_size(vma);
3355 snap->snap[i].bo = xe_bo_get(bo);
3356 snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
3357 } else if (xe_vma_is_userptr(vma)) {
3358 struct mm_struct *mm =
3359 to_userptr_vma(vma)->userptr.notifier.mm;
3361 if (mmget_not_zero(mm))
3362 snap->snap[i].mm = mm;
3364 snap->snap[i].data = ERR_PTR(-EFAULT);
3366 snap->snap[i].bo_ofs = xe_vma_userptr(vma);
3368 snap->snap[i].data = ERR_PTR(-ENOENT);
3374 mutex_unlock(&vm->snap_mutex);
3378 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
3380 for (int i = 0; i < snap->num_snaps; i++) {
3381 struct xe_bo *bo = snap->snap[i].bo;
3382 struct iosys_map src;
3385 if (IS_ERR(snap->snap[i].data))
3388 snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
3389 if (!snap->snap[i].data) {
3390 snap->snap[i].data = ERR_PTR(-ENOMEM);
3395 dma_resv_lock(bo->ttm.base.resv, NULL);
3396 err = ttm_bo_vmap(&bo->ttm, &src);
3398 xe_map_memcpy_from(xe_bo_device(bo),
3400 &src, snap->snap[i].bo_ofs,
3402 ttm_bo_vunmap(&bo->ttm, &src);
3404 dma_resv_unlock(bo->ttm.base.resv);
3406 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
3408 kthread_use_mm(snap->snap[i].mm);
3409 if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
3413 kthread_unuse_mm(snap->snap[i].mm);
3415 mmput(snap->snap[i].mm);
3416 snap->snap[i].mm = NULL;
3420 kvfree(snap->snap[i].data);
3421 snap->snap[i].data = ERR_PTR(err);
3426 snap->snap[i].bo = NULL;
3430 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
3434 for (i = 0; i < snap->num_snaps; i++) {
3435 if (IS_ERR(snap->snap[i].data))
3438 drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
3439 drm_printf(p, "[%llx].data: ",
3442 for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
3443 u32 *val = snap->snap[i].data + j;
3444 char dumped[ASCII85_BUFSZ];
3446 drm_puts(p, ascii85_encode(*val, dumped));
3453 drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n",
3454 snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1,
3455 PTR_ERR(snap->snap[i].data));
3459 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
3466 for (i = 0; i < snap->num_snaps; i++) {
3467 if (!IS_ERR(snap->snap[i].data))
3468 kvfree(snap->snap[i].data);
3469 xe_bo_put(snap->snap[i].bo);
3470 if (snap->snap[i].mm)
3471 mmput(snap->snap[i].mm);