drm/amdkfd: make sure VM is ready for updating operations
authorLang Yu <Lang.Yu@amd.com>
Sun, 7 Apr 2024 04:36:00 +0000 (12:36 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 Apr 2024 03:22:23 +0000 (23:22 -0400)
When page table BOs were evicted but not validated before
updating page tables, VM is still in evicting state,
amdgpu_vm_update_range returns -EBUSY and
restore_process_worker runs into a dead loop.

v2: Split the BO validation and page table update into two
separate loops in amdgpu_amdkfd_restore_process_bos. (Felix)
  1.Validate BOs
  2.Validate VM (and DMABuf attachments)
  3.Update page tables for the BOs validated above

Fixes: 50661eb1a2c8 ("drm/amdgpu: Auto-validate DMABuf imports in compute VMs")
Signed-off-by: Lang Yu <Lang.Yu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index 7c23ba19af33a181065b3a241cbdb9d4588bb203..2131de36e3dac00fee06ccbcc1aebe80d69f0fbc 100644 (file)
@@ -2901,13 +2901,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
 
        amdgpu_sync_create(&sync_obj);
 
-       /* Validate BOs and map them to GPUVM (update VM page tables). */
+       /* Validate BOs managed by KFD */
        list_for_each_entry(mem, &process_info->kfd_bo_list,
                            validate_list) {
 
                struct amdgpu_bo *bo = mem->bo;
                uint32_t domain = mem->domain;
-               struct kfd_mem_attachment *attachment;
                struct dma_resv_iter cursor;
                struct dma_fence *fence;
 
@@ -2932,6 +2931,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
                                goto validate_map_fail;
                        }
                }
+       }
+
+       if (failed_size)
+               pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+
+       /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
+        * validations above would invalidate DMABuf imports again.
+        */
+       ret = process_validate_vms(process_info, &exec.ticket);
+       if (ret) {
+               pr_debug("Validating VMs failed, ret: %d\n", ret);
+               goto validate_map_fail;
+       }
+
+       /* Update mappings managed by KFD. */
+       list_for_each_entry(mem, &process_info->kfd_bo_list,
+                           validate_list) {
+               struct kfd_mem_attachment *attachment;
+
                list_for_each_entry(attachment, &mem->attachments, list) {
                        if (!attachment->is_mapped)
                                continue;
@@ -2948,18 +2966,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
                }
        }
 
-       if (failed_size)
-               pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
-
-       /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
-        * validations above would invalidate DMABuf imports again.
-        */
-       ret = process_validate_vms(process_info, &exec.ticket);
-       if (ret) {
-               pr_debug("Validating VMs failed, ret: %d\n", ret);
-               goto validate_map_fail;
-       }
-
        /* Update mappings not managed by KFD */
        list_for_each_entry(peer_vm, &process_info->vm_list_head,
                        vm_list_node) {