]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
drm/amdgpu: reset vm state machine after gpu reset(vram lost)
authorZhenGuo Yin <zhenguo.yin@amd.com>
Fri, 19 Jul 2024 08:10:40 +0000 (16:10 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 Jul 2024 21:30:49 +0000 (17:30 -0400)
[Why]
Page table of compute VM in the VRAM will lost after gpu reset.
VRAM won't be restored since compute VM has no shadows.

[How]
Use higher 32-bit of vm->generation to record a vram_lost_counter.
Reset the VM state machine when vm->genertaion is not equal to
the new generation token.

v2: Check vm->generation instead of calling drm_sched_entity_error
in amdgpu_vm_validate.
v3: Use new generation token instead of vram_lost_counter for check.

Signed-off-by: ZhenGuo Yin <zhenguo.yin@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
(cherry picked from commit 47c0388b0589cb481c294dcb857d25a214c46eb3)

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 3abfa66d72a272cfd701d5043e1f0409124ecf35..a060c28f0877cd3b27547e58cd04354babead9ad 100644 (file)
@@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        if (!vm)
                return result;
 
-       result += vm->generation;
+       result += lower_32_bits(vm->generation);
        /* Add one if the page tables will be re-generated on next CS */
        if (drm_sched_entity_error(&vm->delayed))
                ++result;
@@ -463,13 +463,14 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                       int (*validate)(void *p, struct amdgpu_bo *bo),
                       void *param)
 {
+       uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
        struct amdgpu_vm_bo_base *bo_base;
        struct amdgpu_bo *shadow;
        struct amdgpu_bo *bo;
        int r;
 
-       if (drm_sched_entity_error(&vm->delayed)) {
-               ++vm->generation;
+       if (vm->generation != new_vm_generation) {
+               vm->generation = new_vm_generation;
                amdgpu_vm_bo_reset_state_machine(vm);
                amdgpu_vm_fini_entities(vm);
                r = amdgpu_vm_init_entities(adev, vm);
@@ -2439,7 +2440,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        vm->last_update = dma_fence_get_stub();
        vm->last_unlocked = dma_fence_get_stub();
        vm->last_tlb_flush = dma_fence_get_stub();
-       vm->generation = 0;
+       vm->generation = amdgpu_vm_generation(adev, NULL);
 
        mutex_init(&vm->eviction_lock);
        vm->evicting = false;