struct nvkm_vm {
        struct nvkm_mmu *mmu;
+
+       struct mutex mutex;
        struct nvkm_mm mm;
        struct kref refcount;
 
        u8  lpg_shift;
 
        int  (*create)(struct nvkm_mmu *, u64 offset, u64 length,
-                      u64 mm_offset, struct nvkm_vm **);
+                      u64 mm_offset, struct lock_class_key *,
+                      struct nvkm_vm **);
 
        void (*map_pgt)(struct nvkm_gpuobj *pgd, u32 pde,
                        struct nvkm_gpuobj *pgt[2]);
 extern struct nvkm_oclass nv50_mmu_oclass;
 extern struct nvkm_oclass gf100_mmu_oclass;
 
-int  nv04_vm_create(struct nvkm_mmu *, u64, u64, u64,
+int  nv04_vm_create(struct nvkm_mmu *, u64, u64, u64, struct lock_class_key *,
                    struct nvkm_vm **);
 void nv04_mmu_dtor(struct nvkm_object *);
 
 int  nvkm_vm_create(struct nvkm_mmu *, u64 offset, u64 length, u64 mm_offset,
-                   u32 block, struct nvkm_vm **);
+                   u32 block, struct lock_class_key *, struct nvkm_vm **);
 int  nvkm_vm_new(struct nvkm_device *, u64 offset, u64 length, u64 mm_offset,
-                struct nvkm_vm **);
+                struct lock_class_key *, struct nvkm_vm **);
 int  nvkm_vm_ref(struct nvkm_vm *, struct nvkm_vm **, struct nvkm_gpuobj *pgd);
 int  nvkm_vm_get(struct nvkm_vm *, u64 size, u32 page_shift, u32 access,
                 struct nvkm_vma *);
 
 
        if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
                ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
-                                 0x1000, &drm->client.vm);
+                                 0x1000, NULL, &drm->client.vm);
                if (ret)
                        goto fail_device;
 
 
        if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
                ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
-                                 0x1000, &cli->vm);
+                                 0x1000, NULL, &cli->vm);
                if (ret) {
                        nouveau_cli_destroy(cli);
                        goto out_suspend;
 
        nvkm_vm_put(vma);
 }
 
+
 static int
 gf100_bar_ctor_vm(struct gf100_bar *bar, struct gf100_bar_vm *bar_vm,
-                 int bar_nr)
+                 struct lock_class_key *key, int bar_nr)
 {
        struct nvkm_device *device = nv_device(&bar->base);
        struct nvkm_vm *vm;
 
        bar_len = nv_device_resource_len(device, bar_nr);
 
-       ret = nvkm_vm_new(device, 0, bar_len, 0, &vm);
+       ret = nvkm_vm_new(device, 0, bar_len, 0, key, &vm);
        if (ret)
                return ret;
 
               struct nvkm_oclass *oclass, void *data, u32 size,
               struct nvkm_object **pobject)
 {
+       static struct lock_class_key bar1_lock;
+       static struct lock_class_key bar3_lock;
        struct nvkm_device *device = nv_device(parent);
        struct gf100_bar *bar;
        bool has_bar3 = nv_device_resource_len(device, 3) != 0;
 
        /* BAR3 */
        if (has_bar3) {
-               ret = gf100_bar_ctor_vm(bar, &bar->bar[0], 3);
+               ret = gf100_bar_ctor_vm(bar, &bar->bar[0], &bar3_lock, 3);
                if (ret)
                        return ret;
        }
 
        /* BAR1 */
-       ret = gf100_bar_ctor_vm(bar, &bar->bar[1], 1);
+       ret = gf100_bar_ctor_vm(bar, &bar->bar[1], &bar1_lock, 1);
        if (ret)
                return ret;
 
 
              struct nvkm_oclass *oclass, void *data, u32 size,
              struct nvkm_object **pobject)
 {
+       static struct lock_class_key bar1_lock;
+       static struct lock_class_key bar3_lock;
        struct nvkm_device *device = nv_device(parent);
        struct nvkm_object *heap;
        struct nvkm_vm *vm;
        start = 0x0100000000ULL;
        limit = start + nv_device_resource_len(device, 3);
 
-       ret = nvkm_vm_new(device, start, limit, start, &vm);
+       ret = nvkm_vm_new(device, start, limit, start, &bar3_lock, &vm);
        if (ret)
                return ret;
 
        start = 0x0000000000ULL;
        limit = start + nv_device_resource_len(device, 1);
 
-       ret = nvkm_vm_new(device, start, limit--, start, &vm);
+       ret = nvkm_vm_new(device, start, limit--, start, &bar1_lock, &vm);
        if (ret)
                return ret;
 
 
                        mmu->map_pgt(vpgd->obj, pde, vpgt->obj);
                }
 
-               mutex_unlock(&nv_subdev(mmu)->mutex);
                nvkm_gpuobj_ref(NULL, &pgt);
-               mutex_lock(&nv_subdev(mmu)->mutex);
        }
 }
 
        struct nvkm_mmu *mmu = vm->mmu;
        struct nvkm_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde];
        struct nvkm_vm_pgd *vpgd;
-       struct nvkm_gpuobj *pgt;
        int big = (type != mmu->spg_shift);
        u32 pgt_size;
        int ret;
        pgt_size  = (1 << (mmu->pgt_bits + 12)) >> type;
        pgt_size *= 8;
 
-       mutex_unlock(&nv_subdev(mmu)->mutex);
        ret = nvkm_gpuobj_new(nv_object(vm->mmu), NULL, pgt_size, 0x1000,
-                             NVOBJ_FLAG_ZERO_ALLOC, &pgt);
-       mutex_lock(&nv_subdev(mmu)->mutex);
+                             NVOBJ_FLAG_ZERO_ALLOC, &vpgt->obj[big]);
        if (unlikely(ret))
                return ret;
 
-       /* someone beat us to filling the PDE while we didn't have the lock */
-       if (unlikely(vpgt->refcount[big]++)) {
-               mutex_unlock(&nv_subdev(mmu)->mutex);
-               nvkm_gpuobj_ref(NULL, &pgt);
-               mutex_lock(&nv_subdev(mmu)->mutex);
-               return 0;
-       }
-
-       vpgt->obj[big] = pgt;
        list_for_each_entry(vpgd, &vm->pgd_list, head) {
                mmu->map_pgt(vpgd->obj, pde, vpgt->obj);
        }
 
+       vpgt->refcount[big]++;
        return 0;
 }
 
        u32 fpde, lpde, pde;
        int ret;
 
-       mutex_lock(&nv_subdev(mmu)->mutex);
+       mutex_lock(&vm->mutex);
        ret = nvkm_mm_head(&vm->mm, 0, page_shift, msize, msize, align,
                           &vma->node);
        if (unlikely(ret != 0)) {
-               mutex_unlock(&nv_subdev(mmu)->mutex);
+               mutex_unlock(&vm->mutex);
                return ret;
        }
 
                        if (pde != fpde)
                                nvkm_vm_unmap_pgt(vm, big, fpde, pde - 1);
                        nvkm_mm_free(&vm->mm, &vma->node);
-                       mutex_unlock(&nv_subdev(mmu)->mutex);
+                       mutex_unlock(&vm->mutex);
                        return ret;
                }
        }
-       mutex_unlock(&nv_subdev(mmu)->mutex);
+       mutex_unlock(&vm->mutex);
 
        vma->vm = NULL;
        nvkm_vm_ref(vm, &vma->vm, NULL);
        fpde = (vma->node->offset >> mmu->pgt_bits);
        lpde = (vma->node->offset + vma->node->length - 1) >> mmu->pgt_bits;
 
-       mutex_lock(&nv_subdev(mmu)->mutex);
+       mutex_lock(&vm->mutex);
        nvkm_vm_unmap_pgt(vm, vma->node->type != mmu->spg_shift, fpde, lpde);
        nvkm_mm_free(&vm->mm, &vma->node);
-       mutex_unlock(&nv_subdev(mmu)->mutex);
+       mutex_unlock(&vm->mutex);
 
        nvkm_vm_ref(NULL, &vma->vm, NULL);
 }
 
 int
 nvkm_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mm_offset,
-              u32 block, struct nvkm_vm **pvm)
+              u32 block, struct lock_class_key *key, struct nvkm_vm **pvm)
 {
+       static struct lock_class_key _key;
        struct nvkm_vm *vm;
        u64 mm_length = (offset + length) - mm_offset;
        int ret;
        if (!vm)
                return -ENOMEM;
 
+       __mutex_init(&vm->mutex, "&vm->mutex", key ? key : &_key);
        INIT_LIST_HEAD(&vm->pgd_list);
        vm->mmu = mmu;
        kref_init(&vm->refcount);
 
 int
 nvkm_vm_new(struct nvkm_device *device, u64 offset, u64 length, u64 mm_offset,
-           struct nvkm_vm **pvm)
+           struct lock_class_key *key, struct nvkm_vm **pvm)
 {
        struct nvkm_mmu *mmu = nvkm_mmu(device);
-       return mmu->create(mmu, offset, length, mm_offset, pvm);
+       return mmu->create(mmu, offset, length, mm_offset, key, pvm);
 }
 
 static int
 
        nvkm_gpuobj_ref(pgd, &vpgd->obj);
 
-       mutex_lock(&nv_subdev(mmu)->mutex);
+       mutex_lock(&vm->mutex);
        for (i = vm->fpde; i <= vm->lpde; i++)
                mmu->map_pgt(pgd, i, vm->pgt[i - vm->fpde].obj);
        list_add(&vpgd->head, &vm->pgd_list);
-       mutex_unlock(&nv_subdev(mmu)->mutex);
+       mutex_unlock(&vm->mutex);
        return 0;
 }
 
 static void
 nvkm_vm_unlink(struct nvkm_vm *vm, struct nvkm_gpuobj *mpgd)
 {
-       struct nvkm_mmu *mmu = vm->mmu;
        struct nvkm_vm_pgd *vpgd, *tmp;
        struct nvkm_gpuobj *pgd = NULL;
 
        if (!mpgd)
                return;
 
-       mutex_lock(&nv_subdev(mmu)->mutex);
+       mutex_lock(&vm->mutex);
        list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) {
                if (vpgd->obj == mpgd) {
                        pgd = vpgd->obj;
                        break;
                }
        }
-       mutex_unlock(&nv_subdev(mmu)->mutex);
+       mutex_unlock(&vm->mutex);
 
        nvkm_gpuobj_ref(NULL, &pgd);
 }
 
 
 static int
 gf100_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mm_offset,
-               struct nvkm_vm **pvm)
+               struct lock_class_key *key, struct nvkm_vm **pvm)
 {
-       return nvkm_vm_create(mmu, offset, length, mm_offset, 4096, pvm);
+       return nvkm_vm_create(mmu, offset, length, mm_offset, 4096, key, pvm);
 }
 
 static int
 
 
 int
 nv04_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mmstart,
-              struct nvkm_vm **pvm)
+              struct lock_class_key *key, struct nvkm_vm **pvm)
 {
        return -EINVAL;
 }
        mmu->base.unmap = nv04_vm_unmap;
        mmu->base.flush = nv04_vm_flush;
 
-       ret = nvkm_vm_create(&mmu->base, 0, NV04_PDMA_SIZE, 0, 4096,
+       ret = nvkm_vm_create(&mmu->base, 0, NV04_PDMA_SIZE, 0, 4096, NULL,
                             &mmu->vm);
        if (ret)
                return ret;
 
        mmu->base.unmap = nv41_vm_unmap;
        mmu->base.flush = nv41_vm_flush;
 
-       ret = nvkm_vm_create(&mmu->base, 0, NV41_GART_SIZE, 0, 4096,
+       ret = nvkm_vm_create(&mmu->base, 0, NV41_GART_SIZE, 0, 4096, NULL,
                             &mmu->vm);
        if (ret)
                return ret;
 
                mmu->null = 0;
        }
 
-       ret = nvkm_vm_create(&mmu->base, 0, NV44_GART_SIZE, 0, 4096,
+       ret = nvkm_vm_create(&mmu->base, 0, NV44_GART_SIZE, 0, 4096, NULL,
                             &mmu->vm);
        if (ret)
                return ret;
 
 }
 
 static int
-nv50_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length,
-              u64 mm_offset, struct nvkm_vm **pvm)
+nv50_vm_create(struct nvkm_mmu *mmu, u64 offset, u64 length, u64 mm_offset,
+              struct lock_class_key *key, struct nvkm_vm **pvm)
 {
        u32 block = (1 << (mmu->pgt_bits + 12));
        if (block > length)
                block = length;
 
-       return nvkm_vm_create(mmu, offset, length, mm_offset, block, pvm);
+       return nvkm_vm_create(mmu, offset, length, mm_offset, block, key, pvm);
 }
 
 static int