if (ret)
                goto bail_free_cntrs;
 
+       init_completion(&dd->user_comp);
+
+       /* The user refcount starts with one to inidicate an active device */
+       atomic_set(&dd->user_refcount, 1);
+
        goto bail;
 
 bail_free_rcverr:
 
                                               struct hfi1_devdata,
                                               user_cdev);
 
+       if (!atomic_inc_not_zero(&dd->user_refcount))
+               return -ENXIO;
+
        /* Just take a ref now. Not all opens result in a context assign */
        kobject_get(&dd->kobj);
 
                fd->rec_cpu_num = -1; /* no cpu affinity by default */
                fd->mm = current->mm;
                atomic_inc(&fd->mm->mm_count);
-       }
+               fp->private_data = fd;
+       } else {
+               fp->private_data = NULL;
+
+               if (atomic_dec_and_test(&dd->user_refcount))
+                       complete(&dd->user_comp);
 
-       fp->private_data = fd;
+               return -ENOMEM;
+       }
 
-       return fd ? 0 : -ENOMEM;
+       return 0;
 }
 
 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 done:
        mmdrop(fdata->mm);
        kobject_put(&dd->kobj);
+
+       if (atomic_dec_and_test(&dd->user_refcount))
+               complete(&dd->user_comp);
+
        kfree(fdata);
        return 0;
 }
 
        spinlock_t aspm_lock;
        /* Number of verbs contexts which have disabled ASPM */
        atomic_t aspm_disabled_cnt;
+       /* Keeps track of user space clients */
+       atomic_t user_refcount;
+       /* Used to wait for outstanding user space clients before dev removal */
+       struct completion user_comp;
 
        struct hfi1_affinity *affinity;
        struct rhashtable sdma_rht;
 
        return ret;
 }
 
+static void wait_for_clients(struct hfi1_devdata *dd)
+{
+       /*
+        * Remove the device init value and complete the device if there is
+        * no clients or wait for active clients to finish.
+        */
+       if (atomic_dec_and_test(&dd->user_refcount))
+               complete(&dd->user_comp);
+
+       wait_for_completion(&dd->user_comp);
+}
+
 static void remove_one(struct pci_dev *pdev)
 {
        struct hfi1_devdata *dd = pci_get_drvdata(pdev);
 
        /* close debugfs files before ib unregister */
        hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+
+       /* remove the /dev hfi1 interface */
+       hfi1_device_remove(dd);
+
+       /* wait for existing user space clients to finish */
+       wait_for_clients(dd);
+
        /* unregister from IB core */
        hfi1_unregister_ib_device(dd);
 
        /* wait until all of our (qsfp) queue_work() calls complete */
        flush_workqueue(ib_wq);
 
-       hfi1_device_remove(dd);
-
        postinit_cleanup(dd);
 }