bool msm_use_mmu(struct drm_device *dev);
 
-void msm_gem_submit_free(struct msm_gem_submit *submit);
 int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
                struct drm_file *file);
 
 
  * lasts for the duration of the submit-ioctl.
  */
 struct msm_gem_submit {
+       struct kref ref;
        struct drm_device *dev;
        struct msm_gpu *gpu;
        struct msm_gem_address_space *aspace;
        } bos[];
 };
 
+void __msm_gem_submit_destroy(struct kref *kref);
+
+static inline void msm_gem_submit_get(struct msm_gem_submit *submit)
+{
+       kref_get(&submit->ref);
+}
+
+static inline void msm_gem_submit_put(struct msm_gem_submit *submit)
+{
+       kref_put(&submit->ref, __msm_gem_submit_destroy);
+}
+
 /* helper to determine of a buffer in submit should be dumped, used for both
  * devcoredump and debugfs cmdstream dumping:
  */
 
        if (!submit)
                return NULL;
 
+       kref_init(&submit->ref);
        submit->dev = dev;
        submit->aspace = queue->ctx->aspace;
        submit->gpu = gpu;
        return submit;
 }
 
-void msm_gem_submit_free(struct msm_gem_submit *submit)
+void __msm_gem_submit_destroy(struct kref *kref)
 {
+       struct msm_gem_submit *submit =
+                       container_of(kref, struct msm_gem_submit, ref);
        unsigned i;
 
        dma_fence_put(submit->fence);
-       spin_lock(&submit->ring->submit_lock);
-       list_del(&submit->node);
-       spin_unlock(&submit->ring->submit_lock);
        put_pid(submit->pid);
        msm_submitqueue_put(submit->queue);
 
        submit_cleanup(submit);
        if (has_ww_ticket)
                ww_acquire_fini(&submit->ticket);
-       if (ret)
-               msm_gem_submit_free(submit);
+       msm_gem_submit_put(submit);
 out_unlock:
        if (ret && (out_fence_fd >= 0))
                put_unused_fd(out_fence_fd);
 
 
        pm_runtime_mark_last_busy(&gpu->pdev->dev);
        pm_runtime_put_autosuspend(&gpu->pdev->dev);
-       msm_gem_submit_free(submit);
+
+       spin_lock(&ring->submit_lock);
+       list_del(&submit->node);
+       spin_unlock(&ring->submit_lock);
+
+       msm_gem_submit_put(submit);
 }
 
 static void retire_submits(struct msm_gpu *gpu)
 
        submit->seqno = ++ring->seqno;
 
-       spin_lock(&ring->submit_lock);
-       list_add_tail(&submit->node, &ring->submits);
-       spin_unlock(&ring->submit_lock);
-
        msm_rd_dump_submit(priv->rd, submit, NULL);
 
        update_sw_cntrs(gpu);
                msm_gem_active_get(drm_obj, gpu);
        }
 
+       /*
+        * ring->submits holds a ref to the submit, to deal with the case
+        * that a submit completes before msm_ioctl_gem_submit() returns.
+        */
+       msm_gem_submit_get(submit);
+
+       spin_lock(&ring->submit_lock);
+       list_add_tail(&submit->node, &ring->submits);
+       spin_unlock(&ring->submit_lock);
+
        gpu->funcs->submit(gpu, submit);
        priv->lastctx = submit->queue->ctx;