return 0;
 }
 
+static void sob_reset_work(struct work_struct *work)
+{
+       struct hl_cs_compl *hl_cs_cmpl =
+               container_of(work, struct hl_cs_compl, sob_reset_work);
+       struct hl_device *hdev = hl_cs_cmpl->hdev;
+
+       /*
+        * A signal CS can get completion while the corresponding wait
+        * for signal CS is on its way to the PQ. The wait for signal CS
+        * will get stuck if the signal CS incremented the SOB to its
+        * max value and there are no pending (submitted) waits on this
+        * SOB.
+        * We do the following to void this situation:
+        * 1. The wait for signal CS must get a ref for the signal CS as
+        *    soon as possible in cs_ioctl_signal_wait() and put it
+        *    before being submitted to the PQ but after it incremented
+        *    the SOB refcnt in init_signal_wait_cs().
+        * 2. Signal/Wait for signal CS will decrement the SOB refcnt
+        *    here.
+        * These two measures guarantee that the wait for signal CS will
+        * reset the SOB upon completion rather than the signal CS and
+        * hence the above scenario is avoided.
+        */
+       kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
+
+       if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
+               hdev->asic_funcs->reset_sob_group(hdev,
+                               hl_cs_cmpl->sob_group);
+
+       kfree(hl_cs_cmpl);
+}
+
 static void hl_fence_release(struct kref *kref)
 {
        struct hl_fence *fence =
                        hl_cs_cmpl->hw_sob->sob_id,
                        hl_cs_cmpl->sob_val);
 
-               /*
-                * A signal CS can get completion while the corresponding wait
-                * for signal CS is on its way to the PQ. The wait for signal CS
-                * will get stuck if the signal CS incremented the SOB to its
-                * max value and there are no pending (submitted) waits on this
-                * SOB.
-                * We do the following to void this situation:
-                * 1. The wait for signal CS must get a ref for the signal CS as
-                *    soon as possible in cs_ioctl_signal_wait() and put it
-                *    before being submitted to the PQ but after it incremented
-                *    the SOB refcnt in init_signal_wait_cs().
-                * 2. Signal/Wait for signal CS will decrement the SOB refcnt
-                *    here.
-                * These two measures guarantee that the wait for signal CS will
-                * reset the SOB upon completion rather than the signal CS and
-                * hence the above scenario is avoided.
-                */
-               kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
+               queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
 
-               if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
-                       hdev->asic_funcs->reset_sob_group(hdev,
-                                       hl_cs_cmpl->sob_group);
+               return;
        }
 
 free:
                goto free_cs;
        }
 
+       cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+                       sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+       if (!cs->jobs_in_queue_cnt)
+               cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+                               sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
+
+       if (!cs->jobs_in_queue_cnt) {
+               atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+               atomic64_inc(&cntr->out_of_mem_drop_cnt);
+               rc = -ENOMEM;
+               goto free_cs_cmpl;
+       }
+
        cs_cmpl->hdev = hdev;
        cs_cmpl->type = cs->type;
        spin_lock_init(&cs_cmpl->lock);
+       INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
        cs->fence = &cs_cmpl->base_fence;
 
        spin_lock(&ctx->cs_lock);
                goto free_fence;
        }
 
-       cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
-                       sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
-       if (!cs->jobs_in_queue_cnt)
-               cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
-                               sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
-
-       if (!cs->jobs_in_queue_cnt) {
-               atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
-               atomic64_inc(&cntr->out_of_mem_drop_cnt);
-               rc = -ENOMEM;
-               goto free_fence;
-       }
-
        /* init hl_fence */
        hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
 
 
 free_fence:
        spin_unlock(&ctx->cs_lock);
+       kfree(cs->jobs_in_queue_cnt);
+free_cs_cmpl:
        kfree(cs_cmpl);
 free_cs:
        kfree(cs);
        int i;
        struct hl_cs *cs, *tmp;
 
+       flush_workqueue(hdev->sob_reset_wq);
+
        /* flush all completions before iterating over the CS mirror list in
         * order to avoid a race with the release functions
         */
 
                goto free_cq_wq;
        }
 
+       hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
+       if (!hdev->sob_reset_wq) {
+               dev_err(hdev->dev,
+                       "Failed to allocate SOB reset workqueue\n");
+               rc = -ENOMEM;
+               goto free_eq_wq;
+       }
+
        hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
                                        GFP_KERNEL);
        if (!hdev->hl_chip_info) {
                rc = -ENOMEM;
-               goto free_eq_wq;
+               goto free_sob_reset_wq;
        }
 
        hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
        kfree(hdev->idle_busy_ts_arr);
 free_chip_info:
        kfree(hdev->hl_chip_info);
+free_sob_reset_wq:
+       destroy_workqueue(hdev->sob_reset_wq);
 free_eq_wq:
        destroy_workqueue(hdev->eq_wq);
 free_cq_wq:
        kfree(hdev->idle_busy_ts_arr);
        kfree(hdev->hl_chip_info);
 
+       destroy_workqueue(hdev->sob_reset_wq);
        destroy_workqueue(hdev->eq_wq);
        destroy_workqueue(hdev->device_reset_work.wq);
 
 
 
 /**
  * struct hl_cs_compl - command submission completion object.
+ * @sob_reset_work: workqueue object to run SOB reset flow.
  * @base_fence: hl fence object.
  * @lock: spinlock to protect fence.
  * @hdev: habanalabs device structure.
  * @sob_group: the SOB group that is used in this collective wait CS.
  */
 struct hl_cs_compl {
+       struct work_struct      sob_reset_work;
        struct hl_fence         base_fence;
        spinlock_t              lock;
        struct hl_device        *hdev;
  * @cq_wq: work queues of completion queues for executing work in process
  *         context.
  * @eq_wq: work queue of event queue for executing work in process context.
+ * @sob_reset_wq: work queue for sob reset executions.
  * @kernel_ctx: Kernel driver context structure.
  * @kernel_queues: array of hl_hw_queue.
  * @cs_mirror_list: CS mirror list for TDR.
        struct hl_user_interrupt        common_user_interrupt;
        struct workqueue_struct         **cq_wq;
        struct workqueue_struct         *eq_wq;
+       struct workqueue_struct         *sob_reset_wq;
        struct hl_ctx                   *kernel_ctx;
        struct hl_hw_queue              *kernel_queues;
        struct list_head                cs_mirror_list;