* flow by calling 'hl_hw_queue_update_ci'.
         */
        if (cs_needs_completion(cs) &&
-               (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW))
+                       (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) {
+
+               /* In CS based completions, the timestamp is already available,
+                * so no need to extract it from job
+                */
+               if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB)
+                       cs->completion_timestamp = job->timestamp;
+
                cs_put(cs);
+       }
 
        hl_cs_job_put(job);
 }
        }
 
        if (cs->timestamp) {
-               cs->fence->timestamp = ktime_get();
+               cs->fence->timestamp = cs->completion_timestamp;
                hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
                                   cs->fence->timestamp, cs->fence->error);
        }
 
  * @type: CS_TYPE_*.
  * @jobs_cnt: counter of submitted jobs on all queues.
  * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
+ * @completion_timestamp: timestamp of the last completed cs job.
  * @sob_addr_offset: sob offset from the configuration base address.
  * @initial_sob_count: count of completed signals in SOB before current submission of signal or
  *                     cs with encaps signals.
        struct list_head        staged_cs_node;
        struct list_head        debugfs_list;
        struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
+       ktime_t                 completion_timestamp;
        u64                     sequence;
        u64                     staged_sequence;
        u64                     timeout_jiffies;
  * @debugfs_list: node in debugfs list of command submission jobs.
  * @refcount: reference counter for usage of the CS job.
  * @queue_type: the type of the H/W queue this job is submitted to.
+ * @timestamp: timestamp upon job completion
  * @id: the id of this job inside a CS.
  * @hw_queue_id: the id of the H/W queue this job is submitted to.
  * @user_cb_size: the actual size of the CB we got from the user.
        struct list_head        debugfs_list;
        struct kref             refcount;
        enum hl_queue_type      queue_type;
+       ktime_t                 timestamp;
        u32                     id;
        u32                     hw_queue_id;
        u32                     user_cb_size;
 
  * @hdev: pointer to device structure
  * @cs_seq: command submission sequence
  * @cq: completion queue
+ * @timestamp: interrupt timestamp
  *
  */
-static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq)
+static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq, ktime_t timestamp)
 {
        struct hl_hw_queue *queue;
        struct hl_cs_job *job;
 
        queue = &hdev->kernel_queues[cq->hw_queue_id];
        job = queue->shadow_queue[hl_pi_2_offset(cs_seq)];
+       job->timestamp = timestamp;
        queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
 
        atomic_inc(&queue->ci);
  *
  * @hdev: pointer to device structure
  * @cs_seq: command submission sequence
+ * @timestamp: interrupt timestamp
  *
  */
-static void cs_finish(struct hl_device *hdev, u16 cs_seq)
+static void cs_finish(struct hl_device *hdev, u16 cs_seq, ktime_t timestamp)
 {
        struct asic_fixed_properties *prop = &hdev->asic_prop;
        struct hl_hw_queue *queue;
                atomic_inc(&queue->ci);
        }
 
+       cs->completion_timestamp = timestamp;
        queue_work(hdev->cs_cmplt_wq, &cs->finish_work);
 }
 
        bool shadow_index_valid, entry_ready;
        u16 shadow_index;
        struct hl_cq_entry *cq_entry, *cq_base;
+       ktime_t timestamp = ktime_get();
 
        if (hdev->disabled) {
                dev_dbg(hdev->dev,
                if (shadow_index_valid && !hdev->disabled) {
                        if (hdev->asic_prop.completion_mode ==
                                        HL_COMPLETION_MODE_CS)
-                               cs_finish(hdev, shadow_index);
+                               cs_finish(hdev, shadow_index, timestamp);
                        else
-                               job_finish(hdev, shadow_index, cq);
+                               job_finish(hdev, shadow_index, cq, timestamp);
                }
 
                /* Clear CQ entry ready bit */