u32 hwq_index;
 
        u8 cmd_tmf:1;
+       struct list_head list;  /* Pending commands link */
 
        /* As per the SISLITE spec the IOARCB EA has to be 16-byte aligned.
         * However for performance reasons the IOARCB/IOASA should be
        struct sisl_ctrl_map __iomem *ctrl_map;         /* MC control map */
        ctx_hndl_t ctx_hndl;    /* master's context handle */
        u32 index;              /* Index of this hwq */
+       struct list_head pending_cmds;  /* Commands pending completion */
 
        atomic_t hsq_credits;
        spinlock_t hsq_slock;   /* Hardware send queue lock */
 
        struct afu *afu = cmd->parent;
        struct cxlflash_cfg *cfg = afu->parent;
        struct device *dev = &cfg->dev->dev;
+       struct hwq *hwq = get_hwq(afu, cmd->hwq_index);
        bool cmd_is_tmf;
 
+       spin_lock_irqsave(&hwq->hsq_slock, lock_flags);
+       list_del(&cmd->list);
+       spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags);
+
        if (cmd->scp) {
                scp = cmd->scp;
                if (unlikely(cmd->sa.ioasc))
                hwq->room = room - 1;
        }
 
+       list_add(&cmd->list, &hwq->pending_cmds);
        writeq_be((u64)&cmd->rcb, &hwq->host_map->ioarrin);
 out:
        spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags);
                hwq->hsq_curr++;
        else
                hwq->hsq_curr = hwq->hsq_start;
+
+       list_add(&cmd->list, &hwq->pending_cmds);
        writeq_be((u64)hwq->hsq_curr, &hwq->host_map->sq_tail);
 
        spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags);
 
        hwq->afu = cfg->afu;
        hwq->index = index;
+       INIT_LIST_HEAD(&hwq->pending_cmds);
 
        if (index == PRIMARY_HWQ)
                ctx = cxl_get_context(cfg->dev);