{
        struct m_can_classdev *cdev = netdev_priv(net);
 
-       if (cdev->tx_skb) {
-               u32 putidx = 0;
+       if (cdev->tx_ops) {
+               for (int i = 0; i != cdev->tx_fifo_size; ++i) {
+                       if (!cdev->tx_ops[i].skb)
+                               continue;
 
-               net->stats.tx_errors++;
-               if (cdev->version > 30)
-                       putidx = FIELD_GET(TXFQS_TFQPI_MASK,
-                                          m_can_read(cdev, M_CAN_TXFQS));
-
-               can_free_echo_skb(cdev->net, putidx, NULL);
-               cdev->tx_skb = NULL;
+                       net->stats.tx_errors++;
+                       cdev->tx_ops[i].skb = NULL;
+               }
        }
+
+       for (int i = 0; i != cdev->can.echo_skb_max; ++i)
+               can_free_echo_skb(cdev->net, i, NULL);
 }
 
 /* For peripherals, pass skb to rx-offload, which will push skb from
        m_can_clk_stop(cdev);
        free_irq(dev->irq, dev);
 
+       m_can_clean(dev);
+
        if (cdev->is_peripheral) {
-               cdev->tx_skb = NULL;
                destroy_workqueue(cdev->tx_wq);
                cdev->tx_wq = NULL;
                can_rx_offload_disable(&cdev->offload);
        return !!cdev->can.echo_skb[next_idx];
 }
 
-static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
+static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev,
+                                   struct sk_buff *skb)
 {
-       struct canfd_frame *cf = (struct canfd_frame *)cdev->tx_skb->data;
+       struct canfd_frame *cf = (struct canfd_frame *)skb->data;
        u8 len_padded = DIV_ROUND_UP(cf->len, 4);
        struct m_can_fifo_element fifo_element;
        struct net_device *dev = cdev->net;
-       struct sk_buff *skb = cdev->tx_skb;
        u32 cccr, fdflags;
        u32 txfqs;
        int err;
        u32 putidx;
 
-       cdev->tx_skb = NULL;
-
        /* Generate ID field for TX buffer Element */
        /* Common to all supported M_CAN versions */
        if (cf->can_id & CAN_EFF_FLAG) {
 
 static void m_can_tx_work_queue(struct work_struct *ws)
 {
-       struct m_can_classdev *cdev = container_of(ws, struct m_can_classdev,
-                                                  tx_work);
+       struct m_can_tx_op *op = container_of(ws, struct m_can_tx_op, work);
+       struct m_can_classdev *cdev = op->cdev;
+       struct sk_buff *skb = op->skb;
 
-       m_can_tx_handler(cdev);
+       op->skb = NULL;
+       m_can_tx_handler(cdev, skb);
+}
+
+static void m_can_tx_queue_skb(struct m_can_classdev *cdev, struct sk_buff *skb)
+{
+       cdev->tx_ops[cdev->next_tx_op].skb = skb;
+       queue_work(cdev->tx_wq, &cdev->tx_ops[cdev->next_tx_op].work);
+
+       ++cdev->next_tx_op;
+       if (cdev->next_tx_op >= cdev->tx_fifo_size)
+               cdev->next_tx_op = 0;
+}
+
+static netdev_tx_t m_can_start_peripheral_xmit(struct m_can_classdev *cdev,
+                                              struct sk_buff *skb)
+{
+       netif_stop_queue(cdev->net);
+       m_can_tx_queue_skb(cdev, skb);
+
+       return NETDEV_TX_OK;
 }
 
 static netdev_tx_t m_can_start_xmit(struct sk_buff *skb,
        if (can_dev_dropped_skb(dev, skb))
                return NETDEV_TX_OK;
 
-       if (cdev->is_peripheral) {
-               if (cdev->tx_skb) {
-                       netdev_err(dev, "hard_xmit called while tx busy\n");
-                       return NETDEV_TX_BUSY;
-               }
-
-               if (cdev->can.state == CAN_STATE_BUS_OFF) {
-                       m_can_clean(dev);
-               } else {
-                       /* Need to stop the queue to avoid numerous requests
-                        * from being sent.  Suggested improvement is to create
-                        * a queueing mechanism that will queue the skbs and
-                        * process them in order.
-                        */
-                       cdev->tx_skb = skb;
-                       netif_stop_queue(cdev->net);
-                       queue_work(cdev->tx_wq, &cdev->tx_work);
-               }
-       } else {
-               cdev->tx_skb = skb;
-               return m_can_tx_handler(cdev);
+       if (cdev->can.state == CAN_STATE_BUS_OFF) {
+               m_can_clean(cdev->net);
+               return NETDEV_TX_OK;
        }
 
-       return NETDEV_TX_OK;
+       if (cdev->is_peripheral)
+               return m_can_start_peripheral_xmit(cdev, skb);
+       else
+               return m_can_tx_handler(cdev, skb);
 }
 
 static enum hrtimer_restart hrtimer_callback(struct hrtimer *timer)
 
        /* register interrupt handler */
        if (cdev->is_peripheral) {
-               cdev->tx_skb = NULL;
-               cdev->tx_wq = alloc_workqueue("mcan_wq",
-                                             WQ_FREEZABLE | WQ_MEM_RECLAIM, 0);
+               cdev->tx_wq = alloc_ordered_workqueue("mcan_wq",
+                                                     WQ_FREEZABLE | WQ_MEM_RECLAIM);
                if (!cdev->tx_wq) {
                        err = -ENOMEM;
                        goto out_wq_fail;
                }
 
-               INIT_WORK(&cdev->tx_work, m_can_tx_work_queue);
+               for (int i = 0; i != cdev->tx_fifo_size; ++i) {
+                       cdev->tx_ops[i].cdev = cdev;
+                       INIT_WORK(&cdev->tx_ops[i].work, m_can_tx_work_queue);
+               }
 
                err = request_threaded_irq(dev->irq, NULL, m_can_isr,
                                           IRQF_ONESHOT,
 {
        int ret;
 
+       cdev->tx_fifo_size = max(1, min(cdev->mcfg[MRAM_TXB].num,
+                                       cdev->mcfg[MRAM_TXE].num));
+       if (cdev->is_peripheral) {
+               cdev->tx_ops =
+                       devm_kzalloc(cdev->dev,
+                                    cdev->tx_fifo_size * sizeof(*cdev->tx_ops),
+                                    GFP_KERNEL);
+               if (!cdev->tx_ops) {
+                       dev_err(cdev->dev, "Failed to allocate tx_ops for workqueue\n");
+                       return -ENOMEM;
+               }
+       }
+
        if (cdev->pm_clock_support) {
                ret = m_can_clk_start(cdev);
                if (ret)