]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
vmxnet3: Fix tx queue race condition with XDP
authorSankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
Fri, 31 Jan 2025 04:23:41 +0000 (09:53 +0530)
committerJakub Kicinski <kuba@kernel.org>
Sun, 2 Feb 2025 01:17:32 +0000 (17:17 -0800)
If XDP traffic runs on a CPU which is greater than or equal to
the number of the Tx queues of the NIC, then vmxnet3_xdp_get_tq()
always picks up queue 0 for transmission as it uses reciprocal scale
instead of simple modulo operation.

vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() use the above
returned queue without any locking which can lead to race conditions
when multiple XDP xmits run in parallel on different CPU's.

This patch uses a simple module scheme when the current CPU equals or
exceeds the number of Tx queues on the NIC. It also adds locking in
vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() functions.

Fixes: 54f00cce1178 ("vmxnet3: Add XDP support.")
Signed-off-by: Sankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
Signed-off-by: Ronak Doshi <ronak.doshi@broadcom.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250131042340.156547-1-sankararaman.jayaraman@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/vmxnet3/vmxnet3_xdp.c

index 1341374a4588a0f9a6c5c1ebb8951b5531c473c3..616ecc38d1726c7424750e9dedb6013f718dc25d 100644 (file)
@@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter)
        if (likely(cpu < tq_number))
                tq = &adapter->tx_queue[cpu];
        else
-               tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)];
+               tq = &adapter->tx_queue[cpu % tq_number];
 
        return tq;
 }
@@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
        u32 buf_size;
        u32 dw2;
 
+       spin_lock_irq(&tq->tx_lock);
        dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
        dw2 |= xdpf->len;
        ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
@@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
 
        if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) {
                tq->stats.tx_ring_full++;
+               spin_unlock_irq(&tq->tx_lock);
                return -ENOSPC;
        }
 
@@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
                tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
                                               xdpf->data, buf_size,
                                               DMA_TO_DEVICE);
-               if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
+               if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) {
+                       spin_unlock_irq(&tq->tx_lock);
                        return -EFAULT;
+               }
                tbi->map_type |= VMXNET3_MAP_SINGLE;
        } else { /* XDP buffer from page pool */
                page = virt_to_page(xdpf->data);
@@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
        dma_wmb();
        gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
                                                  VMXNET3_TXD_GEN);
+       spin_unlock_irq(&tq->tx_lock);
 
        /* No need to handle the case when tx_num_deferred doesn't reach
         * threshold. Backend driver at hypervisor side will poll and reset
@@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
 {
        struct vmxnet3_adapter *adapter = netdev_priv(dev);
        struct vmxnet3_tx_queue *tq;
+       struct netdev_queue *nq;
        int i;
 
        if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state)))
@@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev,
        if (tq->stopped)
                return -ENETDOWN;
 
+       nq = netdev_get_tx_queue(adapter->netdev, tq->qid);
+
+       __netif_tx_lock(nq, smp_processor_id());
        for (i = 0; i < n; i++) {
                if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) {
                        tq->stats.xdp_xmit_err++;
@@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
                }
        }
        tq->stats.xdp_xmit += i;
+       __netif_tx_unlock(nq);
 
        return i;
 }