return ret;
 }
 
+void mlx4_en_fold_software_stats(struct net_device *dev)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       unsigned long packets, bytes;
+       int i;
+
+       if (mlx4_is_master(mdev->dev))
+               return;
+
+       packets = 0;
+       bytes = 0;
+       for (i = 0; i < priv->rx_ring_num; i++) {
+               const struct mlx4_en_rx_ring *ring = priv->rx_ring[i];
+
+               packets += READ_ONCE(ring->packets);
+               bytes   += READ_ONCE(ring->bytes);
+       }
+       dev->stats.rx_packets = packets;
+       dev->stats.rx_bytes = bytes;
+
+       packets = 0;
+       bytes = 0;
+       for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+               const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
+
+               packets += READ_ONCE(ring->packets);
+               bytes   += READ_ONCE(ring->bytes);
+       }
+       dev->stats.tx_packets = packets;
+       dev->stats.tx_bytes = bytes;
+}
+
 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 {
        struct mlx4_counter tmp_counter_stats;
        u64 in_mod = reset << 8 | port;
        int err;
        int i, counter_index;
+       unsigned long sw_tx_dropped = 0;
        unsigned long sw_rx_dropped = 0;
 
        mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
 
        spin_lock_bh(&priv->stats_lock);
 
-       stats->rx_packets = 0;
-       stats->rx_bytes = 0;
+       mlx4_en_fold_software_stats(dev);
+
        priv->port_stats.rx_chksum_good = 0;
        priv->port_stats.rx_chksum_none = 0;
        priv->port_stats.rx_chksum_complete = 0;
        priv->xdp_stats.rx_xdp_tx      = 0;
        priv->xdp_stats.rx_xdp_tx_full = 0;
        for (i = 0; i < priv->rx_ring_num; i++) {
-               stats->rx_packets += priv->rx_ring[i]->packets;
-               stats->rx_bytes += priv->rx_ring[i]->bytes;
-               sw_rx_dropped += priv->rx_ring[i]->dropped;
-               priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok;
-               priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none;
-               priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete;
-               priv->xdp_stats.rx_xdp_drop    += priv->rx_ring[i]->xdp_drop;
-               priv->xdp_stats.rx_xdp_tx      += priv->rx_ring[i]->xdp_tx;
-               priv->xdp_stats.rx_xdp_tx_full += priv->rx_ring[i]->xdp_tx_full;
+               const struct mlx4_en_rx_ring *ring = priv->rx_ring[i];
+
+               sw_rx_dropped                   += READ_ONCE(ring->dropped);
+               priv->port_stats.rx_chksum_good += READ_ONCE(ring->csum_ok);
+               priv->port_stats.rx_chksum_none += READ_ONCE(ring->csum_none);
+               priv->port_stats.rx_chksum_complete += READ_ONCE(ring->csum_complete);
+               priv->xdp_stats.rx_xdp_drop     += READ_ONCE(ring->xdp_drop);
+               priv->xdp_stats.rx_xdp_tx       += READ_ONCE(ring->xdp_tx);
+               priv->xdp_stats.rx_xdp_tx_full  += READ_ONCE(ring->xdp_tx_full);
        }
-       stats->tx_packets = 0;
-       stats->tx_bytes = 0;
-       stats->tx_dropped = 0;
        priv->port_stats.tx_chksum_offload = 0;
        priv->port_stats.queue_stopped = 0;
        priv->port_stats.wake_queue = 0;
        for (i = 0; i < priv->tx_ring_num[TX]; i++) {
                const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
 
-               stats->tx_packets += ring->packets;
-               stats->tx_bytes += ring->bytes;
-               stats->tx_dropped += ring->tx_dropped;
-               priv->port_stats.tx_chksum_offload += ring->tx_csum;
-               priv->port_stats.queue_stopped     += ring->queue_stopped;
-               priv->port_stats.wake_queue        += ring->wake_queue;
-               priv->port_stats.tso_packets       += ring->tso_packets;
-               priv->port_stats.xmit_more         += ring->xmit_more;
+               sw_tx_dropped                      += READ_ONCE(ring->tx_dropped);
+               priv->port_stats.tx_chksum_offload += READ_ONCE(ring->tx_csum);
+               priv->port_stats.queue_stopped     += READ_ONCE(ring->queue_stopped);
+               priv->port_stats.wake_queue        += READ_ONCE(ring->wake_queue);
+               priv->port_stats.tso_packets       += READ_ONCE(ring->tso_packets);
+               priv->port_stats.xmit_more         += READ_ONCE(ring->xmit_more);
        }
+
        if (mlx4_is_master(mdev->dev)) {
                stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
                                                   &mlx4_en_stats->RTOT_prio_1,
        stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength);
        stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC);
        stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
-       stats->tx_dropped += be32_to_cpu(mlx4_en_stats->TDROP);
+       stats->tx_dropped = be32_to_cpu(mlx4_en_stats->TDROP) +
+                           sw_tx_dropped;
 
        /* RX stats */
        priv->pkstats.rx_multicast_packets = stats->multicast;