desc_idx, *post_ptr);
                drop_it_no_recycle:
                        /* Other statistics kept track of by card. */
-                       tp->rx_dropped++;
+                       tnapi->rx_dropped++;
                        goto next_pkt;
                }
 
 drop:
        dev_kfree_skb_any(skb);
 drop_nofree:
-       tp->tx_dropped++;
+       tnapi->tx_dropped++;
        return NETDEV_TX_OK;
 }
 
 /* tp->lock is held. */
 static int tg3_halt(struct tg3 *tp, int kind, bool silent)
 {
-       int err;
+       int err, i;
 
        tg3_stop_fw(tp);
 
 
                /* And make sure the next sample is new data */
                memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats));
+
+               for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) {
+                       struct tg3_napi *tnapi = &tp->napi[i];
+
+                       tnapi->rx_dropped = 0;
+                       tnapi->tx_dropped = 0;
+               }
        }
 
        return err;
 {
        struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev;
        struct tg3_hw_stats *hw_stats = tp->hw_stats;
+       unsigned long rx_dropped;
+       unsigned long tx_dropped;
+       int i;
 
        stats->rx_packets = old_stats->rx_packets +
                get_stat64(&hw_stats->rx_ucast_packets) +
        stats->rx_missed_errors = old_stats->rx_missed_errors +
                get_stat64(&hw_stats->rx_discards);
 
-       stats->rx_dropped = tp->rx_dropped;
-       stats->tx_dropped = tp->tx_dropped;
+       /* Aggregate per-queue counters. The per-queue counters are updated
+        * by a single writer, race-free. The result computed by this loop
+        * might not be 100% accurate (counters can be updated in the middle of
+        * the loop) but the next tg3_get_nstats() will recompute the current
+        * value so it is acceptable.
+        *
+        * Note that these counters wrap around at 4G on 32bit machines.
+        */
+       rx_dropped = (unsigned long)(old_stats->rx_dropped);
+       tx_dropped = (unsigned long)(old_stats->tx_dropped);
+
+       for (i = 0; i < tp->irq_cnt; i++) {
+               struct tg3_napi *tnapi = &tp->napi[i];
+
+               rx_dropped += tnapi->rx_dropped;
+               tx_dropped += tnapi->tx_dropped;
+       }
+
+       stats->rx_dropped = rx_dropped;
+       stats->tx_dropped = tx_dropped;
 }
 
 static int tg3_get_regs_len(struct net_device *dev)
 
        u16                             *rx_rcb_prod_idx;
        struct tg3_rx_prodring_set      prodring;
        struct tg3_rx_buffer_desc       *rx_rcb;
+       unsigned long                   rx_dropped;
 
        u32                             tx_prod ____cacheline_aligned;
        u32                             tx_cons;
        u32                             prodmbox;
        struct tg3_tx_buffer_desc       *tx_ring;
        struct tg3_tx_ring_info         *tx_buffers;
+       unsigned long                   tx_dropped;
 
        dma_addr_t                      status_mapping;
        dma_addr_t                      rx_rcb_mapping;
 
 
        /* begin "everything else" cacheline(s) section */
-       unsigned long                   rx_dropped;
-       unsigned long                   tx_dropped;
        struct rtnl_link_stats64        net_stats_prev;
        struct tg3_ethtool_stats        estats_prev;