unsigned long reinit_state;
        u64 rsc_count;                  /* stat for coalesced packets */
        u64 rsc_flush;                  /* stats for flushed packets */
+       u32 restart_queue;              /* track tx queue restarts */
+       u32 non_eop_descs;              /* track hardware descriptor chaining */
 
        unsigned int size;              /* length in bytes */
        dma_addr_t dma;                 /* phys. address of descriptor ring */
-};
+} ____cacheline_internodealigned_in_smp;
 
 enum ixgbe_ring_f_enum {
        RING_F_NONE = 0,
 struct ixgbe_ring_feature {
        int indices;
        int mask;
-};
+} ____cacheline_internodealigned_in_smp;
 
 #define MAX_RX_QUEUES 128
 #define MAX_TX_QUEUES 128
        u16 eitr_high;
 
        /* TX */
-       struct ixgbe_ring *tx_ring;     /* One per active queue */
+       struct ixgbe_ring *tx_ring ____cacheline_aligned_in_smp; /* One per active queue */
        int num_tx_queues;
-       u64 restart_queue;
-       u64 hw_csum_tx_good;
-       u64 lsc_int;
-       u64 hw_tso_ctxt;
-       u64 hw_tso6_ctxt;
        u32 tx_timeout_count;
        bool detect_tx_hung;
 
+       u64 restart_queue;
+       u64 lsc_int;
+
        /* RX */
-       struct ixgbe_ring *rx_ring;     /* One per active queue */
+       struct ixgbe_ring *rx_ring ____cacheline_aligned_in_smp; /* One per active queue */
        int num_rx_queues;
        u64 hw_csum_rx_error;
        u64 hw_rx_no_dma_resources;
-       u64 hw_csum_rx_good;
        u64 non_eop_descs;
        int num_msix_vectors;
        int max_msix_q_vectors;         /* true count of q_vectors for device */
        struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE];
        struct msix_entry *msix_entries;
 
-       u64 rx_hdr_split;
        u32 alloc_rx_page_failed;
        u32 alloc_rx_buff_failed;
 
 
        {"tx_restart_queue", IXGBE_STAT(restart_queue)},
        {"rx_long_length_errors", IXGBE_STAT(stats.roc)},
        {"rx_short_length_errors", IXGBE_STAT(stats.ruc)},
-       {"tx_tcp4_seg_ctxt", IXGBE_STAT(hw_tso_ctxt)},
-       {"tx_tcp6_seg_ctxt", IXGBE_STAT(hw_tso6_ctxt)},
        {"tx_flow_control_xon", IXGBE_STAT(stats.lxontxc)},
        {"rx_flow_control_xon", IXGBE_STAT(stats.lxonrxc)},
        {"tx_flow_control_xoff", IXGBE_STAT(stats.lxofftxc)},
        {"rx_flow_control_xoff", IXGBE_STAT(stats.lxoffrxc)},
-       {"rx_csum_offload_good", IXGBE_STAT(hw_csum_rx_good)},
        {"rx_csum_offload_errors", IXGBE_STAT(hw_csum_rx_error)},
-       {"tx_csum_offload_ctxt", IXGBE_STAT(hw_csum_tx_good)},
-       {"rx_header_split", IXGBE_STAT(rx_hdr_split)},
        {"alloc_rx_page_failed", IXGBE_STAT(alloc_rx_page_failed)},
        {"alloc_rx_buff_failed", IXGBE_STAT(alloc_rx_buff_failed)},
        {"rx_no_dma_resources", IXGBE_STAT(hw_rx_no_dma_resources)},
 
                if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
                    !test_bit(__IXGBE_DOWN, &adapter->state)) {
                        netif_wake_subqueue(netdev, tx_ring->queue_index);
-                       ++adapter->restart_queue;
+                       ++tx_ring->restart_queue;
                }
        }
 
 
        /* It must be a TCP or UDP packet with a valid checksum */
        skb->ip_summed = CHECKSUM_UNNECESSARY;
-       adapter->hw_csum_rx_good++;
 }
 
 static inline void ixgbe_release_rx_desc(struct ixgbe_hw *hw,
 
                if (!bi->skb) {
                        struct sk_buff *skb;
-                       skb = netdev_alloc_skb_ip_align(adapter->netdev,
-                                                       rx_ring->rx_buf_len);
+                       /* netdev_alloc_skb reserves 32 bytes up front!! */
+                       uint bufsz = rx_ring->rx_buf_len + SMP_CACHE_BYTES;
+                       skb = netdev_alloc_skb(adapter->netdev, bufsz);
 
                        if (!skb) {
                                adapter->alloc_rx_buff_failed++;
                                goto no_buffers;
                        }
 
+                       /* advance the data pointer to the next cache line */
+                       skb_reserve(skb, (PTR_ALIGN(skb->data, SMP_CACHE_BYTES)
+                                         - skb->data));
+
                        bi->skb = skb;
                        bi->dma = pci_map_single(pdev, skb->data,
                                                 rx_ring->rx_buf_len,
                        hdr_info = le16_to_cpu(ixgbe_get_hdr_info(rx_desc));
                        len = (hdr_info & IXGBE_RXDADV_HDRBUFLEN_MASK) >>
                               IXGBE_RXDADV_HDRBUFLEN_SHIFT;
-                       if (hdr_info & IXGBE_RXDADV_SPH)
-                               adapter->rx_hdr_split++;
                        if (len > IXGBE_RX_HDR_SIZE)
                                len = IXGBE_RX_HDR_SIZE;
                        upper_len = le16_to_cpu(rx_desc->wb.upper.length);
 
                cleaned = true;
                skb = rx_buffer_info->skb;
-               prefetch(skb->data - NET_IP_ALIGN);
+               prefetch(skb->data);
                rx_buffer_info->skb = NULL;
 
                if (rx_buffer_info->dma) {
                                skb->next = next_buffer->skb;
                                skb->next->prev = skb;
                        }
-                       adapter->non_eop_descs++;
+                       rx_ring->non_eop_descs++;
                        goto next_desc;
                }
 
                adapter->rsc_total_flush = rsc_flush;
        }
 
+       /* gather some stats to the adapter struct that are per queue */
+       for (i = 0; i < adapter->num_tx_queues; i++)
+               adapter->restart_queue += adapter->tx_ring[i].restart_queue;
+
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               adapter->non_eop_descs += adapter->tx_ring[i].non_eop_descs;
+
        adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
        for (i = 0; i < 8; i++) {
                /* for packet buffers not used, the register should read 0 */
                                                                 iph->daddr, 0,
                                                                 IPPROTO_TCP,
                                                                 0);
-                       adapter->hw_tso_ctxt++;
                } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
                        ipv6_hdr(skb)->payload_len = 0;
                        tcp_hdr(skb)->check =
                            ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
                                             &ipv6_hdr(skb)->daddr,
                                             0, IPPROTO_TCP, 0);
-                       adapter->hw_tso6_ctxt++;
                }
 
                i = tx_ring->next_to_use;
                tx_buffer_info->time_stamp = jiffies;
                tx_buffer_info->next_to_watch = i;
 
-               adapter->hw_csum_tx_good++;
                i++;
                if (i == tx_ring->count)
                        i = 0;
 static int __ixgbe_maybe_stop_tx(struct net_device *netdev,
                                  struct ixgbe_ring *tx_ring, int size)
 {
-       struct ixgbe_adapter *adapter = netdev_priv(netdev);
-
        netif_stop_subqueue(netdev, tx_ring->queue_index);
        /* Herbert's original patch had:
         *  smp_mb__after_netif_stop_queue();
 
        /* A reprieve! - use start_queue because it doesn't call schedule */
        netif_start_subqueue(netdev, tx_ring->queue_index);
-       ++adapter->restart_queue;
+       ++tx_ring->restart_queue;
        return 0;
 }