if (xmit_suc)
                return NETDEV_TX_OK;
 
-       atomic_long_inc(&bond_dev->tx_dropped);
+       dev_core_stats_tx_dropped_inc(bond_dev);
        return NET_XMIT_DROP;
 }
 
 
        i = skb_get_queue_mapping(skb);
        if (unlikely(i >= bp->tx_nr_rings)) {
                dev_kfree_skb_any(skb);
-               atomic_long_inc(&dev->tx_dropped);
+               dev_core_stats_tx_dropped_inc(dev);
                return NETDEV_TX_OK;
        }
 
        if (txr->kick_pending)
                bnxt_txr_db_kick(bp, txr, txr->tx_prod);
        txr->tx_buf_ring[txr->tx_prod].skb = NULL;
-       atomic_long_inc(&dev->tx_dropped);
+       dev_core_stats_tx_dropped_inc(dev);
        return NETDEV_TX_OK;
 }
 
 
        p[21] = net_stats->rx_compressed;
        p[22] = net_stats->tx_compressed;
 
-       p[23] = netdev->rx_dropped.counter;
-       p[24] = netdev->tx_dropped.counter;
+       p[23] = 0; /* was netdev->rx_dropped.counter */
+       p[24] = 0; /* was netdev->tx_dropped.counter */
 
        p[25] = priv->tx_timeout_count;
 
 
        dev = skb->dev;
        port = rmnet_get_port_rcu(dev);
        if (unlikely(!port)) {
-               atomic_long_inc(&skb->dev->rx_nohandler);
+               dev_core_stats_rx_nohandler_inc(skb->dev);
                kfree_skb(skb);
                goto done;
        }
 
                schedule_work(&port->wq);
        } else {
                spin_unlock(&port->backlog.lock);
-               atomic_long_inc(&skb->dev->rx_dropped);
+               dev_core_stats_rx_dropped_inc(skb->dev);
                kfree_skb(skb);
        }
 }
 
 free_nskb:
        kfree_skb(nskb);
 err:
-       atomic_long_inc(&skb->dev->rx_dropped);
+       dev_core_stats_rx_dropped_inc(skb->dev);
 }
 
 static void macvlan_flush_sources(struct macvlan_port *port,
 
 static netdev_tx_t net_failover_drop_xmit(struct sk_buff *skb,
                                          struct net_device *dev)
 {
-       atomic_long_inc(&dev->tx_dropped);
+       dev_core_stats_tx_dropped_inc(dev);
        dev_kfree_skb_any(skb);
        return NETDEV_TX_OK;
 }
 
        return NETDEV_TX_OK;
 
 drop:
-       atomic_long_inc(&dev->tx_dropped);
+       dev_core_stats_tx_dropped_inc(dev);
        skb_tx_error(skb);
        kfree_skb_reason(skb, drop_reason);
        rcu_read_unlock();
                void *frame = tun_xdp_to_ptr(xdp);
 
                if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
-                       atomic_long_inc(&dev->tx_dropped);
+                       dev_core_stats_tx_dropped_inc(dev);
                        break;
                }
                nxmit++;
                trace_xdp_exception(tun->dev, xdp_prog, act);
                fallthrough;
        case XDP_DROP:
-               atomic_long_inc(&tun->dev->rx_dropped);
+               dev_core_stats_rx_dropped_inc(tun->dev);
                break;
        }
 
                 */
                skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp);
                if (IS_ERR(skb)) {
-                       atomic_long_inc(&tun->dev->rx_dropped);
+                       dev_core_stats_rx_dropped_inc(tun->dev);
                        return PTR_ERR(skb);
                }
                if (!skb)
 
                if (IS_ERR(skb)) {
                        if (PTR_ERR(skb) != -EAGAIN)
-                               atomic_long_inc(&tun->dev->rx_dropped);
+                               dev_core_stats_rx_dropped_inc(tun->dev);
                        if (frags)
                                mutex_unlock(&tfile->napi_mutex);
                        return PTR_ERR(skb);
                        err = -EFAULT;
                        drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
 drop:
-                       atomic_long_inc(&tun->dev->rx_dropped);
+                       dev_core_stats_rx_dropped_inc(tun->dev);
                        kfree_skb_reason(skb, drop_reason);
                        if (frags) {
                                tfile->napi.skb = NULL;
                                pi.proto = htons(ETH_P_IPV6);
                                break;
                        default:
-                               atomic_long_inc(&tun->dev->rx_dropped);
+                               dev_core_stats_rx_dropped_inc(tun->dev);
                                kfree_skb(skb);
                                return -EINVAL;
                        }
                                          skb_headlen(skb));
 
                if (unlikely(headlen > skb_headlen(skb))) {
-                       atomic_long_inc(&tun->dev->rx_dropped);
+                       dev_core_stats_rx_dropped_inc(tun->dev);
                        napi_free_frags(&tfile->napi);
                        rcu_read_unlock();
                        mutex_unlock(&tfile->napi_mutex);
 
 
        if (unlikely(!(vxlan->dev->flags & IFF_UP))) {
                rcu_read_unlock();
-               atomic_long_inc(&vxlan->dev->rx_dropped);
+               dev_core_stats_rx_dropped_inc(vxlan->dev);
                vxlan_vnifilter_count(vxlan, vni, vninode,
                                      VXLAN_VNI_STATS_RX_DROPS, 0);
                goto drop;
 
 #include <linux/prefetch.h>
 #include <asm/cache.h>
 #include <asm/byteorder.h>
+#include <asm/local.h>
 
 #include <linux/percpu.h>
 #include <linux/rculist.h>
        unsigned long   tx_compressed;
 };
 
+/* per-cpu stats, allocated on demand.
+ * Try to fit them in a single cache line, for dev_get_stats() sake.
+ */
+struct net_device_core_stats {
+       local_t         rx_dropped;
+       local_t         tx_dropped;
+       local_t         rx_nohandler;
+} __aligned(4 * sizeof(local_t));
 
 #include <linux/cache.h>
 #include <linux/skbuff.h>
  *     @stats:         Statistics struct, which was left as a legacy, use
  *                     rtnl_link_stats64 instead
  *
- *     @rx_dropped:    Dropped packets by core network,
- *                     do not use this in drivers
- *     @tx_dropped:    Dropped packets by core network,
+ *     @core_stats:    core networking counters,
  *                     do not use this in drivers
- *     @rx_nohandler:  nohandler dropped packets by core network on
- *                     inactive devices, do not use this in drivers
  *     @carrier_up_count:      Number of times the carrier has been up
  *     @carrier_down_count:    Number of times the carrier has been down
  *
 
        struct net_device_stats stats; /* not used by modern drivers */
 
-       atomic_long_t           rx_dropped;
-       atomic_long_t           tx_dropped;
-       atomic_long_t           rx_nohandler;
+       struct net_device_core_stats __percpu *core_stats;
 
        /* Stats to monitor link on/off, flapping */
        atomic_t                carrier_up_count;
        return false;
 }
 
+struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev);
+
+static inline struct net_device_core_stats *dev_core_stats(struct net_device *dev)
+{
+       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
+       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
+
+       if (likely(p))
+               return this_cpu_ptr(p);
+
+       return netdev_core_stats_alloc(dev);
+}
+
+#define DEV_CORE_STATS_INC(FIELD)                                              \
+static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)                \
+{                                                                              \
+       struct net_device_core_stats *p = dev_core_stats(dev);                  \
+                                                                               \
+       if (p)                                                                  \
+               local_inc(&p->FIELD);                                           \
+}
+DEV_CORE_STATS_INC(rx_dropped)
+DEV_CORE_STATS_INC(tx_dropped)
+DEV_CORE_STATS_INC(rx_nohandler)
+
 static __always_inline int ____dev_forward_skb(struct net_device *dev,
                                               struct sk_buff *skb,
                                               const bool check_mtu)
 {
        if (skb_orphan_frags(skb, GFP_ATOMIC) ||
            unlikely(!__is_skb_forwardable(dev, skb, check_mtu))) {
-               atomic_long_inc(&dev->rx_dropped);
+               dev_core_stats_rx_dropped_inc(dev);
                kfree_skb(skb);
                return NET_RX_DROP;
        }
 
 
 static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
 {
-       atomic_long_inc(&dev->tx_dropped);
+       dev_core_stats_tx_dropped_inc(dev);
        dev_kfree_skb_any(skb);
        return NET_XMIT_DROP;
 }
 
 out_kfree_skb:
        kfree_skb(skb);
 out_null:
-       atomic_long_inc(&dev->tx_dropped);
+       dev_core_stats_tx_dropped_inc(dev);
        return NULL;
 }
 
        rc = -ENETDOWN;
        rcu_read_unlock_bh();
 
-       atomic_long_inc(&dev->tx_dropped);
+       dev_core_stats_tx_dropped_inc(dev);
        kfree_skb_list(skb);
        return rc;
 out:
        local_bh_enable();
        return ret;
 drop:
-       atomic_long_inc(&dev->tx_dropped);
+       dev_core_stats_tx_dropped_inc(dev);
        kfree_skb_list(skb);
        return NET_XMIT_DROP;
 }
        sd->dropped++;
        rps_unlock_irq_restore(sd, &flags);
 
-       atomic_long_inc(&skb->dev->rx_dropped);
+       dev_core_stats_rx_dropped_inc(skb->dev);
        kfree_skb_reason(skb, reason);
        return NET_RX_DROP;
 }
        } else {
 drop:
                if (!deliver_exact) {
-                       atomic_long_inc(&skb->dev->rx_dropped);
+                       dev_core_stats_rx_dropped_inc(skb->dev);
                        kfree_skb_reason(skb, SKB_DROP_REASON_PTYPE_ABSENT);
                } else {
-                       atomic_long_inc(&skb->dev->rx_nohandler);
+                       dev_core_stats_rx_nohandler_inc(skb->dev);
                        kfree_skb(skb);
                }
                /* Jamal, now you will not able to escape explaining
 }
 EXPORT_SYMBOL(netdev_stats_to_stats64);
 
+struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
+{
+       struct net_device_core_stats __percpu *p;
+
+       p = alloc_percpu_gfp(struct net_device_core_stats,
+                            GFP_ATOMIC | __GFP_NOWARN);
+
+       if (p && cmpxchg(&dev->core_stats, NULL, p))
+               free_percpu(p);
+
+       /* This READ_ONCE() pairs with the cmpxchg() above */
+       p = READ_ONCE(dev->core_stats);
+       if (!p)
+               return NULL;
+
+       return this_cpu_ptr(p);
+}
+EXPORT_SYMBOL(netdev_core_stats_alloc);
+
 /**
  *     dev_get_stats   - get network device statistics
  *     @dev: device to get statistics from
                                        struct rtnl_link_stats64 *storage)
 {
        const struct net_device_ops *ops = dev->netdev_ops;
+       const struct net_device_core_stats __percpu *p;
 
        if (ops->ndo_get_stats64) {
                memset(storage, 0, sizeof(*storage));
        } else {
                netdev_stats_to_stats64(storage, &dev->stats);
        }
-       storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
-       storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
-       storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
+
+       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
+       p = READ_ONCE(dev->core_stats);
+       if (p) {
+               const struct net_device_core_stats *core_stats;
+               int i;
+
+               for_each_possible_cpu(i) {
+                       core_stats = per_cpu_ptr(p, i);
+                       storage->rx_dropped += local_read(&core_stats->rx_dropped);
+                       storage->tx_dropped += local_read(&core_stats->tx_dropped);
+                       storage->rx_nohandler += local_read(&core_stats->rx_nohandler);
+               }
+       }
        return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
        free_percpu(dev->pcpu_refcnt);
        dev->pcpu_refcnt = NULL;
 #endif
+       free_percpu(dev->core_stats);
+       dev->core_stats = NULL;
        free_percpu(dev->xdp_bulkq);
        dev->xdp_bulkq = NULL;
 
 
 
        if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
 drop:
-               atomic_long_inc(&dev->rx_dropped);
+               dev_core_stats_rx_dropped_inc(dev);
                kfree_skb(skb);
                res = NET_RX_DROP;
                goto unlock;
 
                skb_reset_mac_len(skb);
                hsr_forward_skb(skb, master);
        } else {
-               atomic_long_inc(&dev->tx_dropped);
+               dev_core_stats_tx_dropped_inc(dev);
                dev_kfree_skb_any(skb);
        }
        return NETDEV_TX_OK;
 
                segs = skb_gso_segment(skb, esp_features);
                if (IS_ERR(segs)) {
                        kfree_skb(skb);
-                       atomic_long_inc(&dev->tx_dropped);
+                       dev_core_stats_tx_dropped_inc(dev);
                        return NULL;
                } else {
                        consume_skb(skb);