nesibdev = nesvnic->nesibdev;
 
        nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
-                       atomic_read(&nesvnic->netdev->refcnt));
+                       netdev_refcnt_read(nesvnic->netdev));
 
        if (nesqp->active_conn) {
 
        atomic_inc(&cm_accepts);
 
        nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
-                       atomic_read(&nesvnic->netdev->refcnt));
+                       netdev_refcnt_read(nesvnic->netdev));
 
        /* allocate the ietf frame and space for private data */
        nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
 
 
        nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n",
                        nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context,
-                       atomic_read(&nesvnic->netdev->refcnt));
+                       netdev_refcnt_read(nesvnic->netdev));
 
        err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
                        nesadapter->max_pd, &pd_num, &nesadapter->next_pd);
        /* update the QP table */
        nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
        nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
-                       atomic_read(&nesvnic->netdev->refcnt));
+                       netdev_refcnt_read(nesvnic->netdev));
 
        return &nesqp->ibqp;
 }
 
        struct timer_list       watchdog_timer;
 
        /* Number of references to this device */
-       atomic_t                refcnt ____cacheline_aligned_in_smp;
+       int __percpu            *pcpu_refcnt;
 
        /* delayed register/unregister */
        struct list_head        todo_list;
        unregister_netdevice_queue(dev, NULL);
 }
 
+extern int             netdev_refcnt_read(const struct net_device *dev);
 extern void            free_netdev(struct net_device *dev);
 extern void            synchronize_net(void);
 extern int             register_netdevice_notifier(struct notifier_block *nb);
  */
 static inline void dev_put(struct net_device *dev)
 {
-       atomic_dec(&dev->refcnt);
+       irqsafe_cpu_dec(*dev->pcpu_refcnt);
 }
 
 /**
  */
 static inline void dev_hold(struct net_device *dev)
 {
-       atomic_inc(&dev->refcnt);
+       irqsafe_cpu_inc(*dev->pcpu_refcnt);
 }
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
 
         */
        dev->reg_state = NETREG_DUMMY;
 
-       /* initialize the ref count */
-       atomic_set(&dev->refcnt, 1);
-
        /* NAPI wants this */
        INIT_LIST_HEAD(&dev->napi_list);
 
        set_bit(__LINK_STATE_PRESENT, &dev->state);
        set_bit(__LINK_STATE_START, &dev->state);
 
+       /* Note : We dont allocate pcpu_refcnt for dummy devices,
+        * because users of this 'device' dont need to change
+        * its refcount.
+        */
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(init_dummy_netdev);
 }
 EXPORT_SYMBOL(register_netdev);
 
+int netdev_refcnt_read(const struct net_device *dev)
+{
+       int i, refcnt = 0;
+
+       for_each_possible_cpu(i)
+               refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
+       return refcnt;
+}
+EXPORT_SYMBOL(netdev_refcnt_read);
+
 /*
  * netdev_wait_allrefs - wait until all references are gone.
  *
 static void netdev_wait_allrefs(struct net_device *dev)
 {
        unsigned long rebroadcast_time, warning_time;
+       int refcnt;
 
        linkwatch_forget_dev(dev);
 
        rebroadcast_time = warning_time = jiffies;
-       while (atomic_read(&dev->refcnt) != 0) {
+       refcnt = netdev_refcnt_read(dev);
+
+       while (refcnt != 0) {
                if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
                        rtnl_lock();
 
 
                msleep(250);
 
+               refcnt = netdev_refcnt_read(dev);
+
                if (time_after(jiffies, warning_time + 10 * HZ)) {
                        printk(KERN_EMERG "unregister_netdevice: "
                               "waiting for %s to become free. Usage "
                               "count = %d\n",
-                              dev->name, atomic_read(&dev->refcnt));
+                              dev->name, refcnt);
                        warning_time = jiffies;
                }
        }
                netdev_wait_allrefs(dev);
 
                /* paranoia */
-               BUG_ON(atomic_read(&dev->refcnt));
+               BUG_ON(netdev_refcnt_read(dev));
                WARN_ON(rcu_dereference_raw(dev->ip_ptr));
                WARN_ON(dev->ip6_ptr);
                WARN_ON(dev->dn_ptr);
        dev = PTR_ALIGN(p, NETDEV_ALIGN);
        dev->padded = (char *)dev - (char *)p;
 
-       if (dev_addr_init(dev))
+       dev->pcpu_refcnt = alloc_percpu(int);
+       if (!dev->pcpu_refcnt)
                goto free_tx;
 
+       if (dev_addr_init(dev))
+               goto free_pcpu;
+
        dev_mc_init(dev);
        dev_uc_init(dev);
 
 
 free_tx:
        kfree(tx);
+free_pcpu:
+       free_percpu(dev->pcpu_refcnt);
 free_p:
        kfree(p);
        return NULL;
        list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
                netif_napi_del(p);
 
+       free_percpu(dev->pcpu_refcnt);
+       dev->pcpu_refcnt = NULL;
+
        /*  Compatibility with error handling in drivers */
        if (dev->reg_state == NETREG_UNINITIALIZED) {
                kfree((char *)dev - dev->padded);