/* constants */
 
+enum ipoib_flush_level {
+       IPOIB_FLUSH_LIGHT,
+       IPOIB_FLUSH_NORMAL,
+       IPOIB_FLUSH_HEAVY
+};
+
 enum {
        IPOIB_ENCAP_LEN           = 4,
 
 
        struct delayed_work pkey_poll_task;
        struct delayed_work mcast_task;
-       struct work_struct flush_task;
+       struct work_struct flush_light;
+       struct work_struct flush_normal;
+       struct work_struct flush_heavy;
        struct work_struct restart_task;
        struct delayed_work ah_reap_task;
-       struct work_struct pkey_event_task;
 
        struct ib_device *ca;
        u8                port;
 
        struct rb_node        rb_node;
        struct list_head      list;
+       int                   valid;
 };
 
 struct ipoib_neigh {
                struct ipoib_ah *address, u32 qpn);
 void ipoib_reap_ah(struct work_struct *work);
 
+void ipoib_mark_paths_invalid(struct net_device *dev);
 void ipoib_flush_paths(struct net_device *dev);
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
 
 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
-void ipoib_ib_dev_flush(struct work_struct *work);
+void ipoib_ib_dev_flush_light(struct work_struct *work);
+void ipoib_ib_dev_flush_normal(struct work_struct *work);
+void ipoib_ib_dev_flush_heavy(struct work_struct *work);
 void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
 
        return 0;
 }
 
-static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
+static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
+                               enum ipoib_flush_level level)
 {
        struct ipoib_dev_priv *cpriv;
        struct net_device *dev = priv->dev;
         * the parent is down.
         */
        list_for_each_entry(cpriv, &priv->child_intfs, list)
-               __ipoib_ib_dev_flush(cpriv, pkey_event);
+               __ipoib_ib_dev_flush(cpriv, level);
 
        mutex_unlock(&priv->vlan_mutex);
 
                return;
        }
 
-       if (pkey_event) {
+       if (level == IPOIB_FLUSH_HEAVY) {
                if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
                        clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
                        ipoib_ib_dev_down(dev, 0);
                priv->pkey_index = new_index;
        }
 
-       ipoib_dbg(priv, "flushing\n");
+       if (level == IPOIB_FLUSH_LIGHT) {
+               ipoib_mark_paths_invalid(dev);
+               ipoib_mcast_dev_flush(dev);
+       }
 
-       ipoib_ib_dev_down(dev, 0);
+       if (level >= IPOIB_FLUSH_NORMAL)
+               ipoib_ib_dev_down(dev, 0);
 
-       if (pkey_event) {
+       if (level == IPOIB_FLUSH_HEAVY) {
                ipoib_ib_dev_stop(dev, 0);
                ipoib_ib_dev_open(dev);
        }
         * we get here, don't bring it back up if it's not configured up
         */
        if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
-               ipoib_ib_dev_up(dev);
+               if (level >= IPOIB_FLUSH_NORMAL)
+                       ipoib_ib_dev_up(dev);
                ipoib_mcast_restart_task(&priv->restart_task);
        }
 }
 
-void ipoib_ib_dev_flush(struct work_struct *work)
+void ipoib_ib_dev_flush_light(struct work_struct *work)
+{
+       struct ipoib_dev_priv *priv =
+               container_of(work, struct ipoib_dev_priv, flush_light);
+
+       __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT);
+}
+
+void ipoib_ib_dev_flush_normal(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv =
-               container_of(work, struct ipoib_dev_priv, flush_task);
+               container_of(work, struct ipoib_dev_priv, flush_normal);
 
-       ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
-       __ipoib_ib_dev_flush(priv, 0);
+       __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL);
 }
 
-void ipoib_pkey_event(struct work_struct *work)
+void ipoib_ib_dev_flush_heavy(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv =
-               container_of(work, struct ipoib_dev_priv, pkey_event_task);
+               container_of(work, struct ipoib_dev_priv, flush_heavy);
 
-       ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
-       __ipoib_ib_dev_flush(priv, 1);
+       __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY);
 }
 
 void ipoib_ib_dev_cleanup(struct net_device *dev)
 
 
 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
 
+void ipoib_mark_paths_invalid(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_path *path, *tp;
+
+       spin_lock_irq(&priv->lock);
+
+       list_for_each_entry_safe(path, tp, &priv->path_list, list) {
+               ipoib_dbg(priv, "mark path LID 0x%04x GID " IPOIB_GID_FMT " invalid\n",
+                       be16_to_cpu(path->pathrec.dlid),
+                       IPOIB_GID_ARG(path->pathrec.dgid));
+               path->valid =  0;
+       }
+
+       spin_unlock_irq(&priv->lock);
+}
+
 void ipoib_flush_paths(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct net_device *dev = path->dev;
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_ah *ah = NULL;
+       struct ipoib_ah *old_ah;
        struct ipoib_neigh *neigh, *tn;
        struct sk_buff_head skqueue;
        struct sk_buff *skb;
 
        spin_lock_irqsave(&priv->lock, flags);
 
+       old_ah   = path->ah;
        path->ah = ah;
 
        if (ah) {
                        __skb_queue_tail(&skqueue, skb);
 
                list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
+                       if (neigh->ah) {
+                               WARN_ON(neigh->ah != old_ah);
+                               /*
+                                * Dropping the ah reference inside
+                                * priv->lock is safe here, because we
+                                * will hold one more reference from
+                                * the original value of path->ah (ie
+                                * old_ah).
+                                */
+                               ipoib_put_ah(neigh->ah);
+                       }
                        kref_get(&path->ah->ref);
                        neigh->ah = path->ah;
                        memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
                        while ((skb = __skb_dequeue(&neigh->queue)))
                                __skb_queue_tail(&skqueue, skb);
                }
+               path->valid = 1;
        }
 
        path->query = NULL;
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
+       if (old_ah)
+               ipoib_put_ah(old_ah);
+
        while ((skb = __skb_dequeue(&skqueue))) {
                skb->dev = dev;
                if (dev_queue_xmit(skb))
        spin_lock(&priv->lock);
 
        path = __path_find(dev, phdr->hwaddr + 4);
-       if (!path) {
-               path = path_rec_create(dev, phdr->hwaddr + 4);
+       if (!path || !path->valid) {
+               if (!path)
+                       path = path_rec_create(dev, phdr->hwaddr + 4);
                if (path) {
                        /* put pseudoheader back on for next time */
                        skb_push(skb, sizeof *phdr);
        INIT_LIST_HEAD(&priv->multicast_list);
 
        INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
-       INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
        INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
-       INIT_WORK(&priv->flush_task,   ipoib_ib_dev_flush);
+       INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
+       INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
+       INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
        INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
        INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
 }
 
        if (record->element.port_num != priv->port)
                return;
 
-       if (record->event == IB_EVENT_PORT_ERR    ||
-           record->event == IB_EVENT_PORT_ACTIVE ||
-           record->event == IB_EVENT_LID_CHANGE  ||
-           record->event == IB_EVENT_SM_CHANGE   ||
+       ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event,
+                 record->device->name, record->element.port_num);
+
+       if (record->event == IB_EVENT_SM_CHANGE ||
            record->event == IB_EVENT_CLIENT_REREGISTER) {
-               ipoib_dbg(priv, "Port state change event\n");
-               queue_work(ipoib_workqueue, &priv->flush_task);
+               queue_work(ipoib_workqueue, &priv->flush_light);
+       } else if (record->event == IB_EVENT_PORT_ERR ||
+                  record->event == IB_EVENT_PORT_ACTIVE ||
+                  record->event == IB_EVENT_LID_CHANGE) {
+               queue_work(ipoib_workqueue, &priv->flush_normal);
        } else if (record->event == IB_EVENT_PKEY_CHANGE) {
-               ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
-               queue_work(ipoib_workqueue, &priv->pkey_event_task);
+               queue_work(ipoib_workqueue, &priv->flush_heavy);
        }
 }