}
 
 static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
-                           u16 queue_index)
+                           struct tun_file *tfile)
 {
        struct hlist_head *head;
        struct tun_flow_entry *e;
        unsigned long delay = tun->ageing_time;
+       u16 queue_index = tfile->queue_index;
 
        if (!rxhash)
                return;
 
        rcu_read_lock();
 
-       if (tun->numqueues == 1)
+       /* We may get a very small possibility of OOO during switching, not
+        * worth to optimize.*/
+       if (tun->numqueues == 1 || tfile->detached)
                goto unlock;
 
        e = tun_flow_find(head, rxhash);
 
        tun = rtnl_dereference(tfile->tun);
 
-       if (tun) {
+       if (tun && !tfile->detached) {
                u16 index = tfile->queue_index;
                BUG_ON(index >= tun->numqueues);
                dev = tun->dev;
 
                rcu_assign_pointer(tun->tfiles[index],
                                   tun->tfiles[tun->numqueues - 1]);
-               rcu_assign_pointer(tfile->tun, NULL);
                ntfile = rtnl_dereference(tun->tfiles[index]);
                ntfile->queue_index = index;
 
                --tun->numqueues;
-               if (clean)
+               if (clean) {
+                       rcu_assign_pointer(tfile->tun, NULL);
                        sock_put(&tfile->sk);
-               else
+               } else
                        tun_disable_queue(tun, tfile);
 
                synchronize_net();
                rcu_assign_pointer(tfile->tun, NULL);
                --tun->numqueues;
        }
+       list_for_each_entry(tfile, &tun->disabled, next) {
+               wake_up_all(&tfile->wq.wait);
+               rcu_assign_pointer(tfile->tun, NULL);
+       }
        BUG_ON(tun->numqueues != 0);
 
        synchronize_net();
                goto out;
 
        err = -EINVAL;
-       if (rtnl_dereference(tfile->tun))
+       if (rtnl_dereference(tfile->tun) && !tfile->detached)
                goto out;
 
        err = -EBUSY;
        tun->dev->stats.rx_packets++;
        tun->dev->stats.rx_bytes += len;
 
-       tun_flow_update(tun, rxhash, tfile->queue_index);
+       tun_flow_update(tun, rxhash, tfile);
        return total_len;
 }
 
                ret = tun_attach(tun, file);
        } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
                tun = rtnl_dereference(tfile->tun);
-               if (!tun || !(tun->flags & TUN_TAP_MQ))
+               if (!tun || !(tun->flags & TUN_TAP_MQ) || tfile->detached)
                        ret = -EINVAL;
                else
                        __tun_detach(tfile, false);