/*
         * For performance sake on SMP, we put highly modified fields at the end
         */
-       struct Qdisc            *next_sched ____cacheline_aligned_in_smp;
-       struct sk_buff          *gso_skb;
-       unsigned long           state;
+       struct sk_buff          *gso_skb ____cacheline_aligned_in_smp;
        struct sk_buff_head     q;
        struct gnet_stats_basic_packed bstats;
        seqcount_t              running;
        struct gnet_stats_queue qstats;
+       unsigned long           state;
+       struct Qdisc            *next_sched;
+       struct sk_buff          *skb_bad_txq;
        struct rcu_head         rcu_head;
        int                     padded;
        atomic_t                refcnt;
 
        skb->next = NULL;
 }
 
+/* This variant of try_bulk_dequeue_skb() makes sure
+ * all skbs in the chain are for the same txq
+ */
+static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
+                                     struct sk_buff *skb,
+                                     int *packets)
+{
+       int mapping = skb_get_queue_mapping(skb);
+       struct sk_buff *nskb;
+       int cnt = 0;
+
+       do {
+               nskb = q->dequeue(q);
+               if (!nskb)
+                       break;
+               if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
+                       q->skb_bad_txq = nskb;
+                       qdisc_qstats_backlog_inc(q, nskb);
+                       q->q.qlen++;
+                       break;
+               }
+               skb->next = nskb;
+               skb = nskb;
+       } while (++cnt < 8);
+       (*packets) += cnt;
+       skb->next = NULL;
+}
+
 /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
  * A requeued skb (via q->gso_skb) can also be a SKB list.
  */
        const struct netdev_queue *txq = q->dev_queue;
 
        *packets = 1;
-       *validate = true;
        if (unlikely(skb)) {
+               /* skb in gso_skb were already validated */
+               *validate = false;
                /* check the reason of requeuing without tx lock first */
                txq = skb_get_tx_queue(txq->dev, skb);
                if (!netif_xmit_frozen_or_stopped(txq)) {
                        q->q.qlen--;
                } else
                        skb = NULL;
-               /* skb in gso_skb were already validated */
-               *validate = false;
-       } else {
-               if (!(q->flags & TCQ_F_ONETXQUEUE) ||
-                   !netif_xmit_frozen_or_stopped(txq)) {
-                       skb = q->dequeue(q);
-                       if (skb && qdisc_may_bulk(q))
-                               try_bulk_dequeue_skb(q, skb, txq, packets);
+               return skb;
+       }
+       *validate = true;
+       skb = q->skb_bad_txq;
+       if (unlikely(skb)) {
+               /* check the reason of requeuing without tx lock first */
+               txq = skb_get_tx_queue(txq->dev, skb);
+               if (!netif_xmit_frozen_or_stopped(txq)) {
+                       q->skb_bad_txq = NULL;
+                       qdisc_qstats_backlog_dec(q, skb);
+                       q->q.qlen--;
+                       goto bulk;
                }
+               return NULL;
+       }
+       if (!(q->flags & TCQ_F_ONETXQUEUE) ||
+           !netif_xmit_frozen_or_stopped(txq))
+               skb = q->dequeue(q);
+       if (skb) {
+bulk:
+               if (qdisc_may_bulk(q))
+                       try_bulk_dequeue_skb(q, skb, txq, packets);
+               else
+                       try_bulk_dequeue_skb_slow(q, skb, packets);
        }
        return skb;
 }
        if (ops->reset)
                ops->reset(qdisc);
 
+       kfree_skb(qdisc->skb_bad_txq);
+       qdisc->skb_bad_txq = NULL;
+
        if (qdisc->gso_skb) {
                kfree_skb_list(qdisc->gso_skb);
                qdisc->gso_skb = NULL;
-               qdisc->q.qlen = 0;
        }
+       qdisc->q.qlen = 0;
 }
 EXPORT_SYMBOL(qdisc_reset);
 
        dev_put(qdisc_dev(qdisc));
 
        kfree_skb_list(qdisc->gso_skb);
+       kfree_skb(qdisc->skb_bad_txq);
        /*
         * gen_estimator est_timer() might access qdisc->q.lock,
         * wait a RCU grace period before freeing qdisc.