tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
        tx->sta->tx_stats.msdu[tid]++;
 
-       if (!tx->sta->sta.txq[0])
-               hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
+       hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
 
        return TX_CONTINUE;
 }
        fq_flow_init(&txqi->def_flow);
        codel_vars_init(&txqi->def_cvars);
        codel_stats_init(&txqi->cstats);
+       __skb_queue_head_init(&txqi->frags);
 
        txqi->txq.vif = &sdata->vif;
 
        struct fq_tin *tin = &txqi->tin;
 
        fq_tin_reset(fq, tin, fq_skb_free_func);
+       ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
 }
 
 int ieee80211_txq_setup_flows(struct ieee80211_local *local)
        spin_unlock_bh(&fq->lock);
 }
 
+static bool ieee80211_queue_skb(struct ieee80211_local *local,
+                               struct ieee80211_sub_if_data *sdata,
+                               struct sta_info *sta,
+                               struct sk_buff *skb)
+{
+       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+       struct fq *fq = &local->fq;
+       struct ieee80211_vif *vif;
+       struct txq_info *txqi;
+       struct ieee80211_sta *pubsta;
+
+       if (!local->ops->wake_tx_queue ||
+           sdata->vif.type == NL80211_IFTYPE_MONITOR)
+               return false;
+
+       if (sta && sta->uploaded)
+               pubsta = &sta->sta;
+       else
+               pubsta = NULL;
+
+       if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+               sdata = container_of(sdata->bss,
+                                    struct ieee80211_sub_if_data, u.ap);
+
+       vif = &sdata->vif;
+       txqi = ieee80211_get_txq(local, vif, pubsta, skb);
+
+       if (!txqi)
+               return false;
+
+       info->control.vif = vif;
+
+       spin_lock_bh(&fq->lock);
+       ieee80211_txq_enqueue(local, txqi, skb);
+       spin_unlock_bh(&fq->lock);
+
+       drv_wake_tx_queue(local, txqi);
+
+       return true;
+}
+
 static bool ieee80211_tx_frags(struct ieee80211_local *local,
                               struct ieee80211_vif *vif,
                               struct ieee80211_sta *sta,
                               bool txpending)
 {
        struct ieee80211_tx_control control = {};
-       struct fq *fq = &local->fq;
        struct sk_buff *skb, *tmp;
-       struct txq_info *txqi;
        unsigned long flags;
 
        skb_queue_walk_safe(skbs, skb, tmp) {
                }
 #endif
 
-               txqi = ieee80211_get_txq(local, vif, sta, skb);
-               if (txqi) {
-                       info->control.vif = vif;
-
-                       __skb_unlink(skb, skbs);
-
-                       spin_lock_bh(&fq->lock);
-                       ieee80211_txq_enqueue(local, txqi, skb);
-                       spin_unlock_bh(&fq->lock);
-
-                       drv_wake_tx_queue(local, txqi);
-
-                       continue;
-               }
-
                spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
                if (local->queue_stop_reasons[q] ||
                    (!txpending && !skb_queue_empty(&local->pending[q]))) {
 /*
  * Invoke TX handlers, return 0 on success and non-zero if the
  * frame was dropped or queued.
+ *
+ * The handlers are split into an early and late part. The latter is everything
+ * that can be sensitive to reordering, and will be deferred to after packets
+ * are dequeued from the intermediate queues (when they are enabled).
  */
-static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
+static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx)
 {
-       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
        ieee80211_tx_result res = TX_DROP;
 
 #define CALL_TXH(txh) \
        if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
                CALL_TXH(ieee80211_tx_h_rate_ctrl);
 
+ txh_done:
+       if (unlikely(res == TX_DROP)) {
+               I802_DEBUG_INC(tx->local->tx_handlers_drop);
+               if (tx->skb)
+                       ieee80211_free_txskb(&tx->local->hw, tx->skb);
+               else
+                       ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs);
+               return -1;
+       } else if (unlikely(res == TX_QUEUED)) {
+               I802_DEBUG_INC(tx->local->tx_handlers_queued);
+               return -1;
+       }
+
+       return 0;
+}
+
+/*
+ * Late handlers can be called while the sta lock is held. Handlers that can
+ * cause packets to be generated will cause deadlock!
+ */
+static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx)
+{
+       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+       ieee80211_tx_result res = TX_CONTINUE;
+
        if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) {
                __skb_queue_tail(&tx->skbs, tx->skb);
                tx->skb = NULL;
        return 0;
 }
 
+static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
+{
+       int r = invoke_tx_handlers_early(tx);
+
+       if (r)
+               return r;
+       return invoke_tx_handlers_late(tx);
+}
+
 bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
                              struct ieee80211_vif *vif, struct sk_buff *skb,
                              int band, struct ieee80211_sta **sta)
                info->hw_queue =
                        sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
 
-       if (!invoke_tx_handlers(&tx))
+       if (invoke_tx_handlers_early(&tx))
+               return false;
+
+       if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb))
+               return true;
+
+       if (!invoke_tx_handlers_late(&tx))
                result = __ieee80211_tx(local, &tx.skbs, led_len,
                                        tx.sta, txpending);
 
        return ret;
 }
 
+/*
+ * Can be called while the sta lock is held. Anything that can cause packets to
+ * be generated will cause deadlock!
+ */
+static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
+                                      struct sta_info *sta, u8 pn_offs,
+                                      struct ieee80211_key *key,
+                                      struct sk_buff *skb)
+{
+       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+       struct ieee80211_hdr *hdr = (void *)skb->data;
+       u8 tid = IEEE80211_NUM_TIDS;
+
+       if (key)
+               info->control.hw_key = &key->conf;
+
+       ieee80211_tx_stats(skb->dev, skb->len);
+
+       if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+               tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+               *ieee80211_get_qos_ctl(hdr) = tid;
+               hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
+       } else {
+               info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
+               hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
+               sdata->sequence_number += 0x10;
+       }
+
+       if (skb_shinfo(skb)->gso_size)
+               sta->tx_stats.msdu[tid] +=
+                       DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
+       else
+               sta->tx_stats.msdu[tid]++;
+
+       info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+
+       /* statistics normally done by ieee80211_tx_h_stats (but that
+        * has to consider fragmentation, so is more complex)
+        */
+       sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
+       sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+
+       if (pn_offs) {
+               u64 pn;
+               u8 *crypto_hdr = skb->data + pn_offs;
+
+               switch (key->conf.cipher) {
+               case WLAN_CIPHER_SUITE_CCMP:
+               case WLAN_CIPHER_SUITE_CCMP_256:
+               case WLAN_CIPHER_SUITE_GCMP:
+               case WLAN_CIPHER_SUITE_GCMP_256:
+                       pn = atomic64_inc_return(&key->conf.tx_pn);
+                       crypto_hdr[0] = pn;
+                       crypto_hdr[1] = pn >> 8;
+                       crypto_hdr[4] = pn >> 16;
+                       crypto_hdr[5] = pn >> 24;
+                       crypto_hdr[6] = pn >> 32;
+                       crypto_hdr[7] = pn >> 40;
+                       break;
+               }
+       }
+}
+
 static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
-                               struct net_device *dev, struct sta_info *sta,
+                               struct sta_info *sta,
                                struct ieee80211_fast_tx *fast_tx,
                                struct sk_buff *skb)
 {
                        return true;
        }
 
-       ieee80211_tx_stats(dev, skb->len + extra_head);
-
        if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
            ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
                return true;
        info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
                      IEEE80211_TX_CTL_DONTFRAG |
                      (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
-
-       if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
-               *ieee80211_get_qos_ctl(hdr) = tid;
-               if (!sta->sta.txq[0])
-                       hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
-       } else {
-               info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
-               hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
-               sdata->sequence_number += 0x10;
-       }
-
-       if (skb_shinfo(skb)->gso_size)
-               sta->tx_stats.msdu[tid] +=
-                       DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
-       else
-               sta->tx_stats.msdu[tid]++;
-
-       info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+       info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT;
 
        __skb_queue_head_init(&tx.skbs);
 
        tx.sta = sta;
        tx.key = fast_tx->key;
 
-       if (fast_tx->key)
-               info->control.hw_key = &fast_tx->key->conf;
-
        if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
                tx.skb = skb;
                r = ieee80211_tx_h_rate_ctrl(&tx);
                }
        }
 
-       /* statistics normally done by ieee80211_tx_h_stats (but that
-        * has to consider fragmentation, so is more complex)
-        */
-       sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
-       sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+       if (ieee80211_queue_skb(local, sdata, sta, skb))
+               return true;
 
-       if (fast_tx->pn_offs) {
-               u64 pn;
-               u8 *crypto_hdr = skb->data + fast_tx->pn_offs;
-
-               switch (fast_tx->key->conf.cipher) {
-               case WLAN_CIPHER_SUITE_CCMP:
-               case WLAN_CIPHER_SUITE_CCMP_256:
-               case WLAN_CIPHER_SUITE_GCMP:
-               case WLAN_CIPHER_SUITE_GCMP_256:
-                       pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn);
-                       crypto_hdr[0] = pn;
-                       crypto_hdr[1] = pn >> 8;
-                       crypto_hdr[4] = pn >> 16;
-                       crypto_hdr[5] = pn >> 24;
-                       crypto_hdr[6] = pn >> 32;
-                       crypto_hdr[7] = pn >> 40;
-                       break;
-               }
-       }
+       ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
+                                  fast_tx->key, skb);
 
        if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
                sdata = container_of(sdata->bss,
        struct sk_buff *skb = NULL;
        struct fq *fq = &local->fq;
        struct fq_tin *tin = &txqi->tin;
+       struct ieee80211_tx_info *info;
+       struct ieee80211_tx_data tx;
+       ieee80211_tx_result r;
 
        spin_lock_bh(&fq->lock);
 
        if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
                goto out;
 
+       /* Make sure fragments stay together. */
+       skb = __skb_dequeue(&txqi->frags);
+       if (skb)
+               goto out;
+
+begin:
        skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
        if (!skb)
                goto out;
        ieee80211_set_skb_vif(skb, txqi);
 
        hdr = (struct ieee80211_hdr *)skb->data;
-       if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
+       info = IEEE80211_SKB_CB(skb);
+
+       memset(&tx, 0, sizeof(tx));
+       __skb_queue_head_init(&tx.skbs);
+       tx.local = local;
+       tx.skb = skb;
+       tx.sdata = vif_to_sdata(info->control.vif);
+
+       if (txq->sta)
+               tx.sta = container_of(txq->sta, struct sta_info, sta);
+
+       /*
+        * The key can be removed while the packet was queued, so need to call
+        * this here to get the current key.
+        */
+       r = ieee80211_tx_h_select_key(&tx);
+       if (r != TX_CONTINUE) {
+               ieee80211_free_txskb(&local->hw, skb);
+               goto begin;
+       }
+
+       if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) {
                struct sta_info *sta = container_of(txq->sta, struct sta_info,
                                                    sta);
-               struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+               u8 pn_offs = 0;
 
-               hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid);
-               if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
-                       info->flags |= IEEE80211_TX_CTL_AMPDU;
-               else
-                       info->flags &= ~IEEE80211_TX_CTL_AMPDU;
+               if (tx.key &&
+                   (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
+                       pn_offs = ieee80211_hdrlen(hdr->frame_control);
+
+               ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
+                                          tx.key, skb);
+       } else {
+               if (invoke_tx_handlers_late(&tx))
+                       goto begin;
+
+               skb = __skb_dequeue(&tx.skbs);
+
+               if (!skb_queue_empty(&tx.skbs))
+                       skb_queue_splice_tail(&tx.skbs, &txqi->frags);
        }
 
 out:
                fast_tx = rcu_dereference(sta->fast_tx);
 
                if (fast_tx &&
-                   ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb))
+                   ieee80211_xmit_fast(sdata, sta, fast_tx, skb))
                        goto out;
        }