skb_queue_purge(&htt->tx_compl_q);
        skb_queue_purge(&htt->rx_compl_q);
        skb_queue_purge(&htt->rx_in_ord_compl_q);
+       skb_queue_purge(&htt->tx_fetch_ind_q);
 
        ath10k_htt_rx_ring_free(htt);
 
        skb_queue_head_init(&htt->tx_compl_q);
        skb_queue_head_init(&htt->rx_compl_q);
        skb_queue_head_init(&htt->rx_in_ord_compl_q);
+       skb_queue_head_init(&htt->tx_fetch_ind_q);
 
        tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task,
                     (unsigned long)htt);
 
 static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff *skb)
 {
+       struct ieee80211_hw *hw = ar->hw;
+       struct ieee80211_txq *txq;
        struct htt_resp *resp = (struct htt_resp *)skb->data;
        struct htt_tx_fetch_record *record;
        size_t len;
        size_t max_num_bytes;
        size_t max_num_msdus;
+       size_t num_bytes;
+       size_t num_msdus;
        const __le32 *resp_ids;
        u16 num_records;
        u16 num_resp_ids;
        u16 peer_id;
        u8 tid;
+       int ret;
        int i;
 
        ath10k_dbg(ar, ATH10K_DBG_HTT, "htt rx tx fetch ind\n");
                   num_records, num_resp_ids,
                   le16_to_cpu(resp->tx_fetch_ind.fetch_seq_num));
 
-       /* TODO: runtime sanity checks */
+       if (!ar->htt.tx_q_state.enabled) {
+               ath10k_warn(ar, "received unexpected tx_fetch_ind event: not enabled\n");
+               return;
+       }
+
+       if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH) {
+               ath10k_warn(ar, "received unexpected tx_fetch_ind event: in push mode\n");
+               return;
+       }
+
+       rcu_read_lock();
 
        for (i = 0; i < num_records; i++) {
                record = &resp->tx_fetch_ind.records[i];
                        continue;
                }
 
-               /* TODO: dequeue and submit tx to device */
+               spin_lock_bh(&ar->data_lock);
+               txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
+               spin_unlock_bh(&ar->data_lock);
+
+               /* It is okay to release the lock and use txq because RCU read
+                * lock is held.
+                */
+
+               if (unlikely(!txq)) {
+                       ath10k_warn(ar, "failed to lookup txq for peer_id %hu tid %hhu\n",
+                                   peer_id, tid);
+                       continue;
+               }
+
+               num_msdus = 0;
+               num_bytes = 0;
+
+               while (num_msdus < max_num_msdus &&
+                      num_bytes < max_num_bytes) {
+                       ret = ath10k_mac_tx_push_txq(hw, txq);
+                       if (ret < 0)
+                               break;
+
+                       num_msdus++;
+                       num_bytes += ret;
+               }
+
+               record->num_msdus = cpu_to_le16(num_msdus);
+               record->num_bytes = cpu_to_le32(num_bytes);
+
+               ath10k_htt_tx_txq_recalc(hw, txq);
        }
 
+       rcu_read_unlock();
+
        resp_ids = ath10k_htt_get_tx_fetch_ind_resp_ids(&resp->tx_fetch_ind);
        ath10k_htt_rx_tx_fetch_resp_id_confirm(ar, resp_ids, num_resp_ids);
 
-       /* TODO: generate and send fetch response to device */
+       ret = ath10k_htt_tx_fetch_resp(ar,
+                                      resp->tx_fetch_ind.token,
+                                      resp->tx_fetch_ind.fetch_seq_num,
+                                      resp->tx_fetch_ind.records,
+                                      num_records);
+       if (unlikely(ret)) {
+               ath10k_warn(ar, "failed to submit tx fetch resp for token 0x%08x: %d\n",
+                           le32_to_cpu(resp->tx_fetch_ind.token), ret);
+               /* FIXME: request fw restart */
+       }
+
+       ath10k_htt_tx_txq_sync(ar);
 }
 
 static void ath10k_htt_rx_tx_fetch_confirm(struct ath10k *ar,
 {
        const struct htt_resp *resp = (void *)skb->data;
        const struct htt_tx_mode_switch_record *record;
+       struct ieee80211_txq *txq;
+       struct ath10k_txq *artxq;
        size_t len;
        size_t num_records;
        enum htt_tx_mode_switch_mode mode;
        if (!enable)
                return;
 
-       /* TODO: apply configuration */
+       ar->htt.tx_q_state.enabled = enable;
+       ar->htt.tx_q_state.mode = mode;
+       ar->htt.tx_q_state.num_push_allowed = threshold;
+
+       rcu_read_lock();
 
        for (i = 0; i < num_records; i++) {
                record = &resp->tx_mode_switch_ind.records[i];
                        continue;
                }
 
-               /* TODO: apply configuration */
+               spin_lock_bh(&ar->data_lock);
+               txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
+               spin_unlock_bh(&ar->data_lock);
+
+               /* It is okay to release the lock and use txq because RCU read
+                * lock is held.
+                */
+
+               if (unlikely(!txq)) {
+                       ath10k_warn(ar, "failed to lookup txq for peer_id %hu tid %hhu\n",
+                                   peer_id, tid);
+                       continue;
+               }
+
+               spin_lock_bh(&ar->htt.tx_lock);
+               artxq = (void *)txq->drv_priv;
+               artxq->num_push_allowed = le16_to_cpu(record->num_max_msdus);
+               spin_unlock_bh(&ar->htt.tx_lock);
        }
 
-       /* TODO: apply configuration */
+       rcu_read_unlock();
+
+       ath10k_mac_tx_push_pending(ar);
 }
 
 void ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
        case HTT_T2H_MSG_TYPE_AGGR_CONF:
                break;
        case HTT_T2H_MSG_TYPE_TX_FETCH_IND:
-               ath10k_htt_rx_tx_fetch_ind(ar, skb);
-               break;
+               skb_queue_tail(&htt->tx_fetch_ind_q, skb);
+               tasklet_schedule(&htt->txrx_compl_task);
+               return;
        case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM:
                ath10k_htt_rx_tx_fetch_confirm(ar, skb);
                break;
        struct sk_buff_head tx_q;
        struct sk_buff_head rx_q;
        struct sk_buff_head rx_ind_q;
+       struct sk_buff_head tx_ind_q;
        struct htt_resp *resp;
        struct sk_buff *skb;
        unsigned long flags;
        __skb_queue_head_init(&tx_q);
        __skb_queue_head_init(&rx_q);
        __skb_queue_head_init(&rx_ind_q);
+       __skb_queue_head_init(&tx_ind_q);
 
        spin_lock_irqsave(&htt->tx_compl_q.lock, flags);
        skb_queue_splice_init(&htt->tx_compl_q, &tx_q);
        skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q);
        spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags);
 
+       spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags);
+       skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q);
+       spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags);
+
        while ((skb = __skb_dequeue(&tx_q))) {
                ath10k_htt_rx_frm_tx_compl(htt->ar, skb);
                dev_kfree_skb_any(skb);
        }
 
+       while ((skb = __skb_dequeue(&tx_ind_q))) {
+               ath10k_htt_rx_tx_fetch_ind(ar, skb);
+               dev_kfree_skb_any(skb);
+       }
+
        ath10k_mac_tx_push_pending(ar);
 
        while ((skb = __skb_dequeue(&rx_q))) {
 
        if (!ar->htt.tx_q_state.enabled)
                return;
 
+       if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
+               return;
+
        if (txq->sta)
                peer_id = arsta->peer_id;
        else
        if (!ar->htt.tx_q_state.enabled)
                return;
 
+       if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
+               return;
+
        seq = le32_to_cpu(ar->htt.tx_q_state.vaddr->seq);
        seq++;
        ar->htt.tx_q_state.vaddr->seq = cpu_to_le32(seq);
                                   DMA_TO_DEVICE);
 }
 
+void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
+                             struct ieee80211_txq *txq)
+{
+       struct ath10k *ar = hw->priv;
+
+       spin_lock_bh(&ar->htt.tx_lock);
+       __ath10k_htt_tx_txq_recalc(hw, txq);
+       spin_unlock_bh(&ar->htt.tx_lock);
+}
+
+void ath10k_htt_tx_txq_sync(struct ath10k *ar)
+{
+       spin_lock_bh(&ar->htt.tx_lock);
+       __ath10k_htt_tx_txq_sync(ar);
+       spin_unlock_bh(&ar->htt.tx_lock);
+}
+
 void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
                              struct ieee80211_txq *txq)
 {
 {
        struct sk_buff *skb;
        struct htt_cmd *cmd;
-       u16 resp_id;
+       const u16 resp_id = 0;
        int len = 0;
        int ret;
 
+       /* Response IDs are echo-ed back only for host driver convienence
+        * purposes. They aren't used for anything in the driver yet so use 0.
+        */
+
        len += sizeof(cmd->hdr);
        len += sizeof(cmd->tx_fetch_resp);
        len += sizeof(cmd->tx_fetch_resp.records[0]) * num_records;
        if (!skb)
                return -ENOMEM;
 
-       resp_id = 0; /* TODO: allocate resp_id */
-       ret = 0;
-       if (ret)
-               goto err_free_skb;
-
        skb_put(skb, len);
        cmd = (struct htt_cmd *)skb->data;
        cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FETCH_RESP;
        ret = ath10k_htc_send(&ar->htc, ar->htt.eid, skb);
        if (ret) {
                ath10k_warn(ar, "failed to submit htc command: %d\n", ret);
-               goto err_free_resp_id;
+               goto err_free_skb;
        }
 
        return 0;
 
-err_free_resp_id:
-       (void)resp_id; /* TODO: free resp_id */
-
 err_free_skb:
        dev_kfree_skb_any(skb);
 
 
        spin_unlock_bh(&ar->htt.tx_lock);
 }
 
+struct ieee80211_txq *ath10k_mac_txq_lookup(struct ath10k *ar,
+                                           u16 peer_id,
+                                           u8 tid)
+{
+       struct ath10k_peer *peer;
+
+       lockdep_assert_held(&ar->data_lock);
+
+       peer = ar->peer_map[peer_id];
+       if (!peer)
+               return NULL;
+
+       if (peer->sta)
+               return peer->sta->txq[tid];
+       else if (peer->vif)
+               return peer->vif->txq;
+       else
+               return NULL;
+}
+
 static bool ath10k_mac_tx_can_push(struct ieee80211_hw *hw,
                                   struct ieee80211_txq *txq)
 {
-       return 1; /* TBD */
+       struct ath10k *ar = hw->priv;
+       struct ath10k_txq *artxq = (void *)txq->drv_priv;
+
+       /* No need to get locks */
+
+       if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH)
+               return true;
+
+       if (ar->htt.num_pending_tx < ar->htt.tx_q_state.num_push_allowed)
+               return true;
+
+       if (artxq->num_fw_queued < artxq->num_push_allowed)
+               return true;
+
+       return false;
 }
 
-static int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
-                                 struct ieee80211_txq *txq)
+int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
+                          struct ieee80211_txq *txq)
 {
        const bool is_mgmt = false;
        const bool is_presp = false;
        enum ath10k_hw_txrx_mode txmode;
        enum ath10k_mac_tx_path txpath;
        struct sk_buff *skb;
+       size_t skb_len;
        int ret;
 
        spin_lock_bh(&ar->htt.tx_lock);
 
        ath10k_mac_tx_h_fill_cb(ar, vif, txq, skb);
 
+       skb_len = skb->len;
        txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb);
        txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode);
 
        artxq->num_fw_queued++;
        spin_unlock_bh(&ar->htt.tx_lock);
 
-       return 0;
+       return skb_len;
 }
 
 void ath10k_mac_tx_push_pending(struct ath10k *ar)