struct mt7921_sta *msta;
        u32 tx_time[IEEE80211_NUM_ACS], rx_time[IEEE80211_NUM_ACS];
        LIST_HEAD(sta_poll_list);
+       struct rate_info *rate;
        int i;
 
        spin_lock_bh(&dev->sta_poll_lock);
 
        while (true) {
                bool clear = false;
-               u32 addr;
+               u32 addr, val;
                u16 idx;
+               u8 bw;
 
                spin_lock_bh(&dev->sta_poll_lock);
                if (list_empty(&sta_poll_list)) {
                spin_unlock_bh(&dev->sta_poll_lock);
 
                idx = msta->wcid.idx;
-               addr = MT_WTBL_LMAC_OFFS(idx, 0) + 20 * 4;
+               addr = mt7921_mac_wtbl_lmac_addr(idx, MT_WTBL_AC0_CTT_OFFSET);
 
                for (i = 0; i < IEEE80211_NUM_ACS; i++) {
                        u32 tx_last = msta->airtime_ac[i];
                        ieee80211_sta_register_airtime(sta, tid, tx_cur,
                                                       rx_cur);
                }
+
+               /* We don't support reading GI info from txs packets.
+                * For accurate tx status reporting and AQL improvement,
+                * we need to make sure that flags match so polling GI
+                * from per-sta counters directly.
+                */
+               rate = &msta->wcid.rate;
+               addr = mt7921_mac_wtbl_lmac_addr(idx,
+                                                MT_WTBL_TXRX_CAP_RATE_OFFSET);
+               val = mt76_rr(dev, addr);
+
+               switch (rate->bw) {
+               case RATE_INFO_BW_160:
+                       bw = IEEE80211_STA_RX_BW_160;
+                       break;
+               case RATE_INFO_BW_80:
+                       bw = IEEE80211_STA_RX_BW_80;
+                       break;
+               case RATE_INFO_BW_40:
+                       bw = IEEE80211_STA_RX_BW_40;
+                       break;
+               default:
+                       bw = IEEE80211_STA_RX_BW_20;
+                       break;
+               }
+
+               if (rate->flags & RATE_INFO_FLAGS_HE_MCS) {
+                       u8 offs = MT_WTBL_TXRX_RATE_G2_HE + 2 * bw;
+
+                       rate->he_gi = (val & (0x3 << offs)) >> offs;
+               } else if (rate->flags &
+                          (RATE_INFO_FLAGS_VHT_MCS | RATE_INFO_FLAGS_MCS)) {
+                       if (val & BIT(MT_WTBL_TXRX_RATE_G2 + bw))
+                               rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
+                       else
+                               rate->flags &= ~RATE_INFO_FLAGS_SHORT_GI;
+               }
        }
 
        rcu_read_unlock();
        txwi[7] |= cpu_to_le32(val);
 }
 
-static void mt7921_update_txs(struct mt76_wcid *wcid, __le32 *txwi)
-{
-       struct mt7921_sta *msta = container_of(wcid, struct mt7921_sta, wcid);
-       u32 pid, frame_type;
-
-       frame_type = FIELD_GET(MT_TXD2_FRAME_TYPE, le32_to_cpu(txwi[2]));
-       if (!(frame_type & (IEEE80211_FTYPE_DATA >> 2)))
-               return;
-
-       if (time_is_after_eq_jiffies(msta->next_txs_ts))
-               return;
-
-       msta->next_txs_ts = jiffies + msecs_to_jiffies(250);
-       pid = mt76_get_next_pkt_id(wcid);
-       txwi[5] |= cpu_to_le32(MT_TXD5_TX_STATUS_MCU |
-                              FIELD_PREP(MT_TXD5_PID, pid));
-}
-
 static void
 mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
                      struct sk_buff *skb, struct mt76_wcid *wcid,
                txwi[6] |= cpu_to_le32(val);
                txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE);
        }
-
-       mt7921_update_txs(wcid, txwi);
 }
 
 static void
        struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb);
        struct ieee80211_key_conf *key = info->control.hw_key;
-       struct mt76_tx_cb *cb = mt76_tx_skb_cb(tx_info->skb);
        struct mt76_txwi_cache *t;
        struct mt7921_txp_common *txp;
-       int id;
+       int id, pid;
        u8 *txwi = (u8 *)txwi_ptr;
 
        if (unlikely(tx_info->skb->len <= ETH_HLEN))
        if (!wcid)
                wcid = &dev->mt76.global_wcid;
 
-       cb->wcid = wcid->idx;
-
        t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
        t->skb = tx_info->skb;
 
        if (id < 0)
                return id;
 
+       if (sta) {
+               struct mt7921_sta *msta = (struct mt7921_sta *)sta->drv_priv;
+
+               if (time_after(jiffies, msta->stats.jiffies + HZ / 4)) {
+                       info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+                       msta->stats.jiffies = jiffies;
+               }
+       }
+
+       pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb);
        mt7921_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, key,
-                             MT_PACKET_ID_NO_SKB, false);
+                             pid, false);
 
        txp = (struct mt7921_txp_common *)(txwi + MT_TXD_SIZE);
        memset(txp, 0, sizeof(struct mt7921_txp_common));
                ieee80211_start_tx_ba_session(sta, tid, 0);
 }
 
-static void
-mt7921_tx_complete_status(struct mt76_dev *mdev, struct sk_buff *skb,
-                         struct ieee80211_sta *sta, bool clear_status,
-                         struct list_head *free_list)
-{
-       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-       struct ieee80211_tx_status status = {
-               .sta = sta,
-               .info = info,
-               .skb = skb,
-               .free_list = free_list,
-       };
-       struct ieee80211_hw *hw;
-
-       if (sta) {
-               struct mt7921_sta *msta;
-
-               msta = (struct mt7921_sta *)sta->drv_priv;
-               status.rate = &msta->stats.tx_rate;
-       }
-
-       hw = mt76_tx_status_get_hw(mdev, skb);
-
-       if (info->flags & IEEE80211_TX_CTL_AMPDU)
-               info->flags |= IEEE80211_TX_STAT_AMPDU;
-
-       if (clear_status)
-               ieee80211_tx_info_clear_status(info);
-
-       if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
-               info->flags |= IEEE80211_TX_STAT_ACK;
-
-       info->status.tx_time = 0;
-       ieee80211_tx_status_ext(hw, &status);
-}
-
 static void
 mt7921_txp_skb_unmap(struct mt76_dev *dev, struct mt76_txwi_cache *t)
 {
                 struct list_head *free_list)
 {
        struct mt76_dev *mdev = &dev->mt76;
-       struct ieee80211_tx_info *info;
        __le32 *txwi;
+       u16 wcid_idx;
 
        mt7921_txp_skb_unmap(mdev, t);
        if (!t->skb)
                goto out;
 
-       if (!sta)
-               goto out;
-
        txwi = (__le32 *)mt76_get_txwi_ptr(mdev, t);
-       if (likely(t->skb->protocol != cpu_to_be16(ETH_P_PAE)))
-               mt7921_tx_check_aggr(sta, txwi);
-
-       info = IEEE80211_SKB_CB(t->skb);
-       if (!info->tx_time_est) {
+       if (sta) {
                struct mt76_wcid *wcid = (struct mt76_wcid *)sta->drv_priv;
-               int pending;
 
-               pending = atomic_dec_return(&wcid->non_aql_packets);
-               if (pending < 0)
-                       atomic_cmpxchg(&wcid->non_aql_packets, pending, 0);
+               if (likely(t->skb->protocol != cpu_to_be16(ETH_P_PAE)))
+                       mt7921_tx_check_aggr(sta, txwi);
+
+               wcid_idx = wcid->idx;
+       } else {
+               wcid_idx = FIELD_GET(MT_TXD1_WLAN_IDX, le32_to_cpu(txwi[1]));
        }
 
-       mt7921_tx_complete_status(mdev, t->skb, sta, clear_status, free_list);
+       __mt76_tx_complete_skb(mdev, wcid_idx, t->skb, free_list);
+
 out:
        t->skb = NULL;
        mt76_put_txwi(mdev, t);
 mt7921_mac_add_txs_skb(struct mt7921_dev *dev, struct mt76_wcid *wcid, int pid,
                       __le32 *txs_data)
 {
+       struct ieee80211_supported_band *sband;
        struct mt76_dev *mdev = &dev->mt76;
        struct ieee80211_tx_info *info;
+       struct rate_info rate = {};
        struct sk_buff_head list;
        struct sk_buff *skb;
+       bool cck = false;
+       u32 txrate, txs;
 
        mt76_tx_status_lock(mdev, &list);
        skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
                goto out;
 
        info = IEEE80211_SKB_CB(skb);
-       if (!(txs_data[0] & le32_to_cpu(MT_TXS0_ACK_ERROR_MASK)))
+       txs = le32_to_cpu(txs_data[0]);
+       if (!(txs & MT_TXS0_ACK_ERROR_MASK))
                info->flags |= IEEE80211_TX_STAT_ACK;
 
        info->status.ampdu_len = 1;
                                        IEEE80211_TX_STAT_ACK);
 
        info->status.rates[0].idx = -1;
-       mt76_tx_status_skb_done(mdev, skb, &list);
+
+       if (!wcid->sta)
+               goto out;
+
+       txrate = FIELD_GET(MT_TXS0_TX_RATE, txs);
+
+       rate.mcs = FIELD_GET(MT_TX_RATE_IDX, txrate);
+       rate.nss = FIELD_GET(MT_TX_RATE_NSS, txrate) + 1;
+
+       switch (FIELD_GET(MT_TX_RATE_MODE, txrate)) {
+       case MT_PHY_TYPE_CCK:
+               cck = true;
+               fallthrough;
+       case MT_PHY_TYPE_OFDM:
+               if (dev->mphy.chandef.chan->band == NL80211_BAND_5GHZ)
+                       sband = &dev->mphy.sband_5g.sband;
+               else
+                       sband = &dev->mphy.sband_2g.sband;
+
+               rate.mcs = mt76_get_rate(dev->mphy.dev, sband, rate.mcs, cck);
+               rate.legacy = sband->bitrates[rate.mcs].bitrate;
+               break;
+       case MT_PHY_TYPE_HT:
+       case MT_PHY_TYPE_HT_GF:
+               rate.mcs += (rate.nss - 1) * 8;
+               if (rate.mcs > 31)
+                       goto out;
+
+               rate.flags = RATE_INFO_FLAGS_MCS;
+               if (wcid->rate.flags & RATE_INFO_FLAGS_SHORT_GI)
+                       rate.flags |= RATE_INFO_FLAGS_SHORT_GI;
+               break;
+       case MT_PHY_TYPE_VHT:
+               if (rate.mcs > 9)
+                       goto out;
+
+               rate.flags = RATE_INFO_FLAGS_VHT_MCS;
+               break;
+       case MT_PHY_TYPE_HE_SU:
+       case MT_PHY_TYPE_HE_EXT_SU:
+       case MT_PHY_TYPE_HE_TB:
+       case MT_PHY_TYPE_HE_MU:
+               if (rate.mcs > 11)
+                       goto out;
+
+               rate.he_gi = wcid->rate.he_gi;
+               rate.he_dcm = FIELD_GET(MT_TX_RATE_DCM, txrate);
+               rate.flags = RATE_INFO_FLAGS_HE_MCS;
+               break;
+       default:
+               goto out;
+       }
+
+       switch (FIELD_GET(MT_TXS0_BW, txs)) {
+       case IEEE80211_STA_RX_BW_160:
+               rate.bw = RATE_INFO_BW_160;
+               break;
+       case IEEE80211_STA_RX_BW_80:
+               rate.bw = RATE_INFO_BW_80;
+               break;
+       case IEEE80211_STA_RX_BW_40:
+               rate.bw = RATE_INFO_BW_40;
+               break;
+       default:
+               rate.bw = RATE_INFO_BW_20;
+               break;
+       }
+       wcid->rate = rate;
 
 out:
+       if (skb)
+               mt76_tx_status_skb_done(mdev, skb, &list);
        mt76_tx_status_unlock(mdev, &list);
 
        return !!skb;
                e->skb = t ? t->skb : NULL;
        }
 
-       if (e->skb) {
-               struct mt76_tx_cb *cb = mt76_tx_skb_cb(e->skb);
-               struct mt76_wcid *wcid;
-
-               wcid = rcu_dereference(dev->mt76.wcid[cb->wcid]);
-
-               mt7921_tx_complete_status(mdev, e->skb, wcid_to_sta(wcid), 0,
-                                         NULL);
-       }
+       if (e->skb)
+               mt76_tx_complete_skb(mdev, e->wcid, e->skb);
 }
 
 void mt7921_mac_reset_counters(struct mt7921_phy *phy)