struct ieee80211_tx_rate *probe_rate,
                           struct ieee80211_tx_rate *rates)
 {
+       struct ieee80211_tx_rate *ref;
        int wcid = sta->wcid.idx;
        u32 addr = mt7603_wtbl2_addr(wcid);
        bool stbc = false;
        u16 val[4];
        u16 probe_val;
        u32 w9 = mt76_rr(dev, addr + 9 * 4);
-       int i;
+       bool rateset;
+       int i, k;
 
        if (!mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000))
                return;
        for (i = n_rates; i < 4; i++)
                rates[i] = rates[n_rates - 1];
 
+       rateset = !(sta->rate_set_tsf & BIT(0));
+       memcpy(sta->rateset[rateset].rates, rates,
+              sizeof(sta->rateset[rateset].rates));
+       if (probe_rate) {
+               sta->rateset[rateset].probe_rate = *probe_rate;
+               ref = &sta->rateset[rateset].probe_rate;
+       } else {
+               sta->rateset[rateset].probe_rate.idx = -1;
+               ref = &sta->rateset[rateset].rates[0];
+       }
+
+       rates = sta->rateset[rateset].rates;
+       for (i = 0; i < ARRAY_SIZE(sta->rateset[rateset].rates); i++) {
+               /*
+                * We don't support switching between short and long GI
+                * within the rate set. For accurate tx status reporting, we
+                * need to make sure that flags match.
+                * For improved performance, avoid duplicate entries by
+                * decrementing the MCS index if necessary
+                */
+               if ((ref->flags ^ rates[i].flags) & IEEE80211_TX_RC_SHORT_GI)
+                       rates[i].flags ^= IEEE80211_TX_RC_SHORT_GI;
+
+               for (k = 0; k < i; k++) {
+                       if (rates[i].idx != rates[k].idx)
+                               continue;
+                       if ((rates[i].flags ^ rates[k].flags) &
+                           IEEE80211_TX_RC_40_MHZ_WIDTH)
+                               continue;
+
+                       rates[i].idx--;
+               }
+
+       }
+
        w9 &= MT_WTBL2_W9_SHORT_GI_20 | MT_WTBL2_W9_SHORT_GI_40 |
              MT_WTBL2_W9_SHORT_GI_80;
 
        mt76_wr(dev, MT_WTBL_RIUCR1,
                FIELD_PREP(MT_WTBL_RIUCR1_RATE0, probe_val) |
                FIELD_PREP(MT_WTBL_RIUCR1_RATE1, val[0]) |
-               FIELD_PREP(MT_WTBL_RIUCR1_RATE2_LO, val[0]));
+               FIELD_PREP(MT_WTBL_RIUCR1_RATE2_LO, val[1]));
 
        mt76_wr(dev, MT_WTBL_RIUCR2,
-               FIELD_PREP(MT_WTBL_RIUCR2_RATE2_HI, val[0] >> 8) |
+               FIELD_PREP(MT_WTBL_RIUCR2_RATE2_HI, val[1] >> 8) |
                FIELD_PREP(MT_WTBL_RIUCR2_RATE3, val[1]) |
-               FIELD_PREP(MT_WTBL_RIUCR2_RATE4, val[1]) |
+               FIELD_PREP(MT_WTBL_RIUCR2_RATE4, val[2]) |
                FIELD_PREP(MT_WTBL_RIUCR2_RATE5_LO, val[2]));
 
        mt76_wr(dev, MT_WTBL_RIUCR3,
                FIELD_PREP(MT_WTBL_RIUCR3_RATE5_HI, val[2] >> 4) |
-               FIELD_PREP(MT_WTBL_RIUCR3_RATE6, val[2]) |
+               FIELD_PREP(MT_WTBL_RIUCR3_RATE6, val[3]) |
                FIELD_PREP(MT_WTBL_RIUCR3_RATE7, val[3]));
 
+       mt76_set(dev, MT_LPON_T0CR, MT_LPON_T0CR_MODE); /* TSF read */
+       sta->rate_set_tsf = (mt76_rr(dev, MT_LPON_UTTR0) & ~BIT(0)) | rateset;
+
        mt76_wr(dev, MT_WTBL_UPDATE,
                FIELD_PREP(MT_WTBL_UPDATE_WLAN_IDX, wcid) |
                MT_WTBL_UPDATE_RATE_UPDATE |
 
        if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) {
                spin_lock_bh(&dev->mt76.lock);
-               msta->rate_probe = true;
                mt7603_wtbl_set_rates(dev, msta, &info->control.rates[0],
                                      msta->rates);
+               msta->rate_probe = true;
                spin_unlock_bh(&dev->mt76.lock);
        }
 
                struct ieee80211_tx_info *info, __le32 *txs_data)
 {
        struct ieee80211_supported_band *sband;
-       int final_idx = 0;
+       struct mt7603_rate_set *rs;
+       int first_idx = 0, last_idx;
+       u32 rate_set_tsf;
        u32 final_rate;
        u32 final_rate_flags;
+       bool rs_idx;
        bool ack_timeout;
        bool fixed_rate;
        bool probe;
        txs = le32_to_cpu(txs_data[4]);
        ampdu = !fixed_rate && (txs & MT_TXS4_AMPDU);
        count = FIELD_GET(MT_TXS4_TX_COUNT, txs);
+       last_idx = FIELD_GET(MT_TXS4_LAST_TX_RATE, txs);
 
        txs = le32_to_cpu(txs_data[0]);
        final_rate = FIELD_GET(MT_TXS0_TX_RATE, txs);
        if (ampdu || (info->flags & IEEE80211_TX_CTL_AMPDU))
                info->flags |= IEEE80211_TX_STAT_AMPDU | IEEE80211_TX_CTL_AMPDU;
 
+       first_idx = max_t(int, 0, last_idx - (count + 1) / MT7603_RATE_RETRY);
+
        if (fixed_rate && !probe) {
                info->status.rates[0].count = count;
+               i = 0;
                goto out;
        }
 
-       for (i = 0, idx = 0; i < ARRAY_SIZE(info->status.rates); i++) {
-               int cur_count = min_t(int, count, 2 * MT7603_RATE_RETRY);
+       rate_set_tsf = READ_ONCE(sta->rate_set_tsf);
+       rs_idx = !((u32)(FIELD_GET(MT_TXS1_F0_TIMESTAMP, le32_to_cpu(txs_data[1])) -
+                        rate_set_tsf) < 1000000);
+       rs_idx ^= rate_set_tsf & BIT(0);
+       rs = &sta->rateset[rs_idx];
 
-               if (!i && probe) {
-                       cur_count = 1;
-               } else {
-                       info->status.rates[i] = sta->rates[idx];
-                       idx++;
-               }
+       if (!first_idx && rs->probe_rate.idx >= 0) {
+               info->status.rates[0] = rs->probe_rate;
 
-               if (i && info->status.rates[i].idx < 0) {
-                       info->status.rates[i - 1].count += count;
-                       break;
+               spin_lock_bh(&dev->mt76.lock);
+               if (sta->rate_probe) {
+                       mt7603_wtbl_set_rates(dev, sta, NULL,
+                                             sta->rates);
+                       sta->rate_probe = false;
                }
+               spin_unlock_bh(&dev->mt76.lock);
+       } else
+               info->status.rates[0] = rs->rates[first_idx / 2];
+       info->status.rates[0].count = 0;
 
-               if (!count) {
-                       info->status.rates[i].idx = -1;
-                       break;
-               }
+       for (i = 0, idx = first_idx; count && idx <= last_idx; idx++) {
+               struct ieee80211_tx_rate *cur_rate;
+               int cur_count;
 
-               info->status.rates[i].count = cur_count;
-               final_idx = i;
+               cur_rate = &rs->rates[idx / 2];
+               cur_count = min_t(int, MT7603_RATE_RETRY, count);
                count -= cur_count;
+
+               if (idx && (cur_rate->idx != info->status.rates[i].idx ||
+                           cur_rate->flags != info->status.rates[i].flags)) {
+                       i++;
+                       if (i == ARRAY_SIZE(info->status.rates))
+                               break;
+
+                       info->status.rates[i] = *cur_rate;
+                       info->status.rates[i].count = 0;
+               }
+
+               info->status.rates[i].count += cur_count;
        }
 
 out:
-       final_rate_flags = info->status.rates[final_idx].flags;
+       final_rate_flags = info->status.rates[i].flags;
 
        switch (FIELD_GET(MT_TX_RATE_MODE, final_rate)) {
        case MT_PHY_TYPE_CCK:
                return false;
        }
 
-       info->status.rates[final_idx].idx = final_rate;
-       info->status.rates[final_idx].flags = final_rate_flags;
+       info->status.rates[i].idx = final_rate;
+       info->status.rates[i].flags = final_rate_flags;
 
        return true;
 }
        if (skb) {
                struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
-               if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) {
-                       spin_lock_bh(&dev->mt76.lock);
-                       if (sta->rate_probe) {
-                               mt7603_wtbl_set_rates(dev, sta, NULL,
-                                                     sta->rates);
-                               sta->rate_probe = false;
-                       }
-                       spin_unlock_bh(&dev->mt76.lock);
-               }
-
                if (!mt7603_fill_txs(dev, sta, info, txs_data)) {
                        ieee80211_tx_info_clear_status(info);
                        info->status.rates[0].idx = -1;