struct sk_buff *skb,
                                       struct iwl_tfh_tfd *tfd,
                                       dma_addr_t phys, void *virt,
-                                      u16 len, struct iwl_cmd_meta *meta)
+                                      u16 len, struct iwl_cmd_meta *meta,
+                                      bool unmap)
 {
        dma_addr_t oldphys = phys;
        struct page *page;
 
        memcpy(page_address(page), virt, len);
 
-       phys = dma_map_single(trans->dev, page_address(page), len,
-                             DMA_TO_DEVICE);
-       if (unlikely(dma_mapping_error(trans->dev, phys)))
-               return -ENOMEM;
+       /*
+        * This is a bit odd, but performance does not matter here, what
+        * matters are the expectations of the calling code and TB cleanup
+        * function.
+        *
+        * As such, if unmap is set, then create another mapping for the TB
+        * entry as it will be unmapped later. On the other hand, if it is not
+        * set, then the TB entry will not be unmapped and instead we simply
+        * reference and sync the mapping that get_workaround_page() created.
+        */
+       if (unmap) {
+               phys = dma_map_single(trans->dev, page_address(page), len,
+                                     DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(trans->dev, phys)))
+                       return -ENOMEM;
+       } else {
+               phys = iwl_pcie_get_tso_page_phys(page_address(page));
+               dma_sync_single_for_device(trans->dev, phys, len,
+                                          DMA_TO_DEVICE);
+       }
+
        ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len);
        if (ret < 0) {
                /* unmap the new allocation as single */
                meta = NULL;
                goto unmap;
        }
+
        IWL_DEBUG_TX(trans,
                     "TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n",
                     len, (unsigned long long)oldphys,
 
        ret = 0;
 unmap:
+       if (!unmap)
+               goto trace;
+
        if (meta)
                dma_unmap_page(trans->dev, oldphys, len, DMA_TO_DEVICE);
        else
        struct ieee80211_hdr *hdr = (void *)skb->data;
        unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
        unsigned int mss = skb_shinfo(skb)->gso_size;
+       dma_addr_t start_hdr_phys;
        u16 length, amsdu_pad;
        u8 *start_hdr;
        struct sg_table *sgt;
        if (!sgt)
                return -ENOMEM;
 
+       start_hdr_phys = iwl_pcie_get_tso_page_phys(start_hdr);
+
        /*
         * Pull the ieee80211 header to be able to use TSO core,
         * we will restore it for the tx_status flow.
                pos_hdr += snap_ip_tcp_hdrlen;
 
                tb_len = pos_hdr - start_hdr;
-               tb_phys = dma_map_single(trans->dev, start_hdr,
-                                        tb_len, DMA_TO_DEVICE);
-               if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
-                       goto out_err;
+               tb_phys = iwl_pcie_get_tso_page_phys(start_hdr);
+
                /*
                 * No need for _with_wa, this is from the TSO page and
                 * we leave some space at the end of it so can't hit
                        int ret;
 
                        tb_len = min_t(unsigned int, tso.size, data_left);
-                       tb_phys = dma_map_single(trans->dev, tso.data,
-                                                tb_len, DMA_TO_DEVICE);
+                       tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data);
+                       /* Not a real mapping error, use direct comparison */
+                       if (unlikely(tb_phys == DMA_MAPPING_ERROR))
+                               goto out_err;
+
                        ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd,
                                                          tb_phys, tso.data,
-                                                         tb_len, NULL);
+                                                         tb_len, NULL, false);
                        if (ret)
                                goto out_err;
 
                }
        }
 
+       dma_sync_single_for_device(trans->dev, start_hdr_phys, hdr_room,
+                                  DMA_TO_DEVICE);
+
        /* re -add the WiFi header */
        skb_push(skb, hdr_len);
 
                                           fragsz, DMA_TO_DEVICE);
                ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
                                                  skb_frag_address(frag),
-                                                 fragsz, out_meta);
+                                                 fragsz, out_meta, true);
                if (ret)
                        return ret;
        }
                                         tb2_len, DMA_TO_DEVICE);
                ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
                                                  skb->data + hdr_len, tb2_len,
-                                                 NULL);
+                                                 NULL, true);
                if (ret)
                        goto out_err;
        }
                                         skb_headlen(frag), DMA_TO_DEVICE);
                ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
                                                  frag->data,
-                                                 skb_headlen(frag), NULL);
+                                                 skb_headlen(frag), NULL,
+                                                 true);
                if (ret)
                        goto out_err;
                if (iwl_txq_gen2_tx_add_frags(trans, frag, tfd, out_meta))
                return;
        }
 
+       /* TB1 is mapped directly, the rest is the TSO page and SG list. */
+       if (meta->sg_offset)
+               num_tbs = 2;
+
        /* first TB is never freed - it's the bidirectional DMA data */
        for (i = 1; i < num_tbs; i++) {
                if (meta->tbs & BIT(i))
 
                return;
        }
 
+       /* TB1 is mapped directly, the rest is the TSO page and SG list. */
+       if (meta->sg_offset)
+               num_tbs = 2;
+
        /* first TB is never freed - it's the bidirectional DMA data */
 
        for (i = 1; i < num_tbs; i++) {
        unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
        unsigned int mss = skb_shinfo(skb)->gso_size;
        u16 length, iv_len, amsdu_pad;
+       dma_addr_t start_hdr_phys;
        u8 *start_hdr, *pos_hdr;
        struct sg_table *sgt;
        struct tso_t tso;
        if (!sgt)
                return -ENOMEM;
 
+       start_hdr_phys = iwl_pcie_get_tso_page_phys(start_hdr);
        pos_hdr = start_hdr;
        memcpy(pos_hdr, skb->data + hdr_len, iv_len);
        pos_hdr += iv_len;
                pos_hdr += snap_ip_tcp_hdrlen;
 
                hdr_tb_len = pos_hdr - start_hdr;
-               hdr_tb_phys = dma_map_single(trans->dev, start_hdr,
-                                            hdr_tb_len, DMA_TO_DEVICE);
-               if (unlikely(dma_mapping_error(trans->dev, hdr_tb_phys)))
-                       return -EINVAL;
+               hdr_tb_phys = iwl_pcie_get_tso_page_phys(start_hdr);
+
                iwl_pcie_txq_build_tfd(trans, txq, hdr_tb_phys,
                                       hdr_tb_len, false);
                trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr,
                                                  data_left);
                        dma_addr_t tb_phys;
 
-                       tb_phys = dma_map_single(trans->dev, tso.data,
-                                                size, DMA_TO_DEVICE);
-                       if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
+                       tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data);
+                       /* Not a real mapping error, use direct comparison */
+                       if (unlikely(tb_phys == DMA_MAPPING_ERROR))
                                return -EINVAL;
 
                        iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
                }
        }
 
+       dma_sync_single_for_device(trans->dev, start_hdr_phys, hdr_room,
+                                  DMA_TO_DEVICE);
+
        /* re -add the WiFi header and IV */
        skb_push(skb, hdr_len + iv_len);