.debugfs_init           = wl12xx_debugfs_add_files,
        .get_spare_blocks       = wl12xx_get_spare_blocks,
        .set_key                = wl12xx_set_key,
+       .pre_pkt_send           = NULL,
 };
 
 static struct ieee80211_sta_ht_cap wl12xx_ht_cap = {
 
                wl->plt_fw_name = WL18XX_FW_NAME;
                wl->quirks |= WLCORE_QUIRK_NO_ELP |
                              WLCORE_QUIRK_FWLOG_NOT_IMPLEMENTED |
-                             WLCORE_QUIRK_RX_BLOCKSIZE_ALIGN;
+                             WLCORE_QUIRK_RX_BLOCKSIZE_ALIGN |
+                             WLCORE_QUIRK_TX_PAD_LAST_FRAME;
 
                break;
        case CHIP_ID_185x_PG10:
                          u32 blks, u32 spare_blks)
 {
        desc->wl18xx_mem.total_mem_blocks = blks;
-       desc->wl18xx_mem.reserved = 0;
 }
 
 static void
 {
        desc->length = cpu_to_le16(skb->len);
 
+       /* if only the last frame is to be padded, we unset this bit on Tx */
+       if (wl->quirks & WLCORE_QUIRK_TX_PAD_LAST_FRAME)
+               desc->wl18xx_mem.ctrl = WL18XX_TX_CTRL_NOT_PADDED;
+       else
+               desc->wl18xx_mem.ctrl = 0;
+
        wl1271_debug(DEBUG_TX, "tx_fill_hdr: hlid: %d "
                     "len: %d life: %d mem: %d", desc->hlid,
                     le16_to_cpu(desc->length),
        return ret;
 }
 
+static u32 wl18xx_pre_pkt_send(struct wl1271 *wl,
+                              u32 buf_offset, u32 last_len)
+{
+       if (wl->quirks & WLCORE_QUIRK_TX_PAD_LAST_FRAME) {
+               struct wl1271_tx_hw_descr *last_desc;
+
+               /* get the last TX HW descriptor written to the aggr buf */
+               last_desc = (struct wl1271_tx_hw_descr *)(wl->aggr_buf +
+                                                       buf_offset - last_len);
+
+               /* the last frame is padded up to an SDIO block */
+               last_desc->wl18xx_mem.ctrl &= ~WL18XX_TX_CTRL_NOT_PADDED;
+               return ALIGN(buf_offset, WL12XX_BUS_BLOCK_SIZE);
+       }
+
+       /* no modifications */
+       return buf_offset;
+}
+
 static struct wlcore_ops wl18xx_ops = {
        .identify_chip  = wl18xx_identify_chip,
        .boot           = wl18xx_boot,
        .handle_static_data     = wl18xx_handle_static_data,
        .get_spare_blocks = wl18xx_get_spare_blocks,
        .set_key        = wl18xx_set_key,
+       .pre_pkt_send   = wl18xx_pre_pkt_send,
 };
 
 /* HT cap appropriate for wide channels */
 
 #define WL18XX_TX_STATUS_DESC_ID_MASK    0x7F
 #define WL18XX_TX_STATUS_STAT_BIT_IDX    7
 
+/* Indicates this TX HW frame is not padded to SDIO block size */
+#define WL18XX_TX_CTRL_NOT_PADDED      BIT(7)
+
 /*
  * The FW uses a special bit to indicate a wide channel should be used in
  * the rate policy.
 
        return wl->ops->set_key(wl, cmd, vif, sta, key_conf);
 }
 
+static inline u32
+wlcore_hw_pre_pkt_send(struct wl1271 *wl, u32 buf_offset, u32 last_len)
+{
+       if (wl->ops->pre_pkt_send)
+               return wl->ops->pre_pkt_send(wl, buf_offset, last_len);
+
+       return buf_offset;
+}
+
 #endif
 
 unsigned int wlcore_calc_packet_alignment(struct wl1271 *wl,
                                          unsigned int packet_length)
 {
-       if (wl->quirks & WLCORE_QUIRK_TX_BLOCKSIZE_ALIGN)
-               return ALIGN(packet_length, WL12XX_BUS_BLOCK_SIZE);
-       else
+       if ((wl->quirks & WLCORE_QUIRK_TX_PAD_LAST_FRAME) ||
+           !(wl->quirks & WLCORE_QUIRK_TX_BLOCKSIZE_ALIGN))
                return ALIGN(packet_length, WL1271_TX_ALIGN_TO);
+       else
+               return ALIGN(packet_length, WL12XX_BUS_BLOCK_SIZE);
 }
 EXPORT_SYMBOL(wlcore_calc_packet_alignment);
 
        struct wl12xx_vif *wlvif;
        struct sk_buff *skb;
        struct wl1271_tx_hw_descr *desc;
-       u32 buf_offset = 0;
+       u32 buf_offset = 0, last_len = 0;
        bool sent_packets = false;
        unsigned long active_hlids[BITS_TO_LONGS(WL12XX_MAX_LINKS)] = {0};
        int ret;
                         * Flush buffer and try again.
                         */
                        wl1271_skb_queue_head(wl, wlvif, skb);
+
+                       buf_offset = wlcore_hw_pre_pkt_send(wl, buf_offset,
+                                                           last_len);
                        wlcore_write_data(wl, REG_SLV_MEM_DATA, wl->aggr_buf,
                                          buf_offset, true);
                        sent_packets = true;
                                ieee80211_free_txskb(wl->hw, skb);
                        goto out_ack;
                }
-               buf_offset += ret;
+               last_len = ret;
+               buf_offset += last_len;
                wl->tx_packets_count++;
                if (has_data) {
                        desc = (struct wl1271_tx_hw_descr *) skb->data;
 
 out_ack:
        if (buf_offset) {
+               buf_offset = wlcore_hw_pre_pkt_send(wl, buf_offset, last_len);
                wlcore_write_data(wl, REG_SLV_MEM_DATA, wl->aggr_buf,
                                  buf_offset, true);
                sent_packets = true;
 
        u8 total_mem_blocks;
 
        /*
-        * always zero
+        * control bits
         */
-       u8 reserved;
+       u8 ctrl;
 } __packed;
 
 /*
 
                       struct ieee80211_vif *vif,
                       struct ieee80211_sta *sta,
                       struct ieee80211_key_conf *key_conf);
+       u32 (*pre_pkt_send)(struct wl1271 *wl, u32 buf_offset, u32 last_len);
 };
 
 enum wlcore_partitions {
 /* Some firmwares may not support ELP */
 #define WLCORE_QUIRK_NO_ELP                    BIT(6)
 
+/* pad only the last frame in the aggregate buffer */
+#define WLCORE_QUIRK_TX_PAD_LAST_FRAME         BIT(7)
+
 /* extra header space is required for TKIP */
 #define WLCORE_QUIRK_TKIP_HEADER_SPACE         BIT(8)