#define DRIVER_STATE_PRINT_HEX(x)  DRIVER_STATE_PRINT(x, "0x%x")
 
        DRIVER_STATE_PRINT_INT(tx_blocks_available);
-       DRIVER_STATE_PRINT_INT(tx_allocated_blocks);
+       DRIVER_STATE_PRINT_INT(tx_allocated_blocks[0]);
+       DRIVER_STATE_PRINT_INT(tx_allocated_blocks[1]);
+       DRIVER_STATE_PRINT_INT(tx_allocated_blocks[2]);
+       DRIVER_STATE_PRINT_INT(tx_allocated_blocks[3]);
        DRIVER_STATE_PRINT_INT(tx_frames_cnt);
        DRIVER_STATE_PRINT_LHEX(tx_frames_map[0]);
        DRIVER_STATE_PRINT_INT(tx_queue_count);
 
        }
 }
 
+static u32 wl1271_tx_allocated_blocks(struct wl1271 *wl)
+{
+       int i;
+       u32 total_alloc_blocks = 0;
+
+       for (i = 0; i < NUM_TX_QUEUES; i++)
+               total_alloc_blocks += wl->tx_allocated_blocks[i];
+
+       return total_alloc_blocks;
+}
+
 static void wl1271_fw_status(struct wl1271 *wl,
                             struct wl1271_fw_full_status *full_status)
 {
        struct wl1271_fw_common_status *status = &full_status->common;
        struct timespec ts;
        u32 old_tx_blk_count = wl->tx_blocks_available;
-       u32 freed_blocks = 0;
+       u32 freed_blocks = 0, ac_freed_blocks;
        int i;
 
        if (wl->bss_type == BSS_TYPE_AP_BSS) {
 
        /* update number of available TX blocks */
        for (i = 0; i < NUM_TX_QUEUES; i++) {
-               freed_blocks += le32_to_cpu(status->tx_released_blks[i]) -
-                               wl->tx_blocks_freed[i];
+               ac_freed_blocks = le32_to_cpu(status->tx_released_blks[i]) -
+                                 wl->tx_blocks_freed[i];
+               freed_blocks += ac_freed_blocks;
+
+               wl->tx_allocated_blocks[i] -= ac_freed_blocks;
 
                wl->tx_blocks_freed[i] =
                        le32_to_cpu(status->tx_released_blks[i]);
        }
 
-       wl->tx_allocated_blocks -= freed_blocks;
-
        if (wl->bss_type == BSS_TYPE_AP_BSS) {
                /* Update num of allocated TX blocks per link and ps status */
                wl1271_irq_update_links_status(wl, &full_status->ap);
                wl->tx_blocks_available += freed_blocks;
        } else {
-               int avail = full_status->sta.tx_total - wl->tx_allocated_blocks;
+               int avail = full_status->sta.tx_total -
+                           wl1271_tx_allocated_blocks(wl);
 
                /*
                 * The FW might change the total number of TX memblocks before
        wl->psm_entry_retry = 0;
        wl->power_level = WL1271_DEFAULT_POWER_LEVEL;
        wl->tx_blocks_available = 0;
-       wl->tx_allocated_blocks = 0;
        wl->tx_results_count = 0;
        wl->tx_packets_count = 0;
        wl->time_offset = 0;
         */
        wl->flags = 0;
 
-       for (i = 0; i < NUM_TX_QUEUES; i++)
+       for (i = 0; i < NUM_TX_QUEUES; i++) {
                wl->tx_blocks_freed[i] = 0;
+               wl->tx_allocated_blocks[i] = 0;
+       }
 
        wl1271_debugfs_reset(wl);
 
 
        u32 total_len = skb->len + sizeof(struct wl1271_tx_hw_descr) + extra;
        u32 len;
        u32 total_blocks;
-       int id, ret = -EBUSY;
+       int id, ret = -EBUSY, ac;
        u32 spare_blocks;
 
        if (unlikely(wl->quirks & WL12XX_QUIRK_USE_2_SPARE_BLOCKS))
                desc->id = id;
 
                wl->tx_blocks_available -= total_blocks;
-               wl->tx_allocated_blocks += total_blocks;
+
+               ac = wl1271_tx_get_queue(skb_get_queue_mapping(skb));
+               wl->tx_allocated_blocks[ac] += total_blocks;
 
                if (wl->bss_type == BSS_TYPE_AP_BSS)
                        wl->links[hlid].allocated_blks += total_blocks;
        }
 }
 
+static struct sk_buff_head *wl1271_select_queue(struct wl1271 *wl,
+                                               struct sk_buff_head *queues)
+{
+       int i, q = -1;
+       u32 min_blks = 0xffffffff;
+
+       /*
+        * Find a non-empty ac where:
+        * 1. There are packets to transmit
+        * 2. The FW has the least allocated blocks
+        */
+       for (i = 0; i < NUM_TX_QUEUES; i++)
+               if (!skb_queue_empty(&queues[i]) &&
+                   (wl->tx_allocated_blocks[i] < min_blks)) {
+                       q = i;
+                       min_blks = wl->tx_allocated_blocks[q];
+               }
+
+       if (q == -1)
+               return NULL;
+
+       return &queues[q];
+}
+
 static struct sk_buff *wl1271_sta_skb_dequeue(struct wl1271 *wl)
 {
        struct sk_buff *skb = NULL;
        unsigned long flags;
+       struct sk_buff_head *queue;
 
-       skb = skb_dequeue(&wl->tx_queue[CONF_TX_AC_VO]);
-       if (skb)
-               goto out;
-       skb = skb_dequeue(&wl->tx_queue[CONF_TX_AC_VI]);
-       if (skb)
+       queue = wl1271_select_queue(wl, wl->tx_queue);
+       if (!queue)
                goto out;
-       skb = skb_dequeue(&wl->tx_queue[CONF_TX_AC_BE]);
-       if (skb)
-               goto out;
-       skb = skb_dequeue(&wl->tx_queue[CONF_TX_AC_BK]);
+
+       skb = skb_dequeue(queue);
 
 out:
        if (skb) {
        struct sk_buff *skb = NULL;
        unsigned long flags;
        int i, h, start_hlid;
+       struct sk_buff_head *queue;
 
        /* start from the link after the last one */
        start_hlid = (wl->last_tx_hlid + 1) % AP_MAX_LINKS;
        for (i = 0; i < AP_MAX_LINKS; i++) {
                h = (start_hlid + i) % AP_MAX_LINKS;
 
-               skb = skb_dequeue(&wl->links[h].tx_queue[CONF_TX_AC_VO]);
-               if (skb)
-                       goto out;
-               skb = skb_dequeue(&wl->links[h].tx_queue[CONF_TX_AC_VI]);
-               if (skb)
-                       goto out;
-               skb = skb_dequeue(&wl->links[h].tx_queue[CONF_TX_AC_BE]);
-               if (skb)
-                       goto out;
-               skb = skb_dequeue(&wl->links[h].tx_queue[CONF_TX_AC_BK]);
+               /* only consider connected stations */
+               if (h >= WL1271_AP_STA_HLID_START &&
+                   !test_bit(h - WL1271_AP_STA_HLID_START, wl->ap_hlid_map))
+                       continue;
+
+               queue = wl1271_select_queue(wl, wl->links[h].tx_queue);
+               if (!queue)
+                       continue;
+
+               skb = skb_dequeue(queue);
                if (skb)
-                       goto out;
+                       break;
        }
 
-out:
        if (skb) {
                wl->last_tx_hlid = h;
                spin_lock_irqsave(&wl->wl_lock, flags);
 
        /* Accounting for allocated / available TX blocks on HW */
        u32 tx_blocks_freed[NUM_TX_QUEUES];
        u32 tx_blocks_available;
-       u32 tx_allocated_blocks;
+       u32 tx_allocated_blocks[NUM_TX_QUEUES];
        u32 tx_results_count;
 
        /* Transmitted TX packets counter for chipset interface */