struct cake_host {
        u32 srchost_tag;
        u32 dsthost_tag;
-       u16 srchost_refcnt;
-       u16 dsthost_refcnt;
+       u16 srchost_bulk_flow_count;
+       u16 dsthost_bulk_flow_count;
 };
 
 struct cake_heap_entry {
                 * queue, accept the collision, update the host tags.
                 */
                q->way_collisions++;
-               q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
-               q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+               if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
+                       q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
+                       q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
+               }
                allocate_src = cake_dsrc(flow_mode);
                allocate_dst = cake_ddst(flow_mode);
 found:
                        }
                        for (i = 0; i < CAKE_SET_WAYS;
                                i++, k = (k + 1) % CAKE_SET_WAYS) {
-                               if (!q->hosts[outer_hash + k].srchost_refcnt)
+                               if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
                                        break;
                        }
                        q->hosts[outer_hash + k].srchost_tag = srchost_hash;
 found_src:
                        srchost_idx = outer_hash + k;
-                       q->hosts[srchost_idx].srchost_refcnt++;
+                       if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+                               q->hosts[srchost_idx].srchost_bulk_flow_count++;
                        q->flows[reduced_hash].srchost = srchost_idx;
                }
 
                        }
                        for (i = 0; i < CAKE_SET_WAYS;
                             i++, k = (k + 1) % CAKE_SET_WAYS) {
-                               if (!q->hosts[outer_hash + k].dsthost_refcnt)
+                               if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
                                        break;
                        }
                        q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
 found_dst:
                        dsthost_idx = outer_hash + k;
-                       q->hosts[dsthost_idx].dsthost_refcnt++;
+                       if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+                               q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
                        q->flows[reduced_hash].dsthost = dsthost_idx;
                }
        }
                b->sparse_flow_count++;
 
                if (cake_dsrc(q->flow_mode))
-                       host_load = max(host_load, srchost->srchost_refcnt);
+                       host_load = max(host_load, srchost->srchost_bulk_flow_count);
 
                if (cake_ddst(q->flow_mode))
-                       host_load = max(host_load, dsthost->dsthost_refcnt);
+                       host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
 
                flow->deficit = (b->flow_quantum *
                                 quantum_div[host_load]) >> 16;
        } else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+               struct cake_host *srchost = &b->hosts[flow->srchost];
+               struct cake_host *dsthost = &b->hosts[flow->dsthost];
+
                /* this flow was empty, accounted as a sparse flow, but actually
                 * in the bulk rotation.
                 */
                flow->set = CAKE_SET_BULK;
                b->sparse_flow_count--;
                b->bulk_flow_count++;
+
+               if (cake_dsrc(q->flow_mode))
+                       srchost->srchost_bulk_flow_count++;
+
+               if (cake_ddst(q->flow_mode))
+                       dsthost->dsthost_bulk_flow_count++;
+
        }
 
        if (q->buffer_used > q->buffer_max_used)
        dsthost = &b->hosts[flow->dsthost];
        host_load = 1;
 
-       if (cake_dsrc(q->flow_mode))
-               host_load = max(host_load, srchost->srchost_refcnt);
-
-       if (cake_ddst(q->flow_mode))
-               host_load = max(host_load, dsthost->dsthost_refcnt);
-
-       WARN_ON(host_load > CAKE_QUEUES);
-
        /* flow isolation (DRR++) */
        if (flow->deficit <= 0) {
-               /* The shifted prandom_u32() is a way to apply dithering to
-                * avoid accumulating roundoff errors
-                */
-               flow->deficit += (b->flow_quantum * quantum_div[host_load] +
-                                 (prandom_u32() >> 16)) >> 16;
-               list_move_tail(&flow->flowchain, &b->old_flows);
-
                /* Keep all flows with deficits out of the sparse and decaying
                 * rotations.  No non-empty flow can go into the decaying
                 * rotation, so they can't get deficits
                        if (flow->head) {
                                b->sparse_flow_count--;
                                b->bulk_flow_count++;
+
+                               if (cake_dsrc(q->flow_mode))
+                                       srchost->srchost_bulk_flow_count++;
+
+                               if (cake_ddst(q->flow_mode))
+                                       dsthost->dsthost_bulk_flow_count++;
+
                                flow->set = CAKE_SET_BULK;
                        } else {
                                /* we've moved it to the bulk rotation for
                                flow->set = CAKE_SET_SPARSE_WAIT;
                        }
                }
+
+               if (cake_dsrc(q->flow_mode))
+                       host_load = max(host_load, srchost->srchost_bulk_flow_count);
+
+               if (cake_ddst(q->flow_mode))
+                       host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
+
+               WARN_ON(host_load > CAKE_QUEUES);
+
+               /* The shifted prandom_u32() is a way to apply dithering to
+                * avoid accumulating roundoff errors
+                */
+               flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+                                 (prandom_u32() >> 16)) >> 16;
+               list_move_tail(&flow->flowchain, &b->old_flows);
+
                goto retry;
        }
 
                                               &b->decaying_flows);
                                if (flow->set == CAKE_SET_BULK) {
                                        b->bulk_flow_count--;
+
+                                       if (cake_dsrc(q->flow_mode))
+                                               srchost->srchost_bulk_flow_count--;
+
+                                       if (cake_ddst(q->flow_mode))
+                                               dsthost->dsthost_bulk_flow_count--;
+
                                        b->decaying_flow_count++;
                                } else if (flow->set == CAKE_SET_SPARSE ||
                                           flow->set == CAKE_SET_SPARSE_WAIT) {
                                if (flow->set == CAKE_SET_SPARSE ||
                                    flow->set == CAKE_SET_SPARSE_WAIT)
                                        b->sparse_flow_count--;
-                               else if (flow->set == CAKE_SET_BULK)
+                               else if (flow->set == CAKE_SET_BULK) {
                                        b->bulk_flow_count--;
-                               else
+
+                                       if (cake_dsrc(q->flow_mode))
+                                               srchost->srchost_bulk_flow_count--;
+
+                                       if (cake_ddst(q->flow_mode))
+                                               dsthost->dsthost_bulk_flow_count--;
+
+                               } else
                                        b->decaying_flow_count--;
 
                                flow->set = CAKE_SET_NONE;
-                               srchost->srchost_refcnt--;
-                               dsthost->dsthost_refcnt--;
                        }
                        goto begin;
                }