{
        unsigned int users;
 
+       /*
+        * calling test_bit() prior to test_and_set_bit() is intentional,
+        * it avoids dirtying the cacheline if the queue is already active.
+        */
        if (blk_mq_is_shared_tags(hctx->flags)) {
                struct request_queue *q = hctx->queue;
 
-               if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
+               if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+                   test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
                        return;
-               set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags);
        } else {
-               if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+               if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+                   test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
                        return;
-               set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state);
        }
 
        users = atomic_inc_return(&hctx->tags->active_queues);