blk-mq: make the polling code adaptive

author Jens Axboe <axboe@fb.com>

Mon, 14 Nov 2016 20:03:03 +0000 (13:03 -0700)

committer Jens Axboe <axboe@fb.com>

Thu, 17 Nov 2016 20:34:57 +0000 (13:34 -0700)
author Jens Axboe <axboe@fb.com>
Mon, 14 Nov 2016 20:03:03 +0000 (13:03 -0700)
committer Jens Axboe <axboe@fb.com>
Thu, 17 Nov 2016 20:34:57 +0000 (13:34 -0700)
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 8cb248fb6a68308d948d2162b95b3a4fe9b5cd8f..9d4a1d630d0b8b80138d72b8b143d06dd0d1b3f1 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2132,6 +2132,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
          */
         q->nr_requests = set->queue_depth;
  
+       /*
+        * Default to classic polling
+        */
+       q->poll_nsec = -1;
+
         if (set->ops->complete)
                 blk_queue_softirq_done(q, set->ops->complete);
  
@@ -2469,14 +2474,70 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
  }
  EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
  
+static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
+                                      struct blk_mq_hw_ctx *hctx,
+                                      struct request *rq)
+{
+       struct blk_rq_stat stat[2];
+       unsigned long ret = 0;
+
+       /*
+        * If stats collection isn't on, don't sleep but turn it on for
+        * future users
+        */
+       if (!blk_stat_enable(q))
+               return 0;
+
+       /*
+        * We don't have to do this once per IO, should optimize this
+        * to just use the current window of stats until it changes
+        */
+       memset(&stat, 0, sizeof(stat));
+       blk_hctx_stat_get(hctx, stat);
+
+       /*
+        * As an optimistic guess, use half of the mean service time
+        * for this type of request. We can (and should) make this smarter.
+        * For instance, if the completion latencies are tight, we can
+        * get closer than just half the mean. This is especially
+        * important on devices where the completion latencies are longer
+        * than ~10 usec.
+        */
+       if (req_op(rq) == REQ_OP_READ && stat[BLK_STAT_READ].nr_samples)
+               ret = (stat[BLK_STAT_READ].mean + 1) / 2;
+       else if (req_op(rq) == REQ_OP_WRITE && stat[BLK_STAT_WRITE].nr_samples)
+               ret = (stat[BLK_STAT_WRITE].mean + 1) / 2;
+
+       return ret;
+}
+
  static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
+                                    struct blk_mq_hw_ctx *hctx,
                                      struct request *rq)
  {
         struct hrtimer_sleeper hs;
         enum hrtimer_mode mode;
+       unsigned int nsecs;
         ktime_t kt;
  
-       if (!q->poll_nsec || test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
+       if (test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags))
+               return false;
+
+       /*
+        * poll_nsec can be:
+        *
+        * -1:  don't ever hybrid sleep
+        *  0:  use half of prev avg
+        * >0:  use this specific value
+        */
+       if (q->poll_nsec == -1)
+               return false;
+       else if (q->poll_nsec > 0)
+               nsecs = q->poll_nsec;
+       else
+               nsecs = blk_mq_poll_nsecs(q, hctx, rq);
+
+       if (!nsecs)
                 return false;
  
         set_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
@@ -2485,7 +2546,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
          * This will be replaced with the stats tracking code, using
          * 'avg_completion_time / 2' as the pre-sleep target.
          */
-       kt = ktime_set(0, q->poll_nsec);
+       kt = ktime_set(0, nsecs);
  
         mode = HRTIMER_MODE_REL;
         hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
@@ -2520,7 +2581,7 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
          * the IO isn't complete, we'll get called again and will go
          * straight to the busy poll loop.
          */
-       if (blk_mq_poll_hybrid_sleep(q, rq))
+       if (blk_mq_poll_hybrid_sleep(q, hctx, rq))
                 return true;
  
         hctx->poll_considered++;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index dcdfcaa126539c3e79dbf7f55834397631a82bac..1855c6770045c0bf89ad0e29ef26acdc10070e4d 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -352,24 +352,34 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
  
  static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
  {
-       return queue_var_show(q->poll_nsec / 1000, page);
+       int val;
+
+       if (q->poll_nsec == -1)
+               val = -1;
+       else
+               val = q->poll_nsec / 1000;
+
+       return sprintf(page, "%d\n", val);
  }
  
  static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
                                 size_t count)
  {
-       unsigned long poll_usec;
-       ssize_t ret;
+       int err, val;
  
         if (!q->mq_ops || !q->mq_ops->poll)
                 return -EINVAL;
  
-       ret = queue_var_store(&poll_usec, page, count);
-       if (ret < 0)
-               return ret;
+       err = kstrtoint(page, 10, &val);
+       if (err < 0)
+               return err;
  
-       q->poll_nsec = poll_usec * 1000;
-       return ret;
+       if (val == -1)
+               q->poll_nsec = -1;
+       else
+               q->poll_nsec = val * 1000;
+
+       return count;
  }
  
  static ssize_t queue_poll_show(struct request_queue *q, char *page)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 37ed4ea705c8401b411c7bcb47bd586635357362..85699bc90a5166a0d759f177d56ec09a6e075d3c 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -509,7 +509,7 @@ struct request_queue {
         unsigned int            request_fn_active;
  
         unsigned int            rq_timeout;
-       unsigned int            poll_nsec;
+       int                     poll_nsec;
         struct timer_list       timeout;
         struct work_struct      timeout_work;
         struct list_head        timeout_list;
author	Jens Axboe <axboe@fb.com>
	Mon, 14 Nov 2016 20:03:03 +0000 (13:03 -0700)
committer	Jens Axboe <axboe@fb.com>
	Thu, 17 Nov 2016 20:34:57 +0000 (13:34 -0700)
block/blk-mq.c		patch \| blob \| history
block/blk-sysfs.c		patch \| blob \| history
include/linux/blkdev.h		patch \| blob \| history