scsi: storvsc: Select channel based on available percentage of ring buffer to write

author Long Li <longli@microsoft.com>

Thu, 19 Apr 2018 21:54:24 +0000 (14:54 -0700)

committer Martin K. Petersen <martin.petersen@oracle.com>

Fri, 20 Apr 2018 19:38:38 +0000 (15:38 -0400)
author Long Li <longli@microsoft.com>
Thu, 19 Apr 2018 21:54:24 +0000 (14:54 -0700)
committer Martin K. Petersen <martin.petersen@oracle.com>
Fri, 20 Apr 2018 19:38:38 +0000 (15:38 -0400)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c

index 5f2d177c3bd9b1b52c539f22128fb043dfc3d9e5..d3897c4384488310c91e3058787034fa0356c028 100644 (file)
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -395,6 +395,12 @@ MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)");
  
  module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
  MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
+
+static int ring_avail_percent_lowater = 10;
+module_param(ring_avail_percent_lowater, int, S_IRUGO);
+MODULE_PARM_DESC(ring_avail_percent_lowater,
+               "Select a channel if available ring size > this in percent");
+
  /*
   * Timeout in seconds for all devices managed by this driver.
   */
@@ -468,6 +474,13 @@ struct storvsc_device {
          * Mask of CPUs bound to subchannels.
          */
         struct cpumask alloced_cpus;
+       /*
+        * Pre-allocated struct cpumask for each hardware queue.
+        * struct cpumask is used by selecting out-going channels. It is a
+        * big structure, default to 1024k bytes when CONFIG_MAXSMP=y.
+        * Pre-allocate it to avoid allocation on the kernel stack.
+        */
+       struct cpumask *cpumask_chns;
         /* Used for vsc/vsp channel reset process */
         struct storvsc_cmd_request init_request;
         struct storvsc_cmd_request reset_request;
@@ -872,6 +885,13 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
         if (stor_device->stor_chns == NULL)
                 return -ENOMEM;
  
+       stor_device->cpumask_chns = kcalloc(num_possible_cpus(),
+                       sizeof(struct cpumask), GFP_KERNEL);
+       if (stor_device->cpumask_chns == NULL) {
+               kfree(stor_device->stor_chns);
+               return -ENOMEM;
+       }
+
         stor_device->stor_chns[device->channel->target_cpu] = device->channel;
         cpumask_set_cpu(device->channel->target_cpu,
                         &stor_device->alloced_cpus);
@@ -1232,6 +1252,7 @@ static int storvsc_dev_remove(struct hv_device *device)
         vmbus_close(device->channel);
  
         kfree(stor_device->stor_chns);
+       kfree(stor_device->cpumask_chns);
         kfree(stor_device);
         return 0;
  }
@@ -1241,7 +1262,7 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
  {
         u16 slot = 0;
         u16 hash_qnum;
-       struct cpumask alloced_mask;
+       struct cpumask *alloced_mask = &stor_device->cpumask_chns[q_num];
         int num_channels, tgt_cpu;
  
         if (stor_device->num_sc == 0)
@@ -1257,10 +1278,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
          * III. Mapping is persistent.
          */
  
-       cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
+       cpumask_and(alloced_mask, &stor_device->alloced_cpus,
                     cpumask_of_node(cpu_to_node(q_num)));
  
-       num_channels = cpumask_weight(&alloced_mask);
+       num_channels = cpumask_weight(alloced_mask);
         if (num_channels == 0)
                 return stor_device->device->channel;
  
@@ -1268,7 +1289,7 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
         while (hash_qnum >= num_channels)
                 hash_qnum -= num_channels;
  
-       for_each_cpu(tgt_cpu, &alloced_mask) {
+       for_each_cpu(tgt_cpu, alloced_mask) {
                 if (slot == hash_qnum)
                         break;
                 slot++;
@@ -1285,9 +1306,9 @@ static int storvsc_do_io(struct hv_device *device,
  {
         struct storvsc_device *stor_device;
         struct vstor_packet *vstor_packet;
-       struct vmbus_channel *outgoing_channel;
+       struct vmbus_channel *outgoing_channel, *channel;
         int ret = 0;
-       struct cpumask alloced_mask;
+       struct cpumask *alloced_mask;
         int tgt_cpu;
  
         vstor_packet = &request->vstor_packet;
@@ -1301,22 +1322,53 @@ static int storvsc_do_io(struct hv_device *device,
         /*
          * Select an an appropriate channel to send the request out.
          */
-
         if (stor_device->stor_chns[q_num] != NULL) {
                 outgoing_channel = stor_device->stor_chns[q_num];
-               if (outgoing_channel->target_cpu == smp_processor_id()) {
+               if (outgoing_channel->target_cpu == q_num) {
                         /*
                          * Ideally, we want to pick a different channel if
                          * available on the same NUMA node.
                          */
-                       cpumask_and(&alloced_mask, &stor_device->alloced_cpus,
+                       alloced_mask = &stor_device->cpumask_chns[q_num];
+                       cpumask_and(alloced_mask, &stor_device->alloced_cpus,
                                     cpumask_of_node(cpu_to_node(q_num)));
-                       for_each_cpu_wrap(tgt_cpu, &alloced_mask,
-                                       outgoing_channel->target_cpu + 1) {
-                               if (tgt_cpu != outgoing_channel->target_cpu) {
-                                       outgoing_channel =
-                                       stor_device->stor_chns[tgt_cpu];
-                                       break;
+
+                       for_each_cpu_wrap(tgt_cpu, alloced_mask, q_num + 1) {
+                               if (tgt_cpu == q_num)
+                                       continue;
+                               channel = stor_device->stor_chns[tgt_cpu];
+                               if (hv_get_avail_to_write_percent(
+                                                       &channel->outbound)
+                                               > ring_avail_percent_lowater) {
+                                       outgoing_channel = channel;
+                                       goto found_channel;
+                               }
+                       }
+
+                       /*
+                        * All the other channels on the same NUMA node are
+                        * busy. Try to use the channel on the current CPU
+                        */
+                       if (hv_get_avail_to_write_percent(
+                                               &outgoing_channel->outbound)
+                                       > ring_avail_percent_lowater)
+                               goto found_channel;
+
+                       /*
+                        * If we reach here, all the channels on the current
+                        * NUMA node are busy. Try to find a channel in
+                        * other NUMA nodes
+                        */
+                       cpumask_andnot(alloced_mask, &stor_device->alloced_cpus,
+                                       cpumask_of_node(cpu_to_node(q_num)));
+
+                       for_each_cpu(tgt_cpu, alloced_mask) {
+                               channel = stor_device->stor_chns[tgt_cpu];
+                               if (hv_get_avail_to_write_percent(
+                                                       &channel->outbound)
+                                               > ring_avail_percent_lowater) {
+                                       outgoing_channel = channel;
+                                       goto found_channel;
                                 }
                         }
                 }
@@ -1324,7 +1376,7 @@ static int storvsc_do_io(struct hv_device *device,
                 outgoing_channel = get_og_chn(stor_device, q_num);
         }
  
-
+found_channel:
         vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
  
         vstor_packet->vm_srb.length = (sizeof(struct vmscsi_request) -
@@ -1726,8 +1778,9 @@ static int storvsc_probe(struct hv_device *device,
                 max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel);
         }
  
-       scsi_driver.can_queue = (max_outstanding_req_per_channel *
-                                (max_sub_channels + 1));
+       scsi_driver.can_queue = max_outstanding_req_per_channel *
+                               (max_sub_channels + 1) *
+                               (100 - ring_avail_percent_lowater) / 100;
  
         host = scsi_host_alloc(&scsi_driver,
                                sizeof(struct hv_host_device));
@@ -1858,6 +1911,7 @@ err_out2:
  
  err_out1:
         kfree(stor_device->stor_chns);
+       kfree(stor_device->cpumask_chns);
         kfree(stor_device);
  
  err_out0:
author	Long Li <longli@microsoft.com>
	Thu, 19 Apr 2018 21:54:24 +0000 (14:54 -0700)
committer	Martin K. Petersen <martin.petersen@oracle.com>
	Fri, 20 Apr 2018 19:38:38 +0000 (15:38 -0400)