]> www.infradead.org Git - users/hch/misc.git/commitdiff
RFC: block: allow write streams on partitions block-write-streams
authorChristoph Hellwig <hch@lst.de>
Mon, 11 Nov 2024 10:03:46 +0000 (11:03 +0100)
committerChristoph Hellwig <hch@lst.de>
Tue, 19 Nov 2024 11:18:03 +0000 (12:18 +0100)
By default assign all write streams to partition 1, and add a hack
sysfs files that distributes them all equally.

This is implemented by storing the number of per-partition write
streams in struct block device, as well as the offset to the global
ones, and then remapping the write streams in the I/O submission
path.

The sysfs is hacky and undocumented, better suggestions welcome
from actual users of write stream on partitions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
block/bdev.c
block/blk-core.c
block/genhd.c
block/partitions/core.c
include/linux/blk_types.h
include/linux/blkdev.h

index c23245f1fdfe331cda0e52b5c74ebc3fc3ec4f0a..f3549a8cdb3f718c138c3c891a9737f73d1237df 100644 (file)
@@ -440,6 +440,15 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
                return NULL;
        }
        bdev->bd_disk = disk;
+
+       /*
+        * Assign all write streams to the first partition by default.
+        */
+       if (partno == 1) {
+               bdev->bd_part_write_stream_start = 0;
+               bdev->bd_part_write_streams = bdev_max_write_streams(bdev);
+       }
+
        return bdev;
 }
 
index 666efe8fa202065fd417a0f18b367e95c209fc26..9654937f9b2d38b514b4d4641948e90d1766c070 100644 (file)
@@ -574,6 +574,8 @@ static int blk_partition_remap(struct bio *bio)
                return -EIO;
        if (bio_sectors(bio)) {
                bio->bi_iter.bi_sector += p->bd_start_sect;
+               if (bio->bi_write_stream)
+                       bio->bi_write_stream += p->bd_part_write_stream_start;
                trace_block_bio_remap(bio, p->bd_dev,
                                      bio->bi_iter.bi_sector -
                                      p->bd_start_sect);
index 79230c109fca036816667f4cf0c9839fa120c562..3156c70522b6879fb0ab02fc4fb8000454cd3eee 100644 (file)
@@ -1070,6 +1070,54 @@ static ssize_t partscan_show(struct device *dev,
        return sysfs_emit(buf, "%u\n", disk_has_partscan(dev_to_disk(dev)));
 }
 
+static ssize_t disk_distribute_write_streams_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       /* Anything useful to show here like the ranges? */
+       return sysfs_emit(buf, "0\n");
+}
+
+static ssize_t disk_distribute_write_streams_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct gendisk *disk = dev_to_disk(dev);
+       struct block_device *bdev = disk->part0, *part;
+       unsigned short total_write_streams =
+               disk->queue->limits.max_write_streams;
+       unsigned short part_write_streams, part_write_stream_start = 0;
+       unsigned long nr_partitions = 0, idx;
+       int error = 0;
+
+       if (!total_write_streams)
+               return -EINVAL;
+
+       mutex_lock(&disk->open_mutex);
+       if (atomic_read(&bdev->bd_openers)) {
+               error = -EBUSY;
+               goto out_unlock;
+       }
+
+       xa_for_each_start(&disk->part_tbl, idx, part, 1)
+               nr_partitions++;
+       if (!nr_partitions)
+               goto out_unlock;
+
+       part_write_streams = total_write_streams / nr_partitions;
+       xa_for_each_start(&disk->part_tbl, idx, part, 1) {
+               part->bd_part_write_streams = part_write_streams;
+               part->bd_part_write_stream_start = part_write_stream_start;
+               part_write_stream_start += part_write_streams;
+               dev_info(dev,
+                       "assigning %u write streams at %u to partition %lu\n",
+                       part_write_streams, part_write_stream_start, idx - 1);
+       }
+out_unlock:
+       mutex_unlock(&disk->open_mutex);
+       if (error)
+               return error;
+       return count;
+}
+
 static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
@@ -1084,6 +1132,9 @@ static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
 static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
 static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
 static DEVICE_ATTR(partscan, 0444, partscan_show, NULL);
+static DEVICE_ATTR(distribute_write_streams, 0644,
+       disk_distribute_write_streams_show,
+       disk_distribute_write_streams_store);
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 ssize_t part_fail_show(struct device *dev,
@@ -1135,6 +1186,7 @@ static struct attribute *disk_attrs[] = {
        &dev_attr_events_poll_msecs.attr,
        &dev_attr_diskseq.attr,
        &dev_attr_partscan.attr,
+       &dev_attr_distribute_write_streams.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
        &dev_attr_fail.attr,
 #endif
index 815ed33caa1b86d22dccf2bf1593f5c85aad7597..a27dbb5589ce66b5152297d7cc5c8a6b8551db39 100644 (file)
@@ -245,8 +245,10 @@ static const struct attribute_group *part_attr_groups[] = {
 
 static void part_release(struct device *dev)
 {
-       put_disk(dev_to_bdev(dev)->bd_disk);
-       bdev_drop(dev_to_bdev(dev));
+       struct block_device *part = dev_to_bdev(dev);
+
+       put_disk(part->bd_disk);
+       bdev_drop(part);
 }
 
 static int part_uevent(const struct device *dev, struct kobj_uevent_env *env)
index 4ca3449ce9c95aa5e3f2c7414871200556d3bdfd..02a3d58e814f5bb24c071050f6f3d32b556849d1 100644 (file)
@@ -74,6 +74,13 @@ struct block_device {
 #ifdef CONFIG_SECURITY
        void                    *bd_security;
 #endif
+
+       /*
+        * Allow assigning write streams to partitions.
+        */
+       unsigned short          bd_part_write_streams;
+       unsigned short          bd_part_write_stream_start;
+
        /*
         * keep this out-of-line as it's both big and not needed in the fast
         * path
index 9fda66530d9a5232b6e3b46ecb98747c46af0ff0..bb0921e642fb5d1320eb54d03e3c3334e836be88 100644 (file)
@@ -1242,7 +1242,7 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev)
 static inline unsigned short bdev_max_write_streams(struct block_device *bdev)
 {
        if (bdev_is_partition(bdev))
-               return 0;
+               return bdev->bd_part_write_streams;
        return bdev_limits(bdev)->max_write_streams;
 }