From ccb9868ab7f4b253440b8723a3487b8b9a16d371 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 4 Dec 2024 15:04:50 +0000 Subject: [PATCH 01/16] blktrace: remove redundant return at end of function A recent change added return 0 before an existing return statement at the end of function blk_trace_setup. The final return is now redundant, so remove it. Fixes: 64d124798244 ("blktrace: move copy_[to|from]_user() out of ->debugfs_lock") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20241204150450.399005-1-colin.i.king@gmail.com Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 18c81e6aa496..3679a6d18934 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -639,8 +639,6 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, return -EFAULT; } return 0; - - return ret; } EXPORT_SYMBOL_GPL(blk_trace_setup); -- 2.51.0 From 53328a3671e965051fdbd2be9be34a0bdc4e7a74 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Beno=C3=AEt=20du=20Garreau?= Date: Wed, 4 Dec 2024 09:38:39 +0100 Subject: [PATCH 02/16] block: rnull: Initialize the module in place MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Using `InPlaceModule` avoids an allocation and an indirection. Signed-off-by: Benoît du Garreau Acked-by: Andreas Hindborg Link: https://lore.kernel.org/r/20241204-rnull_in_place-v1-1-efe3eafac9fb@dugarreau.fr Signed-off-by: Jens Axboe --- drivers/block/rnull.rs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/block/rnull.rs b/drivers/block/rnull.rs index 9cca05dcf772..ddf3629d8894 100644 --- a/drivers/block/rnull.rs +++ b/drivers/block/rnull.rs @@ -32,25 +32,31 @@ module! { license: "GPL v2", } +#[pin_data] struct NullBlkModule { - _disk: Pin>>>, + #[pin] + _disk: Mutex>, } -impl kernel::Module for NullBlkModule { - fn init(_module: &'static ThisModule) -> Result { +impl kernel::InPlaceModule for NullBlkModule { + fn init(_module: &'static ThisModule) -> impl PinInit { pr_info!("Rust null_blk loaded\n"); - let tagset = Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?; - let disk = gen_disk::GenDiskBuilder::new() - .capacity_sectors(4096 << 11) - .logical_block_size(4096)? - .physical_block_size(4096)? - .rotational(false) - .build(format_args!("rnullb{}", 0), tagset)?; + // Use a immediately-called closure as a stable `try` block + let disk = /* try */ (|| { + let tagset = Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?; - let disk = KBox::pin_init(new_mutex!(disk, "nullb:disk"), flags::GFP_KERNEL)?; + gen_disk::GenDiskBuilder::new() + .capacity_sectors(4096 << 11) + .logical_block_size(4096)? + .physical_block_size(4096)? + .rotational(false) + .build(format_args!("rnullb{}", 0), tagset) + })(); - Ok(Self { _disk: disk }) + try_pin_init!(Self { + _disk <- new_mutex!(disk?, "nullb:disk"), + }) } } -- 2.51.0 From 0e20669a91306540ce76710c12201a73b1c3612a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Dec 2024 16:08:47 +0000 Subject: [PATCH 03/16] null_blk: Remove accesses to page->index Use page->private to store the index instead of page->index. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20241216160849.31739-1-willy@infradead.org Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 32bd232cceef..7b674187c096 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -907,7 +907,7 @@ static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx, if (radix_tree_insert(root, idx, t_page)) { null_free_page(t_page); t_page = radix_tree_lookup(root, idx); - WARN_ON(!t_page || t_page->page->index != idx); + WARN_ON(!t_page || t_page->page->private != idx); } else if (is_cache) nullb->dev->curr_cache += PAGE_SIZE; @@ -930,7 +930,7 @@ static void null_free_device_storage(struct nullb_device *dev, bool is_cache) (void **)t_pages, pos, FREE_BATCH); for (i = 0; i < nr_pages; i++) { - pos = t_pages[i]->page->index; + pos = t_pages[i]->page->private; ret = radix_tree_delete_item(root, pos, t_pages[i]); WARN_ON(ret != t_pages[i]); null_free_page(ret); @@ -956,7 +956,7 @@ static struct nullb_page *__null_lookup_page(struct nullb *nullb, root = is_cache ? &nullb->dev->cache : &nullb->dev->data; t_page = radix_tree_lookup(root, idx); - WARN_ON(t_page && t_page->page->index != idx); + WARN_ON(t_page && t_page->page->private != idx); if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap))) return t_page; @@ -999,7 +999,7 @@ static struct nullb_page *null_insert_page(struct nullb *nullb, spin_lock_irq(&nullb->lock); idx = sector >> PAGE_SECTORS_SHIFT; - t_page->page->index = idx; + t_page->page->private = idx; t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache); radix_tree_preload_end(); @@ -1019,7 +1019,7 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) struct nullb_page *t_page, *ret; void *dst, *src; - idx = c_page->page->index; + idx = c_page->page->private; t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true); @@ -1078,7 +1078,7 @@ again: * avoid race, we don't allow page free */ for (i = 0; i < nr_pages; i++) { - nullb->cache_flush_pos = c_pages[i]->page->index; + nullb->cache_flush_pos = c_pages[i]->page->private; /* * We found the page which is being flushed to disk by other * threads -- 2.51.0 From fea4952df0eeec4e1a295ebaac9f61c0065fae87 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 2 Dec 2024 15:00:09 +0100 Subject: [PATCH 04/16] driver core: bus: add irq_get_affinity callback to bus_type Introducing a callback in struct bus_type so that a subsystem can hook up the getters directly. This approach avoids exposing random getters in any subsystems APIs. Acked-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Acked-by: Greg Kroah-Hartman Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20241202-refactor-blk-affinity-helpers-v6-1-27211e9c2cd5@kernel.org Signed-off-by: Jens Axboe --- include/linux/device/bus.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index cdc4757217f9..b18658bce2c3 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -48,6 +48,7 @@ struct fwnode_handle; * will never get called until they do. * @remove: Called when a device removed from this bus. * @shutdown: Called at shut-down time to quiesce the device. + * @irq_get_affinity: Get IRQ affinity mask for the device on this bus. * * @online: Called to put the device back online (after offlining it). * @offline: Called to put the device offline for hot-removal. May fail. @@ -87,6 +88,8 @@ struct bus_type { void (*sync_state)(struct device *dev); void (*remove)(struct device *dev); void (*shutdown)(struct device *dev); + const struct cpumask *(*irq_get_affinity)(struct device *dev, + unsigned int irq_vec); int (*online)(struct device *dev); int (*offline)(struct device *dev); -- 2.51.0 From 22d813bf00ba7f7a2e027dfc26f60c6ff525ba85 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 2 Dec 2024 15:00:10 +0100 Subject: [PATCH 05/16] PCI: hookup irq_get_affinity callback struct bus_type has a new callback for retrieving the IRQ affinity for a device. Hook this callback up for PCI based devices. Acked-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20241202-refactor-blk-affinity-helpers-v6-2-27211e9c2cd5@kernel.org Signed-off-by: Jens Axboe --- drivers/pci/pci-driver.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 35270172c833..f57ea36d125d 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1670,6 +1670,19 @@ static void pci_dma_cleanup(struct device *dev) iommu_device_unuse_default_domain(dev); } +/* + * pci_device_irq_get_affinity - get IRQ affinity mask for device + * @dev: ptr to dev structure + * @irq_vec: interrupt vector number + * + * Return the CPU affinity mask for @dev and @irq_vec. + */ +static const struct cpumask *pci_device_irq_get_affinity(struct device *dev, + unsigned int irq_vec) +{ + return pci_irq_get_affinity(to_pci_dev(dev), irq_vec); +} + const struct bus_type pci_bus_type = { .name = "pci", .match = pci_bus_match, @@ -1677,6 +1690,7 @@ const struct bus_type pci_bus_type = { .probe = pci_device_probe, .remove = pci_device_remove, .shutdown = pci_device_shutdown, + .irq_get_affinity = pci_device_irq_get_affinity, .dev_groups = pci_dev_groups, .bus_groups = pci_bus_groups, .drv_groups = pci_drv_groups, -- 2.51.0 From c7f63c5d13925c97a4ae9908bd933ab197872161 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 2 Dec 2024 15:00:11 +0100 Subject: [PATCH 06/16] virtio: hookup irq_get_affinity callback struct bus_type has a new callback for retrieving the IRQ affinity for a device. Hook this callback up for virtio based devices. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20241202-refactor-blk-affinity-helpers-v6-3-27211e9c2cd5@kernel.org Signed-off-by: Jens Axboe --- drivers/virtio/virtio.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index b9095751e43b..b10ed9f5b543 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -377,6 +377,24 @@ static void virtio_dev_remove(struct device *_d) of_node_put(dev->dev.of_node); } +/* + * virtio_irq_get_affinity - get IRQ affinity mask for device + * @_d: ptr to dev structure + * @irq_vec: interrupt vector number + * + * Return the CPU affinity mask for @_d and @irq_vec. + */ +static const struct cpumask *virtio_irq_get_affinity(struct device *_d, + unsigned int irq_vec) +{ + struct virtio_device *dev = dev_to_virtio(_d); + + if (!dev->config->get_vq_affinity) + return NULL; + + return dev->config->get_vq_affinity(dev, irq_vec); +} + static const struct bus_type virtio_bus = { .name = "virtio", .match = virtio_dev_match, @@ -384,6 +402,7 @@ static const struct bus_type virtio_bus = { .uevent = virtio_uevent, .probe = virtio_dev_probe, .remove = virtio_dev_remove, + .irq_get_affinity = virtio_irq_get_affinity, }; int __register_virtio_driver(struct virtio_driver *driver, struct module *owner) -- 2.51.0 From 1452e9b470c903fc4137a448e9f5767e92d68229 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 2 Dec 2024 15:00:12 +0100 Subject: [PATCH 07/16] blk-mq: introduce blk_mq_map_hw_queues blk_mq_pci_map_queues and blk_mq_virtio_map_queues will create a CPU to hardware queue mapping based on affinity information. These two function share common code and only differ on how the affinity information is retrieved. Also, those functions are located in the block subsystem where it doesn't really fit in. They are virtio and pci subsystem specific. Thus introduce provide a generic mapping function which uses the irq_get_affinity callback from bus_type. Originally idea from Ming Lei Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20241202-refactor-blk-affinity-helpers-v6-4-27211e9c2cd5@kernel.org Signed-off-by: Jens Axboe --- block/blk-mq-cpumap.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/blk-mq.h | 2 ++ 2 files changed, 39 insertions(+) diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 9638b25fd521..ad8d6a363f24 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "blk.h" #include "blk-mq.h" @@ -54,3 +55,39 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index) return NUMA_NO_NODE; } + +/** + * blk_mq_map_hw_queues - Create CPU to hardware queue mapping + * @qmap: CPU to hardware queue map + * @dev: The device to map queues + * @offset: Queue offset to use for the device + * + * Create a CPU to hardware queue mapping in @qmap. The struct bus_type + * irq_get_affinity callback will be used to retrieve the affinity. + */ +void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, + struct device *dev, unsigned int offset) + +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + if (!dev->bus->irq_get_affinity) + goto fallback; + + for (queue = 0; queue < qmap->nr_queues; queue++) { + mask = dev->bus->irq_get_affinity(dev, queue + offset); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + qmap->mq_map[cpu] = qmap->queue_offset + queue; + } + + return; + +fallback: + WARN_ON_ONCE(qmap->nr_queues > 1); + blk_mq_clear_mq_map(qmap); +} +EXPORT_SYMBOL_GPL(blk_mq_map_hw_queues); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c596e0e4cb75..769eab6247d4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -921,6 +921,8 @@ void blk_mq_unfreeze_queue_non_owner(struct request_queue *q); void blk_freeze_queue_start_non_owner(struct request_queue *q); void blk_mq_map_queues(struct blk_mq_queue_map *qmap); +void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, + struct device *dev, unsigned int offset); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); -- 2.51.0 From bd326a5ad6397ccfc67af862606be107c15a43e6 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 2 Dec 2024 15:00:13 +0100 Subject: [PATCH 08/16] scsi: replace blk_mq_pci_map_queues with blk_mq_map_hw_queues Replace all users of blk_mq_pci_map_queues with the more generic blk_mq_map_hw_queues. This in preparation to retire blk_mq_pci_map_queues. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Reviewed-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20241202-refactor-blk-affinity-helpers-v6-5-27211e9c2cd5@kernel.org Signed-off-by: Jens Axboe --- drivers/scsi/fnic/fnic_main.c | 3 +-- drivers/scsi/hisi_sas/hisi_sas.h | 1 - drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 4 ++-- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +-- drivers/scsi/mpi3mr/mpi3mr.h | 1 - drivers/scsi/mpi3mr/mpi3mr_os.c | 2 +- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 3 +-- drivers/scsi/pm8001/pm8001_init.c | 2 +- drivers/scsi/pm8001/pm8001_sas.h | 1 - drivers/scsi/qla2xxx/qla_nvme.c | 3 +-- drivers/scsi/qla2xxx/qla_os.c | 4 ++-- drivers/scsi/smartpqi/smartpqi_init.c | 7 +++---- 12 files changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index adec0df24bc4..1cb517f731f4 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -601,7 +600,7 @@ void fnic_mq_map_queues_cpus(struct Scsi_Host *host) return; } - blk_mq_pci_map_queues(qmap, l_pdev, FNIC_PCI_OFFSET); + blk_mq_map_hw_queues(qmap, &l_pdev->dev, FNIC_PCI_OFFSET); } static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index a44768bceb9a..4101447bb8eb 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -9,7 +9,6 @@ #include #include -#include #include #include #include diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 5db931663ae4..79129c977704 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -3328,8 +3328,8 @@ static void hisi_sas_map_queues(struct Scsi_Host *shost) if (i == HCTX_TYPE_POLL) blk_mq_map_queues(qmap); else - blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev, - BASE_VECTORS_V3_HW); + blk_mq_map_hw_queues(qmap, hisi_hba->dev, + BASE_VECTORS_V3_HW); qoff += qmap->nr_queues; } } diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 50f1dcb6d584..49abd7dd75a7 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -3193,7 +3192,7 @@ static void megasas_map_queues(struct Scsi_Host *shost) map = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; map->nr_queues = instance->msix_vectors - offset; map->queue_offset = 0; - blk_mq_pci_map_queues(map, instance->pdev, offset); + blk_mq_map_hw_queues(map, &instance->pdev->dev, offset); qoff += map->nr_queues; offset += map->nr_queues; diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 0c3e1ac076b5..0d72b5f1b69d 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -12,7 +12,6 @@ #include #include -#include #include #include #include diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 1bef88130d0c..1e8735538b23 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -4042,7 +4042,7 @@ static void mpi3mr_map_queues(struct Scsi_Host *shost) */ map->queue_offset = qoff; if (i != HCTX_TYPE_POLL) - blk_mq_pci_map_queues(map, mrioc->pdev, offset); + blk_mq_map_hw_queues(map, &mrioc->pdev->dev, offset); else blk_mq_map_queues(map); diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index f2a55aa5fe65..9599d7a50028 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include "mpt3sas_base.h" @@ -11890,7 +11889,7 @@ static void scsih_map_queues(struct Scsi_Host *shost) */ map->queue_offset = qoff; if (i != HCTX_TYPE_POLL) - blk_mq_pci_map_queues(map, ioc->pdev, offset); + blk_mq_map_hw_queues(map, &ioc->pdev->dev, offset); else blk_mq_map_queues(map); diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index f8c81e53e93f..2a7822fd613e 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -105,7 +105,7 @@ static void pm8001_map_queues(struct Scsi_Host *shost) struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; if (pm8001_ha->number_of_intr > 1) { - blk_mq_pci_map_queues(qmap, pm8001_ha->pdev, 1); + blk_mq_map_hw_queues(qmap, &pm8001_ha->pdev->dev, 1); return; } diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h index 42c7b3f7afbf..d3bd8683f344 100644 --- a/drivers/scsi/pm8001/pm8001_sas.h +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -56,7 +56,6 @@ #include #include #include -#include #include "pm8001_defs.h" #define DRV_NAME "pm80xx" diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 8f4cc136a9c9..8ee2e337c9e1 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -8,7 +8,6 @@ #include #include #include -#include #include static struct nvme_fc_port_template qla_nvme_fc_transport; @@ -841,7 +840,7 @@ static void qla_nvme_map_queues(struct nvme_fc_local_port *lport, { struct scsi_qla_host *vha = lport->private; - blk_mq_pci_map_queues(map, vha->hw->pdev, vha->irq_offset); + blk_mq_map_hw_queues(map, &vha->hw->pdev->dev, vha->irq_offset); } static void qla_nvme_localport_delete(struct nvme_fc_local_port *lport) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 7ab717ed7232..31535beaaa16 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -8071,7 +8070,8 @@ static void qla2xxx_map_queues(struct Scsi_Host *shost) if (USER_CTRL_IRQ(vha->hw) || !vha->hw->mqiobase) blk_mq_map_queues(qmap); else - blk_mq_pci_map_queues(qmap, vha->hw->pdev, vha->irq_offset); + blk_mq_map_hw_queues(qmap, &vha->hw->pdev->dev, + vha->irq_offset); } struct scsi_host_template qla2xxx_driver_template = { diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 870f37b70546..04fb24d77e9b 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -6547,10 +6546,10 @@ static void pqi_map_queues(struct Scsi_Host *shost) struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); if (!ctrl_info->disable_managed_interrupts) - return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], - ctrl_info->pci_dev, 0); + blk_mq_map_hw_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + &ctrl_info->pci_dev->dev, 0); else - return blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]); + blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]); } static inline bool pqi_is_tape_changer_device(struct pqi_scsi_dev *device) -- 2.51.0 From 4425f6492a511dd12ccad924ae8c8e802c172418 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 2 Dec 2024 15:00:14 +0100 Subject: [PATCH 09/16] nvme: replace blk_mq_pci_map_queues with blk_mq_map_hw_queues Replace all users of blk_mq_pci_map_queues with the more generic blk_mq_map_hw_queues. This in preparation to retire blk_mq_pci_map_queues. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20241202-refactor-blk-affinity-helpers-v6-6-27211e9c2cd5@kernel.org Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 1 - drivers/nvme/host/pci.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b81af7919e94..094be164ffdc 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -16,7 +16,6 @@ #include #include "fc.h" #include -#include /* *************************** Data Structures/Defines ****************** */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 1a5ba80f1811..709328a67f91 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -463,7 +462,7 @@ static void nvme_pci_map_queues(struct blk_mq_tag_set *set) */ map->queue_offset = qoff; if (i != HCTX_TYPE_POLL && offset) - blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); + blk_mq_map_hw_queues(map, dev->dev, offset); else blk_mq_map_queues(map); qoff += map->nr_queues; -- 2.51.0 From a5665c3d150c9876981e9a31161f42dac6a0d95c Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 2 Dec 2024 15:00:15 +0100 Subject: [PATCH 10/16] virtio: blk/scsi: replace blk_mq_virtio_map_queues with blk_mq_map_hw_queues Replace all users of blk_mq_virtio_map_queues with the more generic blk_mq_map_hw_queues. This in preparation to retire blk_mq_virtio_map_queues. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20241202-refactor-blk-affinity-helpers-v6-7-27211e9c2cd5@kernel.org Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 4 ++-- drivers/scsi/virtio_scsi.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 3efe378f1386..ed514ff46dc8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -1181,7 +1180,8 @@ static void virtblk_map_queues(struct blk_mq_tag_set *set) if (i == HCTX_TYPE_POLL) blk_mq_map_queues(&set->map[i]); else - blk_mq_virtio_map_queues(&set->map[i], vblk->vdev, 0); + blk_mq_map_hw_queues(&set->map[i], + &vblk->vdev->dev, 0); } } diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 8471f38b730e..60be1a0c6183 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -29,7 +29,6 @@ #include #include #include -#include #include "sd.h" @@ -746,7 +745,7 @@ static void virtscsi_map_queues(struct Scsi_Host *shost) if (i == HCTX_TYPE_POLL) blk_mq_map_queues(map); else - blk_mq_virtio_map_queues(map, vscsi->vdev, 2); + blk_mq_map_hw_queues(map, &vscsi->vdev->dev, 2); } } -- 2.51.0 From 9bc1e897a821f19ba3775bb013a8a6fb121c3ca1 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 2 Dec 2024 15:00:16 +0100 Subject: [PATCH 11/16] blk-mq: remove unused queue mapping helpers There are no users left of the pci and virtio queue mapping helpers. Thus remove them. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20241202-refactor-blk-affinity-helpers-v6-8-27211e9c2cd5@kernel.org Signed-off-by: Jens Axboe --- block/Makefile | 2 -- block/blk-mq-pci.c | 46 ----------------------------------- block/blk-mq-virtio.c | 46 ----------------------------------- include/linux/blk-mq-pci.h | 11 --------- include/linux/blk-mq-virtio.h | 11 --------- 5 files changed, 116 deletions(-) delete mode 100644 block/blk-mq-pci.c delete mode 100644 block/blk-mq-virtio.c delete mode 100644 include/linux/blk-mq-pci.h delete mode 100644 include/linux/blk-mq-virtio.h diff --git a/block/Makefile b/block/Makefile index ddfd21c1a9ff..33748123710b 100644 --- a/block/Makefile +++ b/block/Makefile @@ -27,8 +27,6 @@ bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o obj-$(CONFIG_IOSCHED_BFQ) += bfq.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o -obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o -obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c deleted file mode 100644 index d47b5c73c9eb..000000000000 --- a/block/blk-mq-pci.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2016 Christoph Hellwig. - */ -#include -#include -#include -#include -#include - -#include "blk-mq.h" - -/** - * blk_mq_pci_map_queues - provide a default queue mapping for PCI device - * @qmap: CPU to hardware queue map. - * @pdev: PCI device associated with @set. - * @offset: Offset to use for the pci irq vector - * - * This function assumes the PCI device @pdev has at least as many available - * interrupt vectors as @set has queues. It will then query the vector - * corresponding to each queue for it's affinity mask and built queue mapping - * that maps a queue to the CPUs that have irq affinity for the corresponding - * vector. - */ -void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, - int offset) -{ - const struct cpumask *mask; - unsigned int queue, cpu; - - for (queue = 0; queue < qmap->nr_queues; queue++) { - mask = pci_irq_get_affinity(pdev, queue + offset); - if (!mask) - goto fallback; - - for_each_cpu(cpu, mask) - qmap->mq_map[cpu] = qmap->queue_offset + queue; - } - - return; - -fallback: - WARN_ON_ONCE(qmap->nr_queues > 1); - blk_mq_clear_mq_map(qmap); -} -EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues); diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c deleted file mode 100644 index 68d0945c0b08..000000000000 --- a/block/blk-mq-virtio.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2016 Christoph Hellwig. - */ -#include -#include -#include -#include -#include "blk-mq.h" - -/** - * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device - * @qmap: CPU to hardware queue map. - * @vdev: virtio device to provide a mapping for. - * @first_vec: first interrupt vectors to use for queues (usually 0) - * - * This function assumes the virtio device @vdev has at least as many available - * interrupt vectors as @set has queues. It will then query the vector - * corresponding to each queue for it's affinity mask and built queue mapping - * that maps a queue to the CPUs that have irq affinity for the corresponding - * vector. - */ -void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, - struct virtio_device *vdev, int first_vec) -{ - const struct cpumask *mask; - unsigned int queue, cpu; - - if (!vdev->config->get_vq_affinity) - goto fallback; - - for (queue = 0; queue < qmap->nr_queues; queue++) { - mask = vdev->config->get_vq_affinity(vdev, first_vec + queue); - if (!mask) - goto fallback; - - for_each_cpu(cpu, mask) - qmap->mq_map[cpu] = qmap->queue_offset + queue; - } - - return; - -fallback: - blk_mq_map_queues(qmap); -} -EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues); diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h deleted file mode 100644 index ca544e1d3508..000000000000 --- a/include/linux/blk-mq-pci.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BLK_MQ_PCI_H -#define _LINUX_BLK_MQ_PCI_H - -struct blk_mq_queue_map; -struct pci_dev; - -void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, - int offset); - -#endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h deleted file mode 100644 index 13226e9b22dd..000000000000 --- a/include/linux/blk-mq-virtio.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BLK_MQ_VIRTIO_H -#define _LINUX_BLK_MQ_VIRTIO_H - -struct blk_mq_queue_map; -struct virtio_device; - -void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, - struct virtio_device *vdev, int first_vec); - -#endif /* _LINUX_BLK_MQ_VIRTIO_H */ -- 2.51.0 From cc76ace465d6977b47daa427379b7be1e0976f12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 19 Dec 2024 07:01:59 +0100 Subject: [PATCH 12/16] block: remove BLK_MQ_F_SHOULD_MERGE BLK_MQ_F_SHOULD_MERGE is set for all tag_sets except those that purely process passthrough commands (bsg-lib, ufs tmf, various nvme admin queues) and thus don't even check the flag. Remove it to simplify the driver interface. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241219060214.1928848-1-hch@lst.de Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 1 - block/blk-mq-debugfs.c | 1 - block/blk-mq-sched.c | 3 +-- drivers/block/amiflop.c | 1 - drivers/block/aoe/aoeblk.c | 1 - drivers/block/ataflop.c | 1 - drivers/block/floppy.c | 1 - drivers/block/loop.c | 3 +-- drivers/block/mtip32xx/mtip32xx.c | 1 - drivers/block/nbd.c | 3 +-- drivers/block/null_blk/main.c | 2 -- drivers/block/ps3disk.c | 3 +-- drivers/block/rbd.c | 1 - drivers/block/rnbd/rnbd-clt.c | 3 +-- drivers/block/sunvdc.c | 2 +- drivers/block/swim.c | 2 +- drivers/block/swim3.c | 3 +-- drivers/block/ublk_drv.c | 1 - drivers/block/virtio_blk.c | 1 - drivers/block/xen-blkfront.c | 1 - drivers/block/z2ram.c | 1 - drivers/cdrom/gdrom.c | 2 +- drivers/md/dm-rq.c | 2 +- drivers/memstick/core/ms_block.c | 3 +-- drivers/memstick/core/mspro_block.c | 3 +-- drivers/mmc/core/queue.c | 2 +- drivers/mtd/mtd_blkdevs.c | 2 +- drivers/mtd/ubi/block.c | 2 +- drivers/nvme/host/apple.c | 1 - drivers/nvme/host/core.c | 1 - drivers/s390/block/dasd_genhd.c | 1 - drivers/s390/block/scm_blk.c | 1 - drivers/scsi/scsi_lib.c | 1 - include/linux/blk-mq.h | 1 - 34 files changed, 15 insertions(+), 43 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 66c1a8835e36..0b1e61f72fb3 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -865,7 +865,6 @@ static int ubd_add(int n, char **error_out) ubd_dev->tag_set.ops = &ubd_mq_ops; ubd_dev->tag_set.queue_depth = 64; ubd_dev->tag_set.numa_node = NUMA_NO_NODE; - ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ubd_dev->tag_set.driver_data = ubd_dev; ubd_dev->tag_set.nr_hw_queues = 1; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 5463697a8442..4b6b20ccdb53 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -181,7 +181,6 @@ static const char *const alloc_policy_name[] = { #define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name static const char *const hctx_flag_name[] = { - HCTX_FLAG_NAME(SHOULD_MERGE), HCTX_FLAG_NAME(TAG_QUEUE_SHARED), HCTX_FLAG_NAME(STACKING), HCTX_FLAG_NAME(TAG_HCTX_SHARED), diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 451a2c1f1f32..7442ca27c2bf 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -351,8 +351,7 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, ctx = blk_mq_get_ctx(q); hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); type = hctx->type; - if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) || - list_empty_careful(&ctx->rq_lists[type])) + if (list_empty_careful(&ctx->rq_lists[type])) goto out_put; /* default per sw-queue merge */ diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 49ced65bef4c..9edd4468f755 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1819,7 +1819,6 @@ static int fd_alloc_drive(int drive) unit[drive].tag_set.nr_maps = 1; unit[drive].tag_set.queue_depth = 2; unit[drive].tag_set.numa_node = NUMA_NO_NODE; - unit[drive].tag_set.flags = BLK_MQ_F_SHOULD_MERGE; if (blk_mq_alloc_tag_set(&unit[drive].tag_set)) goto out_cleanup_trackbuf; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 2028795ec61c..00b74a845328 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -368,7 +368,6 @@ aoeblk_gdalloc(void *vp) set->nr_hw_queues = 1; set->queue_depth = 128; set->numa_node = NUMA_NO_NODE; - set->flags = BLK_MQ_F_SHOULD_MERGE; err = blk_mq_alloc_tag_set(set); if (err) { pr_err("aoe: cannot allocate tag set for %ld.%d\n", diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 4ba98c6654be..110f9aca2667 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2088,7 +2088,6 @@ static int __init atari_floppy_init (void) unit[i].tag_set.nr_maps = 1; unit[i].tag_set.queue_depth = 2; unit[i].tag_set.numa_node = NUMA_NO_NODE; - unit[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ret = blk_mq_alloc_tag_set(&unit[i].tag_set); if (ret) goto err; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 3affb538b989..abf0486f0d4f 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4596,7 +4596,6 @@ static int __init do_floppy_init(void) tag_sets[drive].nr_maps = 1; tag_sets[drive].queue_depth = 2; tag_sets[drive].numa_node = NUMA_NO_NODE; - tag_sets[drive].flags = BLK_MQ_F_SHOULD_MERGE; err = blk_mq_alloc_tag_set(&tag_sets[drive]); if (err) goto out_put_disk; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 8f6761c27c68..836a53eef4b4 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2023,8 +2023,7 @@ static int loop_add(int i) lo->tag_set.queue_depth = hw_queue_depth; lo->tag_set.numa_node = NUMA_NO_NODE; lo->tag_set.cmd_size = sizeof(struct loop_cmd); - lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING | - BLK_MQ_F_NO_SCHED_BY_DEFAULT; + lo->tag_set.flags = BLK_MQ_F_STACKING | BLK_MQ_F_NO_SCHED_BY_DEFAULT; lo->tag_set.driver_data = lo; err = blk_mq_alloc_tag_set(&lo->tag_set); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 43701b7b10a7..95361099a2dc 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3416,7 +3416,6 @@ static int mtip_block_initialize(struct driver_data *dd) dd->tags.reserved_tags = 1; dd->tags.cmd_size = sizeof(struct mtip_cmd); dd->tags.numa_node = dd->numa_node; - dd->tags.flags = BLK_MQ_F_SHOULD_MERGE; dd->tags.driver_data = dd; dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b852050d8a96..b1a5af69a66d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1841,8 +1841,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) nbd->tag_set.queue_depth = 128; nbd->tag_set.numa_node = NUMA_NO_NODE; nbd->tag_set.cmd_size = sizeof(struct nbd_cmd); - nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | - BLK_MQ_F_BLOCKING; + nbd->tag_set.flags = BLK_MQ_F_BLOCKING; nbd->tag_set.driver_data = nbd; INIT_WORK(&nbd->remove_work, nbd_dev_remove_work); nbd->backend = NULL; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 7b674187c096..178e62cd9a9f 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1791,7 +1791,6 @@ static int null_init_global_tag_set(void) tag_set.nr_hw_queues = g_submit_queues; tag_set.queue_depth = g_hw_queue_depth; tag_set.numa_node = g_home_node; - tag_set.flags = BLK_MQ_F_SHOULD_MERGE; if (g_no_sched) tag_set.flags |= BLK_MQ_F_NO_SCHED; if (g_shared_tag_bitmap) @@ -1817,7 +1816,6 @@ static int null_setup_tagset(struct nullb *nullb) nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues; nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth; nullb->tag_set->numa_node = nullb->dev->home_node; - nullb->tag_set->flags = BLK_MQ_F_SHOULD_MERGE; if (nullb->dev->no_sched) nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED; if (nullb->dev->shared_tag_bitmap) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index ff45ed766469..68fed46c463e 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -434,8 +434,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) ps3disk_identify(dev); - error = blk_mq_alloc_sq_tag_set(&priv->tag_set, &ps3disk_mq_ops, 1, - BLK_MQ_F_SHOULD_MERGE); + error = blk_mq_alloc_sq_tag_set(&priv->tag_set, &ps3disk_mq_ops, 1, 0); if (error) goto fail_teardown; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ac421dbeeb11..5b393e4a1ddf 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4964,7 +4964,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) rbd_dev->tag_set.ops = &rbd_mq_ops; rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth; rbd_dev->tag_set.numa_node = NUMA_NO_NODE; - rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; rbd_dev->tag_set.nr_hw_queues = num_present_cpus(); rbd_dev->tag_set.cmd_size = sizeof(struct rbd_img_request); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index c34695d2eea7..82467ecde7ec 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1209,8 +1209,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess) tag_set->ops = &rnbd_mq_ops; tag_set->queue_depth = sess->queue_depth; tag_set->numa_node = NUMA_NO_NODE; - tag_set->flags = BLK_MQ_F_SHOULD_MERGE | - BLK_MQ_F_TAG_QUEUE_SHARED; + tag_set->flags = BLK_MQ_F_TAG_QUEUE_SHARED; tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE; /* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */ diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 2d38331ee667..88dcae6ec575 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -829,7 +829,7 @@ static int probe_disk(struct vdc_port *port) } err = blk_mq_alloc_sq_tag_set(&port->tag_set, &vdc_mq_ops, - VDC_TX_RING_SIZE, BLK_MQ_F_SHOULD_MERGE); + VDC_TX_RING_SIZE, 0); if (err) return err; diff --git a/drivers/block/swim.c b/drivers/block/swim.c index be4ac58afe41..eda33c5eb5e2 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -818,7 +818,7 @@ static int swim_floppy_init(struct swim_priv *swd) for (drive = 0; drive < swd->floppy_count; drive++) { err = blk_mq_alloc_sq_tag_set(&swd->unit[drive].tag_set, - &swim_mq_ops, 2, BLK_MQ_F_SHOULD_MERGE); + &swim_mq_ops, 2, 0); if (err) goto exit_put_disks; diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 90be1017f7bf..9914153b365b 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1208,8 +1208,7 @@ static int swim3_attach(struct macio_dev *mdev, fs = &floppy_states[floppy_count]; memset(fs, 0, sizeof(*fs)); - rc = blk_mq_alloc_sq_tag_set(&fs->tag_set, &swim3_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE); + rc = blk_mq_alloc_sq_tag_set(&fs->tag_set, &swim3_mq_ops, 2, 0); if (rc) goto out_unregister; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d4aed12dd436..6c16cb798fdd 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2205,7 +2205,6 @@ static int ublk_add_tag_set(struct ublk_device *ub) ub->tag_set.queue_depth = ub->dev_info.queue_depth; ub->tag_set.numa_node = NUMA_NO_NODE; ub->tag_set.cmd_size = sizeof(struct ublk_rq_data); - ub->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ub->tag_set.driver_data = ub; return blk_mq_alloc_tag_set(&ub->tag_set); } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index ed514ff46dc8..71a7ffeafb32 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1481,7 +1481,6 @@ static int virtblk_probe(struct virtio_device *vdev) vblk->tag_set.ops = &virtio_mq_ops; vblk->tag_set.queue_depth = queue_depth; vblk->tag_set.numa_node = NUMA_NO_NODE; - vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; vblk->tag_set.cmd_size = sizeof(struct virtblk_req) + sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 59ce113b882a..edcd08a9dcef 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1131,7 +1131,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, } else info->tag_set.queue_depth = BLK_RING_SIZE(info); info->tag_set.numa_node = NUMA_NO_NODE; - info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; info->tag_set.cmd_size = sizeof(struct blkif_req); info->tag_set.driver_data = info; diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 4b7219be1bb8..8c1c7f4211eb 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -354,7 +354,6 @@ static int __init z2_init(void) tag_set.nr_maps = 1; tag_set.queue_depth = 16; tag_set.numa_node = NUMA_NO_NODE; - tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ret = blk_mq_alloc_tag_set(&tag_set); if (ret) goto out_unregister_blkdev; diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 64b097e830d4..85aceab5eac6 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -777,7 +777,7 @@ static int probe_gdrom(struct platform_device *devptr) probe_gdrom_setupcd(); err = blk_mq_alloc_sq_tag_set(&gd.tag_set, &gdrom_mq_ops, 1, - BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); + BLK_MQ_F_BLOCKING); if (err) goto probe_fail_free_cd_info; diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 499f8cc8a39f..e23076f7ece2 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -547,7 +547,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) md->tag_set->ops = &dm_mq_ops; md->tag_set->queue_depth = dm_get_blk_mq_queue_depth(); md->tag_set->numa_node = md->numa_node_id; - md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING; + md->tag_set->flags = BLK_MQ_F_STACKING; md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues(); md->tag_set->driver_data = md; diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 20a2466bec23..5b617c1f6789 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2094,8 +2094,7 @@ static int msb_init_disk(struct memstick_dev *card) if (msb->disk_id < 0) return msb->disk_id; - rc = blk_mq_alloc_sq_tag_set(&msb->tag_set, &msb_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE); + rc = blk_mq_alloc_sq_tag_set(&msb->tag_set, &msb_mq_ops, 2, 0); if (rc) goto out_release_id; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 13b317c56069..634d343b6bdb 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1139,8 +1139,7 @@ static int mspro_block_init_disk(struct memstick_dev *card) if (disk_id < 0) return disk_id; - rc = blk_mq_alloc_sq_tag_set(&msb->tag_set, &mspro_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE); + rc = blk_mq_alloc_sq_tag_set(&msb->tag_set, &mspro_mq_ops, 2, 0); if (rc) goto out_release_id; diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 4d6844261912..ab662f502fe7 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -441,7 +441,7 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, else mq->tag_set.queue_depth = MMC_QUEUE_DEPTH; mq->tag_set.numa_node = NUMA_NO_NODE; - mq->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; + mq->tag_set.flags = BLK_MQ_F_BLOCKING; mq->tag_set.nr_hw_queues = 1; mq->tag_set.cmd_size = sizeof(struct mmc_queue_req); mq->tag_set.driver_data = mq; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 47ead84407cd..ee7e1d908986 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -329,7 +329,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) goto out_list_del; ret = blk_mq_alloc_sq_tag_set(new->tag_set, &mtd_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING); + BLK_MQ_F_BLOCKING); if (ret) goto out_kfree_tag_set; diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 60d0155be869..2836905f0152 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -383,7 +383,7 @@ int ubiblock_create(struct ubi_volume_info *vi) dev->tag_set.ops = &ubiblock_mq_ops; dev->tag_set.queue_depth = 64; dev->tag_set.numa_node = NUMA_NO_NODE; - dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; + dev->tag_set.flags = BLK_MQ_F_BLOCKING; dev->tag_set.cmd_size = sizeof(struct ubiblock_pdu); dev->tag_set.driver_data = dev; dev->tag_set.nr_hw_queues = 1; diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 4319ab50c10d..83c60468542c 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1275,7 +1275,6 @@ static int apple_nvme_alloc_tagsets(struct apple_nvme *anv) anv->tagset.timeout = NVME_IO_TIMEOUT; anv->tagset.numa_node = NUMA_NO_NODE; anv->tagset.cmd_size = sizeof(struct apple_nvme_iod); - anv->tagset.flags = BLK_MQ_F_SHOULD_MERGE; anv->tagset.driver_data = &anv->ioq; ret = blk_mq_alloc_tag_set(&anv->tagset); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a970168a3014..42283d268500 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4639,7 +4639,6 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, /* Reserved for fabric connect */ set->reserved_tags = 1; set->numa_node = ctrl->numa_node; - set->flags = BLK_MQ_F_SHOULD_MERGE; if (ctrl->ops->flags & NVME_F_BLOCKING) set->flags |= BLK_MQ_F_BLOCKING; set->cmd_size = cmd_size; diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 6da47a65af61..28e92fad0ca1 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -56,7 +56,6 @@ int dasd_gendisk_alloc(struct dasd_block *block) block->tag_set.cmd_size = sizeof(struct dasd_ccw_req); block->tag_set.nr_hw_queues = nr_hw_queues; block->tag_set.queue_depth = queue_depth; - block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; block->tag_set.numa_node = NUMA_NO_NODE; rc = blk_mq_alloc_tag_set(&block->tag_set); if (rc) diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 3fcfe029db1b..91bbe9d2e5ac 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -461,7 +461,6 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) bdev->tag_set.cmd_size = sizeof(blk_status_t); bdev->tag_set.nr_hw_queues = nr_requests; bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests; - bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; bdev->tag_set.numa_node = NUMA_NO_NODE; ret = blk_mq_alloc_tag_set(&bdev->tag_set); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index adee6f60c966..5cf124e13097 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2065,7 +2065,6 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) tag_set->queue_depth = shost->can_queue; tag_set->cmd_size = cmd_size; tag_set->numa_node = dev_to_node(shost->dma_dev); - tag_set->flags = BLK_MQ_F_SHOULD_MERGE; tag_set->flags |= BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy); if (shost->queuecommand_may_block) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 769eab6247d4..7f6c482ebf54 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -668,7 +668,6 @@ struct blk_mq_ops { /* Keep hctx_flag_name[] in sync with the definitions below */ enum { - BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for -- 2.51.0 From 31d813a3b8cbde2d09ba4dee282ca29096541006 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Fri, 20 Dec 2024 10:37:57 +0100 Subject: [PATCH 13/16] rust: block: fix use of BLK_MQ_F_SHOULD_MERGE BLK_MQ_F_SHOULD_MERGE has was removed [1] and is now in effect by default. So remove the flag from tag sets of Rust block device drivers. Link: https://lore.kernel.org/r/20241219060214.1928848-1-hch@lst.de [1] Fixes: 9377b95cda73 ("block: remove BLK_MQ_F_SHOULD_MERGE") Signed-off-by: Andreas Hindborg Link: https://lore.kernel.org/r/20241220-merge-flag-fix-v1-1-41b7778dac06@kernel.org Signed-off-by: Jens Axboe --- rust/kernel/block/mq/tag_set.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/block/mq/tag_set.rs b/rust/kernel/block/mq/tag_set.rs index d7f175a05d99..00ddcc71dfa2 100644 --- a/rust/kernel/block/mq/tag_set.rs +++ b/rust/kernel/block/mq/tag_set.rs @@ -52,7 +52,7 @@ impl TagSet { numa_node: bindings::NUMA_NO_NODE, queue_depth: num_tags, cmd_size, - flags: bindings::BLK_MQ_F_SHOULD_MERGE, + flags: 0, driver_data: core::ptr::null_mut::(), nr_maps: num_maps, ..tag_set -- 2.51.0 From 48ea518d0072e29a7af304258a4cedb3e7eea308 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 17 Dec 2024 13:03:07 -0800 Subject: [PATCH 14/16] blk-zoned: Minimize #include directives Only include those header files that are necessary. Reviewed-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20241217210310.645966-2-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 84da1eadff64..1575b887fa38 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -11,12 +11,8 @@ */ #include -#include #include #include -#include -#include -#include #include #include #include -- 2.51.0 From cbac56e5237dbec9ae3d896e71f24e95e06eafa3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 17 Dec 2024 13:03:08 -0800 Subject: [PATCH 15/16] blk-zoned: Document locking assumptions Document which functions expect that their callers must hold a lock. Reviewed-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20241217210310.645966-3-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 1575b887fa38..954724a2e3c6 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -459,6 +459,8 @@ static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug) static inline bool disk_should_remove_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { + lockdep_assert_held(&zwplug->lock); + /* If the zone write plug was already removed, we are done. */ if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) return false; @@ -913,6 +915,8 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, { struct gendisk *disk = bio->bi_bdev->bd_disk; + lockdep_assert_held(&zwplug->lock); + /* * If we lost track of the zone write pointer due to a write error, * the user must either execute a report zones, reset the zone or finish -- 2.51.0 From fa8555630b32da7c239a1e01e9eb1bb040be59ac Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 17 Dec 2024 13:03:09 -0800 Subject: [PATCH 16/16] blk-zoned: Improve the queue reference count strategy documentation For the blk_queue_exit() calls, document where the corresponding code can be found that increases q->q_usage_counter. Reviewed-by: Christoph Hellwig Reviewed-by: Damien Le Moal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20241217210310.645966-4-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 954724a2e3c6..7876a6458022 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -582,6 +582,7 @@ static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug, bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING); bio_io_error(bio); disk_put_zone_wplug(zwplug); + /* Drop the reference taken by disk_zone_wplug_add_bio(() */ blk_queue_exit(q); } @@ -893,10 +894,7 @@ void blk_zone_write_plug_init_request(struct request *req) break; } - /* - * Drop the extra reference on the queue usage we got when - * plugging the BIO and advance the write pointer offset. - */ + /* Drop the reference taken by disk_zone_wplug_add_bio(). */ blk_queue_exit(q); zwplug->wp_offset += bio_sectors(bio); -- 2.51.0