This causes issue on, at least, nvme-mpath where my boot fails with:
WARNING: CPU: 354 PID: 2729 at block/blk-settings.c:75 blk_validate_limits+0x356/0x380
Modules linked in: tg3(+) nvme usbcore scsi_mod ptp i2c_piix4 libphy nvme_core crc32c_intel scsi_common usb_common pps_core i2c_smbus
CPU: 354 UID: 0 PID: 2729 Comm: kworker/u2061:1 Not tainted 6.12.0-rc6+ #181
Hardware name: Dell Inc. PowerEdge R7625/06444F, BIOS 1.8.3 04/02/2024
Workqueue: async async_run_entry_fn
RIP: 0010:blk_validate_limits+0x356/0x380
Code: f6 47 01 04 75 28 83 bf 94 00 00 00 00 75 39 83 bf 98 00 00 00 00 75 34 83 7f 68 00 75 32 31 c0 83 7f 5c 00 0f 84 9b fd ff ff <0f> 0b eb 13 0f 0b eb 0f 48 c7 c0 74 12 58 92 48 89 c7 e8 13 76 46
RSP: 0018:
ffffa8a1dfb93b30 EFLAGS:
00010286
RAX:
0000000000000000 RBX:
ffff9232829c8388 RCX:
0000000000000088
RDX:
0000000000000080 RSI:
0000000000000200 RDI:
ffffa8a1dfb93c38
RBP:
000000000000000c R08:
00000000ffffffff R09:
000000000000ffff
R10:
0000000000000000 R11:
0000000000000000 R12:
ffff9232829b9000
R13:
ffff9232829b9010 R14:
ffffa8a1dfb93c38 R15:
ffffa8a1dfb93c38
FS:
0000000000000000(0000) GS:
ffff923867c80000(0000) knlGS:
0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0:
0000000080050033
CR2:
000055c1b92480a8 CR3:
0000002484ff0002 CR4:
0000000000370ef0
Call Trace:
<TASK>
? __warn+0xca/0x1a0
? blk_validate_limits+0x356/0x380
? report_bug+0x11a/0x1a0
? handle_bug+0x5e/0x90
? exc_invalid_op+0x16/0x40
? asm_exc_invalid_op+0x16/0x20
? blk_validate_limits+0x356/0x380
blk_alloc_queue+0x7a/0x250
__blk_alloc_disk+0x39/0x80
nvme_mpath_alloc_disk+0x13d/0x1b0 [nvme_core]
nvme_scan_ns+0xcc7/0x1010 [nvme_core]
async_run_entry_fn+0x27/0x120
process_scheduled_works+0x1a0/0x360
worker_thread+0x2bc/0x350
? pr_cont_work+0x1b0/0x1b0
kthread+0x111/0x120
? kthread_unuse_mm+0x90/0x90
ret_from_fork+0x30/0x40
? kthread_unuse_mm+0x90/0x90
ret_from_fork_asm+0x11/0x20
</TASK>
---[ end trace
0000000000000000 ]---
presumably due to max_zone_append_sectors not being cleared to zero,
resulting in blk_validate_zoned_limits() complaining and failing.
This reverts commit
2a8f6153e1c2db06a537a5c9d61102eb591776f1.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
return BLK_STS_IOERR;
/* Make sure the BIO is small enough and will not get split */
- if (nr_sectors > q->limits.max_zone_append_sectors)
+ if (nr_sectors > queue_max_zone_append_sectors(q))
return BLK_STS_IOERR;
bio->bi_opf |= REQ_NOMERGE;
struct bio *bio_split_zone_append(struct bio *bio,
const struct queue_limits *lim, unsigned *nr_segs)
{
+ unsigned int max_sectors = queue_limits_max_zone_append_sectors(lim);
int split_sectors;
split_sectors = bio_split_rw_at(bio, lim, nr_segs,
- lim->max_zone_append_sectors << SECTOR_SHIFT);
+ max_sectors << SECTOR_SHIFT);
if (WARN_ON_ONCE(split_sectors > 0))
split_sectors = -EINVAL;
return bio_submit_split(bio, split_sectors);
if (lim->zone_write_granularity < lim->logical_block_size)
lim->zone_write_granularity = lim->logical_block_size;
- /*
- * The Zone Append size is limited by the maximum I/O size and the zone
- * size given that it can't span zones.
- *
- * If no max_hw_zone_append_sectors limit is provided, the block layer
- * will emulated it, else we're also bound by the hardware limit.
- */
- lim->max_zone_append_sectors =
- min_not_zero(lim->max_hw_zone_append_sectors,
- min(lim->chunk_sectors, lim->max_hw_sectors));
+ if (lim->max_zone_append_sectors) {
+ /*
+ * The Zone Append size is limited by the maximum I/O size
+ * and the zone size given that it can't span zones.
+ */
+ lim->max_zone_append_sectors =
+ min3(lim->max_hw_sectors,
+ lim->max_zone_append_sectors,
+ lim->chunk_sectors);
+ }
+
return 0;
}
t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
b->max_write_zeroes_sectors);
- t->max_hw_zone_append_sectors = min(t->max_hw_zone_append_sectors,
- b->max_hw_zone_append_sectors);
+ t->max_zone_append_sectors = min(queue_limits_max_zone_append_sectors(t),
+ queue_limits_max_zone_append_sectors(b));
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
b->seg_boundary_mask);
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors)
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors)
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors)
-QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors)
#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \
static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
return ret;
}
+/*
+ * For zone append queue_max_zone_append_sectors does not just return the
+ * underlying queue limits, but actually contains a calculation. Because of
+ * that we can't simply use QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES here.
+ */
+static ssize_t queue_zone_append_max_show(struct gendisk *disk, char *page)
+{
+ return sprintf(page, "%llu\n",
+ (u64)queue_max_zone_append_sectors(disk->queue) <<
+ SECTOR_SHIFT);
+}
+
static ssize_t
queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count)
{
QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
QUEUE_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
-QUEUE_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
+QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
QUEUE_RO_ENTRY(queue_zoned, "zoned");
&queue_atomic_write_unit_max_entry.attr,
&queue_write_same_max_entry.attr,
&queue_max_write_zeroes_sectors_entry.attr,
- &queue_max_zone_append_sectors_entry.attr,
+ &queue_zone_append_max_entry.attr,
&queue_zone_write_granularity_entry.attr,
&queue_rotational_entry.attr,
&queue_zoned_entry.attr,
lim->features |= BLK_FEAT_ZONED;
lim->chunk_sectors = dev->zone_size_sects;
- lim->max_hw_zone_append_sectors = dev->zone_append_max_sectors;
+ lim->max_zone_append_sectors = dev->zone_append_max_sectors;
lim->max_open_zones = dev->zone_max_open;
lim->max_active_zones = dev->zone_max_active;
return 0;
lim.features |= BLK_FEAT_ZONED;
lim.max_active_zones = p->max_active_zones;
lim.max_open_zones = p->max_open_zones;
- lim.max_hw_zone_append_sectors = p->max_zone_append_sectors;
+ lim.max_zone_append_sectors = p->max_zone_append_sectors;
}
if (ub->params.basic.attrs & UBLK_ATTR_VOLATILE_CACHE) {
wg, v);
return -ENODEV;
}
- lim->max_hw_zone_append_sectors = v;
+ lim->max_zone_append_sectors = v;
dev_dbg(&vdev->dev, "max append sectors = %u\n", v);
return 0;
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
} else {
set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
- lim->max_hw_zone_append_sectors = 0;
+ lim->max_zone_append_sectors = 0;
}
/*
if (!zlim.mapped_nr_seq_zones) {
lim->max_open_zones = 0;
lim->max_active_zones = 0;
- lim->max_hw_zone_append_sectors = 0;
+ lim->max_zone_append_sectors = 0;
lim->zone_write_granularity = 0;
lim->chunk_sectors = 0;
lim->features &= ~BLK_FEAT_ZONED;
if (head->ids.csi == NVME_CSI_ZNS)
lim.features |= BLK_FEAT_ZONED;
else
- lim.max_hw_zone_append_sectors = 0;
+ lim.max_zone_append_sectors = 0;
head->disk = blk_alloc_disk(&lim, ctrl->numa_node);
if (IS_ERR(head->disk))
lim->features |= BLK_FEAT_ZONED;
lim->max_open_zones = zi->max_open_zones;
lim->max_active_zones = zi->max_active_zones;
- lim->max_hw_zone_append_sectors = ns->ctrl->max_zone_append;
+ lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
lim->chunk_sectors = ns->head->zsze =
nvme_lba_to_sect(ns->head, zi->zone_size);
}
lim->max_open_zones = sdkp->zones_max_open;
lim->max_active_zones = 0;
lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks);
+ /* Enable block layer zone append emulation */
+ lim->max_zone_append_sectors = 0;
return 0;
unsigned int max_user_discard_sectors;
unsigned int max_secure_erase_sectors;
unsigned int max_write_zeroes_sectors;
- unsigned int max_hw_zone_append_sectors;
unsigned int max_zone_append_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
return q->limits.max_segment_size;
}
+static inline unsigned int
+queue_limits_max_zone_append_sectors(const struct queue_limits *l)
+{
+ unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors);
+
+ return min_not_zero(l->max_zone_append_sectors, max_sectors);
+}
+
+static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q)
+{
+ if (!blk_queue_is_zoned(q))
+ return 0;
+
+ return queue_limits_max_zone_append_sectors(&q->limits);
+}
+
static inline bool queue_emulates_zone_append(struct request_queue *q)
{
- return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors;
+ return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors;
}
static inline bool bdev_emulates_zone_append(struct block_device *bdev)
static inline unsigned int
bdev_max_zone_append_sectors(struct block_device *bdev)
{
- return bdev_limits(bdev)->max_zone_append_sectors;
+ return queue_max_zone_append_sectors(bdev_get_queue(bdev));
}
static inline unsigned int bdev_max_segments(struct block_device *bdev)