]> www.infradead.org Git - users/willy/xarray.git/commitdiff
block: use chunk_sectors when evaluating stacked atomic write limits
authorJohn Garry <john.g.garry@oracle.com>
Fri, 11 Jul 2025 10:52:58 +0000 (10:52 +0000)
committerJens Axboe <axboe@kernel.dk>
Thu, 17 Jul 2025 12:01:16 +0000 (06:01 -0600)
The atomic write unit max value is limited by any stacked device stripe
size.

It is required that the atomic write unit is a power-of-2 factor of the
stripe size.

Currently we use io_min limit to hold the stripe size, and check for a
io_min <= SECTOR_SIZE when deciding if we have a striped stacked device.

Nilay reports that this causes a problem when the physical block size is
greater than SECTOR_SIZE [0].

Furthermore, io_min may be mutated when stacking devices, and this makes
it a poor candidate to hold the stripe size. Such an example (of when
io_min may change) would be when the io_min is less than the physical
block size.

Use chunk_sectors to hold the stripe size, which is more appropriate.

[0] https://lore.kernel.org/linux-block/888f3b1d-7817-4007-b3b3-1a2ea04df771@linux.ibm.com/T/#mecca17129f72811137d3c2f1e477634e77f06781

Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Tested-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20250711105258.3135198-7-john.g.garry@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/blk-settings.c

index 3425ae1b1f014654e59bab357cb54bb87cb98f7e..a6ac293f47e3473d964f732aac7538180e7e9c46 100644 (file)
@@ -595,41 +595,50 @@ static bool blk_stack_atomic_writes_boundary_head(struct queue_limits *t,
        return true;
 }
 
-
-/* Check stacking of first bottom device */
-static bool blk_stack_atomic_writes_head(struct queue_limits *t,
-                               struct queue_limits *b)
+static void blk_stack_atomic_writes_chunk_sectors(struct queue_limits *t)
 {
-       if (b->atomic_write_hw_boundary &&
-           !blk_stack_atomic_writes_boundary_head(t, b))
-               return false;
+       unsigned int chunk_bytes;
 
-       if (t->io_min <= SECTOR_SIZE) {
-               /* No chunk sectors, so use bottom device values directly */
-               t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
-               t->atomic_write_hw_unit_min = b->atomic_write_hw_unit_min;
-               t->atomic_write_hw_max = b->atomic_write_hw_max;
-               return true;
-       }
+       if (!t->chunk_sectors)
+               return;
+
+       /*
+        * If chunk sectors is so large that its value in bytes overflows
+        * UINT_MAX, then just shift it down so it definitely will fit.
+        * We don't support atomic writes of such a large size anyway.
+        */
+       if (check_shl_overflow(t->chunk_sectors, SECTOR_SHIFT, &chunk_bytes))
+               chunk_bytes = t->chunk_sectors;
 
        /*
         * Find values for limits which work for chunk size.
         * b->atomic_write_hw_unit_{min, max} may not be aligned with chunk
-        * size (t->io_min), as chunk size is not restricted to a power-of-2.
+        * size, as the chunk size is not restricted to a power-of-2.
         * So we need to find highest power-of-2 which works for the chunk
         * size.
-        * As an example scenario, we could have b->unit_max = 16K and
-        * t->io_min = 24K. For this case, reduce t->unit_max to a value
-        * aligned with both limits, i.e. 8K in this example.
+        * As an example scenario, we could have t->unit_max = 16K and
+        * t->chunk_sectors = 24KB. For this case, reduce t->unit_max to a
+        * value aligned with both limits, i.e. 8K in this example.
         */
-       t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
-       while (t->io_min % t->atomic_write_hw_unit_max)
-               t->atomic_write_hw_unit_max /= 2;
+       t->atomic_write_hw_unit_max = min(t->atomic_write_hw_unit_max,
+                                       max_pow_of_two_factor(chunk_bytes));
 
-       t->atomic_write_hw_unit_min = min(b->atomic_write_hw_unit_min,
+       t->atomic_write_hw_unit_min = min(t->atomic_write_hw_unit_min,
                                          t->atomic_write_hw_unit_max);
-       t->atomic_write_hw_max = min(b->atomic_write_hw_max, t->io_min);
+       t->atomic_write_hw_max = min(t->atomic_write_hw_max, chunk_bytes);
+}
+
+/* Check stacking of first bottom device */
+static bool blk_stack_atomic_writes_head(struct queue_limits *t,
+                               struct queue_limits *b)
+{
+       if (b->atomic_write_hw_boundary &&
+           !blk_stack_atomic_writes_boundary_head(t, b))
+               return false;
 
+       t->atomic_write_hw_unit_max = b->atomic_write_hw_unit_max;
+       t->atomic_write_hw_unit_min = b->atomic_write_hw_unit_min;
+       t->atomic_write_hw_max = b->atomic_write_hw_max;
        return true;
 }
 
@@ -657,6 +666,7 @@ static void blk_stack_atomic_writes_limits(struct queue_limits *t,
 
        if (!blk_stack_atomic_writes_head(t, b))
                goto unsupported;
+       blk_stack_atomic_writes_chunk_sectors(t);
        return;
 
 unsupported: