spin_lock(&mp->m_zone_list_lock);
clear_bit(RTG_F_OPEN, &rtg->rtg_flags);
+ if (mp->m_write_streams)
+ clear_bit(rtg->rtg_write_stream, mp->m_write_streams);
if (!list_empty(&rtg->rtg_entry)) {
/* empty list means this is the open GC zone */
mp->m_nr_open_zones--;
if (rtg->rtg_written == rtg->rtg_extents)
xfs_zone_mark_full(rtg);
+ /*
+ * (ab)use the ctime field to log the write stream. This allows us to
+ * pick up where we left after an unmount or power fail event.
+ *
+ * As the inode core get logged anyway there is no cost doing this
+ * every time an allocation is recorded.
+ */
+ VFS_I(rtg->rtg_inodes[XFS_RTGI_RMAP])->i_ctime_sec =
+ rtg->rtg_write_stream;
xfs_trans_log_inode(tp, rtg->rtg_inodes[XFS_RTGI_RMAP], XFS_ILOG_CORE);
xfs_rtgroup_put(rtg);
list_for_each_entry(rtg, &mp->m_free_zones, rtg_entry) {
ASSERT(rtg->rtg_write_pointer == 0);
- if (atomic_inc_not_zero(&rtg->rtg_group.xg_active_ref)) {
- list_del_init(&rtg->rtg_entry);
- atomic_dec(&mp->m_nr_free_zones);
- return rtg;
- }
+ if (atomic_inc_not_zero(&rtg->rtg_group.xg_active_ref))
+ goto found;
}
return NULL;
+found:
+ if (mp->m_write_streams) {
+ rtg->rtg_write_stream = find_first_zero_bit(mp->m_write_streams,
+ mp->m_max_open_zones);
+ if (rtg->rtg_write_stream < 0) {
+ xfs_warn(mp, "no available write streams");
+ return NULL;
+ }
+ set_bit(rtg->rtg_write_stream, mp->m_write_streams);
+ }
+
+ list_del_init(&rtg->rtg_entry);
+ atomic_dec(&mp->m_nr_free_zones);
+ return rtg;
}
/*
if (*is_seq)
rgbno = 0;
ioend->io_sector = xfs_rtb_to_daddr(mp, xfs_rgbno_to_rtb(rtg, rgbno));
+ ioend->io_bio.bi_write_hint = rtg->rtg_write_stream;
return XFS_FSB_TO_B(mp, count_fsb);
}
*/
if (rtg->rtg_written < rtg->rtg_extents)
return NULL;
+ if (mp->m_write_streams)
+ clear_bit(rtg->rtg_write_stream, mp->m_write_streams);
xfs_rtgroup_rele(rtg);
rtg = NULL;
}
return error;
}
+
+/*
+ * XXX: this also needs to resync the hardware state with ours and advance the
+ * write pointer to match the usable capacity (e.g. RUAMW in FDP) because in
+ * case of a power fail we might have already written data, but not recorded
+ * it in the rmap yet. That needs a block layer API first, though.
+ */
+static void
+xfs_recover_write_stream(
+ struct xfs_rtgroup *rtg)
+{
+ struct xfs_inode *rmapip = rtg->rtg_inodes[XFS_RTGI_RMAP];
+ struct xfs_mount *mp = rtg_mount(rtg);
+
+ rtg->rtg_write_stream = VFS_I(rmapip)->i_ctime_sec;
+ if (test_and_set_bit(rtg->rtg_write_stream, mp->m_write_streams)) {
+ xfs_warn(mp, "duplicate write stream %u for zone %u",
+ rtg->rtg_write_stream, rtg_rgno(rtg));
+ rtg->rtg_write_stream = 0;
+ }
+}
+
static int
xfs_init_zone(
struct xfs_rtgroup *rtg,
set_bit(RTG_F_OPEN, &rtg->rtg_flags);
*available += (rtg->rtg_extents - rtg->rtg_write_pointer);
*freedblocks += (rtg->rtg_write_pointer) - used;
+ if (mp->m_write_streams)
+ xfs_recover_write_stream(rtg);
} else if (used < rtg->rtg_extents) {
/* zone fully written, but has freed blocks */
xfs_group_set_mark(&rtg->rtg_group, XFS_RTG_RECLAIMABLE);
struct xfs_mount *mp)
{
struct xfs_buftarg *bt = mp->m_rtdev_targp;
- unsigned int bdev_open_zones;
+ unsigned int bdev_open_zones = 0;
int64_t available = 0, freedblocks = 0;
+ struct queue_limits *lim = bdev_limits(bt->bt_bdev);
struct xfs_rtgroup *rtg = NULL;
int error;
* Note: To debug the open zone management code, force max_open to
* 1 here.
*/
- bdev_open_zones = bdev_max_open_zones(bt->bt_bdev);
+ if (bdev_is_zoned(bt->bt_bdev))
+ bdev_open_zones = lim->max_open_zones;
+ else if (lim->features & BLK_FEAT_PLACEMENT_HINTS)
+ bdev_open_zones = lim->max_write_hints;
+
if (bdev_open_zones && !mp->m_max_open_zones)
mp->m_max_open_zones = bdev_open_zones;
if (mp->m_max_open_zones) {
mp->m_sb.sb_rgcount, xfs_get_zone_info_cb, mp);
if (error < 0)
return error;
+ } else if (lim->features & BLK_FEAT_PLACEMENT_HINTS) {
+ /*
+ * XXX: This won't cope with the per-partition restriction
+ * bitmap which I have no idea how to use correctly.
+ */
+ mp->m_write_streams = bitmap_zalloc(mp->m_max_open_zones,
+ GFP_KERNEL);
+ if (!mp->m_write_streams)
+ return -ENOMEM;
}
mp->m_zone_gc_thread = kthread_create(xfs_zoned_gcd, mp,
rtg = NULL;
while ((rtg = xfs_rtgroup_next(mp, rtg)))
list_del_init(&rtg->rtg_entry);
+ bitmap_free(mp->m_write_streams);
return error;
}
kthread_stop(mp->m_zone_gc_thread);
while ((rtg = xfs_rtgroup_next(mp, rtg)))
list_del_init(&rtg->rtg_entry);
+ bitmap_free(mp->m_write_streams);
}