From: Trond Myklebust Date: Tue, 11 Aug 2020 17:36:32 +0000 (-0400) Subject: NFS: Fix flexfiles read failover X-Git-Tag: iomap-folio-5.17-old~3372^2 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=563c53e73b8b6ec842828736f77e633f7b0911e9;p=users%2Fwilly%2Flinux.git NFS: Fix flexfiles read failover The current mirrored read failover code is correctly resetting the mirror index between failed reads, however it is not able to actually flip the RPC call over to the next RPC client. The end result is that we keep resending the RPC call to the same client over and over. The fix is to use the pnfs_read_resend_pnfs() mechanism to schedule a new RPC call, but we need to add the ability to pass in a mirror index so that we always retry the next mirror in the list. Fixes: 166bd5b889ac ("pNFS/flexfiles: Fix layoutstats handling during read failovers") Signed-off-by: Trond Myklebust --- diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b26173d72735..965145592750 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -790,6 +790,19 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx); } +static struct nfs4_pnfs_ds * +ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx) +{ + struct pnfs_layout_segment *lseg = pgio->pg_lseg; + struct nfs4_pnfs_ds *ds; + + ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx, + best_idx); + if (ds || !pgio->pg_mirror_idx) + return ds; + return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx); +} + static void ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, @@ -840,7 +853,7 @@ retry: goto out_nolseg; } - ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); + ds = ff_layout_get_ds_for_read(pgio, &ds_idx); if (!ds) { if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg)) goto out_mds; @@ -1022,11 +1035,24 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) } } +static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) +{ + u32 idx = hdr->pgio_mirror_idx + 1; + int new_idx = 0; + + if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx)) + ff_layout_send_layouterror(hdr->lseg); + else + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); + pnfs_read_resend_pnfs(hdr, new_idx); +} + static void ff_layout_reset_read(struct nfs_pgio_header *hdr) { struct rpc_task *task = &hdr->task; pnfs_layoutcommit_inode(hdr->inode, false); + pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " @@ -1228,6 +1254,12 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, break; case NFS4ERR_NXIO: ff_layout_mark_ds_unreachable(lseg, idx); + /* + * Don't return the layout if this is a read and we still + * have layouts to try + */ + if (opnum == OP_READ) + break; /* Fallthrough */ default: pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, @@ -1241,7 +1273,6 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, static int ff_layout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { - int new_idx = hdr->pgio_mirror_idx; int err; if (task->tk_status < 0) { @@ -1261,10 +1292,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task, clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { case -NFS4ERR_RESET_TO_PNFS: - if (ff_layout_choose_best_ds_for_read(hdr->lseg, - hdr->pgio_mirror_idx + 1, - &new_idx)) - goto out_layouterror; set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: @@ -1275,10 +1302,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task, } return 0; -out_layouterror: - ff_layout_read_record_layoutstats_done(task, hdr); - ff_layout_send_layouterror(hdr->lseg); - hdr->pgio_mirror_idx = new_idx; out_eagain: rpc_restart_call_prepare(task); return -EAGAIN; @@ -1405,10 +1428,9 @@ static void ff_layout_read_release(void *data) struct nfs_pgio_header *hdr = data; ff_layout_read_record_layoutstats_done(&hdr->task, hdr); - if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) { - ff_layout_send_layouterror(hdr->lseg); - pnfs_read_resend_pnfs(hdr); - } else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) + if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) + ff_layout_resend_pnfs_read(hdr); + else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) ff_layout_reset_read(hdr); pnfs_generic_rw_release(data); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b5baf36d4de5..40332c758d84 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2939,7 +2939,8 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr, } /* Resend all requests through pnfs. */ -void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) +void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr, + unsigned int mirror_idx) { struct nfs_pageio_descriptor pgio; @@ -2950,6 +2951,7 @@ void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); + pgio.pg_mirror_idx = mirror_idx; hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); } } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8e0ada581b92..2661c44c62db 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -311,7 +311,7 @@ int _pnfs_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *); void pnfs_ld_write_done(struct nfs_pgio_header *); void pnfs_ld_read_done(struct nfs_pgio_header *); -void pnfs_read_resend_pnfs(struct nfs_pgio_header *); +void pnfs_read_resend_pnfs(struct nfs_pgio_header *, unsigned int mirror_idx); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos,