From 33344e0f7eaa2efbf9fcc55557d02e8603aa7012 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 7 Apr 2019 13:59:08 -0400 Subject: pNFS: Add tracking to limit the number of pNFS retries When the client is reading or writing using pNFS, and hits an error on the DS, then it typically sends a LAYOUTERROR and/or LAYOUTRETURN to the MDS, before redirtying the failed pages, and going for a new round of reads/writebacks. The problem is that if the server has no way to fix the DS, then we may need a way to interrupt this loop after a set number of attempts have been made. This patch adds an optional module parameter that allows the admin to specify how many times to retry the read/writeback process before failing with a fatal error. The default behaviour is to retry forever. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 7 +++++++ fs/nfs/flexfilelayout/flexfilelayout.c | 8 ++++++++ fs/nfs/pagelist.c | 14 +++++++++++++- fs/nfs/write.c | 5 +++++ 4 files changed, 33 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2d301a1a73e2..2436bd92bc00 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -663,6 +663,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) } list_for_each_entry_safe(req, tmp, &reqs, wb_list) { + /* Bump the transmission count */ + req->wb_nio++; if (!nfs_pageio_add_request(&desc, req)) { nfs_list_move_request(req, &failed); spin_lock(&cinfo.inode->i_lock); @@ -703,6 +705,11 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { + /* + * Despite the reboot, the write was successful, + * so reset wb_nio. + */ + req->wb_nio = 0; /* Note the rewrite will go through mds */ nfs_mark_request_commit(req, NULL, &cinfo, 0); } else diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 6673d4ff5a2a..9fdbcfd3e39d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -28,6 +28,8 @@ #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) #define FF_LAYOUTRETURN_MAXERR 20 +static unsigned short io_maxretrans; + static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, struct nfs_pgio_header *hdr); static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, @@ -925,6 +927,7 @@ retry: pgm = &pgio->pg_mirrors[0]; pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; + pgio->pg_maxretrans = io_maxretrans; return; out_nolseg: if (pgio->pg_error < 0) @@ -992,6 +995,7 @@ retry: pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; } + pgio->pg_maxretrans = io_maxretrans; return; out_mds: @@ -2515,3 +2519,7 @@ MODULE_DESCRIPTION("The NFSv4 flexfile layout driver"); module_init(nfs4flexfilelayout_init); module_exit(nfs4flexfilelayout_exit); + +module_param(io_maxretrans, ushort, 0644); +MODULE_PARM_DESC(io_maxretrans, "The number of times the NFSv4.1 client " + "retries an I/O request before returning an error. "); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b8301c40dd78..4a31284f411e 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -16,8 +16,8 @@ #include #include #include -#include #include +#include #include #include @@ -327,6 +327,7 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page, req->wb_bytes = count; req->wb_context = get_nfs_open_context(ctx); kref_init(&req->wb_kref); + req->wb_nio = 0; return req; } @@ -370,6 +371,7 @@ nfs_create_subreq(struct nfs_page *req, struct nfs_page *last, nfs_lock_request(ret); ret->wb_index = req->wb_index; nfs_page_group_init(ret, last); + ret->wb_nio = req->wb_nio; } return ret; } @@ -724,6 +726,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_mirrors_dynamic = NULL; desc->pg_mirrors = desc->pg_mirrors_static; nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); + desc->pg_maxretrans = 0; } /** @@ -983,6 +986,15 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, return 0; mirror->pg_base = req->wb_pgbase; } + + if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) { + if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR) + desc->pg_error = -ETIMEDOUT; + else + desc->pg_error = -EIO; + return 0; + } + if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; nfs_list_move_request(req, &mirror->pg_list); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b9bcbd06a628..294604784f70 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1009,6 +1009,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto remove_req; } if (nfs_write_need_commit(hdr)) { + /* Reset wb_nio, since the write was successful. */ + req->wb_nio = 0; memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, hdr->pgio_mirror_idx); @@ -1142,6 +1144,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = end - req->wb_offset; else req->wb_bytes = rqend - req->wb_offset; + req->wb_nio = 0; return req; out_flushme: /* @@ -1416,6 +1419,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, */ static void nfs_redirty_request(struct nfs_page *req) { + /* Bump the transmission count */ + req->wb_nio++; nfs_mark_request_dirty(req); set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); nfs_end_page_writeback(req); -- cgit v1.2.3