From 8efc4bbe84a8bdd26e848ed93a8900fad1b44ca2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Jul 2022 14:12:18 -0400 Subject: nfs: add new nfs_direct_req tracepoint events Add some new tracepoints to the DIO write code. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 45 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4eb2a8380a28..ad40e81857ee 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -60,44 +60,12 @@ #include "iostat.h" #include "pnfs.h" #include "fscache.h" +#include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_VFS static struct kmem_cache *nfs_direct_cachep; -struct nfs_direct_req { - struct kref kref; /* release manager */ - - /* I/O parameters */ - struct nfs_open_context *ctx; /* file open context info */ - struct nfs_lock_context *l_ctx; /* Lock context info */ - struct kiocb * iocb; /* controlling i/o request */ - struct inode * inode; /* target file of i/o */ - - /* completion state */ - atomic_t io_count; /* i/os we're waiting for */ - spinlock_t lock; /* protect completion state */ - - loff_t io_start; /* Start offset for I/O */ - ssize_t count, /* bytes actually processed */ - max_count, /* max expected count */ - bytes_left, /* bytes left to be sent */ - error; /* any reported error */ - struct completion completion; /* wait for i/o completion */ - - /* commit state */ - struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */ - struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ - struct work_struct work; - int flags; - /* for write */ -#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ -#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ - /* for read */ -#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ -#define NFS_ODIRECT_DONE INT_MAX /* write verification failed */ -}; - static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; static void nfs_direct_write_complete(struct nfs_direct_req *dreq); @@ -595,6 +563,8 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) struct nfs_page *req; int status = data->task.tk_status; + trace_nfs_direct_commit_complete(dreq); + if (status < 0) { /* Errors in commit are fatal */ dreq->error = status; @@ -631,6 +601,8 @@ static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, { struct nfs_direct_req *dreq = cinfo->dreq; + trace_nfs_direct_resched_write(dreq); + spin_lock(&dreq->lock); if (dreq->flags != NFS_ODIRECT_DONE) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; @@ -695,6 +667,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) static void nfs_direct_write_complete(struct nfs_direct_req *dreq) { + trace_nfs_direct_write_complete(dreq); queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */ } @@ -705,6 +678,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) struct nfs_page *req = nfs_list_entry(hdr->pages.next); int flags = NFS_ODIRECT_DONE; + trace_nfs_direct_write_completion(dreq); + nfs_init_cinfo_from_dreq(&cinfo, dreq); spin_lock(&dreq->lock); @@ -759,6 +734,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; + trace_nfs_direct_write_reschedule_io(dreq); + spin_lock(&dreq->lock); if (dreq->error == 0) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; @@ -799,6 +776,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); + trace_nfs_direct_write_schedule_iovec(dreq); + nfs_pageio_init_write(&desc, inode, ioflags, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; -- cgit v1.2.3 From 55051c0ced7d322a169f8603d306ee6ec079f8ae Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Jul 2022 14:12:19 -0400 Subject: nfs: always check dreq->error after a commit When the client gets back a short DIO write, it will then attempt to issue another write to finish the DIO request. If that write then fails (as is often the case in an -ENOSPC situation), then we still may need to issue a COMMIT if the earlier short write was unstable. If that COMMIT then succeeds, then we don't want the client to reschedule the write requests, and to instead just return a short write. Otherwise, we can end up looping over the same DIO write forever. Always consult dreq->error after a successful RPC, even when the flag state is not NFS_ODIRECT_DONE. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2028370 Reported-by: Boyang Xue Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index ad40e81857ee..a47d13296194 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -571,8 +571,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) dreq->max_count = 0; dreq->count = 0; dreq->flags = NFS_ODIRECT_DONE; - } else if (dreq->flags == NFS_ODIRECT_DONE) + } else { status = dreq->error; + } nfs_init_cinfo_from_dreq(&cinfo, dreq); -- cgit v1.2.3 From 69d966510d9f5de81588b37d23a9ee8ccc477b23 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 22 Jul 2022 14:12:20 -0400 Subject: nfs: only issue commit in DIO codepath if we have uncommitted data Currently, we try to determine whether to issue a commit based on nfs_write_need_commit which looks at the current verifier. In the case where we got a short write and then tried to follow it up with one that failed, the verifier can't be trusted. What we really want to know is whether the pgio request had any successful writes that came back as UNSTABLE. Add a new flag to the pgio request, and use that to indicate that we've had a successful unstable write. Only issue a commit if that flag is set. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- fs/nfs/write.c | 48 ++++++++++++++++++++++++++++++------------------ include/linux/nfs_xdr.h | 1 + 3 files changed, 32 insertions(+), 19 deletions(-) (limited to 'fs/nfs/direct.c') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index a47d13296194..86df66bb14c5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -690,7 +690,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } nfs_direct_count_bytes(dreq, hdr); - if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) { + if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) { if (!dreq->flags) dreq->flags = NFS_ODIRECT_DO_COMMIT; flags = dreq->flags; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1c706465d090..16d166bc4099 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1576,25 +1576,37 @@ static int nfs_writeback_done(struct rpc_task *task, nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); trace_nfs_writeback_done(task, hdr); - if (hdr->res.verf->committed < hdr->args.stable && - task->tk_status >= 0) { - /* We tried a write call, but the server did not - * commit data to stable storage even though we - * requested it. - * Note: There is a known bug in Tru64 < 5.0 in which - * the server reports NFS_DATA_SYNC, but performs - * NFS_FILE_SYNC. We therefore implement this checking - * as a dprintk() in order to avoid filling syslog. - */ - static unsigned long complain; + if (task->tk_status >= 0) { + enum nfs3_stable_how committed = hdr->res.verf->committed; + + if (committed == NFS_UNSTABLE) { + /* + * We have some uncommitted data on the server at + * this point, so ensure that we keep track of that + * fact irrespective of what later writes do. + */ + set_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags); + } - /* Note this will print the MDS for a DS write */ - if (time_before(complain, jiffies)) { - dprintk("NFS: faulty NFS server %s:" - " (committed = %d) != (stable = %d)\n", - NFS_SERVER(inode)->nfs_client->cl_hostname, - hdr->res.verf->committed, hdr->args.stable); - complain = jiffies + 300 * HZ; + if (committed < hdr->args.stable) { + /* We tried a write call, but the server did not + * commit data to stable storage even though we + * requested it. + * Note: There is a known bug in Tru64 < 5.0 in which + * the server reports NFS_DATA_SYNC, but performs + * NFS_FILE_SYNC. We therefore implement this checking + * as a dprintk() in order to avoid filling syslog. + */ + static unsigned long complain; + + /* Note this will print the MDS for a DS write */ + if (time_before(complain, jiffies)) { + dprintk("NFS: faulty NFS server %s:" + " (committed = %d) != (stable = %d)\n", + NFS_SERVER(inode)->nfs_client->cl_hostname, + committed, hdr->args.stable); + complain = jiffies + 300 * HZ; + } } } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0e3aa0f5f324..e86cf6642d21 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1600,6 +1600,7 @@ enum { NFS_IOHDR_STAT, NFS_IOHDR_RESEND_PNFS, NFS_IOHDR_RESEND_MDS, + NFS_IOHDR_UNSTABLE_WRITES, }; struct nfs_io_completion; -- cgit v1.2.3