From cea9ba7239dcc84175041174304c6cdeae3226e5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:27:00 -0400 Subject: NFS: Do not report EINTR/ERESTARTSYS as mapping errors If the attempt to flush data was interrupted due to a local signal, then just requeue the writes back for I/O. Fixes: 6fbda89b257f ("NFS: Replace custom error reporting mechanism with generic one") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f00d45cf80ef..e437db1791ba 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1444,7 +1444,7 @@ static void nfs_async_write_error(struct list_head *head, int error) while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - if (nfs_error_is_fatal(error)) + if (nfs_error_is_fatal_on_server(error)) nfs_write_error(req, error); else nfs_redirty_request(req); -- cgit v1.2.3 From 9641d9bc9b75f11f70646f5c6ee9f5f519a1012e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:27:01 -0400 Subject: NFS: fsync() should report filesystem errors over EINTR/ERESTARTSYS If the commit to disk is interrupted, we should still first check for filesystem errors so that we can report them in preference to the error due to the signal. Fixes: 2197e9b06c22 ("NFS: Fix up fsync() when the server rebooted") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 150b7fa8f0a7..7c380e555224 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -204,15 +204,16 @@ static int nfs_file_fsync_commit(struct file *file, int datasync) { struct inode *inode = file_inode(file); - int ret; + int ret, ret2; dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); ret = nfs_commit_inode(inode, FLUSH_SYNC); - if (ret < 0) - return ret; - return file_check_and_advance_wb_err(file); + ret2 = file_check_and_advance_wb_err(file); + if (ret2 < 0) + return ret2; + return ret; } int -- cgit v1.2.3 From e6005436f6cc9ed13288f936903f0151e5543485 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:27:02 -0400 Subject: NFS: Don't report ENOSPC write errors twice Any errors reported by the write() system call need to be cleared from the file descriptor's error tracking. The current call to nfs_wb_all() causes the error to be reported, but since it doesn't call file_check_and_advance_wb_err(), we can end up reporting the same error a second time when the application calls fsync(). Note that since Linux 4.13, the rule is that EIO may be reported for write(), but it must be reported by a subsequent fsync(), so let's just drop reporting it in write. The check for nfs_ctx_key_to_expire() is just a duplicate to the one already in nfs_write_end(), so let's drop that too. Reported-by: ChenXiaoSong Fixes: ce368536dd61 ("nfs: nfs_file_write() should check for writeback errors") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7c380e555224..87e4cd5e8fe2 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -598,18 +598,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = { .page_mkwrite = nfs_vm_page_mkwrite, }; -static int nfs_need_check_write(struct file *filp, struct inode *inode, - int error) -{ - struct nfs_open_context *ctx; - - ctx = nfs_file_open_context(filp); - if (nfs_error_is_fatal_on_server(error) || - nfs_ctx_key_to_expire(ctx, inode)) - return 1; - return 0; -} - ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; @@ -637,7 +625,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) { result = nfs_revalidate_file_size(inode, file); if (result) - goto out; + return result; } nfs_clear_invalid_mapping(file->f_mapping); @@ -656,6 +644,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) written = result; iocb->ki_pos += written; + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); if (mntflags & NFS_MOUNT_WRITE_EAGER) { result = filemap_fdatawrite_range(file->f_mapping, @@ -673,17 +662,22 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) } result = generic_write_sync(iocb, written); if (result < 0) - goto out; + return result; +out: /* Return error values */ error = filemap_check_wb_err(file->f_mapping, since); - if (nfs_need_check_write(file, inode, error)) { - int err = nfs_wb_all(inode); - if (err < 0) - result = err; + switch (error) { + default: + break; + case -EDQUOT: + case -EFBIG: + case -ENOSPC: + nfs_wb_all(inode); + error = file_check_and_advance_wb_err(file); + if (error < 0) + result = error; } - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); -out: return result; out_swapfile: -- cgit v1.2.3 From d95b26650e86175e4a97698d89bc1626cd1df0c6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:27:03 -0400 Subject: NFS: Do not report flush errors in nfs_write_end() If we do flush cached writebacks in nfs_write_end() due to the imminent expiration of an RPCSEC_GSS session, then we should defer reporting any resulting errors until the calls to file_check_and_advance_wb_err() in nfs_file_write() and nfs_file_fsync(). Fixes: 6fbda89b257f ("NFS: Replace custom error reporting mechanism with generic one") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 87e4cd5e8fe2..3f17748eaf29 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -386,11 +386,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, return status; NFS_I(mapping->host)->write_io += copied; - if (nfs_ctx_key_to_expire(ctx, mapping->host)) { - status = nfs_wb_all(mapping->host); - if (status < 0) - return status; - } + if (nfs_ctx_key_to_expire(ctx, mapping->host)) + nfs_wb_all(mapping->host); return copied; } -- cgit v1.2.3 From c5e483b77cc2edb318da152abe07e33006b975fd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:27:04 -0400 Subject: NFS: Don't report errors from nfs_pageio_complete() more than once Since errors from nfs_pageio_complete() are already being reported through nfs_async_write_error(), we should not be returning them to the callers of do_writepages() as well. They will end up being reported through the generic mechanism instead. Fixes: 6fbda89b257f ("NFS: Replace custom error reporting mechanism with generic one") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e437db1791ba..4925d11849cd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -681,11 +681,7 @@ static int nfs_writepage_locked(struct page *page, err = nfs_do_writepage(page, wbc, &pgio); pgio.pg_error = 0; nfs_pageio_complete(&pgio); - if (err < 0) - return err; - if (nfs_error_is_fatal(pgio.pg_error)) - return pgio.pg_error; - return 0; + return err; } int nfs_writepage(struct page *page, struct writeback_control *wbc) @@ -747,9 +743,6 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) if (err < 0) goto out_err; - err = pgio.pg_error; - if (nfs_error_is_fatal(err)) - goto out_err; return 0; out_err: return err; -- cgit v1.2.3 From 452284407c18d8a522c3039339b1860afa0025a8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:08:10 -0400 Subject: NFS: Memory allocation failures are not server fatal errors We need to filter out ENOMEM in nfs_error_is_fatal_on_server(), because running out of memory on our client is not a server error. Reported-by: Olga Kornievskaia Fixes: 2dc23afffbca ("NFS: ENOMEM should also be a fatal error.") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7eefa16ed381..8f8cd6e2d4db 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -841,6 +841,7 @@ static inline bool nfs_error_is_fatal_on_server(int err) case 0: case -ERESTARTSYS: case -EINTR: + case -ENOMEM: return false; } return nfs_error_is_fatal(err); -- cgit v1.2.3 From 3764a17e31d579cf9b4bd0a69894b577e8d75702 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:08:11 -0400 Subject: NFSv4/pNFS: Do not fail I/O when we fail to allocate the pNFS layout Commit 587f03deb69b caused pnfs_update_layout() to stop returning ENOMEM when the memory allocation fails, and hence causes it to fall back to trying to do I/O through the MDS. There is no guarantee that this will fare any better. If we're failing the pNFS layout allocation, then we should just redirty the page and retry later. Reported-by: Olga Kornievskaia Fixes: 587f03deb69b ("pnfs: refactor send_layoutget") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 856c962273c7..68a87be3e6f9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2000,6 +2000,7 @@ lookup_again: lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { spin_unlock(&ino->i_lock); + lseg = ERR_PTR(-ENOMEM); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_NOMEM); goto out; @@ -2128,6 +2129,7 @@ lookup_again: lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); if (!lgp) { + lseg = ERR_PTR(-ENOMEM); trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, PNFS_UPDATE_LAYOUT_NOMEM); nfs_layoutget_end(lo); -- cgit v1.2.3 From c6fd3511c3397dd9cbc6dc5d105bbedb69bf4061 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:08:12 -0400 Subject: NFS: Further fixes to the writeback error handling When we handle an error by redirtying the page, we're not corrupting the mapping, so we don't want the error to be recorded in the mapping. If the caller has specified a sync_mode of WB_SYNC_NONE, we can just return AOP_WRITEPAGE_ACTIVATE. However if we're dealing with WB_SYNC_ALL, we need to ensure that retries happen when the errors are non-fatal. Reported-by: Olga Kornievskaia Fixes: 8fc75bed96bb ("NFS: Fix up return value on fatal errors in nfs_page_async_flush()") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4925d11849cd..2f41659e232e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -603,8 +603,9 @@ static void nfs_write_error(struct nfs_page *req, int error) * Find an associated nfs write request, and prepare to flush it out * May return an error if the user signalled nfs_wait_on_request(). */ -static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - struct page *page) +static int nfs_page_async_flush(struct page *page, + struct writeback_control *wbc, + struct nfs_pageio_descriptor *pgio) { struct nfs_page *req; int ret = 0; @@ -630,11 +631,11 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, /* * Remove the problematic req upon fatal errors on the server */ - if (nfs_error_is_fatal(ret)) { - if (nfs_error_is_fatal_on_server(ret)) - goto out_launder; - } else - ret = -EAGAIN; + if (nfs_error_is_fatal_on_server(ret)) + goto out_launder; + if (wbc->sync_mode == WB_SYNC_NONE) + ret = AOP_WRITEPAGE_ACTIVATE; + redirty_page_for_writepage(wbc, page); nfs_redirty_request(req); pgio->pg_error = 0; } else @@ -650,15 +651,8 @@ out_launder: static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - int ret; - nfs_pageio_cond_complete(pgio, page_index(page)); - ret = nfs_page_async_flush(pgio, page); - if (ret == -EAGAIN) { - redirty_page_for_writepage(wbc, page); - ret = AOP_WRITEPAGE_ACTIVATE; - } - return ret; + return nfs_page_async_flush(page, wbc, pgio); } /* @@ -733,12 +727,15 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) priority = wb_priority(wbc); } - nfs_pageio_init_write(&pgio, inode, priority, false, - &nfs_async_write_completion_ops); - pgio.pg_io_completion = ioc; - err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); - pgio.pg_error = 0; - nfs_pageio_complete(&pgio); + do { + nfs_pageio_init_write(&pgio, inode, priority, false, + &nfs_async_write_completion_ops); + pgio.pg_io_completion = ioc; + err = write_cache_pages(mapping, wbc, nfs_writepages_callback, + &pgio); + pgio.pg_error = 0; + nfs_pageio_complete(&pgio); + } while (err < 0 && !nfs_error_is_fatal(err)); nfs_io_completion_put(ioc); if (err < 0) -- cgit v1.2.3 From 126966ddedb60bb7bf9f3b341e26ca8ef1019efc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:08:13 -0400 Subject: pNFS/files: Fall back to I/O through the MDS on non-fatal layout errors Only report the error when the server is returning a fatal error, such as ESTALE, EIO, etc... Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/filelayout/filelayout.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 76deddab0a8f..2b2661582bbe 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -839,7 +839,12 @@ fl_pnfs_update_layout(struct inode *ino, lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode, gfp_flags); - if (IS_ERR_OR_NULL(lseg)) + if (IS_ERR(lseg)) { + /* Fall back to MDS on recoverable errors */ + if (!nfs_error_is_fatal_on_server(PTR_ERR(lseg))) + lseg = NULL; + goto out; + } else if (!lseg) goto out; lo = NFS_I(ino)->layout; -- cgit v1.2.3 From 6949493884fe88500de4af182588e071cf1544ee Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:08:14 -0400 Subject: NFSv4: Don't hold the layoutget locks across multiple RPC calls When doing layoutget as part of the open() compound, we have to be careful to release the layout locks before we can call any further RPC calls, such as setattr(). The reason is that those calls could trigger a recall, which could deadlock. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a79f66432bd3..bf3ba541b9fb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3098,6 +3098,10 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, } out: + if (opendata->lgp) { + nfs4_lgopen_release(opendata->lgp); + opendata->lgp = NULL; + } if (!opendata->cancelled) nfs4_sequence_free_slot(&opendata->o_res.seq_res); return ret; -- cgit v1.2.3 From 7b8b44eb7710e34a1ea5278392417993a549fabf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:36:58 -0400 Subject: NFSv4: Specify the type of ACL to cache When caching a NFSv4 ACL, we want to specify whether we are caching an NFSv4.0 type acl, the NFSv4.1 dacl or the NFSv4.1 sacl. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 59 +++++++++++++++++++++++++++++++++---------------- include/linux/nfs4.h | 2 ++ include/linux/nfs_xdr.h | 7 ++++++ 3 files changed, 49 insertions(+), 19 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bf3ba541b9fb..e6c830d4db0b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5772,9 +5772,17 @@ static int nfs4_proc_renew(struct nfs_client *clp, const struct cred *cred) return 0; } -static inline int nfs4_server_supports_acls(struct nfs_server *server) +static bool nfs4_server_supports_acls(const struct nfs_server *server, + enum nfs4_acl_type type) { - return server->caps & NFS_CAP_ACLS; + switch (type) { + default: + return server->attr_bitmask[0] & FATTR4_WORD0_ACL; + case NFS4ACL_DACL: + return server->attr_bitmask[1] & FATTR4_WORD1_DACL; + case NFS4ACL_SACL: + return server->attr_bitmask[1] & FATTR4_WORD1_SACL; + } } /* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that @@ -5813,6 +5821,7 @@ unwind: } struct nfs4_cached_acl { + enum nfs4_acl_type type; int cached; size_t len; char data[]; @@ -5833,7 +5842,8 @@ static void nfs4_zap_acl_attr(struct inode *inode) nfs4_set_cached_acl(inode, NULL); } -static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen) +static ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, + size_t buflen, enum nfs4_acl_type type) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_cached_acl *acl; @@ -5843,6 +5853,8 @@ static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_ acl = nfsi->nfs4_acl; if (acl == NULL) goto out; + if (acl->type != type) + goto out; if (buf == NULL) /* user is just asking for length */ goto out_len; if (acl->cached == 0) @@ -5858,7 +5870,9 @@ out: return ret; } -static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) +static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, + size_t pgbase, size_t acl_len, + enum nfs4_acl_type type) { struct nfs4_cached_acl *acl; size_t buflen = sizeof(*acl) + acl_len; @@ -5875,6 +5889,7 @@ static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size goto out; acl->cached = 0; } + acl->type = type; acl->len = acl_len; out: nfs4_set_cached_acl(inode, acl); @@ -5890,7 +5905,8 @@ out: * length. The next getxattr call will then produce another round trip to * the server, this time with the input buf of the required size. */ -static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, + size_t buflen, enum nfs4_acl_type type) { struct page **pages; struct nfs_getaclargs args = { @@ -5947,7 +5963,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu ret = -ERANGE; goto out_free; } - nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len); + nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len, + type); if (buf) { if (res.acl_len > buflen) { ret = -ERANGE; @@ -5967,14 +5984,15 @@ out_free: return ret; } -static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, + size_t buflen, enum nfs4_acl_type type) { struct nfs4_exception exception = { .interruptible = true, }; ssize_t ret; do { - ret = __nfs4_get_acl_uncached(inode, buf, buflen); + ret = __nfs4_get_acl_uncached(inode, buf, buflen, type); trace_nfs4_get_acl(inode, ret); if (ret >= 0) break; @@ -5983,27 +6001,29 @@ static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bufl return ret; } -static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen, + enum nfs4_acl_type type) { struct nfs_server *server = NFS_SERVER(inode); int ret; - if (!nfs4_server_supports_acls(server)) + if (!nfs4_server_supports_acls(server, type)) return -EOPNOTSUPP; ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); if (ret < 0) return ret; if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - ret = nfs4_read_cached_acl(inode, buf, buflen); + ret = nfs4_read_cached_acl(inode, buf, buflen, type); if (ret != -ENOENT) /* -ENOENT is returned if there is no ACL or if there is an ACL * but no cached acl data, just the acl length */ return ret; - return nfs4_get_acl_uncached(inode, buf, buflen); + return nfs4_get_acl_uncached(inode, buf, buflen, type); } -static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, + size_t buflen, enum nfs4_acl_type type) { struct nfs_server *server = NFS_SERVER(inode); struct page *pages[NFS4ACL_MAXPAGES]; @@ -6024,7 +6044,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl /* You can't remove system.nfs4_acl: */ if (buflen == 0) return -EINVAL; - if (!nfs4_server_supports_acls(server)) + if (!nfs4_server_supports_acls(server, type)) return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) return -ERANGE; @@ -6055,12 +6075,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return ret; } -static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, + size_t buflen, enum nfs4_acl_type type) { struct nfs4_exception exception = { }; int err; do { - err = __nfs4_proc_set_acl(inode, buf, buflen); + err = __nfs4_proc_set_acl(inode, buf, buflen, type); trace_nfs4_set_acl(inode, err); if (err == -NFS4ERR_BADOWNER || err == -NFS4ERR_BADNAME) { /* @@ -7659,19 +7680,19 @@ static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler, const char *key, const void *buf, size_t buflen, int flags) { - return nfs4_proc_set_acl(inode, buf, buflen); + return nfs4_proc_set_acl(inode, buf, buflen, NFS4ACL_ACL); } static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *key, void *buf, size_t buflen) { - return nfs4_proc_get_acl(inode, buf, buflen); + return nfs4_proc_get_acl(inode, buf, buflen, NFS4ACL_ACL); } static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry) { - return nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry))); + return nfs4_server_supports_acls(NFS_SB(dentry->d_sb), NFS4ACL_ACL); } #ifdef CONFIG_NFS_V4_SECURITY_LABEL diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5662d8be04eb..8d04b6a5964c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -451,6 +451,8 @@ enum lock_type4 { #define FATTR4_WORD1_TIME_MODIFY (1UL << 21) #define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22) #define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) +#define FATTR4_WORD1_DACL (1UL << 26) +#define FATTR4_WORD1_SACL (1UL << 27) #define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) #define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0) #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2863e5a69c6a..13d068c57d8d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -800,6 +800,13 @@ struct nfs_setattrargs { const struct nfs4_label *label; }; +enum nfs4_acl_type { + NFS4ACL_NONE = 0, + NFS4ACL_ACL, + NFS4ACL_DACL, + NFS4ACL_SACL, +}; + struct nfs_setaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; -- cgit v1.2.3 From db145db021abf75aa1a5810e631425055cfbed47 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:36:59 -0400 Subject: NFSv4: Add encoders/decoders for the NFSv4.1 dacl and sacl attributes Add the ability to set or retrieve the acl using the NFSv4.1 'dacl' and 'sacl' attributes to the NFSv4 xdr encoders/decoders. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 9 +++-- fs/nfs/nfs4xdr.c | 95 ++++++++++++++++++++++++++++++++----------------- include/linux/nfs_xdr.h | 3 ++ 3 files changed, 71 insertions(+), 36 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e6c830d4db0b..b2ddbaf32a95 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5911,9 +5911,11 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, struct page **pages; struct nfs_getaclargs args = { .fh = NFS_FH(inode), + .acl_type = type, .acl_len = buflen, }; struct nfs_getaclres res = { + .acl_type = type, .acl_len = buflen, }; struct rpc_message msg = { @@ -6028,9 +6030,10 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, struct nfs_server *server = NFS_SERVER(inode); struct page *pages[NFS4ACL_MAXPAGES]; struct nfs_setaclargs arg = { - .fh = NFS_FH(inode), - .acl_pages = pages, - .acl_len = buflen, + .fh = NFS_FH(inode), + .acl_type = type, + .acl_len = buflen, + .acl_pages = pages, }; struct nfs_setaclres res; struct rpc_message msg = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 86a5f6516928..0b41e00e754f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1680,19 +1680,35 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) encode_op_hdr(xdr, OP_RESTOREFH, decode_restorefh_maxsz, hdr); } -static void -encode_setacl(struct xdr_stream *xdr, const struct nfs_setaclargs *arg, - struct compound_hdr *hdr) +static void nfs4_acltype_to_bitmap(enum nfs4_acl_type type, __u32 bitmap[2]) { - __be32 *p; + switch (type) { + default: + bitmap[0] = FATTR4_WORD0_ACL; + bitmap[1] = 0; + break; + case NFS4ACL_DACL: + bitmap[0] = 0; + bitmap[1] = FATTR4_WORD1_DACL; + break; + case NFS4ACL_SACL: + bitmap[0] = 0; + bitmap[1] = FATTR4_WORD1_SACL; + } +} + +static void encode_setacl(struct xdr_stream *xdr, + const struct nfs_setaclargs *arg, + struct compound_hdr *hdr) +{ + __u32 bitmap[2]; + + nfs4_acltype_to_bitmap(arg->acl_type, bitmap); encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr); encode_nfs4_stateid(xdr, &zero_stateid); - p = reserve_space(xdr, 2*4); - *p++ = cpu_to_be32(1); - *p = cpu_to_be32(FATTR4_WORD0_ACL); - p = reserve_space(xdr, 4); - *p = cpu_to_be32(arg->acl_len); + xdr_encode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap)); + encode_uint32(xdr, arg->acl_len); xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len); } @@ -2587,11 +2603,11 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - const __u32 nfs4_acl_bitmap[1] = { - [0] = FATTR4_WORD0_ACL, - }; + __u32 nfs4_acl_bitmap[2]; uint32_t replen; + nfs4_acltype_to_bitmap(args->acl_type, nfs4_acl_bitmap); + encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); @@ -5386,7 +5402,7 @@ decode_restorefh(struct xdr_stream *xdr) } static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, - struct nfs_getaclres *res) + struct nfs_getaclres *res, enum nfs4_acl_type type) { unsigned int savep; uint32_t attrlen, @@ -5404,26 +5420,39 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) goto out; - if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) - return -EIO; - if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { - - /* The bitmap (xdr len + bitmaps) and the attr xdr len words - * are stored with the acl data to handle the problem of - * variable length bitmaps.*/ - res->acl_data_offset = xdr_page_pos(xdr); - res->acl_len = attrlen; - - /* Check for receive buffer overflow */ - if (res->acl_len > xdr_stream_remaining(xdr) || - res->acl_len + res->acl_data_offset > xdr->buf->page_len) { - res->acl_flags |= NFS4_ACL_TRUNC; - dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", - attrlen, xdr_stream_remaining(xdr)); - } - } else - status = -EOPNOTSUPP; + switch (type) { + default: + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) + return -EIO; + if (!(bitmap[0] & FATTR4_WORD0_ACL)) + return -EOPNOTSUPP; + break; + case NFS4ACL_DACL: + if (unlikely(bitmap[0] || bitmap[1] & (FATTR4_WORD1_DACL - 1U))) + return -EIO; + if (!(bitmap[1] & FATTR4_WORD1_DACL)) + return -EOPNOTSUPP; + break; + case NFS4ACL_SACL: + if (unlikely(bitmap[0] || bitmap[1] & (FATTR4_WORD1_SACL - 1U))) + return -EIO; + if (!(bitmap[1] & FATTR4_WORD1_SACL)) + return -EOPNOTSUPP; + } + /* The bitmap (xdr len + bitmaps) and the attr xdr len words + * are stored with the acl data to handle the problem of + * variable length bitmaps.*/ + res->acl_data_offset = xdr_page_pos(xdr); + res->acl_len = attrlen; + + /* Check for receive buffer overflow */ + if (res->acl_len > xdr_stream_remaining(xdr) || + res->acl_len + res->acl_data_offset > xdr->buf->page_len) { + res->acl_flags |= NFS4_ACL_TRUNC; + dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", + attrlen, xdr_stream_remaining(xdr)); + } out: return status; } @@ -6486,7 +6515,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_getacl(xdr, rqstp, res); + status = decode_getacl(xdr, rqstp, res, res->acl_type); out: return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 13d068c57d8d..4a8ba84f848e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -810,6 +810,7 @@ enum nfs4_acl_type { struct nfs_setaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; + enum nfs4_acl_type acl_type; size_t acl_len; struct page ** acl_pages; }; @@ -821,6 +822,7 @@ struct nfs_setaclres { struct nfs_getaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; + enum nfs4_acl_type acl_type; size_t acl_len; struct page ** acl_pages; }; @@ -829,6 +831,7 @@ struct nfs_getaclargs { #define NFS4_ACL_TRUNC 0x0001 /* ACL was truncated */ struct nfs_getaclres { struct nfs4_sequence_res seq_res; + enum nfs4_acl_type acl_type; size_t acl_len; size_t acl_data_offset; int acl_flags; -- cgit v1.2.3 From 71342db05722907f30505afa4d845cae0f276d23 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 May 2022 10:37:00 -0400 Subject: NFSv4.1: Enable access to the NFSv4.1 'dacl' and 'sacl' attributes Enable access to the NFSv4 acl via the NFSv4.1 'dacl' and 'sacl' attributes. This allows the server to authenticate the DACL and the SACL operations separately, since reading and/or editing the SACL is usually considered to be a privileged operation. It also allows the propagation of automatic inheritance information that was not supported by the NFSv4.0 'acl' attribute. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2ddbaf32a95..0dfdbb406f96 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7698,6 +7698,55 @@ static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry) return nfs4_server_supports_acls(NFS_SB(dentry->d_sb), NFS4ACL_ACL); } +#if defined(CONFIG_NFS_V4_1) +#define XATTR_NAME_NFSV4_DACL "system.nfs4_dacl" + +static int nfs4_xattr_set_nfs4_dacl(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) +{ + return nfs4_proc_set_acl(inode, buf, buflen, NFS4ACL_DACL); +} + +static int nfs4_xattr_get_nfs4_dacl(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) +{ + return nfs4_proc_get_acl(inode, buf, buflen, NFS4ACL_DACL); +} + +static bool nfs4_xattr_list_nfs4_dacl(struct dentry *dentry) +{ + return nfs4_server_supports_acls(NFS_SB(dentry->d_sb), NFS4ACL_DACL); +} + +#define XATTR_NAME_NFSV4_SACL "system.nfs4_sacl" + +static int nfs4_xattr_set_nfs4_sacl(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) +{ + return nfs4_proc_set_acl(inode, buf, buflen, NFS4ACL_SACL); +} + +static int nfs4_xattr_get_nfs4_sacl(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) +{ + return nfs4_proc_get_acl(inode, buf, buflen, NFS4ACL_SACL); +} + +static bool nfs4_xattr_list_nfs4_sacl(struct dentry *dentry) +{ + return nfs4_server_supports_acls(NFS_SB(dentry->d_sb), NFS4ACL_SACL); +} + +#endif + #ifdef CONFIG_NFS_V4_SECURITY_LABEL static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler, @@ -10615,6 +10664,22 @@ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { .set = nfs4_xattr_set_nfs4_acl, }; +#if defined(CONFIG_NFS_V4_1) +static const struct xattr_handler nfs4_xattr_nfs4_dacl_handler = { + .name = XATTR_NAME_NFSV4_DACL, + .list = nfs4_xattr_list_nfs4_dacl, + .get = nfs4_xattr_get_nfs4_dacl, + .set = nfs4_xattr_set_nfs4_dacl, +}; + +static const struct xattr_handler nfs4_xattr_nfs4_sacl_handler = { + .name = XATTR_NAME_NFSV4_SACL, + .list = nfs4_xattr_list_nfs4_sacl, + .get = nfs4_xattr_get_nfs4_sacl, + .set = nfs4_xattr_set_nfs4_sacl, +}; +#endif + #ifdef CONFIG_NFS_V4_2 static const struct xattr_handler nfs4_xattr_nfs4_user_handler = { .prefix = XATTR_USER_PREFIX, @@ -10625,6 +10690,10 @@ static const struct xattr_handler nfs4_xattr_nfs4_user_handler = { const struct xattr_handler *nfs4_xattr_handlers[] = { &nfs4_xattr_nfs4_acl_handler, +#if defined(CONFIG_NFS_V4_1) + &nfs4_xattr_nfs4_dacl_handler, + &nfs4_xattr_nfs4_sacl_handler, +#endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL &nfs4_xattr_nfs4_label_handler, #endif -- cgit v1.2.3 From 3e2910c7e23b55f6452b7130f97217ee0551a71a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 Mar 2022 10:59:51 +1100 Subject: NFS: Improve warning message when locks are lost. NFSv4 can lose locks if, for example there is a network partition for longer than the lease period. When this happens a warning message NFS: __nfs4_reclaim_open_state: Lock reclaim failed! is generated, possibly once for each lock (though rate limited). This is potentially misleading as is can be read as suggesting that lock reclaim was attempted. However the default behaviour is to not attempt to recover locks (except due to server report). This patch changes the reporting to produce at most one message for each attempt to recover all state from a given server. The message reports the server name and the number of locks lost if that number is non-zero. It reports that locks were lost and give no suggestion as to whether there was an attempt or not. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9e1c987c81e7..775c58def32c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1602,7 +1602,8 @@ static inline void nfs42_complete_copies(struct nfs4_state_owner *sp, #endif /* CONFIG_NFS_V4_2 */ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_state *state, - const struct nfs4_state_recovery_ops *ops) + const struct nfs4_state_recovery_ops *ops, + int *lost_locks) { struct nfs4_lock_state *lock; int status; @@ -1620,7 +1621,7 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st list_for_each_entry(lock, &state->lock_states, ls_locks) { trace_nfs4_state_lock_reclaim(state, lock); if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) - pr_warn_ratelimited("NFS: %s: Lock reclaim failed!\n", __func__); + *lost_locks += 1; } spin_unlock(&state->state_lock); } @@ -1630,7 +1631,9 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st return status; } -static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops) +static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, + const struct nfs4_state_recovery_ops *ops, + int *lost_locks) { struct nfs4_state *state; unsigned int loop = 0; @@ -1666,7 +1669,7 @@ restart: #endif /* CONFIG_NFS_V4_2 */ refcount_inc(&state->count); spin_unlock(&sp->so_lock); - status = __nfs4_reclaim_open_state(sp, state, ops); + status = __nfs4_reclaim_open_state(sp, state, ops, lost_locks); switch (status) { default: @@ -1909,6 +1912,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov struct rb_node *pos; LIST_HEAD(freeme); int status = 0; + int lost_locks = 0; restart: rcu_read_lock(); @@ -1928,8 +1932,11 @@ restart: spin_unlock(&clp->cl_lock); rcu_read_unlock(); - status = nfs4_reclaim_open_state(sp, ops); + status = nfs4_reclaim_open_state(sp, ops, &lost_locks); if (status < 0) { + if (lost_locks) + pr_warn("NFS: %s: lost %d locks\n", + clp->cl_hostname, lost_locks); set_bit(ops->owner_flag_bit, &sp->so_flags); nfs4_put_state_owner(sp); status = nfs4_recovery_handle_error(clp, status); @@ -1943,6 +1950,9 @@ restart: } rcu_read_unlock(); nfs4_free_state_owners(&freeme); + if (lost_locks) + pr_warn("NFS: %s: lost %d locks\n", + clp->cl_hostname, lost_locks); return 0; } -- cgit v1.2.3 From 9c4a5c75a62e83963083efd4eea5d5bd1583193c Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Wed, 4 May 2022 09:21:06 -0400 Subject: NFS: Pass i_size to fscache_unuse_cookie() when a file is released Pass updated i_size in fscache_unuse_cookie() when called from nfs_fscache_release_file(), which ensures the size of an fscache object gets written to the cache storage. Failing to do so results in unnessary reads from the NFS server, even when the data is cached, due to a cachefiles object coherency check failing with a trace similar to the following: cachefiles_coherency: o=0000000e BAD osiz B=afbb3 c=0 This problem can be reproduced as follows: #!/bin/bash v=4.2; NFS_SERVER=127.0.0.1 set -e; trap cleanup EXIT; rc=1 function cleanup { umount /mnt/nfs > /dev/null 2>&1 RC_STR="TEST PASS" [ $rc -eq 1 ] && RC_STR="TEST FAIL" echo "$RC_STR on $(uname -r) with NFSv$v and server $NFS_SERVER" } mount -o vers=$v,fsc $NFS_SERVER:/export /mnt/nfs rm -f /mnt/nfs/file1.bin > /dev/null 2>&1 dd if=/dev/zero of=/mnt/nfs/file1.bin bs=4096 count=1 > /dev/null 2>&1 echo 3 > /proc/sys/vm/drop_caches echo Read file 1st time from NFS server into fscache dd if=/mnt/nfs/file1.bin of=/dev/null > /dev/null 2>&1 umount /mnt/nfs && mount -o vers=$v,fsc $NFS_SERVER:/export /mnt/nfs echo 3 > /proc/sys/vm/drop_caches echo Read file 2nd time from fscache dd if=/mnt/nfs/file1.bin of=/dev/null > /dev/null 2>&1 echo Check mountstats for NFS read grep -q "READ: 0" /proc/self/mountstats # (1st number) == 0 [ $? -eq 0 ] && rc=0 Fixes: a6b5a28eb56c "nfs: Convert to new fscache volume/cookie API" Signed-off-by: Dave Wysochanski Tested-by: Daire Byrne Signed-off-by: Anna Schumaker --- fs/nfs/fscache.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index f73c09a9cf0a..e861d7bae305 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -231,11 +231,10 @@ void nfs_fscache_release_file(struct inode *inode, struct file *filp) { struct nfs_fscache_inode_auxdata auxdata; struct fscache_cookie *cookie = nfs_i_fscache(inode); + loff_t i_size = i_size_read(inode); - if (fscache_cookie_valid(cookie)) { - nfs_fscache_update_auxdata(&auxdata, inode); - fscache_unuse_cookie(cookie, &auxdata, NULL); - } + nfs_fscache_update_auxdata(&auxdata, inode); + fscache_unuse_cookie(cookie, &auxdata, &i_size); } /* -- cgit v1.2.3 From c3ed222745d9ad7b69299b349a64ba533c64a34f Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Sat, 14 May 2022 07:05:13 -0400 Subject: NFSv4: Fix free of uninitialized nfs4_label on referral lookup. Send along the already-allocated fattr along with nfs4_fs_locations, and drop the memcpy of fattr. We end up growing two more allocations, but this fixes up a crash as: PID: 790 TASK: ffff88811b43c000 CPU: 0 COMMAND: "ls" #0 [ffffc90000857920] panic at ffffffff81b9bfde #1 [ffffc900008579c0] do_trap at ffffffff81023a9b #2 [ffffc90000857a10] do_error_trap at ffffffff81023b78 #3 [ffffc90000857a58] exc_stack_segment at ffffffff81be1f45 #4 [ffffc90000857a80] asm_exc_stack_segment at ffffffff81c009de #5 [ffffc90000857b08] nfs_lookup at ffffffffa0302322 [nfs] #6 [ffffc90000857b70] __lookup_slow at ffffffff813a4a5f #7 [ffffc90000857c60] walk_component at ffffffff813a86c4 #8 [ffffc90000857cb8] path_lookupat at ffffffff813a9553 #9 [ffffc90000857cf0] filename_lookup at ffffffff813ab86b Suggested-by: Trond Myklebust Fixes: 9558a007dbc3 ("NFS: Remove the label from the nfs4_lookup_res struct") Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4namespace.c | 9 +++++++-- fs/nfs/nfs4proc.c | 15 +++++++-------- fs/nfs/nfs4state.c | 9 ++++++++- fs/nfs/nfs4xdr.c | 4 ++-- include/linux/nfs_xdr.h | 2 +- 5 files changed, 25 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 3680c8da510c..f2dbf904c598 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -417,6 +417,9 @@ static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client) fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); if (!fs_locations) goto out_free; + fs_locations->fattr = nfs_alloc_fattr(); + if (!fs_locations->fattr) + goto out_free_2; /* Get locations */ dentry = ctx->clone_data.dentry; @@ -427,14 +430,16 @@ static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client) err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page); dput(parent); if (err != 0) - goto out_free_2; + goto out_free_3; err = -ENOENT; if (fs_locations->nlocations <= 0 || fs_locations->fs_path.ncomponents <= 0) - goto out_free_2; + goto out_free_3; err = nfs_follow_referral(fc, fs_locations); +out_free_3: + kfree(fs_locations->fattr); out_free_2: kfree(fs_locations); out_free: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0dfdbb406f96..337609befd17 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4247,6 +4247,8 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir, if (locations == NULL) goto out; + locations->fattr = fattr; + status = nfs4_proc_fs_locations(client, dir, name, locations, page); if (status != 0) goto out; @@ -4256,17 +4258,14 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir, * referral. Cause us to drop into the exception handler, which * will kick off migration recovery. */ - if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { + if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &fattr->fsid)) { dprintk("%s: server did not return a different fsid for" " a referral at %s\n", __func__, name->name); status = -NFS4ERR_MOVED; goto out; } /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */ - nfs_fixup_referral_attributes(&locations->fattr); - - /* replace the lookup nfs_fattr with the locations nfs_fattr */ - memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr)); + nfs_fixup_referral_attributes(fattr); memset(fhandle, 0, sizeof(struct nfs_fh)); out: if (page) @@ -7979,7 +7978,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, else bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; - nfs_fattr_init(&fs_locations->fattr); + nfs_fattr_init(fs_locations->fattr); fs_locations->server = server; fs_locations->nlocations = 0; status = nfs4_call_sync(client, server, &msg, &args.seq_args, &res.seq_res, 0); @@ -8044,7 +8043,7 @@ static int _nfs40_proc_get_locations(struct nfs_server *server, unsigned long now = jiffies; int status; - nfs_fattr_init(&locations->fattr); + nfs_fattr_init(locations->fattr); locations->server = server; locations->nlocations = 0; @@ -8109,7 +8108,7 @@ static int _nfs41_proc_get_locations(struct nfs_server *server, }; int status; - nfs_fattr_init(&locations->fattr); + nfs_fattr_init(locations->fattr); locations->server = server; locations->nlocations = 0; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 775c58def32c..2540b35ec187 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2116,6 +2116,11 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred dprintk("<-- %s: no memory\n", __func__); goto out; } + locations->fattr = nfs_alloc_fattr(); + if (locations->fattr == NULL) { + dprintk("<-- %s: no memory\n", __func__); + goto out; + } inode = d_inode(server->super->s_root); result = nfs4_proc_get_locations(server, NFS_FH(inode), locations, @@ -2130,7 +2135,7 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred if (!locations->nlocations) goto out; - if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { + if (!(locations->fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { dprintk("<-- %s: No fs_locations data, migration skipped\n", __func__); goto out; @@ -2155,6 +2160,8 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred out: if (page != NULL) __free_page(page); + if (locations != NULL) + kfree(locations->fattr); kfree(locations); if (result) { pr_err("NFS: migration recovery failed (server %s)\n", diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0b41e00e754f..acfe5f4bda48 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7080,7 +7080,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, if (res->migration) { xdr_enter_page(xdr, PAGE_SIZE); status = decode_getfattr_generic(xdr, - &res->fs_locations->fattr, + res->fs_locations->fattr, NULL, res->fs_locations, res->fs_locations->server); if (status) @@ -7093,7 +7093,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, goto out; xdr_enter_page(xdr, PAGE_SIZE); status = decode_getfattr_generic(xdr, - &res->fs_locations->fattr, + res->fs_locations->fattr, NULL, res->fs_locations, res->fs_locations->server); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4a8ba84f848e..0e3aa0f5f324 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1222,7 +1222,7 @@ struct nfs4_fs_location { #define NFS4_FS_LOCATIONS_MAXENTRIES 10 struct nfs4_fs_locations { - struct nfs_fattr fattr; + struct nfs_fattr *fattr; const struct nfs_server *server; struct nfs4_pathname fs_path; int nlocations; -- cgit v1.2.3 From 118f09eda21d392e1eeb9f8a4bee044958cccf20 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 25 May 2022 12:12:59 -0400 Subject: NFSv4.1 mark qualified async operations as MOVEABLE tasks Mark async operations such as RENAME, REMOVE, COMMIT MOVEABLE for the nfsv4.1+ sessions. Fixes: 85e39feead948 ("NFSv4.1 identify and mark RPC tasks that can move between transports") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 26 ++++++++++++++------------ fs/nfs/pagelist.c | 3 +++ fs/nfs/unlink.c | 8 ++++++++ fs/nfs/write.c | 4 ++++ include/linux/nfs_fs_sb.h | 1 + 5 files changed, 30 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 337609befd17..c0fdcf8c0032 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1162,7 +1162,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, { unsigned short task_flags = 0; - if (server->nfs_client->cl_minorversion) + if (server->caps & NFS_CAP_MOVEABLE) task_flags = RPC_TASK_MOVEABLE; return nfs4_do_call_sync(clnt, server, msg, args, res, task_flags); } @@ -2568,7 +2568,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, }; int status; - if (server->nfs_client->cl_minorversion) + if (nfs_server_capable(dir, NFS_CAP_MOVEABLE)) task_setup_data.flags |= RPC_TASK_MOVEABLE; kref_get(&data->kref); @@ -3737,7 +3737,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) }; int status = -ENOMEM; - if (server->nfs_client->cl_minorversion) + if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE)) task_setup_data.flags |= RPC_TASK_MOVEABLE; nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, @@ -4407,7 +4407,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, }; unsigned short task_flags = 0; - if (server->nfs_client->cl_minorversion) + if (nfs_server_capable(dir, NFS_CAP_MOVEABLE)) task_flags = RPC_TASK_MOVEABLE; /* Is this is an attribute revalidation, subject to softreval? */ @@ -6639,10 +6639,13 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs4_delegreturn_ops, - .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, }; int status = 0; + if (nfs_server_capable(inode, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) return -ENOMEM; @@ -6956,10 +6959,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; - struct nfs_client *client = - NFS_SERVER(lsp->ls_state->inode)->nfs_client; - if (client->cl_minorversion) + if (nfs_server_capable(lsp->ls_state->inode, NFS_CAP_MOVEABLE)) task_setup_data.flags |= RPC_TASK_MOVEABLE; nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client, @@ -7230,9 +7231,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, }; int ret; - struct nfs_client *client = NFS_SERVER(state->inode)->nfs_client; - if (client->cl_minorversion) + if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE)) task_setup_data.flags |= RPC_TASK_MOVEABLE; data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), @@ -10467,7 +10467,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 - | NFS_CAP_LGOPEN, + | NFS_CAP_LGOPEN + | NFS_CAP_MOVEABLE, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, @@ -10502,7 +10503,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_LAYOUTSTATS | NFS_CAP_CLONE | NFS_CAP_LAYOUTERROR - | NFS_CAP_READ_PLUS, + | NFS_CAP_READ_PLUS + | NFS_CAP_MOVEABLE, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 9157dd19b8b4..317cedfa52bf 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -767,6 +767,9 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, .flags = RPC_TASK_ASYNC | flags, }; + if (nfs_server_capable(hdr->inode, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); dprintk("NFS: initiated pgio call " diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 6f325e10056c..9697cd5d2561 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -102,6 +102,10 @@ static void nfs_do_call_unlink(struct inode *inode, struct nfs_unlinkdata *data) }; struct rpc_task *task; struct inode *dir = d_inode(data->dentry->d_parent); + + if (nfs_server_capable(inode, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + nfs_sb_active(dir->i_sb); data->args.fh = NFS_FH(dir); nfs_fattr_init(data->res.dir_attr); @@ -344,6 +348,10 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, }; + if (nfs_server_capable(old_dir, NFS_CAP_MOVEABLE) && + nfs_server_capable(new_dir, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) return ERR_PTR(-ENOMEM); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2f41659e232e..1c706465d090 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1709,6 +1709,10 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, .flags = RPC_TASK_ASYNC | flags, .priority = priority, }; + + if (nfs_server_capable(data->inode, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + /* Set up the initial task struct. */ nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client); trace_nfs_initiate_commit(data); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 157d2bd6b241..ea2f7e6b1b0b 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -287,4 +287,5 @@ struct nfs_server { #define NFS_CAP_XATTR (1U << 28) #define NFS_CAP_READ_PLUS (1U << 29) #define NFS_CAP_FS_LOCATIONS (1U << 30) +#define NFS_CAP_MOVEABLE (1U << 31) #endif -- cgit v1.2.3